{"created":"2024-11-13T08:35:54.192857+00:00","id":2000356,"links":{},"metadata":{"_buckets":{"deposit":"70accb55-09da-4b57-9dee-099ce9733fe6"},"_deposit":{"created_by":25,"id":"2000356","owner":"25","owners":[25],"pid":{"revision_id":0,"type":"depid","value":"2000356"},"status":"published"},"_oai":{"id":"oai:repository.ninjal.ac.jp:02000356","sets":["245:1731477595303"]},"author_link":[],"item_10003_biblio_info_32":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2024","bibliographicIssueDateType":"Issued"},"bibliographicNumberOfPages":"18","bibliographicPageEnd":"76","bibliographicPageStart":"59","bibliographicVolumeNumber":"1","bibliographic_titles":[{"bibliographic_title":"言語資源ワークショップ発表論文集","bibliographic_titleLang":"ja"},{"bibliographic_title":"Proceedings of Language Resources Workshop","bibliographic_titleLang":"en"}]}]},"item_10003_description_27":{"attribute_name":"会議概要(会議名, 開催地, 会期, 主催者等)","attribute_value_mlt":[{"subitem_description":"会議名: 言語資源ワークショップ2024, 開催地: オンライン, 会期: 2024年8月28日-29日, 主催: 国立国語研究所 言語資源開発センター, 共催: 言語資源協会, 後援: 国立情報学研究所","subitem_description_language":"ja","subitem_description_type":"Other"}]},"item_10003_description_29":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"本稿では、LLM を用いてカタカナ語の文脈中の意味分類を行った手法と結果について報告する。ChatGPT などの生成 AI の学習に用いられる資源の多くは英語で占められており、日本語の資源はあまり使用されていない。そのため日本語に含まれるカタカナ語は対応する英単語の意味と異なる場合があり、文脈中の意味分類が正しく行われない可能性が高い。そこで『現代日本語書き言葉均衡コーパス』(BCCWJ) に含まれる文章からカタカナ語を含む文章を抽出し、その中から数個の単語を対象として、gpt-3.5-turbo, gpt-4o, gpt-4o-mini, Gemini-Pro,Swallow の 5 つの LLM を用いて Few-shot Learning を行った。実験 1 と実験 2 では生成 AIが作成した意味区分を利用した際の意味分類とプロンプト中で役割を与えることによる影響を、実験 3 では人間の定義した Wiktionary の意味区分を利用した場合の意味分類を上記のLLM で検証した。結果として生成 AI、Wiktionary どちらの意味区分を扱った意味分類でも gpt-4o が最も平均正解率が高く、gpt-4o と Gemini-Pro は役割を与えることでほとんどのプロンプトで回答精度が向上したことが確認できた。また gpt-4o-mini と Gemini-Pro ではWiktionary の意味区分を利用したほうが平均正解率が 20% 以上高くなった。さらに単語による各 LLM 間での正解率の差異もみられ、gpt-4o,gpt-4o,mini,Gemini-Pro 間で顕著であった。","subitem_description_language":"ja","subitem_description_type":"Abstract"}]},"item_10003_description_43":{"attribute_name":"フォーマット","attribute_value_mlt":[{"subitem_description":"application/pdf","subitem_description_language":"ja","subitem_description_type":"Other"}]},"item_10003_description_51":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_description":"茨城大学大学院","subitem_description_language":"ja","subitem_description_type":"Other"},{"subitem_description":"茨城大学","subitem_description_language":"ja","subitem_description_type":"Other"}]},"item_10003_description_52":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_description":"Ibaraki University","subitem_description_language":"en","subitem_description_type":"Other"},{"subitem_description":"Ibaraki University","subitem_description_language":"en","subitem_description_type":"Other"}]},"item_10003_identifier_registration":{"attribute_name":"ID登録","attribute_value_mlt":[{"subitem_identifier_reg_text":"10.15084/0002000356","subitem_identifier_reg_type":"JaLC"}]},"item_10003_publisher_45":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"国立国語研究所","subitem_publisher_language":"ja"}]},"item_10003_relation_40":{"attribute_name":"関連サイト","attribute_value_mlt":[{"subitem_relation_name":[{"subitem_relation_name_language":"ja","subitem_relation_name_text":"言語資源ワークショップ2024"}],"subitem_relation_type":"isSupplementedBy","subitem_relation_type_id":{"subitem_relation_type_id_text":"https://clrd.ninjal.ac.jp/lrw2024.html","subitem_relation_type_select":"URI"}}]},"item_10003_version_type_44":{"attribute_name":"著者版フラグ","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_970fb48d4fbd8a85","subitem_version_type":"VoR"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"小滝, 主紀","creatorNameLang":"ja"},{"creatorName":"Kodaki, Kazuki","creatorNameLang":"en"}]},{"creatorNames":[{"creatorName":"佐々木, 稔","creatorNameLang":"ja"},{"creatorName":"Sasaki, Minoru","creatorNameLang":"en"}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2024-11-13"}],"displaytype":"detail","filename":"LRW2024_05-o04s.pdf","filesize":[{"value":"510.4 KB"}],"format":"application/pdf","mimetype":"application/pdf","url":{"label":"LRW2024_05-o04s.pdf","url":"https://repository.ninjal.ac.jp/record/2000356/files/LRW2024_05-o04s.pdf"},"version_id":"a2a2fc6b-7dc2-4f61-95fe-b69fd3b3272e"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"conference paper","resourceuri":"http://purl.org/coar/resource_type/c_5794"}]},"item_title":"カタカナ語の意味分類に対する大規模言語モデルの有効性検証","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"カタカナ語の意味分類に対する大規模言語モデルの有効性検証","subitem_title_language":"ja"},{"subitem_title":"Validation of a Large-Scale Linguistic Model forSemantic Classification of Katakana Words","subitem_title_language":"en"}]},"item_type_id":"10003","owner":"25","path":["1731477595303"],"pubdate":{"attribute_name":"PubDate","attribute_value":"2024-11-13"},"publish_date":"2024-11-13","publish_status":"0","recid":"2000356","relation_version_is_last":true,"title":["カタカナ語の意味分類に対する大規模言語モデルの有効性検証"],"weko_creator_id":"25","weko_shared_id":-1},"updated":"2024-11-14T05:50:54.105584+00:00"}