{"created":"2023-05-15T14:24:10.999789+00:00","id":2591,"links":{},"metadata":{"_buckets":{"deposit":"7953f929-31d1-4105-97f5-9c943c45cd05"},"_deposit":{"created_by":3,"id":"2591","owners":[3],"pid":{"revision_id":0,"type":"depid","value":"2591"},"status":"published"},"_oai":{"id":"oai:repository.ninjal.ac.jp:00002591","sets":["245:372"]},"author_link":["4607","9000","8999"],"item_10003_biblio_info_32":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2019","bibliographicIssueDateType":"Issued"},"bibliographicPageEnd":"250","bibliographicPageStart":"238","bibliographicVolumeNumber":"4","bibliographic_titles":[{"bibliographic_title":"言語資源活用ワークショップ発表論文集"},{"bibliographic_title":"Proceedings of Language Resources Workshop","bibliographic_titleLang":"en"}]}]},"item_10003_description_27":{"attribute_name":"会議概要(会議名, 開催地, 会期, 主催者等)","attribute_value_mlt":[{"subitem_description":"会議名: 言語資源活用ワークショップ2019, 開催地: 国立国語研究所, 会期: 2019年9月2日−4日, 主催: 国立国語研究所 コーパス開発センター","subitem_description_type":"Other"}]},"item_10003_description_29":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"国語研で構築中の『日本語日常会話コーパス』(CEJC)の短単位解析作業について報告する。CEJCにおける短単位情報は、アノテーションの一つであるにとどまらず、(i)発音に関する情報を唯一持つ、(ii)他のアノテーション(長単位・韻律)の初期値作成の際の入力となる、(iii)転記誤りを発見する際の有力な手掛かりとなる、などの点で重要なアノテーションであり、高い精度が求められる。作業は次のように進める。まず、MeCab+UniDicで自動解析したのち、短単位付加情報の一つである「発音形」を、音を聴取しながら人手で修正する。これにより、発音形の精度向上を図る。さらに、修正された発音形を尊重しつつ再び形態素解析を行なうことにより、発音形以外の短単位情報(境界・付加情報)の精度向上をも図る(例:初出店「ショシュツ/テン」→「ハツ/シュッテン」)。その後、短単位解析結果を、形態論情報管理ツール「大納言」で検索・修正できるようにし、引き続き解析誤りを修正していく。修正が進んだ段階で、境界・付加情報に揺れがないかを系統的にチェックする(例:「ミリ/メートル」「ミリ=メートル」)。","subitem_description_type":"Abstract"}]},"item_10003_description_43":{"attribute_name":"フォーマット","attribute_value_mlt":[{"subitem_description":"application/pdf","subitem_description_type":"Other"}]},"item_10003_description_51":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_description":"国立国語研究所","subitem_description_type":"Other"},{"subitem_description":"国立国語研究所","subitem_description_type":"Other"}]},"item_10003_description_52":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_description":"National Institute for Japanese Language and Linguistics","subitem_description_type":"Other"},{"subitem_description":"National Institute for Japanese Language and Linguistics","subitem_description_type":"Other"}]},"item_10003_identifier_registration":{"attribute_name":"ID登録","attribute_value_mlt":[{"subitem_identifier_reg_text":"10.15084/00002575","subitem_identifier_reg_type":"JaLC"}]},"item_10003_publisher_45":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"国立国語研究所"}]},"item_10003_relation_40":{"attribute_name":"関連サイト","attribute_value_mlt":[{"subitem_relation_name":[{"subitem_relation_name_text":"言語資源活用ワークショップ2019"}],"subitem_relation_type_id":{"subitem_relation_type_id_text":"https://pj.ninjal.ac.jp/corpus_center/lrw2019.html","subitem_relation_type_select":"URI"}}]},"item_10003_version_type_44":{"attribute_name":"著者版フラグ","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_970fb48d4fbd8a85","subitem_version_type":"VoR"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"西川, 賢哉"},{"creatorName":"ニシカワ, ケンヤ","creatorNameLang":"ja-Kana"},{"creatorName":"Nishikawa, Ken'ya","creatorNameLang":"en"}],"nameIdentifiers":[{},{},{}]},{"creatorNames":[{"creatorName":"渡邊, 友香"},{"creatorName":"ワタナベ, ユカ","creatorNameLang":"ja-Kana"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Watanabe, Yuka","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2020-02-13"}],"displaytype":"detail","filename":"LRW2019_27_P-3-2-E.pdf","filesize":[{"value":"932.5 kB"}],"format":"application/pdf","licensetype":"license_note","mimetype":"application/pdf","url":{"label":"LRW2019_27_P-3-2-E.pdf","url":"https://repository.ninjal.ac.jp/record/2591/files/LRW2019_27_P-3-2-E.pdf"},"version_id":"658fa3f7-26e2-42bf-a1c0-42c46389e4df"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"UniDic","subitem_subject_scheme":"Other"},{"subitem_subject":"日本語日常会話コーパス(CEJC)","subitem_subject_scheme":"Other"},{"subitem_subject":"UniDic","subitem_subject_language":"en","subitem_subject_scheme":"Other"},{"subitem_subject":"Corpus of Everyday Japanese Conversation (CEJC)","subitem_subject_language":"en","subitem_subject_scheme":"Other"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"conference paper","resourceuri":"http://purl.org/coar/resource_type/c_5794"}]},"item_title":"『日本語日常会話コーパス』の短単位解析:作業工程を中心に","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"『日本語日常会話コーパス』の短単位解析:作業工程を中心に"},{"subitem_title":"Morphological Analysis of the Corpus of Everyday Japanese Conversation","subitem_title_language":"en"}]},"item_type_id":"10003","owner":"3","path":["372"],"pubdate":{"attribute_name":"公開日","attribute_value":"2020-02-06"},"publish_date":"2020-02-06","publish_status":"0","recid":"2591","relation_version_is_last":true,"title":["『日本語日常会話コーパス』の短単位解析:作業工程を中心に"],"weko_creator_id":"3","weko_shared_id":-1},"updated":"2023-05-15T15:32:32.499513+00:00"}