{"created":"2023-05-15T14:24:56.075883+00:00","id":3514,"links":{},"metadata":{"_buckets":{"deposit":"f7d6318d-4f99-4e5d-8660-6b8120351b86"},"_deposit":{"created_by":3,"id":"3514","owners":[3],"pid":{"revision_id":0,"type":"depid","value":"3514"},"status":"published"},"_oai":{"id":"oai:repository.ninjal.ac.jp:00003514","sets":["245:516"]},"author_link":["4607","11749","11748"],"item_10003_biblio_info_32":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2021","bibliographicIssueDateType":"Issued"},"bibliographicPageEnd":"239","bibliographicPageStart":"226","bibliographicVolumeNumber":"6","bibliographic_titles":[{"bibliographic_title":"言語資源活用ワークショップ発表論文集"},{"bibliographic_title":"Proceedings of Language Resources Workshop","bibliographic_titleLang":"en"}]}]},"item_10003_description_27":{"attribute_name":"会議概要(会議名, 開催地, 会期, 主催者等)","attribute_value_mlt":[{"subitem_description":"会議名: 言語資源活用ワークショップ2021, 開催地: オンライン, 会期: 2021年9月13日-14日, 主催: 国立国語研究所 コーパス開発センター","subitem_description_type":"Other"}]},"item_10003_description_29":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"『日本語日常会話コーパス』(CEJC)の短単位情報付与作業では、次の4段階の作業工程、(i)転記をMeCab(解析器)+UniDic(解析辞書)で自動解析、(ii)音声を聴取しながら、付加情報の一つである「発音形」のみを人手修正、(iii)人手修正された発音形を尊重しつつ再び自動解析、(iv)短単位情報(境界情報、発音形以外の付加情報)を人手修正、を踏んでいる。今後の(iv)人手修正作業の参考とするため、人手修正済みデータを対象に、複数の版の現代話し言葉UniDic(Ver2.2.0, 2.3.0, 3.0.1, 3.1.0)を用いて(i)-(iii)を自動で実施し、その出力と人手修正結果とを比較した。その結果、UniDicの版が新しくなるにつれて誤解析の頻度が低下し、向上が見られたものの、誤りやすい個所がなお残っていることがわかった。特に、品詞が 「記号」「代名詞」「接続詞」「名詞-助動詞語幹」「名詞-固有名詞-人名-一般」「名詞-固有名詞-一般」となるべき語は、UniDicの版が新しくなっても別の品詞として解析される、短単位境界を誤るなど、誤解析が起こりやすい。","subitem_description_type":"Abstract"}]},"item_10003_description_43":{"attribute_name":"フォーマット","attribute_value_mlt":[{"subitem_description":"application/pdf","subitem_description_type":"Other"}]},"item_10003_description_51":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_description":"国立国語研究所","subitem_description_type":"Other"},{"subitem_description":"国立国語研究所","subitem_description_type":"Other"}]},"item_10003_description_52":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_description":"National Institute for Japanese Language and Linguistics","subitem_description_type":"Other"},{"subitem_description":"National Institute for Japanese Language and Linguistics","subitem_description_type":"Other"}]},"item_10003_identifier_registration":{"attribute_name":"ID登録","attribute_value_mlt":[{"subitem_identifier_reg_text":"10.15084/00003497","subitem_identifier_reg_type":"JaLC"}]},"item_10003_publisher_45":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"国立国語研究所"}]},"item_10003_relation_40":{"attribute_name":"関連サイト","attribute_value_mlt":[{"subitem_relation_name":[{"subitem_relation_name_text":"言語資源活用ワークショップ2021"}],"subitem_relation_type_id":{"subitem_relation_type_id_text":"https://ccd.ninjal.ac.jp/lrw2021.html","subitem_relation_type_select":"URI"}}]},"item_10003_version_type_44":{"attribute_name":"著者版フラグ","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_970fb48d4fbd8a85","subitem_version_type":"VoR"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"渡邊, 友香"},{"creatorName":"ワタナベ, ユカ","creatorNameLang":"ja-Kana"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"西川, 賢哉"},{"creatorName":"ニシカワ, ケンヤ","creatorNameLang":"ja-Kana"},{"creatorName":"NISHIKAWA, Ken'ya","creatorNameLang":"en"}],"nameIdentifiers":[{},{},{}]},{"creatorNames":[{"creatorName":"WATANABE, Yuka","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2022-01-04"}],"displaytype":"detail","filename":"LRW2021_22-p3-2.pdf","filesize":[{"value":"821.7 kB"}],"format":"application/pdf","licensetype":"license_note","mimetype":"application/pdf","url":{"label":"LRW2021_22-p3-2.pdf","url":"https://repository.ninjal.ac.jp/record/3514/files/LRW2021_22-p3-2.pdf"},"version_id":"2632decf-19a0-4326-8aef-eeafb8a7f50e"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"UniDic","subitem_subject_scheme":"Other"},{"subitem_subject":"日本語日常会話コーパス(CEJC)","subitem_subject_scheme":"Other"},{"subitem_subject":"UniDic","subitem_subject_language":"en","subitem_subject_scheme":"Other"},{"subitem_subject":"Corpus of Everyday Japanese Conversation (CEJC)","subitem_subject_language":"en","subitem_subject_scheme":"Other"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"conference paper","resourceuri":"http://purl.org/coar/resource_type/c_5794"}]},"item_title":"『日本語日常会話コーパス』での形態素解析:誤解析箇所の分析","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"『日本語日常会話コーパス』での形態素解析:誤解析箇所の分析"},{"subitem_title":"Morphological Analysis of the Corpus of Everyday Japanese Conversation : An error analysis","subitem_title_language":"en"}]},"item_type_id":"10003","owner":"3","path":["516"],"pubdate":{"attribute_name":"公開日","attribute_value":"2022-01-07"},"publish_date":"2022-01-07","publish_status":"0","recid":"3514","relation_version_is_last":true,"title":["『日本語日常会話コーパス』での形態素解析:誤解析箇所の分析"],"weko_creator_id":"3","weko_shared_id":-1},"updated":"2023-05-15T14:52:47.661733+00:00"}