{"created":"2023-05-15T14:23:29.190056+00:00","id":1660,"links":{},"metadata":{"_buckets":{"deposit":"13dd0329-9c67-4a4d-8007-3e33cbbd3789"},"_deposit":{"created_by":3,"id":"1660","owners":[3],"pid":{"revision_id":0,"type":"depid","value":"1660"},"status":"published"},"_oai":{"id":"oai:repository.ninjal.ac.jp:00001660","sets":["245:268"]},"author_link":["5506","5501","5507","5504","5508","5499","5502","5503","5500","5505"],"item_10003_biblio_info_32":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2018","bibliographicIssueDateType":"Issued"},"bibliographicPageEnd":"129","bibliographicPageStart":"118","bibliographicVolumeNumber":"3","bibliographic_titles":[{"bibliographic_title":"言語資源活用ワークショップ発表論文集"},{"bibliographic_title":"Proceedings of Language Resources Workshop","bibliographic_titleLang":"en"}]}]},"item_10003_description_27":{"attribute_name":"会議概要(会議名, 開催地, 会期, 主催者等)","attribute_value_mlt":[{"subitem_description":"会議名: 言語資源活用ワークショップ2018, 開催地: 国立国語研究所, 会期: 2018年9月4日-5日, 主催: 国立国語研究所 コーパス開発センター","subitem_description_type":"Other"}]},"item_10003_description_29":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"我々は,汎用的な日本語形態素解析器『Sudachi』とその辞書を開発した。本稿では,Sudachiの辞書開発内容について述べる。我々は,まず,UniDicをベースとして,見出し表記,品詞,各種パラメータ等,形態素解析をするための辞書情報を整えた。次に,実用上UniDicに不足している語句を見出しとして追加した。これには,NEologdから取り込んだ膨大な固有名称も含まれる。さらに,登録見出しについて,アプリケーションが利用しやすい形態素単位の整備,表記のゆれを同一視するための正規化表記の整備等を行い,辞書内容を充実させた。また,形態素解析精度の向上のため,UniDic由来の見出しについても,弊害となる見出しの抑制や間違いの修正,形態素単位の調整を行った。我々のこれまでの成果は,最新版の辞書ソースに反映しOSSとして公開している。","subitem_description_type":"Abstract"}]},"item_10003_description_43":{"attribute_name":"フォーマット","attribute_value_mlt":[{"subitem_description":"application/pdf","subitem_description_type":"Other"}]},"item_10003_description_51":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_description":"株式会社ワークスアプリケーションズ ワークス徳島人工知能NLP研究所","subitem_description_type":"Other"},{"subitem_description":"株式会社ワークスアプリケーションズ ワークス徳島人工知能NLP研究所","subitem_description_type":"Other"},{"subitem_description":"株式会社ワークスアプリケーションズ ワークス徳島人工知能NLP研究所","subitem_description_type":"Other"},{"subitem_description":"株式会社ワークスアプリケーションズ ワークス徳島人工知能NLP研究所","subitem_description_type":"Other"},{"subitem_description":"株式会社ワークスアプリケーションズ ワークス徳島人工知能NLP研究所","subitem_description_type":"Other"}]},"item_10003_description_52":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_description":"WAP Tokushima Laboratory of AI and NLP","subitem_description_type":"Other"},{"subitem_description":"WAP Tokushima Laboratory of AI and NLP","subitem_description_type":"Other"},{"subitem_description":"WAP Tokushima Laboratory of AI and NLP","subitem_description_type":"Other"},{"subitem_description":"WAP Tokushima Laboratory of AI and NLP","subitem_description_type":"Other"},{"subitem_description":"WAP Tokushima Laboratory of AI and NLP","subitem_description_type":"Other"}]},"item_10003_identifier_registration":{"attribute_name":"ID登録","attribute_value_mlt":[{"subitem_identifier_reg_text":"10.15084/00001644","subitem_identifier_reg_type":"JaLC"}]},"item_10003_publisher_45":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"国立国語研究所"}]},"item_10003_relation_40":{"attribute_name":"関連サイト","attribute_value_mlt":[{"subitem_relation_name":[{"subitem_relation_name_text":"言語資源活用ワークショップ2018"}],"subitem_relation_type_id":{"subitem_relation_type_id_text":"https://pj.ninjal.ac.jp/corpus_center/lrw2018.html","subitem_relation_type_select":"URI"}}]},"item_10003_version_type_44":{"attribute_name":"著者版フラグ","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_970fb48d4fbd8a85","subitem_version_type":"VoR"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"坂本, 美保"},{"creatorName":"サカモト, ミホ","creatorNameLang":"ja-Kana"}],"nameIdentifiers":[{"nameIdentifier":"5499","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"川原, 典子"},{"creatorName":"カワハラ, ノリコ","creatorNameLang":"ja-Kana"}],"nameIdentifiers":[{"nameIdentifier":"5500","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"久本, 空海"},{"creatorName":"ヒサモト, ソラミ","creatorNameLang":"ja-Kana"}],"nameIdentifiers":[{"nameIdentifier":"5501","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"髙岡, 一馬"},{"creatorName":"タカオカ, カズマ","creatorNameLang":"ja-Kana"}],"nameIdentifiers":[{"nameIdentifier":"5502","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"内田, 佳孝"},{"creatorName":"ウチダ, ヨシタカ","creatorNameLang":"ja-Kana"}],"nameIdentifiers":[{"nameIdentifier":"5503","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"SAKAMOTO, Miho","creatorNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"5504","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"KAWAHARA, Noriko","creatorNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"5505","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"HISAMOTO, Sorami","creatorNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"5506","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"TAKAOKA, Kazuma","creatorNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"5507","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"UCHIDA, Yoshitaka","creatorNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"5508","nameIdentifierScheme":"WEKO"}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2019-02-14"}],"displaytype":"detail","filename":"LRW-2018-13-P-1-08.pdf","filesize":[{"value":"403.1 kB"}],"format":"application/pdf","licensetype":"license_note","mimetype":"application/pdf","url":{"label":"LRW-2018-13-P-1-08.pdf","url":"https://repository.ninjal.ac.jp/record/1660/files/LRW-2018-13-P-1-08.pdf"},"version_id":"52c948dd-0e60-4221-8fcf-368aece85e90"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"UniDic","subitem_subject_scheme":"Other"},{"subitem_subject":"現代日本語書き言葉均衡コーパス(BCCWJ)","subitem_subject_scheme":"Other"},{"subitem_subject":"形態素解析","subitem_subject_scheme":"Other"},{"subitem_subject":"UniDic","subitem_subject_language":"en","subitem_subject_scheme":"Other"},{"subitem_subject":"Balanced Corpus of Contemporary Written Japanese (BCCWJ)","subitem_subject_language":"en","subitem_subject_scheme":"Other"},{"subitem_subject":"Morphological Analysis","subitem_subject_language":"en","subitem_subject_scheme":"Other"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"conference paper","resourceuri":"http://purl.org/coar/resource_type/c_5794"}]},"item_title":"形態素解析器『Sudachi』のための大規模辞書開発","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"形態素解析器『Sudachi』のための大規模辞書開発"},{"subitem_title":"Large Scale Dictionary Development for Sudachi","subitem_title_language":"en"}]},"item_type_id":"10003","owner":"3","path":["268"],"pubdate":{"attribute_name":"公開日","attribute_value":"2019-02-14"},"publish_date":"2019-02-14","publish_status":"0","recid":"1660","relation_version_is_last":true,"title":["形態素解析器『Sudachi』のための大規模辞書開発"],"weko_creator_id":"3","weko_shared_id":-1},"updated":"2023-05-16T10:26:59.308849+00:00"}