{"created":"2023-05-15T14:24:09.331476+00:00","id":2552,"links":{},"metadata":{"_buckets":{"deposit":"fc9d2d3c-182f-4dea-98e4-fc99118d105e"},"_deposit":{"created_by":3,"id":"2552","owners":[3],"pid":{"revision_id":0,"type":"depid","value":"2552"},"status":"published"},"_oai":{"id":"oai:repository.ninjal.ac.jp:00002552","sets":["320:324"]},"author_link":["8483","8484"],"item_10001_biblio_info_7":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2019-09-15","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"3","bibliographicPageEnd":"652","bibliographicPageStart":"635","bibliographicVolumeNumber":"26","bibliographic_titles":[{"bibliographic_title":"自然言語処理"},{"bibliographic_title":"Journal of Natural Language Processing","bibliographic_titleLang":"en"}]}]},"item_10001_description_19":{"attribute_name":"フォーマット","attribute_value_mlt":[{"subitem_description":"application/pdf ","subitem_description_type":"Other"}]},"item_10001_description_24":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_description":"国立国語研究所","subitem_description_type":"Other"}]},"item_10001_description_25":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_description":"National Institute for Japanese Language and Linguistics","subitem_description_type":"Other"}]},"item_10001_description_26":{"attribute_name":"抄録(英)","attribute_value_mlt":[{"subitem_description":"The concept of surprisal was proposed by Hale as a psycholinguistic model of sentence processing costs based on the information theory. Surprisal measures a word's negative log probability in context and can be used to model the difficulty in processing a sentence. If this difficulty is estimated using the eye-tracking method, the reading time can be estimated using base phrase units in Japanese. In addition, word probability is estimated from the frequency of morphemes or word units in Japanese.We introduced word embeddings to address the discrepancy in units, which makes it difficult to model surprisal in Japanese. The additive property of skip-gram word embeddings enabled us to compose a base phrase vector from word vectors in the base phrase. We confirmed that the cosine similarity between two adjacent base phrase vectors can be used to model the contextual probability of the bi-gram of the base phrase and found that the norm of the base phrase correlates with reading time in Japanese.","subitem_description_type":"Other"}]},"item_10001_description_5":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"ヒトの文処理のモデル化としてHaleによりサプライザルが提案されている。サプライザルは文処理の負荷に対する情報量基準に基づいた指標で,当該単語の文脈中の負の対数確率が文処理の困難さをモデル化するとしている。日本語において眼球運動測定を用いて文処理の負荷をモデル化する際に,統語における基本単位である文節単位の読み時間を集計する。一方,単語の文脈中の生起確率は形態素や単語といった単位で評価し,この齟齬が直接的なサプライザルのモデル化を難しくしていた。本論文では,この問題を解決するために単語埋め込みを用いる。skip-gramの単語埋め込みの加法構成性に基づき,文節構成語のベクトルから文節のベクトルを構成し,隣接文節間のベクトルのコサイン類似度を用いて,文脈中の隣接尤度をモデル化できることを確認した。さらに,skip-gramの単語埋め込みに基づいて構成した文節のベクトルのノルムが,日本語の読み時間のモデル化に寄与することを発見した。","subitem_description_type":"Abstract"}]},"item_10001_publisher_8":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"言語処理学会"}]},"item_10001_relation_14":{"attribute_name":"DOI","attribute_value_mlt":[{"subitem_relation_type":"isIdenticalTo","subitem_relation_type_id":{"subitem_relation_type_id_text":"10.5715/jnlp.26.635","subitem_relation_type_select":"DOI"}}]},"item_10001_source_id_9":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"1340-7619","subitem_source_identifier_type":"ISSN"},{"subitem_source_identifier":"2185-8314","subitem_source_identifier_type":"ISSN"}]},"item_10001_version_type_20":{"attribute_name":"著者版フラグ","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_970fb48d4fbd8a85","subitem_version_type":"VoR"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"浅原, 正幸"},{"creatorName":"アサハラ, マサユキ","creatorNameLang":"ja-Kana"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Asahara, Masayuki","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2019-12-20"}],"displaytype":"detail","filename":"jnlp_26_635.pdf","filesize":[{"value":"713.4 kB"}],"format":"application/pdf","licensetype":"license_6","mimetype":"application/pdf","url":{"label":"jnlp_26_635.pdf","url":"https://repository.ninjal.ac.jp/record/2552/files/jnlp_26_635.pdf"},"version_id":"1bb991fb-147d-4bc6-8344-b5502dba5851"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"リーダビリティ評価","subitem_subject_scheme":"Other"},{"subitem_subject":"読み時間","subitem_subject_scheme":"Other"},{"subitem_subject":"単語埋め込み","subitem_subject_scheme":"Other"},{"subitem_subject":"サプライザル","subitem_subject_scheme":"Other"},{"subitem_subject":"Readability","subitem_subject_language":"en","subitem_subject_scheme":"Other"},{"subitem_subject":"Reading Time","subitem_subject_language":"en","subitem_subject_scheme":"Other"},{"subitem_subject":"Word Embeddings","subitem_subject_language":"en","subitem_subject_scheme":"Other"},{"subitem_subject":"Surprisal","subitem_subject_language":"en","subitem_subject_scheme":"Other"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"journal article","resourceuri":"http://purl.org/coar/resource_type/c_6501"}]},"item_title":"単語埋め込みに基づくサプライザル","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"単語埋め込みに基づくサプライザル"},{"subitem_title":"Surprisal through Word Embeddings","subitem_title_language":"en"}]},"item_type_id":"10001","owner":"3","path":["324"],"pubdate":{"attribute_name":"公開日","attribute_value":"2019-12-21"},"publish_date":"2019-12-21","publish_status":"0","recid":"2552","relation_version_is_last":true,"title":["単語埋め込みに基づくサプライザル"],"weko_creator_id":"3","weko_shared_id":3},"updated":"2023-05-15T14:45:05.012138+00:00"}