{"created":"2026-01-21T02:06:46.172149+00:00","id":2000598,"links":{},"metadata":{"_buckets":{"deposit":"c55cc54b-3ce5-4b3c-bb2c-921ebde33afd"},"_deposit":{"created_by":26,"id":"2000598","owner":"26","owners":[26],"pid":{"revision_id":0,"type":"depid","value":"2000598"},"status":"published"},"_oai":{"id":"oai:repository.ninjal.ac.jp:02000598","sets":["38:1768960040272"]},"author_link":[],"control_number":"2000598","item_10002_biblio_info_40":{"attribute_name":"bibliographic_information","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2026-01","bibliographicIssueDateType":"Issued"},"bibliographicPageEnd":"120","bibliographicPageStart":"109","bibliographicVolumeNumber":"30","bibliographic_titles":[{"bibliographic_title":"国立国語研究所論集","bibliographic_titleLang":"ja"},{"bibliographic_title":"NINJAL Research Papers","bibliographic_titleLang":"en"}]}]},"item_10002_description_36":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"自然言語の普遍的な性質の代表的なものとしてZipf則がある。自然言語のデータを任意の関数でフィットし,Zipf則の妥当性を定量的に評価する場合,その事前準備として正確な誤差を見積もっておく必要がある。本稿ではBrown Corpusの英語データを用いて,英単語の出現数分布を作成し,その標準偏差から単語の出現数に付随する誤差(修正誤差)を評価した。その結果,修正誤差はポアソン誤差よりも有意に大きい値を持つことが分かった。また,英単語の出現順位と出現数のプロットのデータ点に修正誤差とポアソン誤差を付与し,Zipf則の関数でフィットを行った。そして,データとフィットの一致度合いをχ²検定とKS(Kolmogorov-Smirnov)検定を用いて定量的に比較した。その結果,ポアソン誤差は英単語の出現数の不定性を過少評価していることが明らかになった。一方,フィット関数の形状はどちらの誤差を使用しても概ね同様であることが分かった。","subitem_description_language":"ja","subitem_description_type":"Abstract"}]},"item_10002_description_37":{"attribute_name":"抄録(英)","attribute_value_mlt":[{"subitem_description":"Zipf's law is one of the well-known universal characteristics of natural language. To evaluate the validity of Zipf's law quantitatively, it is necessary to evaluate the statistical errors of the word frequencies in a text (referred to as \"true error\"). In this study, true errors in English texts were examined using standard deviations of the word frequency distributions obtained from the Brown Corpus. It was found that the true error is significantly larger than the Poisson error. We performed fits of word frequencies as a function of the frequency rank using Zipf's law, assigning either the true or Poisson errors to the data points. We then applied the χ² and Kolmogorov-Smirnov tests to compare consistency between the data and Zipf's law. The results show that the error's size was underestimated when using Poisson errors. In addition, the overall shape of the fitted function was similar, regardless of the type of error used.","subitem_description_language":"en","subitem_description_type":"Abstract"}]},"item_10002_description_51":{"attribute_name":"フォーマット","attribute_value_mlt":[{"subitem_description":"application/pdf","subitem_description_language":"ja","subitem_description_type":"Other"}]},"item_10002_identifier_registration":{"attribute_name":"identifier_registration","attribute_value_mlt":[{"subitem_identifier_reg_text":"10.15084/0002000598","subitem_identifier_reg_type":"JaLC"}]},"item_10002_publisher_39":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"国立国語研究所","subitem_publisher_language":"ja"}]},"item_10002_source_id_41":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2186-1358","subitem_source_identifier_type":"EISSN"}]},"item_10002_version_type_52":{"attribute_name":"出版タイプ","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_970fb48d4fbd8a85","subitem_version_type":"VoR"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorAffiliations":[{"affiliationNames":[{"affiliationName":"新居浜工業高等専門学校","affiliationNameLang":"ja"},{"affiliationName":"National Institute of Technology, Niihama College","affiliationNameLang":"en"}]},{"affiliationNames":[{"affiliationName":"高エネルギー加速器研究機構","affiliationNameLang":"ja"},{"affiliationName":"High Energy Accelerator Research Organization","affiliationNameLang":"en"}]}],"creatorNames":[{"creatorName":"田窪, 洋介","creatorNameLang":"ja"},{"creatorName":"TAKUBO, Yosuke","creatorNameLang":"en"}]},{"creatorAffiliations":[{"affiliationNames":[{"affiliationName":"新居浜工業高等専門学校 専攻科","affiliationNameLang":"ja"},{"affiliationName":"Advanced Engineering Course Student, National Institute of Technology, Niihama College","affiliationNameLang":"en"}]}],"creatorNames":[{"creatorName":"窪田, 葵","creatorNameLang":"ja"},{"creatorName":"KUBOTA, Aoi","creatorNameLang":"en"}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_access","date":[{"dateType":"Available","dateValue":"2026-01-23"}],"displaytype":"detail","fileDate":[{"fileDateType":"Issued","fileDateValue":"2026-01"}],"filename":"papers3006.pdf","filesize":[{"value":"1.1 MB"}],"format":"application/pdf","licensetype":"license_4","mimetype":"application/pdf","url":{"label":"papers3006.pdf","url":"https://repository.ninjal.ac.jp/record/2000598/files/papers3006.pdf"},"version_id":"82b84416-d0e7-48d0-8b22-63c02bd0bdea"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"Brown Corpus","subitem_subject_language":"ja","subitem_subject_scheme":"Other"},{"subitem_subject":"統計的不定性","subitem_subject_language":"ja","subitem_subject_scheme":"Other"},{"subitem_subject":"Zipf則","subitem_subject_language":"ja","subitem_subject_scheme":"Other"},{"subitem_subject":"χ²検定","subitem_subject_language":"ja","subitem_subject_scheme":"Other"},{"subitem_subject":"KS検定","subitem_subject_language":"ja","subitem_subject_scheme":"Other"},{"subitem_subject":"Brown Corpus","subitem_subject_language":"en","subitem_subject_scheme":"Other"},{"subitem_subject":"Zipf's law","subitem_subject_language":"en","subitem_subject_scheme":"Other"},{"subitem_subject":"statistical uncertainty","subitem_subject_language":"en","subitem_subject_scheme":"Other"},{"subitem_subject":"χ² test","subitem_subject_language":"en","subitem_subject_scheme":"Other"},{"subitem_subject":"KS test","subitem_subject_language":"en","subitem_subject_scheme":"Other"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_resource_type":{"attribute_name":"item_resource_type","attribute_value_mlt":[{"resourcetype":"departmental bulletin paper","resourceuri":"http://purl.org/coar/resource_type/c_6501"}]},"item_title":"英語テキストに含まれる単語の出現頻度に付随する不定性の評価","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"英語テキストに含まれる単語の出現頻度に付随する不定性の評価","subitem_title_language":"ja"},{"subitem_title":"Evaluating Uncertainty on Word Frequencies in English Texts","subitem_title_language":"en"}]},"item_type_id":"10002","owner":"26","path":["1768960040272"],"pubdate":{"attribute_name":"PubDate","attribute_value":"2026-01-23"},"publish_date":"2026-01-23","publish_status":"0","recid":"2000598","relation_version_is_last":true,"title":["英語テキストに含まれる単語の出現頻度に付随する不定性の評価"],"weko_creator_id":"26","weko_shared_id":-1},"updated":"2026-01-22T07:29:35.899736+00:00"}