{"created":"2023-05-15T14:22:55.475952+00:00","id":805,"links":{},"metadata":{"_buckets":{"deposit":"54ea707d-422d-4f6d-8ce8-1cf8fc03c648"},"_deposit":{"created_by":3,"id":"805","owners":[3],"pid":{"revision_id":0,"type":"depid","value":"805"},"status":"published"},"_oai":{"id":"oai:repository.ninjal.ac.jp:00000805","sets":["51:57"]},"author_link":["6551","6550"],"item_10002_biblio_info_40":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2015-06","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"1","bibliographicPageEnd":"10","bibliographicPageStart":"1","bibliographicVolumeNumber":"6","bibliographic_titles":[{"bibliographic_title":"国語研プロジェクトレビュー"},{"bibliographic_title":"NINJAL Project Review","bibliographic_titleLang":"en"}]}]},"item_10002_description_34":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_description":"国立国語研究所言語資源研究系・コーパス開発センター","subitem_description_type":"Other"}]},"item_10002_description_36":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"国立国語研究所コーパス開発センターでは2011年より超大規模コーパス構築プロジェクトとして,Webを母集団とした100億語規模のコーパスの構築を進めている。構築にあたっては,工程を収集・組織化・利活用・保存の4つに分割して実装を進めている。2012年第4四半期より3か月ごとに1億URLのクロールを繰り返し実施している。本稿では構築されたコーパスデータの基礎統計量を示し,本コーパスを用いて,どのような理論的・応用的研究が可能になると考えられるかを論じる。","subitem_description_type":"Abstract"}]},"item_10002_description_37":{"attribute_name":"抄録(英)","attribute_value_mlt":[{"subitem_description":"In 2011, the National Institute for Japanese Language and Linguistics launched a corpus compilation project with the aim of constructing a ten-billion-word Web corpus. The project was split into the following four sub-projects: page collection, linguistic annotation, release, and preservation. In the page collection stage, crawling began during the fourth quarter of 2012. We crawled 100 million URLs every three months as fixed-point observations. This paper presents the basic statistics of the crawled data and discusses possible theoretical and practical implications of these language resources.","subitem_description_type":"Other"}]},"item_10002_description_51":{"attribute_name":"フォーマット","attribute_value_mlt":[{"subitem_description":"application/pdf","subitem_description_type":"Other"}]},"item_10002_identifier_registration":{"attribute_name":"ID登録","attribute_value_mlt":[{"subitem_identifier_reg_text":"10.15084/00000796","subitem_identifier_reg_type":"JaLC"}]},"item_10002_publisher_39":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"国立国語研究所"}]},"item_10002_source_id_41":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2185-0100","subitem_source_identifier_type":"ISSN"},{"subitem_source_identifier":"2185-0119","subitem_source_identifier_type":"ISSN"}]},"item_10002_source_id_44":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AA12480598","subitem_source_identifier_type":"NCID"}]},"item_10002_version_type_52":{"attribute_name":"著者版フラグ","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_970fb48d4fbd8a85","subitem_version_type":"VoR"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"浅原, 正幸"},{"creatorName":"アサハラ, マサユキ","creatorNameLang":"ja-Kana"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"ASAHARA, Masayuki","creatorNameLang":"en"}],"nameIdentifiers":[{}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2019-02-20"}],"displaytype":"detail","filename":"review060101.pdf","filesize":[{"value":"1.3 MB"}],"format":"application/pdf","licensetype":"license_note","mimetype":"application/pdf","url":{"label":"review060101.pdf","url":"https://repository.ninjal.ac.jp/record/805/files/review060101.pdf"},"version_id":"d2e91d5f-492f-4134-ad61-39f31908a460"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"departmental bulletin paper","resourceuri":"http://purl.org/coar/resource_type/c_6501"}]},"item_title":"〈プロジェクト紹介〉超大規模コーパス構築プロジェクト 日本語Webコーパスの構築 : 利活用","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"〈プロジェクト紹介〉超大規模コーパス構築プロジェクト 日本語Webコーパスの構築 : 利活用"},{"subitem_title":"Building NINJAL Web Japanese Corpus : Use and Application","subitem_title_language":"en"}]},"item_type_id":"10002","owner":"3","path":["57"],"pubdate":{"attribute_name":"公開日","attribute_value":"2015-10-30"},"publish_date":"2015-10-30","publish_status":"0","recid":"805","relation_version_is_last":true,"title":["〈プロジェクト紹介〉超大規模コーパス構築プロジェクト 日本語Webコーパスの構築 : 利活用"],"weko_creator_id":"3","weko_shared_id":-1},"updated":"2023-05-16T10:08:54.419326+00:00"}