{"created":"2023-05-15T14:22:47.574439+00:00","id":525,"links":{},"metadata":{"_buckets":{"deposit":"4ea246ad-81b2-4016-8cbe-8987f24694a7"},"_deposit":{"created_by":3,"id":"525","owners":[3],"pid":{"revision_id":0,"type":"depid","value":"525"},"status":"published"},"_oai":{"id":"oai:repository.ninjal.ac.jp:00000525","sets":["38:65"]},"author_link":["6359","6362","6361","6360"],"item_10002_biblio_info_40":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2013-11","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"6","bibliographicPageEnd":"181","bibliographicPageStart":"163","bibliographic_titles":[{"bibliographic_title":"国立国語研究所論集"},{"bibliographic_title":"NINJAL Research Papers","bibliographic_titleLang":"en"}]}]},"item_10002_description_34":{"attribute_name":"著者所属","attribute_value_mlt":[{"subitem_description":"国立国語研究所 コーパス開発センター 非常勤研究員(元)","subitem_description_type":"Other"},{"subitem_description":"東京農工大学 博士課程","subitem_description_type":"Other"}]},"item_10002_description_35":{"attribute_name":"著者所属(英)","attribute_value_mlt":[{"subitem_description":"[former] Adjunct Researcher, Center for Corpus Development, NINJAL","subitem_description_type":"Other"},{"subitem_description":"Doctoral Student, Tokyo University of Agriculture and Technology","subitem_description_type":"Other"}]},"item_10002_description_36":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"国立国語研究所で計画されている『日本語歴史コーパス』の構築にあたっては活字書籍化された古典資料のコーパス化を基本とし,その際には国内規格JIS X0213文字集合を用いて活字を電子化することが予定されている。本稿ではJIS X0213を古典資料の活字書籍に適用した場合の効果を検証するため,小学館新全集『今昔物語集』での漢字活字を調査し,のべ字数にして99.86%の活字がJIS X0213でカバーできることを明らかにし,JIS X0213の有効性を確認した。また,JIS X0213では表現できない活字に関しては,コーパスとしての利便性を鑑み,「〓」表示せずJIS X0213の範囲内の別字で代用しつつ,原資料での字形の情報を保持する方針を考案した。別字代用によりほぼ9割の外字は解消されるが,「〓」表示を完全になくすためには,文字レベルではなく,語の表記というレベルでの代用を考えなければならなくなる。末尾には小学館新全集『今昔物語集』で代用処理の対象となる特殊活字の一覧を付した。","subitem_description_type":"Abstract"}]},"item_10002_description_37":{"attribute_name":"抄録(英)","attribute_value_mlt":[{"subitem_description":"Digitizing characters not included in the standard set is an urgent problem for electronic corpora of historical documents. Such non-standard characters have hitherto been replaced with the symbol \"〓\" in digital corpora, which is quite inconvenient for users. In constructing the Corpus of Historical Japanese, the current Japanese standard for character codes, JIS X0213, will be adopted for the digitization of printed documents. This paper first examines the efficacy of JIS X0213 for typeset versions of old texts. A thorough investigation of the Shogakukan (SNKBZ) edition of the Konjaku Monogatarishu found that JIS X0213 covers 99.86% of the total character tokens. The paper then proposes a substitution system for the remaining 0.14% of the characters not covered by JIS X0213. The idea is to replace these non-standard characters with similar characters that are included in JIS X0213 while retaining information about the original characters for reference. All the non-standard characters in the Shogakukan (SNKBZ) edition of the Konjaku Monogatarishu are listed at the end of the paper along with their replacements.","subitem_description_type":"Other"}]},"item_10002_description_51":{"attribute_name":"フォーマット","attribute_value_mlt":[{"subitem_description":"application/pdf","subitem_description_type":"Other"}]},"item_10002_identifier_registration":{"attribute_name":"ID登録","attribute_value_mlt":[{"subitem_identifier_reg_text":"10.15084/00000516","subitem_identifier_reg_type":"JaLC"}]},"item_10002_publisher_39":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"国立国語研究所"}]},"item_10002_source_id_41":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2186-134X","subitem_source_identifier_type":"ISSN"},{"subitem_source_identifier":"2186-1358","subitem_source_identifier_type":"ISSN"}]},"item_10002_source_id_44":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AA12536262","subitem_source_identifier_type":"NCID"}]},"item_10002_version_type_52":{"attribute_name":"著者版フラグ","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_970fb48d4fbd8a85","subitem_version_type":"VoR"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"須永, 哲矢"},{"creatorName":"スナガ, テツヤ","creatorNameLang":"ja-Kana"}],"nameIdentifiers":[{"nameIdentifier":"6359","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"堤, 智昭"},{"creatorName":"ツツミ, トモアキ","creatorNameLang":"ja-Kana"}],"nameIdentifiers":[{"nameIdentifier":"6360","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"SUNAGA, Tetsuya","creatorNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"6361","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"TSUTSUMI, Tomoaki","creatorNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"6362","nameIdentifierScheme":"WEKO"}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2019-02-20"}],"displaytype":"detail","filename":"papers0609.pdf","filesize":[{"value":"1.4 MB"}],"format":"application/pdf","licensetype":"license_note","mimetype":"application/pdf","url":{"label":"papers0609.pdf","url":"https://repository.ninjal.ac.jp/record/525/files/papers0609.pdf"},"version_id":"fca1cfcb-a5d5-4d10-9f66-22f0231223bc"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"コーパス構築","subitem_subject_scheme":"Other"},{"subitem_subject":"JIS X0213","subitem_subject_scheme":"Other"},{"subitem_subject":"外字処理","subitem_subject_scheme":"Other"},{"subitem_subject":"今昔物語集","subitem_subject_scheme":"Other"},{"subitem_subject":"construction of electronic corpora","subitem_subject_language":"en","subitem_subject_scheme":"Other"},{"subitem_subject":"JIS X0213","subitem_subject_language":"en","subitem_subject_scheme":"Other"},{"subitem_subject":"non-standard character processing","subitem_subject_language":"en","subitem_subject_scheme":"Other"},{"subitem_subject":"Konjaku Monogatarishu","subitem_subject_language":"en","subitem_subject_scheme":"Other"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"departmental bulletin paper","resourceuri":"http://purl.org/coar/resource_type/c_6501"}]},"item_title":"『日本語歴史コーパス』のための書籍活字の電子化 : 小学館新全集『今昔物語集』を事例として","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"『日本語歴史コーパス』のための書籍活字の電子化 : 小学館新全集『今昔物語集』を事例として"},{"subitem_title":"Digitization of Typeset Books in Constructing the Corpus of Historical Japanese : The Case of the Shogakukan (SNKBZ) Edition of the Konjaku Monogatarishu","subitem_title_language":"en"}]},"item_type_id":"10002","owner":"3","path":["65"],"pubdate":{"attribute_name":"公開日","attribute_value":"2015-10-30"},"publish_date":"2015-10-30","publish_status":"0","recid":"525","relation_version_is_last":true,"title":["『日本語歴史コーパス』のための書籍活字の電子化 : 小学館新全集『今昔物語集』を事例として"],"weko_creator_id":"3","weko_shared_id":-1},"updated":"2023-05-16T10:12:19.237068+00:00"}