@book{oai:repository.ninjal.ac.jp:00001241,
author = {国立国語研究所 and The National Language Research Institute},
month = {Mar},
note = {[Research on vocabulary in cultural reviews Part 1, 2 (Reports 12, 13)]
This is the second report of the successive studies of vocabulary beginning with Report 4. The “universe” here was the complete text of thirteen cultural reviews, such as Sekai, Tyûôkôron, Kaizô, and Bungei Syunzyû, from the issues of July, 1954, to the issues of June, 1955. These reviews were divided into three classes by their characteristics. At the first stage 1, 120 pages were drawn from the total of 23,000 pages in proportion to the size of each stratum, and at the second stage a half of each pages was chosen. The running number of words in our “universe” was estimated to be about nine million as measured in β-units, as we defined them anew. The numbers of running and different words in our sample were some 230,000 and some 23,000 respectively.
Two kinds of word lists were published in Report 12; one is arranged in the order of kana-syllabary, and the other in the order of frequency. Entries were limited to words whose sample frequencies were equal to or larger than 7. Each list contains 4,181 words. It can be said to be a remarkable feature of this study that, for the about one thousand most frequently used words, both their intervals of confidence coefficient (95%) and their estimation precisions were calculated.
Report 13 contains chapters on the method of the survey, statistical and semantical analysis of the structure of vocabulary, and an analysis of word-construction. In the statistical analysis two problems are discussed. One is the problem of estimating the amount of vocabulary, viz. the statistical inference of the different number of words in the complete “universe”. The estimate was 43,669 ± 1,616; it was obtained by the curve fitting of a sort of exponential function deducted from some theorems on the relation between the number of running words and the number of different words in the sample using a mathematical model. The other is an approach to the distribution function of the relative frequencies of words, where an approximate function,
F(p) = p(ap+b),
is applied. In semantic analysis some 16,000 words were listed by revised semantic classification. In the analysis of word-construction, the combination powers of words and the construction patterns inside the words were investigated. In this survey we tried to make use of a linear discriminative function for the judgement of whether two items were the same word or two different words; a discussion of this problem as it arose in an experiment is appended to Report 13., application/pdf},
publisher = {国立国語研究所},
title = {現代語の語彙調査 : 総合雑誌の用語 前編},
year = {1957},
yomi = {コクリツ コクゴ ケンキュウジョ}
}