@inproceedings{oai:repository.ninjal.ac.jp:00001930, author = {Bono, Mayumi and Sakaida, Rui and Makino, Ryosaku and Joh, Ayami}, book = {Proceedings of the LREC 2018 Special Speech Sessions}, month = {May}, note = {LREC 2018 Special Speech Sessions "Speech Resources Collection in Real-World Situations"; Phoenix Seagaia Conference Center, Miyazaki; 2018-05-09, application/pdf, National Institute of Informatics/SOKENDAI, National Institute of Informatics, Waseda University, The University of Shiga Prefecture, This paper shows the concept and design of our Miraikan SC Corpus. A well-structured and well-prepared corpus would be useful to engineers for understanding the mechanism of speech production and the nature of social interaction with regard to informing the design of their systems. Applications of the corpus range from speech recognition and dialogue processing to human-agent interaction systems, among others. We started collecting audio-visual data using multiple video cameras and microphones in October 2012 at a science museum in Tokyo, Japan. In this paper, we describe the reason why we chose the museum as a research field for data collection, how we audio-video-recorded the interactions, and how we dealt with personal information in the data set, such as participants’ names, jobs, and places of residence.}, pages = {30--34}, publisher = {Center for Corpus Development, National Institute for Japanese Language and Linguistics}, title = {Miraikan SC corpus : A trial for data collection in a semi-open and semi-controlled environment}, year = {2018} }