@inproceedings{5e4112b7d4e94b73a64c097fd9f7a91d,
title = "Classification of speaking proficiency level by machine learning and feature selection",
abstract = "Analysis of publicly available language learning corpora can be useful for extracting characteristic features of learners from different proficiency levels. This can then be used to support language learning research and the creation of educational resources. In this paper, we classify the words and parts of speech of transcripts from different speaking proficiency levels found in the NICT-JLE corpus. The characteristic features of learners who have the equivalent spoken proficiency of CEFR levels A1 through to B2 were extracted by analyzing the data with the support vector machine method. In particular, we apply feature selection to find a set of characteristic features that achieve optimal classification performance, which can be used to predict spoken learner proficiency.",
author = "Brendan Flanagan and Sachio Hirokawa and Emiko Kaneko and Emi Izumi",
year = "2017",
month = jan,
day = "1",
doi = "10.1007/978-3-319-52836-6_72",
language = "English",
isbn = "9783319528359",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "677--682",
editor = "Rosella Gennari and Yiwei Cao and Yueh-Min Huang and Wu Wu and Haoran Xie",
booktitle = "Emerging Technologies for Education - 1st International Symposium, SETE 2016 Held in Conjunction with ICWL 2016, Revised Selected Papers",
address = "Germany",
note = "1st International Symposium on Emerging Technologies for Education, SETE 2016 Held in Conjunction with ICWL 2016 ; Conference date: 26-10-2016 Through 29-10-2016",
}