czesl.bib

@article{Bedrichova:etal:2011,
  author = {Zuzanna Bedřichová and Karel Šebesta and Kateřina Šormová},
  journal = {Lidé města},
  keywords = {akces, ROMi, CzeSL},
  number = {1},
  title = {{ROMI} -- první rozsáhlá databanka romského etnolektu češtiny},
  url = {http://lidemesta.cz/index.php?id=769},
  volume = {13},
  year = {2011},
}
@inproceedings{Bedrichova:etal:2011b,
  address = {Praha},
  author = {Bedřichová, Zuzanna and Šebesta, Karel and Škodová, Svatava and Šormová, Kateřina},
  booktitle = {Korpusová lingvistika Praha 2011: 2 -- Výzkum a výstavba korpusů},
  editor = {František Čermák},
  keywords = {akces, CzeSL; ROMi},
  organization = {Ústav Českého národního korpusu},
  pages = {93-104},
  publisher = {Nakladatelství Lidové noviny},
  series = {Studie z korpusové lingvistiky},
  title = {Podoba a využití korpusu jinojazyčných a romských mluvčích češtiny: {CZESL} a {ROMi} [{F}orm and Utilization of a Corpus of Non-Native and {R}omany Speakers of {C}zech: {CZESL} and {ROMi}]},
  volume = {15},
  volumetitle = {Korpusová lingvistika Praha 2011: 2 -- Výzkum a výstavba korpusů},
  year = {2011}
}
@inproceedings{hana:etal:10,
  address = {Uppsala, Sweden},
  author = {Jirka Hana and Alexandr Rosen and Svatava Škodová and Barbora Štindlová},
  booktitle = {Proceedings of the Fourth Linguistic Annotation Workshop},
  isbn = {978-1-932432-72-5},
  keywords = {akces; CzeSL},
  organization = {Association for Computational Linguistics},
  pages = {11--19},
  title = {Error-tagged learner corpus of {C}zech},
  url = {http://www.aclweb.org/anthology/W10-1802},
  year = 2010,
}
@inproceedings{Hana:etal:2012,
  address = {Istanbul, Turkey},
  author = {Jirka Hana and Alexandr Rosen and Barbora Štindlová and Petr Jäger},
  booktitle = {Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)},
  date = {23-25},
  editor = {Nicoletta Calzolari and Khalid Choukri and Thierry Declerck and Mehmet Uğur Doğan and Bente Maegaard and Joseph Mariani and Jan Odijk and Stelios Piperidis},
  isbn = {978-2-9517408-7-7},
  keywords = {learner corpus; akces; CzeSL},
  language = {english},
  pages = {3228--3232},
  publisher = {European Language Resources Association (ELRA)},
  title = {Building a learner corpus},
  url = {http://www.lrec-conf.org/proceedings/lrec2012/pdf/992_Paper.pdf},
  year = {2012},
}
@article{Hana:etal:2014,
  abstract = {The need for data about the acquisition of Czech by non-native learners prompted the compilation of the first learner corpus of Czech. After introducing its basic design and parameters, including a multi-tier manual annotation scheme and error taxonomy, we focus on the more technical aspects: transcription of hand-written source texts, process of annotation, and options for exploiting the result, together with tools used for these tasks and decisions behind the choices. To support or even substitute manual annotation we assign some error tags automatically and use automatic annotation tools (tagger, spell checker).},
  author = {Jirka Hana and Alexandr Rosen and Barbora Štindlová and Jan Štěpánek},
  doi = {10.1007/s10579-014-9278-z},
  issn = {1574-020X},
  journal = {Language Resources and Evaluation},
  keywords = {learner corpus; error annotation; Czech; akces; CzeSL},
  number = {4},
  pages = {741-752},
  publisher = {Springer Netherlands},
  title = {Building a learner corpus},
  url = {https://www.researchgate.net/publication/265164699_Building_a_learner_corpus},
  volume = {48},
  year = {2014},
}
@techreport{Hana:etal:2018,
  address = {Prague},
  author = {Jirka Hana and Filip Hana and Barbora Hladká and Jana Vitoušová-Alferyová},
  institution = {Charles University},
  keywords = {CzeSL},
  local-url = {http://utkl.ff.cuni.cz/~rosen/public/2019_UD_Cheatsheet.pdf},
  title = {Annotation guidelines for {UD} annotation of {CzeSL}},
  url = {http://bit.ly/UDCheat},
  year = {2018},
}
@inproceedings{Hana:Hladka:2018,
  abstract = {CzeSL is a learner corpus of texts produced by non-native speakers of Czech. Such corpora are a great source of information about specific features of learners' language, helping language teachers and researchers in the area of second language acquisition. In our project, we have focused on syntactic annotation of the non-native text within the framework of Universal Dependencies. As far as we know, this is a first project annotating a richly inflectional non-native language. Our ideal goal has been to annotate according to the non-native grammar in the mind of the author, not according to the standard grammar. However, this brings many challenges. First, we do not have enough data to get reliable insights into the grammar of each author. Second, many phenomena are far more complicated than they are in native languages. We believe that the most important result of this project is not the actual annotation, but the guidelines and principles that can be used as a basis for other non-native languages.},
  author = {Jirka Hana and Barbora Hladká},
  booktitle = {Proceedings of the 17th International Workshop on Treebanks and Linguistic Theories (TLT 2018), December 13--14, 2018, Oslo University, Norway},
  editor = {Dag Haug and Stephan Oepen and Lilja Øvrelid and Marie Candito and Jan Hajič},
  isbn = {978-91-7685-137-1},
  issn = {1650-3740},
  keywords = {learner corpus, second language, syntax annotation, Universal Dependencies, second language acquisition, CzeSL},
  number = {155},
  pages = {105--114},
  publisher = {Linköping University Electronic Press, Linköpings universitet},
  title = {Universal {D}ependencies and Non-Native {C}zech},
  url = {http://www.ep.liu.se/ecp/155/011/ecp18155011.pdf},
  year = {2018},
}
@inproceedings{Hana:Hladka:2018b,
  abstract = {Our goal has been to annotate the CzeSL corpus according to the non-native grammar in the mind of the author, not according to the standard grammar. However, this brings many challenges. First, we do not have enough data to get reliable insights into the grammar of each author. Second, many phenomena are far more complicated than they are in native languages.
},
  address = {Hong Kong},
  author = {Jirka Hana and Barbora Hladká},
  booktitle = {Proceedings of the International Conference on Bilingual Learning and Teaching},
  keywords = {CzeSL},
  publisher = {The Open University of Hong Kong},
  title = {Syntactic annotation of a second-language learner corpus},
  url = {https://ufal.mff.cuni.cz/~hladka/2019/docs/hana-hladka-blt-2018_0.pdf},
  year = {2018},
}
@incollection{Hrdlicka:2012,
  address = {Liberec},
  author = {Milan Hrdlička},
  booktitle = {Čeština -- cílový jazyk a korpusy},
  editor = {Karel Šebesta and Svatava Škodová},
  keywords = {CzeSL},
  pages = {89-108},
  publisher = {Technická univerzita v Liberci, Fakulta přírodovědně-humanitní a pedagogická},
  title = {Jazyková chyba a práce s ní v jazykovém vyučování},
  year = {2012}
}
@inproceedings{Hudouskova:2013,
  address = {Lüdenscheid, Germany},
  author = {Andrea Hudousková},
  booktitle = {Proceedings of the Seventh International Conference Slovko 2013; Natural Language Processing, Corpus Linguistics, E-learning},
  editor = {Katarína Gajdošová and Adriána Žáková},
  isbn = {978-3-942303-18-7},
  keywords = {CzeSL-SGT, akces, CzeSL},
  pages = {100–107},
  publisher = {RAM-Verlag},
  title = {The Corpus {CzeSL} in the Service of Teaching {C}zech for Foreigners -- Errors in the Use of the Pronoun \textit{který}},
  url = {https://korpus.sk/~slovko/2013/Proceedings_Slovko_2013.pdf},
  year = {2013},
}
@incollection{Hudouskova:2014,
  address = {Praha},
  author = {Andrea Hudousková},
  booktitle = {Radost z jazyků. {S}borník k 75. narozeninám prof. {F}rantiška {Č}ermáka},
  editor = {Vladimír Petkevič and Ana Adamovičová and Václav Cvrček},
  keywords = {CzeSL-SGT, akces, CzeSL, book1},
  pages = {215--230},
  publisher = {Nakladatelství Lidové noviny, ÚČNK FF UK},
  series = {Studie z korpusové lingvistiky},
  title = {Jmenné koncovky v češtině pro cizince -- distribuce, frekvence a fonetika. {P}rvní sonda},
  volume = {20},
  year = {2014}
}
@unpublished{Janssen:Rosen:2018,
  abstract = {Different ways of tagging errors in learner corpora

The most distinctive feature of learner corpora is the fact that texts written by language learners contain errors, or deviant forms, or non-native variants of the target language if you wish. To provide a systematic analysis of those errors, learner corpora typically include error annotation, indicating errors in the text. This is traditionally done by an error code assigned to the incorrect part of the text, optionally accompanied by a target hypothesis, i.e. a reformulation of the error in the native standard of the target language. While standards are emerging for linguistic annotation of corpora including standard native language, choosing appropriate categories for annotating errors is not easy. The codes usually reflect their interpretation in terms of a standard grammar (spelling, morphological paradigms, morphosyntactic categories, agreement, government or valency, etc.), and thus their design and application to various phenomena of a non-native language is far from trivial. Moreover, the interplay of categories presumably responsible for the phenomena is not easily represented by tags assigned to the linear text of the original.

We compare the traditional linear approach to error annotation with two alternative approaches: (i) the approach introduced in the COPLE2 corpus (del Río et al. 2016), in which errors are not indicated by a code, but rather the error is provided with an orthographic, a morphosyntactic, and a lexical correction, which together provide detailed information about the type of error; and (ii) the approach introduced in the CzeSL corpus, in which the erroneous sentence is aligned with two corrected versions of of the sentence, with alignments between the words in the three variants, as well as error codes (Hana et al. 2014).

For the comparison, we will describe what the various options provide, using the two tools in their respective project as examples: ​feat1 for the parallel scheme approach, and TEITOK (Janssen 2016) for the multi-layered correction approach. We look at how the different approaches can represent complex cases of overlapping errors (where a group of words is involved in various distinct errors), discontinuous errors (where the words involved in an error are not next to each other), word order errors, and secondary errors (where the correction of one error leads to another error). We will not only discuss how the three different paradigms can represent the complex error cases, but also how once the errors are correctly represented in the corpus, they can be used for concrete search queries to answer research questions.
Error annotation of learner corpora is often combined with linguistic annotation, as it is applied in corpora of standard (native) language. If a corrected version of the text is available, standard tools (such as tokenizers, taggers, and parsers) can be used to apply such tools with high accuracy, and the result can even be projected to or linked with the original uncorrected text. Annotating the uncorrected original text is typically a more challenging task, both in terms of accuracy of existing tools, and in terms of missing concepts. We will show how the paradigms discussed above integrate with annotations of this type.

1 ​https://bitbucket.org/jhana/feat-hg/wiki/Home
 
References
del Río, I., Antunes, S., Mendes, A., and Janssen, M. (2016). Towards error annotation in a learner corpus of portuguese. In ​5th NLP4CALL and 1st NLP4LA workshop in Sixth Swedish Language Technology Conference (SLTC)​, Umeå University, Sweden.
Hana, J., Rosen, A., Štindlová, B., and Štěpánek, J. (2014). Building a learner corpus. Language Resources and Evaluation​, 48(4):741–752.
Janssen, M. (2016). TEITOK: Text Faithful Corpora. In: ​Proceedings of LREC 2016.​ ELRA. Portorož, Slovenia, pp. 4037–4043.},
  author = {Maarten Janssen and Alexandr Rosen},
  booktitle = {Grammar \& Corpora 2018},
  keywords = {akces; CzeSL},
  local-url = {https://docs.google.com/presentation/d/1LdtMiKKif6PFLFwqi6LU3VYFR_K7-5xzLT6KhN4uAWw/edit#slide=id.g44349cf621_0_122},
  note = {A talk presented at the 7th conference Grammar and Corpora (GaC 2018) in Paris, November 2018},
  title = {Different ways of tagging errors in learner corpora},
  url = {http://drehu.linguist.univ-paris-diderot.fr/gac-2018/?fichier=programme},
  year = {2018},
}
@inproceedings{Jelinek:2017,
  author = {Tomáš Jelínek},
  booktitle = {Text, Speech, and Dialogue 20th International Conference, TSD 2017, Prague, Czech Republic, August 27-31, 2017},
  editor = {Ekštein, Kamil and Matoušek, Václav},
  keywords = {akces; CzeSL},
  pages = {263--271},
  publisher = {Springer International Publishing},
  series = {Lecture Notes in Artificial Intelligence series},
  title = {Errors in Inflection in {C}zech as a Second Language and Their Automatic Classification},
  year = {2017},
}
@incollection{Jelinek:etal:2012a,
  address = {Berlin, Heidelberg},
  author = {Jelínek, Tomáš and Štindlová, Barbora and Rosen, Alexandr and Hana, Jirka},
  booktitle = {Text, Speech and Dialogue: 15th International Conference, TSD 2012, Brno, Czech Republic, September 3-7, 2012. Proceedings},
  doi = {10.1007/978-3-642-32790-2_15},
  editor = {Sojka, Petr and Horák, Aleš and Kopeček, Ivan and Pala, Karel},
  isbn = {978-3-642-32790-2},
  keywords = {CzeSL},
  pages = {127--134},
  publisher = {Springer Berlin Heidelberg},
  title = {Combining Manual and Automatic Annotation of a Learner Corpus},
  url = {http://dx.doi.org/10.1007/978-3-642-32790-2_15},
  year = {2012},
}
@unpublished{Kaczmarska:Gawronska:2019,
  abstract = {Elżbieta Kaczmarska and Gabriela Gawrońska
Specifics of the acquisition of a closely related language in a corpus of Czech produced by Polish learners

With Polish (L1) as a language closely related to Czech (L2), a strong L1 interference is observed at all levels – pronunciation, morphology, syntax, lexicon, including phraseology (false friends), and metalinguistic communication.  To make teaching (and learning) more efficient, we need to focus on specific weaknesses and strengths of the learner on any level. To identify them, both incorrect and correct use of Czech by the learners should be studied.  For this purpose, we build a corpus of Czech texts produced by Polish students by extending the L1 Polish – L2 Czech subcorpus of CzeSL (Czech as a Second Language), a learner corpus built at Charles University in Prague.

Before the start of our project, the Polish–Czech subcorpus of CzeSL was quite small (77 texts, 15 thousand words). Currently the Polish–Czech subcorpus of CzeSL is significantly larger (200 texts, 60 thousand words).},
  author = {Elżbieta Kaczmarska and Gabriela Gawrońska},
  note = {Poster presented at the 5th International Learner Corpus Research Conference (LCR2019) in Warsaw, 12–14 September 2019},
  title = {Specifics of the acquisition of a closely related language in a corpus of {C}zech produced by {P}olish learners},
  url = {https://lcr2019.ils.uw.edu.pl/files/2019/09/Conference-schedule-final-colour.pdf},
  year = {2019},
}
@article{Ludwicki:Skodova2018,
  abstract = {The paper deals with the use of the expression ty vole ( that can be translated as``man/dude/shit'') in the current Czech language. Through the qualitative analysis of the material of spoken Czech corpora, the contribution demonstrates that this originally vulgar expression in contemporary Czech has lost its mischievous content in some contexts and it is used also with other meanings than vulgar or depreciative. At the same time, it is possible to observe the shift in its morphological classification, at present it is not always possible to characterize its form as a vocative of the substance of a word vůl/ox, i.e. also its position in the contacting function as addressing a communication partner, but it is formally ceased and it functions as a particle. Parallelly with it, it acquires new communication functions. The aim of the analysis is to describe the communication acquires new communicative functions. Corpus analysis is accompanied by an elicitation research probe among foreign students of Bohemistics. The aim of the elicitation is the individual interpretation of contexts and goals of using the vocabulary of ty vole.},
  annote = {ERIH PLUS},
  author = {Antoni Ludwicki and Svatava Škodová},
  issn = {1642-9893},
  journal = {Bohemistyka},
  keywords = {Pragmatics; ty vole; contemporary Czech; informal Czech; particles; communication; vulgarity; vulgarisms; CzeSL},
  number = {3},
  pages = {291--246},
  title = {Ty vole jako multifunkční komunikační prostředek v současné češtině (`{Ty} vole' as a multifunctional communication means in contemporary {C}zech)},
  url = {http://utkl.ff.cuni.cz/~rosen/public/TY%20VOLE_fin_tisk.pdf},
  year = {2018},
}
@incollection{Meurers:2015,
  author = {Detmar Meurers},
  booktitle = {The Cambridge Handbook of Learner Corpus Research},
  editor = {Sylviane Granger, Gaëtanelle Gilquin and Fanny Meunier},
  keywords = {learner corpora; CzeSL},
  pages = {537--566},
  publisher = {Cambridge University Press},
  title = {Learner Corpora and Natural Language Processing},
  url = {http://www.sfs.uni-tuebingen.de/~dm/papers/meurers-15.pdf},
  year = {2015},
}
@mastersthesis{Naplava:2017,
  abstract = {The goal of this thesis is to explore the area of natural language correction and to design and implement neural network models for a range of tasks ranging from general grammar correction to the specific task of diacritization. The thesis opens with a description of existing approaches to natural language correction. Existing datasets are reviewed and two new datasets are introduced: a manually annotated dataset for grammatical error correction based on CzeSL (Czech as a Second Language) and an automatically created spelling correction dataset. The main part of the thesis then presents design and implementation of three models, and evaluates them on several natural language correction datasets. In comparison to existing statistical systems, the proposed models learn all knowledge from training data; therefore, they do not require an error model or a candidate generation mechanism to be manually set, neither they need any additional language information such as a part of speech tags. Our models significantly outperform existing systems on the diacritization task. Considering the spelling and basic grammar correction tasks for Czech, our models achieve the best results for two out of the three datasets. Finally, considering the general grammatical correction for English, our models achieve results which are slightly worse, but still comparable to the previous state-of-the-art model.},
  address = {Praha},
  author = {Jakub Náplava},
  keywords = {language correction, spell checking, natural language processing, deep learning, CzeSL},
  school = {Charles University, Faculty of Mathematics and Physics},
  title = {Natural Language Correction},
  url = {https://is.cuni.cz/webapps/zzp/detail/188260/},
  year = {2017},
}
@inproceedings{Naplava:Straka:2019,
  address = {Stroudsburg, PA, USA},
  author = {Jakub Náplava and Milan Straka},
  booktitle = {Proceedings of the 5th Workshop on Noisy User-generated Text (W-NUT 2019)},
  isbn = {978-1-950737-84-0},
  keywords = {CzeSL},
  local-url = {http://ufal.mff.cuni.cz/biblio/attachments/2019-naplava-p7438659577534366726.pdf},
  pages = {346--356},
  publisher = {Association for Computational Linguistics},
  title = {Grammatical Error Correction in Low-Resource Scenarios},
  url = {https://www.aclweb.org/anthology/D19-5545/},
  year = 2019,
}
@phdthesis{Peceny:2017,
  abstract = {Over the last few years two learner corpora of authentic texts of non-native speakers of Czech have originated (MERLIN Corpus and CzeSL Corpus), giving linguists an important source of data for researching Czech as a foreign language. Ergo, for the first time it is possible to carry out the language analysis of non-native speakers of Czech using tools of corpus linguistics to formulate evidence-based research findings. The presented thesis uses that as well, focussing on the study and description of the use of connectors in the written text production of non-native speakers of Czech, being primarily based on evidence from the learner MERLIN Corpus, which as opposed to other corpora is characterized by linking written text production reliably to the proficiency levels of the Common European Framework of Reference for Languages (CEFR), including the levels of A2–B2. At the same time, it also contains a text error annotation, thus enabling to ascertain what effect the language proficiency has on the extent of the repertoire and frequency of using connectors, as well as to determine how the proficiency is reflected in using them correctly. The theoretical part of the thesis summarizes the approaches to the classification of connectors in Czech, pointing out some difficulties in their classification. Furthermore, it deals with the description of language proficiency within the CEFR with respect to using connectors. Last but not least, it presents the basic principles of an error analysis and the interlanguage theory. The second part of the dissertation characterises the used corpora, explaining the procedures for retrieving and processing data employed for the subsequent analysis, which is included in the third part of the thesis. Firstly, the repertoire of connectors that the corpus contains is described there. Subsequently, the tendencies in their use are researched. Using a correspondence analysis, the relationship of connectors to individual proficiency levels of the CEFR is shown. The description is in the end completed with a basic quantitative-qualitative error analysis.},
  address = {Praha},
  author = {Pavel Pečený},
  keywords = {connectors, Czech as a foreign language, interlanguage, error analysis, language proficiency level, CEFR, MERLIN corpus, CzeSL},
  school = {Institute of Czech Language and Theory of Communication Faculty of Arts, Charles University},
  title = {Užití spojovacích prostředků v textech nerodilých mluvčích češtiny [The use of Connectors in the Texts of Non-Native Speakers of Czech]},
  url = {https://is.cuni.cz/webapps/zzp/detail/105300/},
  year = {2017},
}
@incollection{Petkevic:etal:2012,
  address = {Liberec},
  author = {Vladimír Petkevič and Alexandr Rosen and Barbora Štindlová and Tomáš Jelínek and Milena Hnátková and Petr Jäger},
  booktitle = {Čeština -- cílový jazyk a korpusy},
  chapter = {4},
  editor = {Karel Šebesta and Svatava Škodová},
  isbn = {978-80-7372-848-9},
  keywords = {learner corpora; akces; CzeSL},
  pages = {61--88},
  publisher = {Technická univerzita v Liberci},
  title = {Anotace chybových textů v českém žákovském korpusu},
  url = {http://utkl.ff.cuni.cz/~rosen/public/czesl_monografie.pdf},
  year = {2012},
}
@incollection{Petkevic:Stindlova:2011,
  address = {Praha},
  author = {Vladimír Petkevič and Barbora Štindlová},
  booktitle = {Sborník AUČCJ 2011},
  editor = {Z. Hajíčková},
  keywords = {akces, CzeSL},
  pages = {99--114},
  publisher = {AUČCJ, Akropolis},
  title = {Chybová anotace žákovského korpusu {CzeSL}},
  year = {2011}
}
@inproceedings{Ramasamy:etal:2015,
  address = {Prague},
  author = {Loganathan Ramasamy and Alexandr Rosen and Pavel Straňák},
  booktitle = {ITAT 2015: Information technologies -- Applications and Theory / SloNLP 2015},
  editor = {Jakub Yaghob},
  isbn = {978-1515120650},
  issn = {1613-0073},
  keywords = {akces; CzeSL},
  pages = {73--80},
  publisher = {Charles University in Prague},
  title = {Improvements to {K}orektor: A case study with native and non-native {C}zech},
  url = {http://ceur-ws.org/Vol-1422/73.pdf},
  year = {2015},
}
@inproceedings{Richter:etal:2012,
  abstract = {We present Korektor -- a flexible and powerful purely statistical text correction tool for Czech that goes beyond a traditional spell checker. We use a combination of several language models and an error model to offer the best ordering of correction proposals and also to find errors that cannot be detected by simple spell checkers, namely spelling errors that happen to be homographs of existing word forms. Our system works also without any adaptation as a diacritics generator with the best reported results for Czech text. The design of Korektor contains no language-specific parts other than trained statistical models, which makes it highly suitable to be trained for other languages with available resources. The evaluation demonstrates that the system is a state-of-the-art tool for Czech, both as a spell checker and as a diacritics generator. We also show that these functions combine into a potential aid in the error annotation of a learner corpus of Czech.},
  address = {Mumbai, India},
  author = {Richter, Michal and Straňák, Pavel and Rosen, Alexandr},
  booktitle = {Proceedings of COLING 2012: Posters},
  keywords = {spellchecking; diacritics completion; language model; error model; akces; CzeSL; grammar checker},
  month = {December},
  pages = {1019--1028},
  publisher = {The COLING 2012 Organizing Committee},
  title = {{K}orektor -- A System for Contextual Spell-Checking and Diacritics Completion},
  url = {http://www.aclweb.org/anthology/C12-2099},
  year = {2012},
}
@conference{Rosen:2014,
  address = {Poznań, Poland},
  author = {Alexandr Rosen and Barbora Štindlová and Svatava Škodová and Jirka Hana},
  booktitle = {SLE 2014 --- 47th Annual Meeting of the Societas Linguistica Europaea, Workshop on Interlanguage Annotation},
  doi = {10.13140/2.1.1588.9600},
  keywords = {akces; CzeSL},
  organization = {Adam Mickiewicz University},
  title = {Using a cross-classifying taxonomy of non-standard forms to analyze non-native {C}zech},
  url = {https://www.researchgate.net/profile/Alexandr_Rosen/publication/268123637_Using_a_cross-classifying_taxonomy_of_non-standard_forms_to_analyze_non-native_Czech/links/5461db480cf2c1a63c001fef.pdf?ev=pub_int_doc_dl&origin=publication_list&inViewer=true},
  year = {2014},
}
@techreport{Rosen:2014c,
  author = {Alexandr Rosen},
  institution = {Univerzita Karlova v Praze},
  keywords = {akces, CzeSL},
  title = {Cze{SL}-{SGT} -- korpus češtiny nerodilých mluvčích s automaticky provedenou anotací},
  url = {http://utkl.ff.cuni.cz/~rosen/public/2014-czesl-sgt-cs.pdf},
  year = {2014},
}
@techreport{Rosen:2014d,
  author = {Alexandr Rosen},
  institution = {Charles University},
  keywords = {akces, CzeSL},
  title = {Cze{SL}-{SGT} -- a corpus of non-native speakers’ {C}zech with automatic annotation},
  url = {http://utkl.ff.cuni.cz/~rosen/public/2014-czesl-sgt-en.pdf},
  year = {2014},
}
@techreport{Rosen:2015,
  author = {Alexandr Rosen},
  institution = {Charles University in Prague},
  keywords = {akces, CzeSL},
  title = {{CzeSL-MAN} -- a corpus of non-native speakers' {C}zech with manual annotation},
  url = {http://utkl.ff.cuni.cz/~rosen/public/2015-czesl-man-en.pdf},
  year = {2015},
}
@inproceedings{Rosen:2016a,
  abstract = {Investigating language acquisition by non-native learners helps to understand important linguistic issues and to develop teaching methods, better suited both to the specific target language and to the learner. These tasks can now be based on empirical evidence from learner corpora including Czech. They are equipped with morphological and syntactic annotation, together with the detection and categorization of non-standard linguistic phenomena. After an overview of existing resources we propose solutions to several issues inherent to the process of compiling, annotating and using such corpora, including automatic identification of errors, design and application of error taxonomy, and user-friendly search tool. Finally, we deal with the question to what extent resources and tools available for standard language can be applied to the language of non-native learners.},
  address = {Bratislava, Slovakia},
  author = {Alexandr Rosen},
  booktitle = {Proceedings of the 16th {ITAT}: Slovenskočeský {NLP} workshop (Slo{NLP} 2016)},
  editor = {Broňa Brejová},
  isbn = {978-1537016740},
  issn = {1613-0073},
  keywords = {akces, learner corpus; language acquisition; error annotation; spelling checker; grammar checker; morphology; syntax; Czech; CzeSL},
  organization = {Comenius University in Bratislava, Faculty of Mathematics, Physics and Informatics},
  pages = {80--87},
  publisher = {CreateSpace Independent Publishing Platform},
  series = {{CEUR} Workshop Proceedings},
  title = {Building and using corpora of non-native {C}zech},
  url = {http://ceur-ws.org/Vol-1649/80.pdf},
  volume = {1649},
  year = {2016},
}
@inproceedings{Rosen:2016b,
  abstract = {A specific language as used by different speakers and in different situations
	has a number of more or less distant varieties. Extending the notion of
	non-standard language to varieties that do not fit an explicitly or implicitly
	assumed norm or pattern, we look for methods and tools that could be applied to
	this domain. The needs start from the theoretical side: categories usable for
	the analysis of non-standard language are not readily available, and continue
	to methods and tools required for its detection and diagnostics. A general
	discussion of issues related to non-standard language is followed by two case
	studies. The first study presents a taxonomy of morphosyntactic categories as
	an attempt to analyse non-standard forms produced by non-native learners of
	Czech. The second study focusses on the role of a rule-based grammar and
	lexicon in the process of building and
	using a parsebank.},
  address = {Osaka, Japan},
  author = {Rosen, Alexandr},
  booktitle = {Proceedings of the Workshop on Grammar and Lexicon: interactions and interfaces (GramLex)},
  editor = {Eva Hajičová and Igor Boguslavski},
  isbn = {978-4-87974-706-8},
  keywords = {CzeSL; treebank},
  month = {December},
  pages = {120--131},
  publisher = {The COLING 2016 Organizing Committee},
  title = {Modeling non-standard language},
  url = {http://aclweb.org/anthology/W16-3815},
  year = {2016},
}
@inproceedings{Rosen:2017,
  address = {Frankfurt am Main, Bern, Bruxelles, New York, Oxford, Warszawa, Wien},
  author = {Alexandr Rosen},
  booktitle = {Language, Corpora and Cognition},
  doi = {http://dx.doi.org/10.3726/b10717},
  editor = {Piotr Pezik and Jacek Walinski and Krzysztof Kosecki},
  isbn = {9783631707098},
  keywords = {akces; CzeSL; CzeSL-SGT},
  pages = {163--180},
  publisher = {Peter Lang},
  title = {Introducing a corpus of non-native {C}zech with automatic annotation},
  url = {http://utkl.ff.cuni.cz/~rosen/public/2016_SGT_lodz.pdf},
  year = {2017},
}
@article{Rosen:2017a,
  author = {Alexandr Rosen},
  journal = {Prace Filologiczne},
  keywords = {akces; CzeSL},
  pages = {377--397},
  title = {Valency Patterns in {C}zech Learner Corpora},
  volume = {LXX},
  year = {2017},
}
@conference{Rosen:2017b,
  abstract = {
A collection of essays, written by non-native learners of Czech, has been used to build several corpora, with or without metadata, in a dedicated multi-level format or structured according to a commonly used search tool. Its manual or automatic annotation includes correction, lemmas and standard morphosyntactic tags assigned to the source or the corrected target, and error labels based on a formally defined or grammar-based taxonomy. An overview of the annotation and the corpora themselves will be presented. The results suggest a question: to what extent they meet expectations of existing or prospective users and what can be done to better suit their needs.
},
  author = {Alexandr Rosen},
  booktitle = {SWE-CLARIN: Workshop on interoperability of L2 resources and tools},
  keywords = {akces; CzeSL},
  note = {Presentation},
  title = {Trying to make a learner corpus user happy: from annotation to search tools},
  year = {2017},
}
@inproceedings{Rosen:2018,
  address = {Heidelberg},
  author = {Alexandr Rosen},
  booktitle = {Grammar and Corpora 2016},
  doi = {https://dx.doi.org/10.17885/heiup.361.509},
  editor = {Eric Fuß and Marek Konopka and Beata Trawinski and Ulrich H. Waßner},
  isbn = {978-3-946054-84-9},
  keywords = {akces; treebank; CzeSL},
  pages = {271--287},
  publisher = {Heidelberg University Publishing},
  title = {Coping with unruly language: non-standard usage in a corpus},
  url = {https://heiup.uni-heidelberg.de/catalog/book/361},
  year = {2018},
}
@article{Rosen:etal:2013,
  author = {Rosen, Alexandr and Hana, Jirka and Štindlová, Barbora and Feldman, Anna},
  doi = {http://dx.doi.org/10.1007/s10579-013-9226-3},
  issn = {1574-020X},
  journal = {Language Resources and Evaluation -- Special Issue: Resources for language learning},
  keywords = {Learner corpus; Error annotation; Second language acquisition; Czech; akces; CzeSL},
  language = {English},
  month = {March},
  number = {1},
  pages = {65--92},
  publisher = {Springer Netherlands},
  title = {Evaluating and automating the annotation of a learner corpus},
  url = {http://utkl.ff.cuni.cz/~rosen/public/2011-czesl-lrej_prefinal.pdf},
  volume = {48},
  year = {2014},
}
@book{Rosen:etal:2020,
  address = {Praha},
  author = {Alexandr Rosen and Jiří Hana and Barbora Hladká and Tomáš Jelínek and Svatava Škodová and Barbora Štindlová},
  booktitle = {Compiling and annotating a learner corpus for a morphologically rich language – CzeSL, a corpus of non-native Czech},
  keywords = {CzeSL},
  publisher = {Karolinum, Charles University Press},
  title = {Compiling and annotating a learner corpus for a morphologically rich language – CzeSL, a corpus of non-native Czech},
  url = {https://dspace.cuni.cz/handle/20.500.11956/123103},
  year = {2020}
}
@article{Sebesta:2010,
  author = {Šebesta, Karel},
  journal = {Studie z aplikované lingvistiky/Studies in Applied Linguistics},
  keywords = {akces; CzeSL},
  pages = {11--34},
  title = {Korpusy češtiny a osvojování jazyka [{C}orpora of {C}zech and Language Acquistion]},
  volume = {1},
  year = {2010}
}
@incollection{Sebesta:2011,
  address = {Praha},
  author = {Karel Šebesta},
  booktitle = {Přednášky z 54. běhu LŠSS},
  keywords = {akces; CzeSL},
  publisher = {Filozofická fakulta UK v Praze},
  title = {Čeština cizinců v korpusu},
  year = {2011}
}
@incollection{Sebesta:2011a,
  address = {Ústí nad Labem},
  author = {Karel Šebesta},
  booktitle = {Minulost, přítomnost a budoucnost v jazyce a v literatuře. Ústí nad Labem 1.--3. 9. 2010},
  editor = {Patrik Mitter and Zdeňka Menšíková},
  keywords = {akces; CzeSL},
  pages = {282--285},
  publisher = {PF UJEP},
  title = {Akviziční korpusy},
  year = {2011}
}
@incollection{Sebesta:2012,
  address = {Hradec Králové},
  author = {Karel Šebesta},
  booktitle = {Intercultural Inspirations for Language Education. Spaces for understanding.},
  editor = {Ilona Semrádová},
  keywords = {akces; CzeSL},
  pages = {74--89},
  publisher = {Univerzita Hradec Králové},
  title = {Learner Corpora and {C}zech Language},
  year = {2012}
}
@incollection{Sebesta:etal:2011,
  address = {Praha},
  author = {Karel Šebesta and Zuzanna Bedřichová and Kateřina Šormová and Svatava Škodová},
  booktitle = {Korpusová lingvistika Praha 2011: 2 -- Výzkum a výstavba korpusů.},
  editor = {František Čermák},
  keywords = {akces, CzeSL; ROMi},
  pages = {93--104},
  publisher = {NLN},
  title = {Podoba a využití korpusu jinojazyčných a romských mluvčích češtiny: {CzeSL} a {ROMi}},
  url = {http://akces.ff.cuni.cz/system/files/KL-BEDR-SEB-SORM-nov.pdf},
  year = {2011},
}
@misc{Sebesta:etal:2012,
  author = {Šebesta, Karel and Bedřichová, Zuzanna and Štindlová, Barbora and Hrdlička, Milan and Hrdličková, Tereza and Hana, Jiří and Rosen, Alexandr and Petkevič, Vladimír and Jelínek, Tomáš and Škodová, Svatava and Janeš, Petr and Lundáková, Kateřina and Skoumalová, Hana and Šťastný, Klement and Sládek, Šimon},
  copyright = {Attribution-{NonCommercial}-{NoDerivs} 3.0 Unported ({CC} {BY}-{NC}-{ND} 3.0)},
  keywords = {CzeSL},
  note = {{LINDAT}/{CLARIAH}-{CZ} digital library at the Institute of Formal and Applied Linguistics ({ÚFAL}), Faculty of Mathematics and Physics, Charles University},
  title = {{AKCES} 4},
  url = {http://hdl.handle.net/11858/00-097C-0000-000C-2293-0},
  year = {2012},
}
@incollection{Sebesta:Skodova:2011,
  address = {Liberec},
  author = {Karel Šebesta and Svatava Škodová},
  booktitle = {20 let vývoje didaktiky cizích jazyků},
  keywords = {akces; CzeSL},
  publisher = {TUL},
  title = {Žákovský korpus a jeho využití pro češtinu jako druhý jazyk},
  year = {2011}
}
@book{Sebesta:Skodova:2012,
  address = {Liberec},
  booktitle = {Čeština -- cílový jazyk a korpusy},
  editor = {Karel Šebesta and Svatava Škodová},
  isbn = {978-80-7372-848-9},
  keywords = {learner corpora; akces; CzeSL},
  listofcontents = {http://utkl.ff.cuni.cz/~rosen/public/czesl_obsah_monografie.png},
  publisher = {Technická univerzita v Liberci},
  title = {Čeština -- cílový jazyk a korpusy},
  url = {http://utkl.ff.cuni.cz/~rosen/public/czesl_monografie.pdf},
  year = {2012},
}
@book{Sindelarova:Skodova:2013,
  address = {Praha},
  author = {Jaromíra Šindelářová and Svatava Škodová},
  booktitle = {Čeština jako cílový jazyk},
  isbn = {978-80-87601-20-4; 978-80-87601-18-1},
  keywords = {CzeSL},
  publisher = {MŠMT},
  title = {Čeština jako cílový jazyk},
  volume = {I, II},
  year = {2013}
}
@electronic{Sindelarova:Skodova:2013a,
  address = {Praha},
  annote = {Využití korpusů se jeví jako jeden z moderních technologických prostředků, které dobře vyhovují jednak měnícímu se výukovému paradigmatu. 

Článek byl vytvořen na základě příručky Metodika práce s žáky-cizinci v základní škole, která vznikla v roce 2012 pod patronací MŠMT. Všechny kapitoly z příručky budou součástí uceleného seriálu článků. Příručka je jako příloha ke stažení přiložena k úvodnímu článku celého seriálu.

Některé části kapitol vycházejí ze zkušeností pedagogů, kteří se účastnili e-learningového kurzu v rámci projektu z ESF s názvem Sociokulturní kompetence pro pracovníky škol a školských zařízení (CZ.1.07/1.2.00/08.0104 ). Informace týkající se využití národního korpusu a žákovského korpusu CzeSL pro výuku cizinců vycházejí z projektu Inovace vzdělávání v oboru čeština jako druhý jazyk (CZ.1.07/2.2.00/07.0259).},
  author = {Jaromíra Šindelářová and Svatava Škodová},
  year = {2013},
  keywords = {integrace, žák-cizinec, žáci-cizinci, jazykový korpus, český národní korpus, CzeSL},
  publisher = {Metodický portál RVP – inspirace a zkušenosti učitelů},
  title = {Práce s korpusy ve výuce žáků-cizinců},
  url = {https://clanky.rvp.cz/clanek/c/z/17481/PRACE-S-KORPUSY-VE-VYUCE-ZAKU-CIZINCU.html/},
  urldate = {31 March 2021},
}
@article{Skodova:2013,
  author = {Svatava Škodová},
  journal = {Korpus -- gramatika -- axiologie},
  keywords = {learner corpora, CzeSL},
  pages = {82--89},
  title = {{CzeSL} -- Korpus češtiny nerodilých mluvčích [{CzeSL -- a Corpus of Non-Native Speakers' Czech}]},
  volume = {8},
  year = {2013}
}
@article{Skodova:2017,
  abstract = {Implementation of Selected Communication Functions in Comparison of Non-Native and Na- tive Speakers of Czech.  e goal of the paper is to present analysis of a personal apology le er with formal features wri en by Ukrainian examinees during the Examination of the Czech Language for Permanent Residence in the Czech Republic.  e study does not present a complex analysis of the le ers, but it concentrates on selected linguistic phenomena tied up to the functions of an introduc- tory greeting, apology from a meeting and proposal of a new meeting, expression of farewell.  e in- vestigation presented in the paper aims to (1) make an inventory of linguistic means used by non-na- tive speakers to achieve the task given, (2) analyse their use and to (3) compare the results with those reported on the basis of a similar analysis conducted on a native speakers' corpus. It seeks to answer the following questions: (1) What type of means do non-native speakers use to express the commu- nicative functions under the interest? (2) What is the frequency of the means? Does it create any pat- tern of the interlanguage? (3) How do the non-native speakers express the function in comparison to the native speaker use?  e corpus built for the analysis consists of le ers selected during the Ex- amination of the Czech Language for Permanent Residence in the CR in years 2013--2014.  e partic- ipants selected were the Ukrainian native speakers, all living and working in the Czech Republic for 5 years at least.  ere are 141 le ers  nally included into the corpus.  e le ers were wri en on the basis of a short printed verbal and visual prompt in an essay examination.  e same task was given to a group of native speakers.  is group included 23 Czech native speakers of various age, level of education and occupation. We analysed the le ers of this group at the same way as the le ers of the examinees, the result served as the prototype of the Czech native speakers and allowed the compar- ison and the frame for description of variants in the learners' language.},
  author = {Svatava Škodová},
  journal = {Studie z aplikované lingvistiky},
  keywords = {akces; CzeSL, book1},
  pages = {121--135},
  title = {Realizace vybraných komunikačních funkcí v porovnání nerodilých a rodilých mluvčích češtiny},
  url = {https://sites.ff.cuni.cz/studiezaplikovanelingvistiky/wp-content/uploads/sites/19/2017/11/Svatava_Skodova_121-135.pdf},
  volume = {8},
  year = {2017},
}
@incollection{Skodova:2018,
  abstract = {The aim of the text is to introduce an analysis of the verb jít/go in the written texts of non-native speakers of Czech. The quantitative and qualitative analysis is based on data excerpted from the Czech National Corpus: the subcorpus CzeSL. The aim of the text is not an error analysis, but rather, a semantic analysis. The text aims to contribute to the description of the acquisition of motion events by non-native speakers of Czech.
},
  address = {Praha},
  author = {Svatava Škodová},
  booktitle = {Čeština jako cizí jazyk v průsečíku pohledů},
  editor = {Svatava Škodová and Milan Hrdlička},
  isbn = {978-80-7308-884‑2},
  keywords = {CzeSL, verb jít/go, motion event, learner corpus, book1},
  publisher = {Filozofická fakulta UK v Praze},
  title = {Sloveso {JÍT} v zrcadle užití nerodilými mluvčími češtiny ({T}he verb {JÍT} `to go' as used by non-native speakers of {C}zech)},
  year = {2018}
}
@article{Skodova:2020,
  author = {Svatava Škodová},
  keywords = {CzeSL},
  note = {In prep},
  title = {Genitivní a lokální vazby sloves v češtině nerodilých mluvčích},
  year = {2020}
}
@article{Skodova:2020a,
  author = {Svatava Škodová},
  journal = {Studie z aplikované lingvistiky},
  keywords = {CzeSL},
  number = {2},
  title = {Sloveso {JÍT} jako reprezentant pohybové události v prostoru},
  volume = {11},
  year = {2020}
}
@inproceedings{Skodova:etal:2011,
  address = {Praha},
  author = {Svatava Škodová and Barbora Štindlová and Jirka Hana and Alexandr Rosen},
  booktitle = {Korpusová lingvistika Praha 2011: 3 -- Gramatika a značkování korpusů},
  editor = {Vladimír Petkevič and Alexandr Rosen},
  isbn = {978-80-7422-116-3},
  keywords = {akces; CzeSL},
  organization = {Ústav Českého národního korpusu},
  pages = {208-225},
  publisher = {Nakladatelství Lidové noviny},
  series = {Studie z korpusové lingvistiky},
  title = {Víceúrovňová anotace českého žákovského korpusu},
  url = {http://utkl.ff.cuni.cz/~rosen/public/2011-czesl-korpling.pdf},
  volume = {16},
  year = {2011},
}
@techreport{Skodova:etal:2016,
  address = {Louvain-la-Neuve},
  author = {Svatava Škodová and Barbora Štindlová and Jirka Hana and Alexandr Rosen},
  institution = {ELC UC},
  keywords = {akces; CzeSL},
  title = {Building and annotating corpora of non-native {C}zech},
  type = {presentation},
  year = {2016},
}
@techreport{Skodova:etal:2019,
  address = {Praha},
  author = {Svatava Škodová and Barbora Štindlová and Alexandr Rosen and Tomáš Jelínek and Barbora Hladká},
  doi = {10.13140/RG.2.2.34952.78080},
  edition = {1.3},
  institution = {Univerzita Karlova},
  keywords = {CzeSL},
  title = {Příručka k morfologické anotaci češtiny nerodilých mluvčích ({A} Guide for Morphological Annotation of Non-native {C}zech)},
  type = {Manual},
  url = {https://docs.google.com/document/d/1-CXo7CY8gAE3qtflqA6Orl3VfaP5Sf9N4pEDDEqfP2E/edit?usp=sharing},
  year = {2019},
}
@article{Skodova:etal:2019a,
  address = {Seoul, Korea},
  author = {Svatava Škodová and Kateřina Rysová and Magdaléna Rysová},
  doi = {https://doi.org/10.30530/JSL.2019.04.24.1.93},
  journal = {Journal of Slavic Languages},
  keywords = {CzeSL},
  organization = {The Korean Association of Slavic Languages},
  pages = {93--102},
  title = {Comparison of Automatic and Human Evaluation of {L2} Texts in {C}zech},
  year = {2019},
}
@incollection{Skodova:Stindlova:2007,
  address = {Praha},
  author = {Svatava Škodová and Barbora Štindlová},
  booktitle = {Sborník Asociace učitelů češtiny jako cizího jazyka (AUČCJ) 2006-2007},
  editor = {Barbora Štindlová and Jana Čemusová},
  keywords = {CzeSL},
  pages = {55--64},
  publisher = {Akropolis},
  title = {Modifikace principů přímé metody pro potřeby výuky gramatiky češtiny jako cizího jazyka},
  year = {2007}
}
@inproceedings{Skodova:Stindlova:2011,
  address = {Hradec Králové},
  author = {Svatava Škodová and Barbora Štindlová},
  booktitle = {Sborník vědecké konference s mezinárodní účastí Sapere Aude -- evropské a české vzdělávání. Hradec Králové 21.--25. 3. 2011},
  keywords = {akces, CzeSL},
  pages = {664--674},
  publisher = {Magnanimitas},
  title = {Žákovské korpusy, {CzeSL} a čeština jako druhý jazyk},
  year = {2011}
}
@phdthesis{Stindlova:2011,
  address = {Prague},
  author = {Štindlová, Barbora},
  keywords = {akces; CzeSL},
  school = {Charles University, Faculty of Arts},
  title = {Evaluace chybové anotace v žákovském korpusu češtiny [{E}valuation of Error Mark-Up in a Learner Corpus of {C}zech]},
  url = {https://is.cuni.cz/webapps/zzp/detail/25046/},
  year = {2011},
}
@incollection{Stindlova:2011a,
  address = {Ústí nad Labem},
  author = {Barbora Štindlová},
  booktitle = {Minulost, přítomnost a budoucnost v jazyce a v literatuře, Ústí nad Labem 1.-- 3. 9. 2010},
  editor = {Patrik Mitter and Zdeňka Menšíková},
  keywords = {akces; CzeSL},
  publisher = {PF UJEP},
  title = {Žákovský korpus. {B}udoucnost pro poznávání akvizice cizího jazyka},
  year = {2011}
}
@inproceedings{Stindlova:2011b,
  abstract = {The paper describes a corpus of texts produced by non-native speakers of
Czech. We discuss its annotation scheme, consisting of three interlinked levels, designed to handle a wide range of error types present in the input. Each level corrects different types of errors; links between the levels allow capturing errors in word order and complex discontinuous expressions. Errors are not only corrected, but also classified. The annotation scheme is tested on a doubly-annotated sample of approx. 10,000 words with fair inter-annotator agreement results. We also explore the possibility of applying automated linguistic annotation tools (taggers, spell checkers and grammar checkers) to the learner text to support or even substitute manual annotation.},
  address = {Frankfurt am Main},
  annote = {PALC 2011 -- Practical Applications in Language and Computers, Łódź 13--15 April 2011},
  author = {Barbora Štindlová and Alexandr Rosen and Jirka Hana and Svatava Škodová},
  booktitle = {Corpus Data across Languages and Disciplines},
  editor = {Piotr Pęzik},
  isbn = {978-3-631-62547-7},
  issn = {1437-5281},
  keywords = {learner corpus; error annotation; second language acquisition; Czech; akces; CzeSL},
  pages = {21--32},
  publisher = {Peter Lang},
  series = {Łód{\'{z}} Studies in Language},
  title = {Cze{SL} -- an error tagged corpus of {C}zech as a second language},
  url = {http://utkl.ff.cuni.cz/~rosen/public/2011-czesl-palc.pdf},
  volume = {28},
  year = {2012},
}
@incollection{Stindlova:2011c,
  address = {Praha},
  author = {Barbora Štindlová},
  booktitle = {Sborník z mezinárodního semináře „Informační a komunikační technologie ve výuce cizích jazyků (včetně češtiny pro cizince)`` Poděbrady 21.--22. 6. 2011},
  keywords = {akces; CzeSL},
  pages = {123--131},
  publisher = {ÚJOP UK},
  title = {K přepisu textů nerodilých mluvčích češtiny pro potřeby žákovského korpusu.},
  year = {2011}
}
@article{Stindlova:2011d,
  author = {Barbora Štindlová},
  journal = {SALi},
  keywords = {akces; CzeSL},
  number = {2},
  pages = {37--60},
  title = {Evaluace chybové anotace navržené pro žákovský korpus češtiny},
  volume = {2},
  year = {2011}
}
@book{Stindlova:2013,
  abstract = {Žákovské korpusy se staly významným zdrojem pro poznání žákovského mezijazyka a významným stimulem pro různé oblasti studia a výuky cizího, resp. druhého jazyka. Studie Žákovský korpus češtiny a evaluace jeho chybové anotace se obecně týká tématu češtiny jako cizího jazyka a částečně zasahuje do oblasti korpusové lingvistiky, neboť se zabývá problematikou žákovských korpusů, především pak otázkami jejich chybového značkování a možnostmi evaluace anotačních schémat. Zaměřuje se především na specifické problémy při elektronizaci rukopisů, na možnosti chybové anotace projevů nerodilých mluvčích a problematiku její evaluace. Zároveň věnuje značný prostor i metodologii, koncepci a účelu budování žákovských korpusů.},
  address = {Praha},
  author = {Barbora Štindlová},
  booktitle = {Žákovský korpus češtiny a evaluace jeho chybové anotace [A Learner Corpus of Czech and Evaluation of its Error Annotation]},
  isbn = {978-80-7308-463-9},
  keywords = {akces; CzeSL},
  publisher = {Univerzita Karlova v Praze, Filozofická fakulta},
  title = {Žákovský korpus češtiny a evaluace jeho chybové anotace [A Learner Corpus of Czech and Evaluation of its Error Annotation]},
  year = {2013}
}
@inproceedings{Stindlova:2015,
  address = {Praha},
  author = {Barbora Štindlová},
  booktitle = {Gramatika ve výuce a testování cizích jazyků (včetně češtiny pro cizince) -- Sborník z mezinárodní konference Poděbrady, 23.--24. 6. 2015},
  editor = {Martina Švrčinová and Zuzana Vlasáková},
  keywords = {CzeSL-SGT, akces, CzeSL},
  pages = {198--209},
  publisher = {Ústav jazykové a odborné přípravy UK},
  title = {K parcelaci gramatiky češtiny pro nerodilé mluvčí},
  url = {https://www.researchgate.net/profile/Svatava_Skodova/publication/279765891_Gramatika_ve_vuce_etiny_jako_cizho_jazyka_z_pohledu_modernch_lingvistickch_pstup_Grammar_in_Teaching_Czech_as_a_Foreign_Language_from_the_Perspective_of_Modern_Linguistic_Approaches/links/559a782a08ae5d8f39371ec3.pdf?disableCoverPage=true},
  year = {2015},
}
@inproceedings{Stindlova:etal:2010,
  address = {Frankfurt am Main},
  author = {Barbora Štindlová and Svatava Škodová and Alexandr Rosen and Jirka Hana},
  booktitle = {Slavic Languages in Formal Grammar. Proceedings of FDSL 8.5, Brno 2010},
  editor = {Markéta Ziková and Mojmír Dočekal},
  isbn = {978-3-631-63609-1},
  keywords = {akces; CzeSL},
  pages = {205-219},
  publisher = {Peter Lang},
  title = {Annotating foreign learners' {C}zech},
  url = {http://utkl.ff.cuni.cz/~rosen/public/2010-czesl-fdsl-prefinal.pdf},
  year = {2012},
}
@inproceedings{Stindlova:etal:2013,
  address = {Louvain-la-Neuve},
  author = {Barbora Štindlová and Svatava Škodová and Jirka Hana and Alexandr Rosen},
  booktitle = {Twenty Years of Learner Corpus Research: Looking back, Moving ahead},
  editor = {Sylviane Granger and Gaëtanelle Gilquin and Fanny Meunier},
  keywords = {learner corpora; error annotation; akces; CzeSL},
  month = {September},
  publisher = {Presses Universitaires de Louvain},
  series = {Corpora and Language in Use -- Proceedings 1},
  title = {A learner corpus of {C}zech: current state and future directions},
  url = {http://utkl.ff.cuni.cz/~rosen/public/LCR2011_proceedings_Stindlova-et-al_prefinal.pdf},
  year = {2013},
}
@unpublished{Stindlova:Rosen:2012,
  author = {Barbora Štindlová and Alexandr Rosen},
  doi = {10.13140/RG.2.2.24106.64968},
  institution = {Technical University Liberec and Charles University Prague},
  keywords = {learner corpora, error annotation, akces, CzeSL},
  note = {[Annotation manual for the CzeSL learner corpus].},
  title = {Návod k anotaci chybového korpusu},
  url = {http://utkl.ff.cuni.cz/~rosen/public/anotace.pdf},
  urldate = {2012.06.13},
  year = {2012},
}
@conference{Stindlova:Rosen:2016,
  address = {Praha},
  author = {Barbora Štindlová and Alexandr Rosen},
  booktitle = {VIII. mezinárodní sympozium},
  keywords = {akces; CzeSL},
  note = {Presentation},
  organization = {Ústav bohemistických studií FF UK},
  title = {Analýza genitivu v jazyce nerodilých mluvčích na základě žákovského korpusu},
  year = {2016},
}
@mastersthesis{Svak:2013,
  address = {Praha},
  author = {Jiří Svák},
  keywords = {learner corpora, CzeSL},
  note = {Bachelor thesis},
  school = {Filozofická fakulta Univerzity Karlovy},
  title = {Možnosti chybové anotace češtiny nerodilých mluvčích},
  url = {https://is.cuni.cz/webapps/zzp/detail/108472/},
  year = {2013},
}
@mastersthesis{Tydlitatova:2016,
  address = {Prague},
  author = {Ludmila Tydlitátová},
  keywords = {akces; CzeSL},
  school = {Faculty of Mathematics and Physics, Charles University},
  title = {Native Language Identification of {L2} Speakers of {C}zech},
  url = {http://ufal.mff.cuni.cz/~hana/bib/tydlitatova-2016-BScThesis.pdf},
  year = {2016},
}
@mastersthesis{Vokacova:2016,
  abstract = {Předkládaná práce zkoumá vliv frekvenčních charakteristik českých substantiv na jejich osvojování nerodilými mluvčími. První, teoretická část shrnuje diskuzi o významu frekvence pro reprezentaci gramatických kategorií v mysli, která se odehrává v rámci zahraniční kognitivní lingvistiky a psycholingvistiky. Druhá část představuje metodiku výzkumu, způsob získávání a anotace vzorku 20 lemmat z korpusu nerodilých mluvčích češtiny CzeSL-SGT. Ve třetí části je provedena analýza vybraných substantiv s ohledem na jejich gramatické profily (tvořené dvěma až třemi v korpusu SYN2015 nejfrekventovanějšími pádovými tvary). Ukazuje, že produkce nerodilých mluvčích má tendenci se od gramatických profilů odvíjet, což se projevuje jednak ve vysoké korespondenci frekvenčních charakteristik substantiv, jednak v nízkém zastoupení morfologických chyb v jejich nejčetnějších tvarech. Případy, kdy se chybovost od tohoto modelu odchyluje, vysvětluje pomocí typové frekvence -- souběžně působícího efektu produktivity určitých deklinačních vzorů -- a vyšší relevancí nominativu, jakožto základního tvaru, pro nerodilé mluvčí.

The present thesis examines the influence of Czech nouns frequency features on their acquisition by non-native speakers. The first theoretical part summarizes the ongoing discussion regarding the importance of frequency for the entrenchment of grammatical categories in one's mind as outlined by cognitive and psycho-linguists abroad. The second introduces the research methodology, collection method and annotation of 20 lemmas obtained from non-native Czech speaker corpus CzeSL-SGT. Subsequently, an analysis of selected nouns is carried out with regard to their grammatical profiles (comprising two or three most frequent case forms as found in SYN2015). Based on its results, the production of non-native speakers shows a tendency to follow grammatical profiles as demonstrated by high correspondence of frequency features of nouns on the one hand and by low error rate in morphology of most frequent forms on the other. Additionally, cases where lower rate does not correspond to the above mentioned model can be explained using type frequency, i.e. the productivity of particular declension models having simultaneously more far-reaching effect, and higher relevance of nominative as the default form for non-native speakers.

},
  address = {Praha},
  author = {Martina Vokáčová},
  keywords = {akces, grammatical profiles, frequency, nouns, second language acquisition, CzeSL},
  school = {Filozofická fakulta Univerzity Karlovy},
  title = {Vliv gramatických profilů českých substantiv na jejich osvojování nerodilými mluvčími},
  url = {https://is.cuni.cz/webapps/zzp/detail/178323/},
  year = {2016},
}
@inproceedings{Zasina:Skodova:2020,
  address = {Poznań},
  author = {Adrian Jan Zasina and Svatava Škodová},
  booktitle = {Konvence a kreativita v českém jazyce a literatuře, 2--4 September 2019, Cieszyn},
  editor = {Mieczysław Balowski},
  keywords = {CzeSL},
  note = {In print},
  publisher = {Uniwersytet Adama Mickiewicza},
  title = {Konvence a variabilita v užívání prefixů u sloves pohybu nerodilými mluvčími češtiny},
  year = {2020}
}

This file was generated by bibtex2html 1.96.