Publications
2024
- Kushal Tatariya, Artur Kulmizev, Wessel Poelman, Esther Ploeger, Marcel Bollmann, Johannes Bjerva, Jiaming Luo, Heather Lent, and Miryam de Lhoneux. 2024. How Good is Your Wikipedia?. arXiv:2411.05527.
@misc{tatariya2024goodwikipedia, title={How Good is Your Wikipedia?}, author={Kushal Tatariya and Artur Kulmizev and Wessel Poelman and Esther Ploeger and Marcel Bollmann and Johannes Bjerva and Jiaming Luo and Heather Lent and Miryam de Lhoneux}, year={2024}, eprint={2411.05527}, archivePrefix={arXiv}, primaryClass={cs.CL}, url={https://arxiv.org/abs/2411.05527}, }
- Heather Lent, Kushal Tatariya, Raj Dabre, Yiyi Chen, Marcell Fekete, Esther Ploeger, Li Zhou, Ruth-Ann Armstrong, Abee Eijansantos, Catriona Malau, Hans Erik Heje, Ernests Lavrinovics, Diptesh Kanojia, Paul Belony, Marcel Bollmann, Loïc Grobol, Miryam de Lhoneux, Daniel Hershcovich, Michel DeGraff, Anders Søgaard, and Johannes Bjerva. 2024. CreoleVal: Multilingual Multitask Benchmarks for Creoles. Transactions of the Association for Computational Linguistics, 12:950–978. BibTeX
@article{lent-etal-2024-creoleval, author = {Lent, Heather and Tatariya, Kushal and Dabre, Raj and Chen, Yiyi and Fekete, Marcell and Ploeger, Esther and Zhou, Li and Armstrong, Ruth-Ann and Eijansantos, Abee and Malau, Catriona and Heje, Hans Erik and Lavrinovics, Ernests and Kanojia, Diptesh and Belony, Paul and Bollmann, Marcel and Grobol, Loïc and Lhoneux, Miryam de and Hershcovich, Daniel and DeGraff, Michel and Søgaard, Anders and Bjerva, Johannes}, title = "{CreoleVal: Multilingual Multitask Benchmarks for Creoles}", journal = {Transactions of the Association for Computational Linguistics}, volume = {12}, pages = {950-978}, year = {2024}, month = {09}, abstract = "{Creoles represent an under-explored and marginalized group of languages, with few available resources for NLP research. While the genealogical ties between Creoles and a number of highly resourced languages imply a significant potential for transfer learning, this potential is hampered due to this lack of annotated data. In this work we present CreoleVal, a collection of benchmark datasets spanning 8 different NLP tasks, covering up to 28 Creole languages; it is an aggregate of novel development datasets for reading comprehension relation classification, and machine translation for Creoles, in addition to a practical gateway to a handful of preexisting benchmarks. For each benchmark, we conduct baseline experiments in a zero-shot setting in order to further ascertain the capabilities and limitations of transfer learning for Creoles. Ultimately, we see CreoleVal as an opportunity to empower research on Creoles in NLP and computational linguistics, and in general, a step towards more equitable language technology around the globe.}", issn = {2307-387X}, doi = {10.1162/tacl_a_00682}, url = {https://doi.org/10.1162/tacl\_a\_00682}, eprint = {https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl\_a\_00682/2468651/tacl\_a\_00682.pdf}, }
2023
- Marcel Bollmann, Nathan Schneider, Arne Köhn, and Matt Post. 2023. Two Decades of the ACL Anthology: Development, Impact, and Open Challenges. In Proceedings of the 3rd Workshop for Natural Language Processing Open Source Software (NLP-OSS 2023), pages 83–94, Singapore. Association for Computational Linguistics. BibTeX Poster
@inproceedings{bollmann-etal-2023-two-decades, title = "Two Decades of the {ACL} {A}nthology: Development, Impact, and Open Challenges", author = {Bollmann, Marcel and Schneider, Nathan and K{\"o}hn, Arne and Post, Matt}, booktitle = "Proceedings of the 3rd Workshop for Natural Language Processing Open Source Software (NLP-OSS 2023)", year = "2023", address = "Singapore", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2023.nlposs-1.10", pages = "83--94", abstract = "The ACL Anthology is a prime resource for research papers within computational linguistics and natural language processing, while continuing to be an open-source and community-driven project. Since Gildea et al. (2018) reported on its state and planned directions, the Anthology has seen major technical changes. We discuss what led to these changes and how they impact long-term maintainability and community engagement, describe which open-source data and software tools the Anthology currently provides, and provide a survey of literature that has used the Anthology as a main data source.", hugo_attach = {Poster»/pub/2023.nlposs-1.10.Poster.pdf} }
2021
- Marcel Bollmann and Anders Søgaard. 2021. Error Analysis and the Role of Morphology. In Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume, pages 1887–1900, Online. Association for Computational Linguistics. BibTeX Code Talk Best Paper Award
@inproceedings{bollmann-sogaard2021-error, title = "Error Analysis and the Role of Morphology", author = "Bollmann, Marcel and S{\o}gaard, Anders", booktitle = "Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume", year = "2021", address = "Online", publisher = "Association for Computational Linguistics", url = "https://www.aclweb.org/anthology/2021.eacl-main.162", pages = "1887--1900", abstract = "We evaluate two common conjectures in error analysis of NLP models: (i) Morphology is predictive of errors; and (ii) the importance of morphology increases with the morphological complexity of a language. We show across four different tasks and up to 57 languages that of these conjectures, somewhat surprisingly, only (i) is true. Using morphological features does improve error prediction across tasks; however, this effect is less pronounced with morphologically complex languages. We speculate this is because morphology is more discriminative in morphologically simple languages. Across all four tasks, case and gender are the morphological features most predictive of error.", hugo_attach = {Code»https://github.com/coastalcph/eacl2021-morpherror|Talk»https://slideslive.com/38954442/}, }
- Marcel Bollmann, Rahul Aralikatte, Héctor Murrieta Bello, Daniel Hershcovich, Miryam de Lhoneux, and Anders Søgaard. 2021. Moses and the Character-Based Random Babbling Baseline: CoAStaL at AmericasNLP 2021 Shared Task. In Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas, pages 248–254, Online. Association for Computational Linguistics. BibTeX
@inproceedings{bollmann-etal2021-moses, title = "{M}oses and the Character-Based Random Babbling Baseline: {C}o{AS}ta{L} at {A}mericas{NLP} 2021 Shared Task", author = "Bollmann, Marcel and Aralikatte, Rahul and Murrieta Bello, H{\'e}ctor and Hershcovich, Daniel and de Lhoneux, Miryam and S{\o}gaard, Anders", booktitle = "Proceedings of the First Workshop on Natural Language Processing for Indigenous Languages of the Americas", year = "2021", address = "Online", publisher = "Association for Computational Linguistics", url = "https://www.aclweb.org/anthology/2021.americasnlp-1.28", pages = "248--254", abstract = "We evaluated a range of neural machine translation techniques developed specifically for low-resource scenarios. Unsuccessfully. In the end, we submitted two runs: (i) a standard phrase-based model, and (ii) a random babbling baseline using character trigrams. We found that it was surprisingly hard to beat (i), in spite of this model being, in theory, a bad fit for polysynthetic languages; and more interestingly, that (ii) was better than several of the submitted systems, highlighting how difficult low-resource machine translation for polysynthetic languages is.", }
- Rahul Aralikatte, Héctor Ricardo Murrieta Bello, Miryam de Lhoneux, Daniel Hershcovich, Marcel Bollmann, and Anders Søgaard. 2021. How far can we get with one GPU in 100 hours? CoAStaL at MultiIndicMT Shared Task. In Proceedings of the 8th Workshop on Asian Translation (WAT2021), pages 205–211, Online. Association for Computational Linguistics. BibTeX
@inproceedings{aralikatte-etal2021-howfar, title = "How far can we get with one {GPU} in 100 hours? {C}o{AS}ta{L} at {M}ulti{I}ndic{MT} Shared Task", author = "Aralikatte, Rahul and Murrieta Bello, H{\'e}ctor Ricardo and de Lhoneux, Miryam and Hershcovich, Daniel and Bollmann, Marcel and S{\o}gaard, Anders", booktitle = "Proceedings of the 8th Workshop on Asian Translation (WAT2021)", year = "2021", address = "Online", publisher = "Association for Computational Linguistics", url = "https://aclanthology.org/2021.wat-1.24/", pages = "205--211", abstract = "This work shows that competitive translation results can be obtained in a constrained setting by incorporating the latest advances in memory and compute optimization. We train and evaluate large multilingual translation models using a single GPU for a maximum of 100 hours and get within 4-5 BLEU points of the top submission on the leaderboard. We also benchmark standard baselines on the PMI corpus and re-discover well-known shortcomings of translation systems and metrics.", }
2020
- Marcel Bollmann and Desmond Elliott. 2020. On Forgetting to Cite Older Papers: An Analysis of the ACL Anthology. In Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics, pages 7819–7827, Online. Association for Computational Linguistics. BibTeX Code and Data Talk
@inproceedings{bollmann-elliott2020-forgetting, title = "On Forgetting to Cite Older Papers: An Analysis of the {ACL} {A}nthology", author = "Bollmann, Marcel and Elliott, Desmond", booktitle = "Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics", year = "2020", address = "Online", publisher = "Association for Computational Linguistics", url = "https://www.aclweb.org/anthology/2020.acl-main.699", pages = "7819--7827", abstract = "The field of natural language processing is experiencing a period of unprecedented growth, and with it a surge of published papers. This represents an opportunity for us to take stock of how we cite the work of other researchers, and whether this growth comes at the expense of {``}forgetting{''} about older literature. In this paper, we address this question through bibliographic analysis. By looking at the age of outgoing citations in papers published at selected ACL venues between 2010 and 2019, we find that there is indeed a tendency for recent papers to cite more recent work, but the rate at which papers older than 15 years are cited has remained relatively stable.", hugo_attach = {Code and Data»https://github.com/coastalcph/acl-citations|Talk»https://slideslive.com/38929066/on-forgetting-to-cite-older-papers-an-analysis-of-the-acl-anthology} }
2019
- Marcel Bollmann, Natalia Korchagina, and Anders Søgaard. 2019. Few-Shot and Zero-Shot Learning for Historical Text Normalization. In Proceedings of the 2nd Workshop on Deep Learning Approaches for Low-Resource NLP (DeepLo 2019), pages 104–114, Hong Kong, China. Association for Computational Linguistics. BibTeX Poster
@inproceedings{bollmann-etal2019-fewshot, title = "Few-Shot and Zero-Shot Learning for Historical Text Normalization", author = "Bollmann, Marcel and Korchagina, Natalia and S{\o}gaard, Anders", booktitle = "Proceedings of the 2nd Workshop on Deep Learning Approaches for Low-Resource NLP (DeepLo 2019)", year = "2019", address = "Hong Kong, China", publisher = "Association for Computational Linguistics", url = "https://www.aclweb.org/anthology/D19-6112", doi = "10.18653/v1/D19-6112", pages = "104--114", hugo_attach = {Poster»/pub/poster-deeplo-2019.pdf}, abstract = "Historical text normalization often relies on small training datasets. Recent work has shown that multi-task learning can lead to significant improvements by exploiting synergies with related datasets, but there has been no systematic study of different multi-task learning architectures. This paper evaluates 63 multi-task learning configurations for sequence-to-sequence-based historical text normalization across ten datasets from eight languages, using autoencoding, grapheme-to-phoneme mapping, and lemmatization as auxiliary tasks. We observe consistent, significant improvements across languages when training data for the target task is limited, but minimal or no improvements when training data is abundant. We also show that zero-shot learning outperforms the simple, but relatively strong, identity baseline.", }
- Meriem Beloucif, Ana Valeria Gonzalez, Marcel Bollmann, and Anders Søgaard. 2019. Naive Regularizers for Low-Resource Neural Machine Translation. In Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019), pages 102–111, Varna, Bulgaria. INCOMA Ltd. BibTeX
@inproceedings{beloucif-etal2019-naive, title = "Naive Regularizers for Low-Resource Neural Machine Translation", author = "Beloucif, Meriem and Gonzalez, Ana Valeria and Bollmann, Marcel and S{\o}gaard, Anders", booktitle = "Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)", year = "2019", address = "Varna, Bulgaria", publisher = "INCOMA Ltd", url = "https://www.aclweb.org/anthology/R19-1013", doi = "10.26615/978-954-452-056-4_013", pages = "102--111", abstract = "Neural machine translation models have little inductive bias, which can be a disadvantage in low-resource scenarios. Neural models have to be trained on large amounts of data and have been shown to perform poorly when only limited data is available. We show that using naive regularization methods, based on sentence length, punctuation and word frequencies, to penalize translations that are very different from the input sentences, consistently improves the translation quality across multiple low-resource languages. We experiment with 12 language pairs, varying the training data size between 17k to 230k sentence pairs. Our best regularizer achieves an average increase of 1.5 BLEU score and 1.0 TER score across all the language pairs. For example, we achieve a BLEU score of 26.70 on the IWSLT15 English{--}Vietnamese translation task simply by using relative differences in punctuation as a regularizer.", }
- Simon Flachs, Marcel Bollmann, and Anders Søgaard. 2019. Historical Text Normalization with Delayed Rewards. In Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, pages 1614–1619, Florence, Italy. Association for Computational Linguistics. BibTeX
@inproceedings{flachs-etal2019-historical, title = "Historical Text Normalization with Delayed Rewards", author = "Flachs, Simon and Bollmann, Marcel and S{\o}gaard, Anders", booktitle = "Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics", year = "2019", address = "Florence, Italy", publisher = "Association for Computational Linguistics", url = "https://www.aclweb.org/anthology/P19-1157", doi = "10.18653/v1/P19-1157", pages = "1614--1619", abstract = "Training neural sequence-to-sequence models with simple token-level log-likelihood is now a standard approach to historical text normalization, albeit often outperformed by phrase-based models. Policy gradient training enables direct optimization for exact matches, and while the small datasets in historical text normalization are prohibitive of from-scratch reinforcement learning, we show that policy gradient fine-tuning leads to significant improvements across the board. Policy gradient training, in particular, leads to more accurate normalizations for long or unseen words.", }
- Marcel Bollmann. 2019. A Large-Scale Comparison of Historical Text Normalization Systems. In Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), pages 3885–3898. Association for Computational Linguistics. BibTeX Code and Datasets Poster
@inproceedings{bollmann2019-largescale, author = {Bollmann, Marcel}, title = {A Large-Scale Comparison of Historical Text Normalization Systems}, booktitle = {Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)}, location = {Minneapolis, Minnesota}, publisher = {Association for Computational Linguistics}, year = {2019}, pages = {3885--3898}, url = {http://www.aclweb.org/anthology/N19-1389}, hugo_attach = {Code and Datasets»https://github.com/coastalcph/histnorm|Poster»/pub/poster-naacl2019.pdf} }
2018
- Marcel Bollmann. 2018. Normalization of Historical Texts with Neural Network Models. Bochumer Linguistische Arbeitsberichte, 22. Revised and updated version of PhD thesis. BibTeX Slides (from an invited talk at Inria Paris, 21.09.2018)
@article{bollmann2018-normalization, title = {Normalization of Historical Texts with Neural Network Models}, volume = {22}, journal = {Bochumer Linguistische Arbeitsberichte}, author = {Bollmann, Marcel}, year = {2018}, url = {https://www.linguistics.rub.de/forschung/arbeitsberichte/22.pdf}, hugo_attach = {Slides (from an invited talk at Inria Paris, 21.09.2018)»/pub/talk-histnorm-2018.pdf}, note = {Revised and updated version of PhD thesis} }
- Marcel Bollmann, Anders Søgaard, and Joachim Bingel. 2018. Multi-Task Learning for Historical Text Normalization: Size Matters. In Proceedings of the Workshop on Deep Learning Approaches for Low-Resource NLP, pages 19–24. Association for Computational Linguistics. BibTeX Poster
@inproceedings{bollmann-etal2018-multitask, title = {Multi-Task Learning for Historical Text Normalization: Size Matters}, booktitle = {Proceedings of the Workshop on Deep Learning Approaches for Low-Resource {NLP}}, publisher = {{Association for Computational Linguistics}}, author = {Bollmann, Marcel and S\o{}gaard, Anders and Bingel, Joachim}, year = {2018}, pages = {19--24}, url = {https://aclweb.org/anthology/W18-3403}, hugo_attach = {Poster»/pub/poster-deeplo-2018.pdf} }
2017
- Erik Tjong Kim Sang, Marcel Bollmann, Remko Boschker, Francisco Casacuberta, Feike Dietz, Stefanie Dipper, Miguel Domingo, Rob van der Goot, Marjo van Koppen, Nikola Ljubešić, Robert Östling, Florian Petran, Eva Pettersson, Yves Scherrer, Marijn Schraagen, Leen Sevens, Jörg Tiedemann, Tom Vanallemeersch, and Kalliopi Zervanou. 2017. The CLIN27 Shared Task: Translating Historical Text to Contemporary Language for Improving Automatic Linguistic Annotation. Computational Linguistics in the Netherlands Journal, 7:53–64. BibTeX Poster (from the Bochum team)
@article{tjongkimsang-etal2017-clin27, title = {The {CLIN27} Shared Task: Translating Historical Text to Contemporary Language for Improving Automatic Linguistic Annotation}, volume = {7}, issn = {2211-4009}, journal = {Computational Linguistics in the Netherlands Journal}, author = {Tjong Kim Sang, Erik and Bollmann, Marcel and Boschker, Remko and Casacuberta, Francisco and Dietz, Feike and Dipper, Stefanie and Domingo, Miguel and {van der Goot}, Rob and {van Koppen}, Marjo and Ljube{\v s}i\'c, Nikola and \"Ostling, Robert and Petran, Florian and Pettersson, Eva and Scherrer, Yves and Schraagen, Marijn and Sevens, Leen and Tiedemann, J\"org and Vanallemeersch, Tom and Zervanou, Kalliopi}, year = {2017}, pages = {53--64}, url = {https://clinjournal.org/clinj/article/view/68/61}, hugo_attach = {Poster (from the Bochum team)»/pub/Poster-CLIN27.pdf} }
- Marcel Bollmann, Joachim Bingel, and Anders Søgaard. 2017. Learning Attention for Historical Text Normalization by Learning to Pronounce. In Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pages 332–344. Association for Computational Linguistics. BibTeX Code Slides
@inproceedings{bollmann-etal2017-learning, title = {Learning Attention for Historical Text Normalization by Learning to Pronounce}, booktitle = {Proceedings of the 55th Annual Meeting of the {A}ssociation for {C}omputational {L}inguistics (Volume 1: Long Papers)}, publisher = {{Association for Computational Linguistics}}, author = {Bollmann, Marcel and Bingel, Joachim and S\o{}gaard, Anders}, year = {2017}, pages = {332--344}, doi = {10.18653/v1/P17-1031}, url = {https://www.aclweb.org/anthology/P17-1031/}, hugo_attach = {Code»https://bitbucket.org/mbollmann/acl2017|Slides»/pub/slides-acl2017.pdf} }
2016
- Florian Petran, Marcel Bollmann, Stefanie Dipper, and Thomas Klein. 2016. ReM: A Reference Corpus of Middle High German — Corpus Compilation, Annotation, and Access. Journal for Language Technology and Computational Linguistics (JLCL), 31(2):1–15. BibTeX
@article{petran-etal2016-rem, title = {{ReM}: A Reference Corpus of {M}iddle {H}igh {G}erman — Corpus Compilation, Annotation, and Access}, volume = {31}, number = {2}, journal = {Journal for Language Technology and Computational Linguistics ({JLCL})}, author = {Petran, Florian and Bollmann, Marcel and Dipper, Stefanie and Klein, Thomas}, editor = {Hoenen, Armin and Mehler, Alexander and Gippert, Jost}, year = {2016}, pages = {1--15}, url = {https://jlcl.org/content/2-allissues/4-Heft2-2016/01Petran.pdf} }
- Marcel Bollmann and Anders Søgaard. 2016. Improving Historical Spelling Normalization with Bi-Directional LSTMs and Multi-Task Learning. In Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics, Osaka, Japan. BibTeX Slides
@inproceedings{bollmann-sogaard2016-improving, address = {Osaka, Japan}, title = {Improving Historical Spelling Normalization with Bi-Directional {LSTM}s and Multi-Task Learning}, booktitle = {Proceedings of {COLING} 2016, the 26th International Conference on Computational Linguistics}, author = {Bollmann, Marcel and S\o{}gaard, Anders}, year = {2016}, url = {https://aclweb.org/anthology/C16-1013}, hugo_attach = {Slides»/pub/slides-coling2016.pdf} }
- Marcel Bollmann, Stefanie Dipper, and Florian Petran. 2016. Evaluating Inter-Annotator Agreement on Historical Spelling Normalization. In Proceedings of the 10th Linguistic Annotation Workshop Held in Conjunction with ACL 2016 (LAW-X 2016), pages 89–98, Berlin, Germany. Association for Computational Linguistics. BibTeX
@inproceedings{bollmann-etal2016-evaluating, address = {Berlin, Germany}, title = {Evaluating Inter-Annotator Agreement on Historical Spelling Normalization}, booktitle = {Proceedings of the 10th Linguistic Annotation Workshop Held in Conjunction with {ACL} 2016 ({LAW-X} 2016)}, publisher = {{Association for Computational Linguistics}}, author = {Bollmann, Marcel and Dipper, Stefanie and Petran, Florian}, year = {2016}, pages = {89--98}, url = {https://aclweb.org/anthology/W16-1711} }
2015
- Julia Krasselt, Marcel Bollmann, Stefanie Dipper, and Florian Petran. 2015. Guidelines für die Normalisierung historischer deutscher Texte / Guidelines for Normalizing Historical German Texts. Bochumer Linguistische Arbeitsberichte, 15. BibTeX
@article{krasselt-etal2015-guidelines, title = {{G}uidelines für die {N}ormalisierung historischer deutscher {T}exte / {G}uidelines for Normalizing Historical {G}erman Texts}, volume = {15}, journal = {Bochumer Linguistische Arbeitsberichte}, author = {Krasselt, Julia and Bollmann, Marcel and Dipper, Stefanie and Petran, Florian}, year = {2015}, url = {https://www.linguistics.rub.de/forschung/arbeitsberichte/15.pdf} }
2014
- Marcel Bollmann, Florian Petran, Stefanie Dipper, and Julia Krasselt. 2014. CorA: A Web-Based Annotation Tool for Historical and Other Non-Standard Language Data. In Proceedings of the 8th Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities (LaTeCH), pages 86–90, Gothenburg, Sweden. BibTeX Code Poster
@inproceedings{bollmann-etal2014-cora, address = {Gothenburg, Sweden}, title = {{CorA}: A Web-Based Annotation Tool for Historical and Other Non-Standard Language Data}, booktitle = {Proceedings of the 8th Workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities ({LaTeCH})}, author = {Bollmann, Marcel and Petran, Florian and Dipper, Stefanie and Krasselt, Julia}, year = {2014}, pages = {86--90}, url = {https://aclweb.org/anthology/W14-0612}, hugo_attach = {Code»https://github.com/comphist/cora|Poster»/pub/poster-cora.pdf} }
- Marcel Bollmann, Florian Petran, and Stefanie Dipper. 2014. Applying Rule-Based Normalization to Different Types of Historical Texts—An Evaluation. Human Language Technology. Challenges for Computer Science and Linguistics. 5th Language and Technology Conference, LTC 2011. Revised Selected Papers:166–177. BibTeX
@incollection{bollmann-etal2014-applying, edition = {1st}, series = {Lecture Notes in Artificial Intelligence 8387}, title = {Applying Rule-Based Normalization to Different Types of Historical Texts—An Evaluation}, booktitle = {Human Language Technology. Challenges for Computer Science and Linguistics. 5th Language and Technology Conference, {LTC} 2011. Revised Selected Papers}, publisher = {Springer International Publishing}, author = {Bollmann, Marcel and Petran, Florian and Dipper, Stefanie}, editor = {Vetulani, Zygmunt and Mariani, Joseph}, year = {2014}, pages = {166--177}, doi = {10.1007/978-3-319-08958-4}, url = {https://dx.doi.org/10.1007/978-3-319-08958-4} }
- Marcel Bollmann, Stefanie Dipper, Mario Frank, Julia Krasselt, Florian Petran, and Tom Ruette. 2014. ANNIS-Hist: Historische deutschsprachige Korpora in ANNIS. Poster presented at: 36. Jahrestagung der Deutschen Gesellschaft für Sprachwissenschaft (DGfS). Marburg, Germany. Poster
@unpublished{bollmann-etal2014-annis, title = {{ANNIS-Hist}: Historische deutschsprachige {K}orpora in {ANNIS}}, author = {Bollmann, Marcel and Dipper, Stefanie and Frank, Mario and Krasselt, Julia and Petran, Florian and Ruette, Tom}, year = {2014}, note = {Poster presented at: 36. {J}ahrestagung der {D}eutschen {G}esellschaft für {S}prachwissenschaft ({DGfS}). {M}arburg, {G}ermany}, hugo_attach = {Poster»/pub/poster-dgfs2014.pdf} }
2013
- Marcel Bollmann. 2013. POS Tagging for Historical Texts with Sparse Training Data. In Proceedings of the 7th Linguistic Annotation Workshop and Interoperability in Discourse, pages 11–18, Sofia, Bulgaria. BibTeX
@inproceedings{bollmann2013-pos, address = {Sofia, Bulgaria}, title = {{POS} Tagging for Historical Texts with Sparse Training Data}, booktitle = {Proceedings of the 7th Linguistic Annotation Workshop and Interoperability in Discourse}, author = {Bollmann, Marcel}, year = {2013}, pages = {11--18}, url = {https://aclweb.org/anthology/W13-2302} }
- Marcel Bollmann. 2013. Automatic Normalization for Linguistic Annotation of Historical Language Data. Bochumer Linguistische Arbeitsberichte, 13. Revised version of M.A. thesis. BibTeX
@article{bollmann2013-automatic, title = {Automatic Normalization for Linguistic Annotation of Historical Language Data}, volume = {13}, journal = {Bochumer Linguistische Arbeitsberichte}, author = {Bollmann, Marcel}, year = {2013}, url = {https://www.linguistics.rub.de/forschung/arbeitsberichte/13.pdf}, note = {Revised version of M.A. thesis} }
2012
- Marcel Bollmann. 2012. (Semi-)Automatic Normalization of Historical Texts Using Distance Measures and the Norma Tool. In Proceedings of the Second Workshop on Annotation of Corpora for Research in the Humanities (ACRH-2), Lisbon, Portugal. BibTeX Code Slides
@inproceedings{bollmann2012-semi, address = {Lisbon, Portugal}, title = {({S}emi-)Automatic Normalization of Historical Texts Using Distance Measures and the {N}orma Tool}, booktitle = {Proceedings of the Second Workshop on Annotation of Corpora for Research in the Humanities ({ACRH}-2)}, author = {Bollmann, Marcel}, year = {2012}, url = {https://marcel.bollmann.me/pub/acrh12.pdf}, hugo_attach = {Code»https://github.com/comphist/norma|Slides»/pub/slides-acrh.pdf} }
- Marcel Bollmann, Stefanie Dipper, Julia Krasselt, and Florian Petran. 2012. Manual and Semi-Automatic Normalization of Historical Spelling – Case Studies from Early New High German. In LThist 2012: First International Workshop on Language Technology for Historical Text(s), pages 342–350, Vienna, Austria. BibTeX
@inproceedings{bollmann-etal2012-manual, address = {Vienna, Austria}, title = {Manual and Semi-Automatic Normalization of Historical Spelling -- Case Studies from {E}arly {N}ew {H}igh {G}erman}, booktitle = {{LThist} 2012: First International Workshop on Language Technology for Historical Text(s)}, author = {Bollmann, Marcel and Dipper, Stefanie and Krasselt, Julia and Petran, Florian}, year = {2012}, pages = {342--350}, url = {https://marcel.bollmann.me/pub/lthist12.pdf} }
- Marcel Bollmann, Stefanie Dipper, Julia Krasselt, and Florian Petran. 2012. The Anselm Project: Tools for Automatic Analysis of a Parallel Corpus in Early New High German. Poster presented at: 34. Jahrestagung der Deutschen Gesellschaft für Sprachwissenschaft (DGfS). Frankfurt, Germany. Poster
@unpublished{bollmann-etal2012-anselm, title = {The {A}nselm Project: Tools for Automatic Analysis of a Parallel Corpus in {E}arly {N}ew {H}igh {G}erman}, author = {Bollmann, Marcel and Dipper, Stefanie and Krasselt, Julia and Petran, Florian}, year = {2012}, note = {Poster presented at: 34. {J}ahrestagung der {D}eutschen {G}esellschaft für {S}prachwissenschaft ({DGfS}). {F}rankfurt, {G}ermany}, hugo_attach = {Poster»/pub/poster-dgfs2012.pdf} }
2011
- Marcel Bollmann, Florian Petran, and Stefanie Dipper. 2011. Applying Rule-Based Normalization to Different Types of Historical Texts — An Evaluation. In Zygmunt Vetulani, editor, Proceedings of the 5th Language & Technology Conference: Human Language Technologies as a Challenge for Computer Science and Linguistics (LTC 2011), pages 339–344, Poznan, Poland. BibTeX
@inproceedings{bollmann-etal2011-applying, address = {Poznan, Poland}, title = {Applying Rule-Based Normalization to Different Types of Historical Texts — An Evaluation}, booktitle = {Proceedings of the 5th Language \& Technology Conference: Human Language Technologies as a Challenge for Computer Science and Linguistics ({LTC} 2011)}, author = {Bollmann, Marcel and Petran, Florian and Dipper, Stefanie}, editor = {Vetulani, Zygmunt}, year = {2011}, pages = {339--344}, url = {https://marcel.bollmann.me/pub/ltc11.pdf} }
- Marcel Bollmann, Florian Petran, and Stefanie Dipper. 2011. Rule-Based Normalization of Historical Texts. In Proceedings of the International Workshop on Language Technologies for Digital Humanities and Cultural Heritage, pages 34–42, Hissar, Bulgaria. BibTeX
@inproceedings{bollmann-etal2011-rulebased, address = {Hissar, Bulgaria}, title = {Rule-Based Normalization of Historical Texts}, booktitle = {Proceedings of the International Workshop on Language Technologies for Digital Humanities and Cultural Heritage}, author = {Bollmann, Marcel and Petran, Florian and Dipper, Stefanie}, year = {2011}, pages = {34--42}, url = {https://marcel.bollmann.me/pub/ranlp11.pdf} }
- Marcel Bollmann. 2011. Adapting SimpleNLG to German. In Proceedings of the 13th European Workshop on Natural Language Generation (ENLG 2011), pages 133–138, Nancy, France. BibTeX Poster Software
@inproceedings{bollmann2011-adapting, address = {Nancy, France}, title = {Adapting {SimpleNLG} to {G}erman}, booktitle = {Proceedings of the 13th {E}uropean Workshop on Natural Language Generation ({ENLG} 2011)}, author = {Bollmann, Marcel}, year = {2011}, pages = {133--138}, url = {https://aclweb.org/anthology/W11-2817}, hugo_attach = {Poster»/pub/PosterENLG.pdf|Software»/software/simplenlg.html} }