@InProceedings{VeronicaRomero2017, author="Veronica Romero and Alicia Fornes and Enrique Vidal and Joan Andreu Sanchez", editor="L.A. Alexandre and J.Salvador Sanchez and Joao M. F. Rodriguez", title="Information Extraction in Handwritten Marriage Licenses Books Using the MGGI Methodology", booktitle="8th Iberian Conference on Pattern Recognition and Image Analysis", year="2017", volume="10255", pages="287--294", optkeywords="Handwritten Text Recognition", optkeywords="Information extraction", optkeywords="Language modeling", optkeywords="MGGI", optkeywords="Categories-based language model", abstract="Historical records of daily activities provide intriguing insights into the life of our ancestors, useful for demographic and genealogical research. For example, marriage license books have been used for centuries by ecclesiastical and secular institutions to register marriages. These books follow a simple structure of the text in the records with a evolutionary vocabulary, mainly composed of proper names that change along the time. This distinct vocabulary makes automatic transcription and semantic information extraction difficult tasks. In previous works we studied the use of category-based language models and how a Grammatical Inference technique known as MGGI could improve the accuracy of these tasks. In this work we analyze the main causes of the semantic errors observed in previous results and apply a better implementation of the MGGI technique to solve these problems. Using the resulting language model, transcription and information extraction experiments have been carried out, and the results support our proposed approach.", optnote="DAG; 602.006; 600.097; 600.121", optnote="exported from refbase (http://refbase.cvc.uab.es/show.php?record=2952), last updated on Mon, 07 Dec 2020 14:30:17 +0100", isbn="978-3-319-58837-7", file=":http://refbase.cvc.uab.es/files/RFV2017.pdf:PDF" }