@Article{FranciscoAlvaro2015, author="Francisco Alvaro and Francisco Cruz and Joan Andreu Sanchez and Oriol Ramos Terrades and Jose Miguel Benedi", title="Structure Detection and Segmentation of Documents Using 2D Stochastic Context-Free Grammars", journal="Neurocomputing", year="2015", volume="150", number="A", pages="147--154", optkeywords="document image analysis", optkeywords="stochastic context-free grammars", optkeywords="text classi cation features", abstract="In this paper we de ne a bidimensional extension of Stochastic Context-Free Grammars for structure detection and segmentation of images of documents.Two sets of text classi cation features are used to perform an initial classi cation of each zone of the page. Then, the document segmentation is obtained as the most likely hypothesis according to a stochastic grammar. We used a dataset of historical marriage license books to validate this approach. We also tested several inference algorithms for Probabilistic Graphical Modelsand the results showed that the proposed grammatical model outperformedthe other methods. Furthermore, grammars also provide the document structurealong with its segmentation.", optnote="DAG; 601.158; 600.077; 600.061", optnote="exported from refbase (http://refbase.cvc.uab.es/show.php?record=2531), last updated on Tue, 25 Feb 2020 10:00:30 +0100", file=":http://refbase.cvc.uab.es/files/ACS2014.pdf:PDF" }