@Article{Mar{\c c}alRusi{\~n}ol2014, author="Mar{\c{c}}al Rusi{\~n}ol and Volkmar Frinken and Dimosthenis Karatzas and Andrew Bagdanov and Josep Llados", title="Multimodal page classification in administrative document image streams", journal="International Journal on Document Analysis and Recognition", year="2014", publisher="Springer Berlin Heidelberg", volume="17", number="4", pages="331--341", optkeywords="Digital mail room", optkeywords="Multimodal page classification", optkeywords="Visual and textual document description", abstract="In this paper, we present a page classification application in a banking workflow. The proposed architecture represents administrative document images by merging visual and textual descriptions. The visual description is based on a hierarchical representation of the pixel intensity distribution. The textual description uses latent semantic analysis to represent document content as a mixture of topics. Several off-the-shelf classifiers and different strategies for combining visual and textual cues have been evaluated. A final step uses an n-gram model of the page stream allowing a finer-grained classification of pages. The proposed method has been tested in a real large-scale environment and we report results on a dataset of 70,000 pages.", optnote="DAG; LAMP; 600.056; 600.061; 601.240; 601.223; 600.077; 600.079", optnote="exported from refbase (http://refbase.cvc.uab.es/show.php?record=2523), last updated on Wed, 04 Feb 2015 16:52:34 +0100", issn="1433-2833", doi="10.1007/s10032-014-0225-8" }