@Article{AndresMafla2021,
author="Andres Mafla
and Ruben Tito
and Sounak Dey
and Lluis Gomez
and Mar{\c{c}}al Rusi{\~n}ol
and Ernest Valveny
and Dimosthenis Karatzas",
title="Real-time Lexicon-free Scene Text Retrieval",
journal="Pattern Recognition",
year="2021",
volume="110",
pages="107656",
abstract="In this work, we address the task of scene text retrieval: given a text query, the system returns all images containing the queried text. The proposed model uses a single shot CNN architecture that predicts bounding boxes and builds a compact representation of spotted words. In this way, this problem can be modeled as a nearest neighbor search of the textual representation of a query over the outputs of the CNN collected from the totality of an image database. Our experiments demonstrate that the proposed model outperforms previous state-of-the-art, while offering a significant increase in processing speed and unmatched expressiveness with samples never seen at training time. Several experiments to assess the generalization capability of the model are conducted in a multilingual dataset, as well as an application of real-time text spotting in videos.",
optnote="DAG; 600.121; 600.129; 601.338",
optnote="exported from refbase (http://refbase.cvc.uab.es/show.php?record=3493), last updated on Tue, 26 Jan 2021 13:32:15 +0100",
opturl="https://www.sciencedirect.com/science/article/pii/S0031320320304593"
}