@InProceedings{FatemehNoroozi2016,
author="Fatemeh Noroozi
and Marina Marjanovic
and Angelina Njegus
and Sergio Escalera
and Gholamreza Anbarjafari",
title="Fusion of Classifier Predictions for Audio-Visual Emotion Recognition",
booktitle="23rd International Conference on Pattern Recognition Workshops",
year="2016",
abstract="In this paper is presented a novel multimodal emotion recognition system which is based on the analysis of audio and visual cues. MFCC-based features are extracted from the audio channel and facial landmark geometric relations arecomputed from visual data. Both sets of features are learnt separately using state-of-the-art classifiers. In addition, we summarise each emotion video into a reduced set of key-frames, which are learnt in order to visually discriminate emotions by means of a Convolutional Neural Network. Finally, confidenceoutputs of all classifiers from all modalities are used to define a new feature space to be learnt for final emotion prediction, in a late fusion/stacking fashion. The conducted experiments on eNTERFACE{\textquoteright}05 database show significant performance improvements of our proposed system in comparison to state-of-the-art approaches.",
optnote="HuPBA;MILAB;",
optnote="exported from refbase (http://refbase.cvc.uab.es/show.php?record=2839), last updated on Mon, 21 Jan 2019 14:14:32 +0100"
}