@STRING{arpaslt = "Proceedings of ARPA Workshop on Spoken Language Technology"} @STRING{arpahlt= "Proceedings of ARPA Workshop on Human Language Technology"} @STRING{arpasltw= "Proceedings of ARPA Spoken Language Technology Workshop"} @STRING{darpasnlw = "Proceedings of DARPA Speech and Natural Language Workshop"} @STRING{darpabntuw = "Proceedings of DARPA Broadcast News Transcription and Understanding Workshop"} @STRING{icassp = "Proceedings of ICASSP"} @STRING{ieeeicassp = "Proceedings of the IEEE-ICASSP"} @STRING{ieeeasr = "Proceeding of IEEE Workshop on Automatic Speech Recognition"} @STRING{ijcnn = "IEEE joint conference on neural networks"} @STRING{eurospeech = "Proceedings of the European Conference on Speech Technology, EuroSpeech"} @STRING{escaieee = "Proceedings of ESCA/IEEE Workshop on Speech Synthesis"} @STRING{atttj = "AT\&T Technical Journal"} @STRING{csl = "Computer Speech and Language"} @STRING{sv = "Springer Verlag"} @STRING{ASJ = "Meeting of the Acoustical Society of Japan"} @STRING{ASJSPRING = "Spring " # ASJ} @STRING{CNS = "Computation in Neurons and Neural Systems"} @STRING{ESCA = "European Conference on Speech Communication and Technology"} @STRING{NSI = "Neurosciences et Sciences de l'Ingénieur"} @STRING{ieeetsa = "IEEE transactions on Speech and Audio"} @STRING{spcom = "Speech Communication"} @STRING{ieice = "The Transactions of the Institute of Electronics, Information and Communication Engineers"} @STRING{ivtta = "IEEE Workshop on Interactive Voice Technology for Telecommunications Applications"} @STRING{icslp = "International Conference on Speech and Language Processing"} @STRING{jst = "Journ\\'es Scientifiques et Techniques du R\'eseau Francophone d'Ing\'enierie de la Langue de l'AUPELF-UREF"} @STRING{lrec = "International Conference on Language Resources and Evaluation"} @STRING{lrecWp = "International Conference on Language Resources and Evaluation - Workshop on The Evaluation of Parsing Systems"} @STRING{lrecWb = "International Conference on Language Resources and Evaluation - Workshop on Speech Database Development for Central and Eastern European Language"} @STRING{jasa = "The Journal of the Acoustical Society of America"} @STRING{rla2c = "La Reconnaissance du Locuteur et ses Applications Commerciales et Criminalistiques - Speaker Recognition and its Commercial and Forensic Applications"} @STRING{MIDL = "Identification des langues et des varietes dialectales par les humains et par les machines"} @STRING{TAL = "Traitement Automatique des Langues"} @STRING{tcstar06 = "TC-STAR Workshop on Speech-to-Speech Translation"} @STRING{jep = "Journ\'ees d'\'etude sur la Parole"} @STRING{Odyssey = "IEEE, The Speaker and Language Recognition Workshop"} @STRING{taln = "Traitement Automatique des Langues Naturelles"} @STRING{acl = "Association for Computational Linguistics"} @STRING{iwslt = "International Workshop on Spoken Language Translation"} %%%%%%%%%% 2021 section %%\bibitem{DGfS2021} M. Hutin, A. Jatteau, I. Vasilescu, {\bf L. Lamel}, Y. Wu, M. Adda-Decker, ``Modelling the realization of variable word-final schwa in Standard French,'' Free Variation 2021. submitted %%\bibitem{croissant} P. Boula de Mare\"uil, G. Adda, {\bf L. Lamel}, ``Comparaison di\alectom\'etriques de parlers du Croissant avec d'autres parlers d'oc et d'oïl,'' L. Esher, M. Gu\'erin, N. Quint. M. Russo (eds), {\it Le Croissant Linguistique : nouvelles perspectives aux confins oc/oïl}, L'Harmattan, Paris, 2020. (to appear) %%%%%%%%%% 2021 section @inproceedings{elie21_interspeech, TITLE = {{Modeling the effect of military oxygen masks on speech characteristics}}, AUTHOR = {Elie, B. AND Gauvain, J. AND Gauvain, J.L. AND Lamel, L.}, URL = {http://www-tlp.limsi.fr/public/elie21_interspeech.pdf}, BOOKTITLE = {{Interspeech 2021}}, ADDRESS = {Brno, Czechia}, SERIES = {Proceedings of Interspeech 2021}, YEAR = {2021}, MONTH = Aug, DOI = {10.21437/Interspeech.2021-1650}, KEYWORDS = {oxygen mask, speech recognition, data augmentation} } @misc{wu:hal-03153413, TITLE = {{Tone realization in Mandarin speech: a large corpus based study of disyllabic words}}, AUTHOR = {Wu, Yaru and Lamel, Lori and Adda-Decker, Martine}, URL = {https://hal.archives-ouvertes.fr/hal-03153413}, abstactNOTE = {This study aims to increase our knowledge about tone realiza- tion in disyllabic words in continuous Mandarin speech. Au- tomatic alignments of large speech corpora were carried out to enable the study of potential tone variants, with a special focus on variation factors such as prosodic position and right tonal context. The alignments without tone variants (V0, phono- logical representation) show that Tone 4 is more frequent in phrase-final position than in other prosodic positions, support- ing the ''declination line'' pattern often observed in speech pro- duction. Tone 4 is also the most frequent lexical tone (>50\%) in all prosodic positions. Alignments permitting tone variants (V1, phonetic realization) show an increase of Tone 1 in phrase- initial position, compared to V0. Tone realization is observed to be related not only to the prosodic position, but also to the within-word right tonal context. Unsurprisingly, the most no- table change in tone realization happens for Tone 3 in the first syllable of disyllabic words when followed by another Tone 3 because of the well-known ''tone sandhi rule'' in which T3T3 disyllabic words become T2T3. Cross-word right tonal context is found to impact only Tone 3. However, the results in this study show that Tone 3 sandhi rule is more a tendency than an absolute rule.}, YEAR = {2021}, MONTH = Jan, KEYWORDS = {Mandarin ; tone realization ; large corpora ; continuous speech}, PDF = {https://hal.archives-ouvertes.fr/hal-03153413/file/ISCSLP_2021_final.pdf}, HAL_ID = {hal-03153413}, HAL_VERSION = {v1}, } %%%%%%%%%% 2020 section @article{Wu_Adda-Decker_Lamel_2020, author={Wu, Yaru and Adda-Decker, Martine and Lamel, Lori}, title={Schwa Deletion in Word-Initial Syllables of Polysyllabic Words: Investigations Using Large French Speech Corpora}, volume={2}, url={https://journal.equinoxpub.com/JMBS/article/view/17311}, DOI={10.1558/jmbs.17311}, abstractNote={<p>This study aims to analyse factors that could influence schwa deletion in word-initial syllables of polysyllabic words in continuous French speech. Both phonological and extralinguistic factors were considered: number of consonants, post-lexical context, speech style, sex and profession. Three large corpora covering different speech styles were explored using forced alignment with optional schwa variants. Formal journalistic ESTER corpus, conversational journalistic ETAPE corpus and casual speech NCCFr corpus were used in this study. We observe that schwa tends to be deleted more for 2C-words than for 3C-words. Words preceded by a consonant or a pause tend to prevent schwa deletion whereas words preceded by a vowel tend to facilitate schwa deletion. The less formal the speech style is, the more schwas are deleted. Males tend to delete schwas more frequently than females. Interestingly, journalists tend to delete more schwas than politicians in our data.</p>}, number={2}, journal={Journal of Monolingual and Bilingual Speech}, year={2020}, month={Dec.}, pages={269-289} } @article{gagnepain20, author = {Pierre Gagnepain and Thomas Vallée and Serge Heiden and Matthieu Decorde and Jean-Luc Gauvain and Antoine Laurentand Carine Klein-Peschanski and Fausto Viader and Denis Peschanski and Francis Eustache}, title = {{Collective memory shapes the organization of individual memories in the medial prefrontal cortex}}, year = {2020}, pages = {189-200}, journal = {{Nature Human Behaviour}}, volume = {4}, url={https://rdcu.be/b1RGS} } @inproceedings{hutin:hal-02571909, TITLE = {{Lenition and Fortition of Stop Codas in Romanian}}, AUTHOR = {Hutin, Mathilde and Niculescu, Oana and Vasilescu, Ioana and Lamel, Lori and Adda-Decker, Martine}, URL = {https://hal.archives-ouvertes.fr/hal-02571909}, BOOKTITLE = {{SLTU-CCURL}}, ADDRESS = {Marseille, France}, YEAR = {2020}, MONTH = May, KEYWORDS = {Romanian ; lenition ; fortition ; automatic alignment ; pronunciation variant}, PDF = {https://hal.archives-ouvertes.fr/hal-02571909/file/Hutin_SLTU2020_LenitionFortition.pdf}, HAL_ID = {hal-02571909}, HAL_VERSION = {v1}, } @inproceedings{hutin:hal-02977812, TITLE = {{Ongoing phonologization of word-final voicing alternations in two Romance languages: Romanian and French}}, AUTHOR = {Hutin, Mathilde and Jatteau, Ad{\`e}le and Vasilescu, Ioana and Lamel, Lori and Adda-Decker, Martine}, URL = {https://hal.archives-ouvertes.fr/hal-02977812}, BOOKTITLE = {{Interspeech 2020}}, ADDRESS = {Shanghai, China}, SERIES = {Proceedings of Interspeech 2020}, YEAR = {2020}, MONTH = Oct, DOI = {10.21437/Interspeech.2020-1460}, KEYWORDS = {Large corpora ; forced alignment ; phonologization ; lenition ; fortition ; voicing ; devoicing ; assimilation ; French ; Romanian ; Large corpora}, PDF = {https://hal.archives-ouvertes.fr/hal-02977812/file/Hutin_et_al._2020_Ongoing_Phonologization_IS2020.pdf}, HAL_ID = {hal-02977812}, HAL_VERSION = {v1}, } @inproceedings{wu:hal-03153402, TITLE = {{Mandarin Lexical Tones: A Corpus-Based Study of Word Length, Syllable Position and Prosodic Position on Duration}}, AUTHOR = {Wu, Yaru and Adda-Decker, Martine and Lamel, Lori}, URL = {https://hal.archives-ouvertes.fr/hal-03153402}, BOOKTITLE = {{Interspeech 2020}}, ADDRESS = {Shanghai, China}, PUBLISHER = {{ISCA}}, PAGES = {1908-1912}, YEAR = {2020}, MONTH = Oct, DOI = {10.21437/Interspeech.2020-1614}, KEYWORDS = {Mandarin ; tone duration ; word length ; prosodic categories ; large corpora ; continuous speech}, PDF = {https://hal.archives-ouvertes.fr/hal-03153402/file/1614%281%29.pdf}, HAL_ID = {hal-03153402}, HAL_VERSION = {v1}, } @inproceedings{Barras20, author = {Claude Barras and Viet-Bac Le and Jean-Luc Gauvain}, title = {{Vocapia-LIMSI System for 2020 Shared Task on Code-switched Spoken Language Identification}}, booktitle = {{First Workshop on Speech Technologies for Code-switching in Multilingual Communities}}, year = {2020}, address = {China}, keywords = {Code-switching, language identifiation}, url={http://www-tlp.limsi.fr/public/code-switching-2020.pdf} } @inproceedings{hutin:hal-02931786, TITLE = {{Le schwa final en fran{\c c}ais standard est-il un ``lubrifiant phon{\'e}tique''?}}, AUTHOR = {Hutin, Mathilde and Jatteau, Ad{\`e}le and Vasilescu, Ioana and Lamel, Lori and Adda-Decker, Martine}, URL = {https://hal.archives-ouvertes.fr/hal-02931786}, BOOKTITLE = {{7e Congr{\`e}s Mondial de Linguistique Fran{\c c}aise - CMLF 2020}}, ADDRESS = {Montpellier, France}, SERIES = {Actes du 7e Congr{\`e}s Mondial de Linguistique Fran{\c c}aise}, VOLUME = {SHS Web of Conferences}, NUMBER = {78}, PAGES = {id. 09004}, YEAR = {2020}, MONTH = Jul, DOI = {10.1051/shsconf/20207809004}, KEYWORDS = {Schwa ; Epenthesis ; Standard French ; Large corpora ; Automatic alignment ; {\'e}penth{\`e}se ; Fran{\c c}ais standard ; TAL Traitement Automatique des Langues}, PDF = {https://hal.archives-ouvertes.fr/hal-02931786/file/shsconf_cmlf2020_09004.pdf}, HAL_ID = {hal-02931786}, HAL_VERSION = {v1}, } @inproceedings{hutin:hal-02798551, TITLE = {{L{\'e}nition et fortition des occlusives en coda finale dans deux langues romanes : le fran{\c c}ais et le roumain}}, AUTHOR = {Hutin, Mathilde and Jatteau, Ad{\`e}le and Vasilescu, Ioana and Lamel, Lori and Adda-Decker, Martine}, URL = {https://hal.archives-ouvertes.fr/hal-02798551}, BOOKTITLE = {{6e conf{\'e}rence conjointe Journ{\'e}es d'{\'E}tudes sur la Parole (JEP, 33e {\'e}dition), Traitement Automatique des Langues Naturelles (TALN, 27e {\'e}dition), Rencontre des {\'E}tudiants Chercheurs en Informatique pour le Traitement Automatique des Langues (R{\'E}CITAL, 22e {\'e}dition). Volume 1 : Journ{\'e}es d'{\'E}tudes sur la Parole}}, ADDRESS = {Nancy, France}, EDITOR = {Benzitoun, Christophe and Braud, Chlo{\'e} and Huber, Laurine and Langlois, David and Ouni, Slim and Pogodalla, Sylvain and Schneider, St{\'e}phane}, PUBLISHER = {{ATALA}}, PAGES = {289-298}, YEAR = {2020}, KEYWORDS = {large corpora ; forced alignment ; fortition ; lenition ; voicing ; devoicing ; French ; Romanian ; automatic alignment ; voisement ; fran{\c c}ais ; roumain ; d{\'e}voisement ; fortition ; grands corpus ; alignement automatique ; alignement forc{\'e} ; l{\'e}nition}, PDF = {https://hal.archives-ouvertes.fr/hal-02798551v3/file/JEP-TALN-RECITAL-2020_paper_73.pdf}, HAL_ID = {hal-02798551}, HAL_VERSION = {v3}, } @incollection{candea:hal-02505343, TITLE = {{Merci$\times$chh, entendu$\times$chh : variation phon{\'e}tique ancienne ou {\'e}mergence d'une proto-particule en voie de stabilisation ?}}, AUTHOR = {Candea, Maria and Wottawa, Jane and Adda-Decker, Martine and Lamel, Lori}, URL = {https://hal.archives-ouvertes.fr/hal-02505343}, BOOKTITLE = {{Le Fran{\c c}ais innovant}}, EDITOR = {Federica Di{\'e}moz and Ga{\'e}tane Dostie and Pascale Habermann and Florence Lefeuvre}, SERIES = {Sciences pour la Communication}, VOLUME = {130}, PAGES = {pp. 291-308}, YEAR = {2020}, KEYWORDS = {sociophon{\'e}tique ; fran{\c c}ais ; consonnes fricatives ; stylistique}, PDF = {https://hal.archives-ouvertes.fr/hal-02505343/file/article%20Candea%20et%20alii-1_pre-print-auteur.pdf}, HAL_ID = {hal-02505343}, HAL_VERSION = {v1}, } @article{vasilescu:hal-03040177, TITLE = {{Alternances de voisement et processus de l{\'e}nition et de fortition : une {\'e}tude automatis{\'e}e de grands corpus en cinq langues romanes}}, AUTHOR = {Vasilescu, Ioana and Wu, Yaru and Jatteau, Ad{\`e}le and Adda-Decker, Martine and Lamel, Lori}, URL = {https://hal.archives-ouvertes.fr/hal-03040177}, JOURNAL = {{Traitement Automatique des Langues (TAL)}}, YEAR = {2020}, PDF = {https://hal.archives-ouvertes.fr/hal-03040177/file/lenition_fortition_TAL_Vasilescuetal.pdf}, HAL_ID = {hal-03040177}, HAL_VERSION = {v1}, } @misc{hutin:halshs-03215964, TITLE = {{Estimer la variabilit{\'e} de la liaison facultative en fran{\c{c}}ais standard gr{\^a}ce aux m{\'e}thodes automatis{\'e}es. Preuve de concept dans le contexte verbe `` {\^e}tre '' conjugu{\'e} + `` un / une ''}}, AUTHOR = {Hutin, Mathilde and Wu, Yaru and Kondo, Nori and Samantha, Ruvoletto and Vasilescu, Ioana and Lamel, Lori and Adda-Decker, Martine}, URL = {https://halshs.archives-ouvertes.fr/halshs-03215964}, NOTE = {Poster}, HOWPUBLISHED = {{Going Romance}}, YEAR = {2020}, MONTH = Nov, HAL_ID = {halshs-03215964}, HAL_VERSION = {v1}, } %%%%%%%%%% 2019 section @inproceedings{jatteau:hal-02336119, TITLE = {{''Gra[f]e!'' Word-final devoicing of obstruents in Standard French: An acoustic study based on large corpora}}, AUTHOR = {Jatteau, Ad{\`e}le and Vasilescu, Ioana and Lamel, Lori and Adda-Decker, Martine and Audibert, Nicolas}, URL = {https://hal.archives-ouvertes.fr/hal-02336119}, BOOKTITLE = {{Annual Conference of the International Speech Communication Association}}, ADDRESS = {Graz, Austria}, ORGANIZATION = {{ISCA}}, EDITOR = {ISCA}, SERIES = {Proceedings of Interspeech}, PAGES = {DOI: 10.21437/Interspeech.2019-2329}, YEAR = {2019}, MONTH = Sep, KEYWORDS = {Standard French ; acoustics ; voicing ; large corpora ; forced alignment}, HAL_ID = {hal-02336119}, HAL_VERSION = {v1}, } @inproceedings{wu:hal-03041044, TITLE = {{Impact of post-lexical context and speech style on word-final /?/ realization in French using large corpora and automatic speech processing}}, AUTHOR = {Wu, Yaru and Adda-Decker, Martine and Gendrot, C{\'e}dric and Lamel, Lori}, URL = {https://hal.archives-ouvertes.fr/hal-03041044}, BOOKTITLE = {{R-atics 6}}, ADDRESS = {Paris, France}, YEAR = {2019}, MONTH = Nov, HAL_ID = {hal-03041044}, HAL_VERSION = {v1}, } @inproceedings{wu:hal-03041066, TITLE = {{Variation in Pluricentric Mandarin Using Large Corpus: a forced alignment-based duration and tone frequency study}}, AUTHOR = {Wu, Yaru and Lamel, Lori and Adda-Decker, Martine}, URL = {https://hal.archives-ouvertes.fr/hal-03041066}, BOOKTITLE = {{Pluricentric Languages in Speech Technology - Satellite Workshop at Interspeech 2019}}, ADDRESS = {Graz, Austria}, YEAR = {2019}, MONTH = Sep, HAL_ID = {hal-03041066}, HAL_VERSION = {v1}, } @inproceedings{PubLIMSI-7694, author = {Amazouz, Djegdjiga AND Adda-Decker, Martine AND Lamel, Lori AND Gauvain, Jean-Luc}, title = {{Exploring COnsonantal Variation in French-Arabic Code Switching Speech: The Case of Gemination}}, booktitle = {{International Congress of Phonetic Sciences}}, year = {2019}, pages = {5}, month= {2019-08-05}, address= {Melbourne - AU}, keywords= {Code-switching, consonant variation, gemination, automatic speech alignment, French, Arabic} } @inproceedings{boulademareuil:hal-02387368, TITLE = {{A Speaking Atlas of Minority Languages of France: Collection and Analyses of Dialectical Data}}, AUTHOR = {Boula De Mare{\"u}il, Philippe and Adda, Gilles and Lamel, Lori and Rilliard, Albert and Vernier, Fr{\'e}d{\'e}ric}, URL = {https://hal.archives-ouvertes.fr/hal-02387368}, BOOKTITLE = {{International Congress of Phonetic Sciences}}, ADDRESS = {Melbourne, Australia}, ORGANIZATION = {{Sasha Calhoun, Paola Escudero, Marija Tabain and Paul Warren (Eds.)}}, EDITOR = {Sasha Calhoun and Paola Escudero and Marija Tabain and Paul Warren (Eds.)}, YEAR = {2019}, MONTH = Aug, KEYWORDS = {speaking atlas ; language documentation ; underresourced languages ; dialect crowdsourcing}, HAL_ID = {hal-02387368}, HAL_VERSION = {v1}, } @inproceedings{jatteau:hal-02270089, TITLE = {{Final devoicing of fricatives in French: Studying variation in large-scale corpora with automatic alignment}}, AUTHOR = {Jatteau, Ad{\`e}le and Vasilescu, Ioana and Lamel, Lori and Adda-Decker, Martine}, URL = {https://hal.archives-ouvertes.fr/hal-02270089}, BOOKTITLE = {{19th International Congress of Phonetic Sciences}}, ADDRESS = {Melbourne, Australia}, EDITOR = {Sasha Calhoun and Paola Escudero and Marija Tabain and Paul Warren}, PUBLISHER = {{Australasian Speech Science and Technology Association Inc.}}, SERIES = {Proceedings of the 19th International Congress of Phonetic Sciences}, PAGES = {295-299}, YEAR = {2019}, KEYWORDS = {Phonetics ; Automatic alignment ; Phonology ; French linguistics ; Phon{\'e}tique ; Alignement automatique ; Phonologie ; Linguistique fran{\c c}aise}, HAL_ID = {hal-02270089}, HAL_VERSION = {v1}, } @inproceedings{jatteau:hal-02336112, TITLE = {{Final devoicing in the 'pool of variation': A large-scale corpora approach with automatic alignment}}, AUTHOR = {Jatteau, Ad{\`e}le and Vasilescu, Ioana and Lamel, Lori and Adda-Decker, Martine}, URL = {https://hal.archives-ouvertes.fr/hal-02336112}, BOOKTITLE = {{Phonetics and Phonology in Europe Conference}}, ADDRESS = {Lecce, Italy}, YEAR = {2019}, MONTH = Jun, KEYWORDS = {French ; Final devoicing ; variantion}, HAL_ID = {hal-02336112}, HAL_VERSION = {v1}, } @inproceedings{vasilescu:hal-02336116, TITLE = {{Synchronic variation and sound change in Romance languages: a corpus-based study of lenition phenomena in Romanian and Spanish}}, AUTHOR = {Vasilescu, Ioana and Lamel, Lori}, URL = {https://hal.archives-ouvertes.fr/hal-02336116}, BOOKTITLE = {{Linguistic Symposium on Romance Languages}}, ADDRESS = {Athens, United States}, YEAR = {2019}, MONTH = May, KEYWORDS = {synchronic variation ; lenition ; corpus studies}, HAL_ID = {hal-02336116}, HAL_VERSION = {v1}, } @inproceedings{PubLIMSI-7657, author = {Gauvain, Jodie AND Lamel, Lori AND Le, Viet Bac AND Despres, Julien AND Gauvain, Jean-Luc AND Messaoudi, Abdelkhalek AND Vieru, Bianca AND Ben Kheder, Waad}, title = {{Challenges in Audio Processing of Terrorist-Related Data}}, booktitle = {{International Conference on Multimedia Modeling}}, year = {2019}, month= {2019-01-08 / 2019-01-11}, address= {Thessaloniki - GR}, keywords= {Automatic speech recognition, Acoustic event detection, Language identification Code switching} } %%%%%%%%%% 2018 section @book{PubLIMSI-7144 , author = {Diab, Mona AND Villavicencio, Aline AND Apidianaki, Marianna AND Kordoni, Valia AND Nakov, Preslav AND Stevenson, Mark}, title = {{Essays in Lexical Semantics and Computational Lexicography - In Honor of Adam Kilgarriff}}, year = {2018}, pages = {200}, publisher = {Springer Publishing}, editor = {}, keywords= {lexical semantics, computational lexicography}, } @inproceedings{PubLIMSI-7173 , author = {Patino, Jose AND Yin, Ruiqing AND Delgado, H\'ector AND Bredin, Herv{\'e} AND Komaty, Alain AND Wisniewski, G\ uillaume AND Barras, Claude AND Evans, Nicholas AND Marcel, S\'ebastien}, title = {{Low-latency speaker spotting with online diarization and detection}}, booktitle = {{The Speaker and Language Recognition Workshop}}, year = {2018}, pages = {140-146}, month= {2018-06-26 / 2018-06-29}, address= {Les Sables d'Olonne - FR}, } @inproceedings{PubLIMSI-7165 , author = {Godard, Pierre AND L{\`o}ser, Kevin AND Allauzen, Alexandre AND Besacier, Laurent AND Yvon, Fran\c{c}ois}, title = {{Unsupervised Word Segmentation: does tone matter ?}}, booktitle = {{International Conference on Intelligent Text Processing and Computational Linguistics}}, year = {2018}, pages = {11p}, month= {2018-03-18 / 2018-03-24}, address= {Hano{\"i} - VN}, keywords= {Unwritten Language; Tonal Language; Unsupervised Morphology; Word discovery}, } @inproceedings{PubLIMSI-7154 , author = {Zhang, Zheng AND Yin, Ruiqing AND Zweigenbaum, Pierre}, title = {{Efficient Generation and Processing of Word Co-occurrence Networks Using corpus2graph}}, booktitle = {{Workshop on Graph-Based Natural Language Processing}}, year = {2018}, pages = {5p}, month= {2018-06-06}, address= {New Orleans, LA - US}, keywords= {Natural Language Processing; Graph of Words; Open Source; Python package; Graph Construction}, } @inproceedings{PubLIMSI-6778 , author = {Hernandez, Nidia AND Vasilescu, Ioana AND Dimulescu-Vieru, Bianca AND Lamel, Lori}, title = {{An automatic study of lenition of intra-lexical intervocalic /bdg/ and coda -s in Peninsular vs America spani\ sh}}, booktitle = {{Laboratory Phonology Conference}}, year = {2018}, pages = {2p}, month= {2018-06-19 / 2018-06-22}, address= {Lisbonne - PT}, keywords= {lenition, Spanish, pronunciation variants, ASR}, } @inproceedings{PubLIMSI-7143 , author = {Gar{\'i}, Aina AND Apidianaki, Marianna AND Allauzen, Alexandre}, title = {{A comparative study of word embeddings and other features for lexical complexity detection in French}}, booktitle = {{Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles}}, year = {2018}, pages = {9p}, month= {2018-05-14 / 2018-05-18}, address= {Rennes - FR}, keywords= {complexit{\'e} lexicale, simplification, plongement lexical}, } @inproceedings{PubLIMSI-7142 , author = {Apidianaki, Marianna AND Wisniewski, Guillaume AND Cocos, Anne AND Callison-Burch, Chris}, title = {{Automated Paraphrase Lattice Creation for HyTER Machine Translation Evaluation}}, booktitle = {{Conference of the North American Chapter of the Association for Computational Linguistics: Human Language\ Technologies}}, year = {2018}, pages = {480-485}, month= {2018-06-01 / 2018-06-06}, address= {Nouvelle Orl{\'e}ans - US}, keywords= {paraphrase, lattice, MT evaluation, HyTER}, } @inproceedings{PubLIMSI-7141 , author = {Cocos, Anne AND Apidianaki, Marianna AND Callison-Burch, Chris}, title = {{Comparing Constraints for Taxonomic Organization}}, booktitle = {{Conference of the North American Chapter of the Association for Computational Linguistics: Human Language\ Technologies}}, year = {2018}, pages = {323-333}, month= {2018-06-01 / 2018-06-06}, address= {Nouvelle Orl{\'e}ans - US}, keywords= {taxonomy, paraphrases}, } @inproceedings{PubLIMSI-7128 , author = {Lee, Tan AND Ma, Matthew K. H. AND Rilliard, Albert AND Mixdorff, Hansj{\`o}rg AND H{\`o}nemann, Angelika}, title = {{Free Labeling of Audio-visual Attitudinal Expressions in Cantonese}}, booktitle = {{International Conference on Speech Prosody}}, year = {2018}, pages = {483-487}, month= {2018-06-13 / 2018-06-16}, address= {Poznań - PL}, keywords= {attitudes, auditory-visual speech, free labeling}, } @inproceedings{PubLIMSI-7126 , author = {Goldman, Jean-Philippe AND Scherrer, Yves AND Glikman, Julie AND Avanzi, Mathieu AND Benzitoun, Christophe AN\ D Boula De Mareuil, Philippe}, title = {{Crowdsourcing Regional Variation Data and Automatic Geolocalisation of Speakers of European French}}, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2018}, pages = {3336-3342}, month= {2018-05-07 / 2018-05-12}, address= {Miyazaki - JP}, keywords= {linguistique}, } @inproceedings{PubLIMSI-7125 , author = {Boula De Mareuil, Philippe AND Vernier, Fr{\'e}d{\'e}ric AND Rilliard, Albert}, title = {{A Speaking Atlas of the Regional Languages of France}}, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2018}, pages = {4133-4138}, month= {2018-05-07 / 2018-05-12}, address= {Miyazaki - JP}, keywords= {linguistique}, } @inproceedings{PubLIMSI-7120 , author = {Aufrant, Lauriane AND Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {{Exploiting Dynamic Oracles to Train Projective Dependency Parsers on Non-Projective Trees}}, booktitle = {{Conference of the North American Chapter of the Association for Computational Linguistics: Human Language\ Technologies}}, year = {2018}, pages = {7p}, month= {2018-06-01 / 2018-06-06}, address= {New Orleans - US}, keywords= {dependency parsing; non-projective dependencies}, } @inproceedings{PubLIMSI-7102 , author = {Godard, Pierre AND Adda, Gilles AND Adda-Decker, Martine AND Benjumea, Juan AND Besacier, Laurent AND Cooper-Leavitt, Jamison AND Kouarata, Guy-No{\"e}l AND Lamel, Lori AND Maynard, H{\'e}l{\`e}ne AND Mueller, Markus AND Rialland, Annie AND Stueker, Sebastian AND Yvon, Fran\c{c}ois AND Zanon Boito, Marcely}, title = {{A Very Low Resource Language Speech Corpus for Computational Language Documentation Experiments}}, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2018}, pages = {3366-3370}, month= {2018-05-07 / 2018-05-12}, address= {Miyazaki - JP}, keywords= {Speech Resource/Database, Endangered Languages, Corpus (Creation, Annotation, Etc.)}, } @inproceedings{PubLIMSI-7077 , author = {Mariani, Joseph-Jean AND Francopoulo, Gil AND Paroubek, Patrick}, title = {{Measuring Innovation in Speech and Language Processing Publications}}, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2018}, pages = {1890-1895}, month= {2018-05-07 / 2018-05-12}, address= {Miyazaki - JP}, keywords= {speech and language processing, bibliometry, scientometry, information extraction} } @inproceedings{PubLIMSI-7059 , author = {Burlot, Franck AND Yvon, Fran\c{c}ois}, title = {{Evaluation morphologique pour la traduction automatique : adaptation au fran\c{c}ais}}, booktitle = {{Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles}}, year = {2018}, pages = {14p}, month= {2018-05-16 / 2018-05-18}, address= {Rennes - FR}, keywords= {Traduction automatique, {\'e}valuation de la TA, morphologie}, } @inproceedings{PubLIMSI-7058 , author = {Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {{Divergences entre annotations dans le projet UD et leur impact sur l'{\'e}valuation des performance d'{\'e}tiquetage morpho-syntaxique}}, booktitle = {{Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles}}, year = {2018}, pages = {7p}, month= {2018-05-16 / 2018-05-18}, address= {Rennes - FR}, keywords= {Analyse morpho-syntaxique; adaptation au domaine; mod{\`e}les statistiques} } @inproceedings{PubLIMSI-7057 , author = {Ondel, Lucas AND Godard, Pierre AND Besacier, Laurent AND Larsen, Elin AND Hasegawa-Johnson, Mark AND Scharenborg, Odettte AND Dupoux, Emmanuel AND Burget, Lukas AND Yvon, Fran\c{c}ois AND Khudanpur, Sanjeev}, title = {{Bayesian Models for Unit Discovery on a very Low Resource Language}}, booktitle = {{IEEE International Conference on Acoustics, Speech, and Signal Processing}}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2018}, pages = {6p}, month= {2018-04-15 / 2018-04-20}, address= {Calgary - CA}, keywords= {low resource language; unsupervised speech recognition; unit discovery}, } @inproceedings{PubLIMSI-7056 , author = {Wisniewski, Guillaume AND Lacroix, Oph{\'e}lie AND Yvon, Fran\c{c}ois}, title = {{Automatically Selecting the Best Dependency Annotation Design with Dynamic Oracles}}, booktitle = {{Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies}}, year = {2018}, pages = {401-406}, month= {2018-06-02 / 2018-06-04}, address= {New Orleans - US}, keywords= {dependency parsing; annotation; universal dependencies} } @inproceedings{PubLIMSI-5790 , author = {Burlot, Franck AND Yvon, Fran\c{c}ois}, title = {{Extraction d'interactions entre aliment et m{\'e}dicament : Etat de l'art et premiers r{\'e}sultats\ }}, booktitle = {{Journ{\'e}es internationales d'Analyse statistique des Donn{\'e}es Textuelles}}, year = {2018}, pages = {14p}, month= {2018-05-14 / 2018-05-18}, address= {Rennes - FR}, keywords= {Interaction aliment-m{\'e}dicament, Relation s{\'e}mantique, Corpus de sp{\'e}cialit{\'e}, Classification supervis{\'e}e}, } @article{PubLIMSI-7235 , author = {Mariani, Joseph-Jean AND Francopoulo, Gil AND Paroubek, Patrick}, title = {{Le corpus NLP4NLP pour l'analyse bibliom{\'e}trique de 50 ann{\'e}es de recherches en traitement automat\ ique de la parole et du langage naturel}}, year = {2018}, pages = {48 p.}, journal = {{Document Num{\'e}rique}}, volume = {20}, number = {2-3}, } @article{PubLIMSI-7164 , author = {Yvon, Fran\c{c}ois}, title = {{Langue et IA}}, year = {2018}, pages = {2p}, journal = {{Le Monde}}, number = {26-06-2018}, keywords= {Traduction automatique; Intelligence artificielle} } @article{PubLIMSI-7124 , author = {Boula De Mareuil, Philippe AND Barbosa, Plinio}, title = {{Caract{\'e}risation de styles de parole et d\u2019accents {\'e}trangers {\`a} travers l\u2019imitation : comp\ araisons entre fran\c{c}ais et portugais br{\'e}silien}}, year = {2018}, pages = {31-44}, journal = {{Revue Fran\c{c}aise de Linguistique Appliqu{\'e}e}}, volume = {23}, keywords= {linguistique, prosodie, imitation}, } @article{PubLIMSI-7090 , author = {Mariani, Joseph-Jean}, title = {{Quand les machines apprennent les langues}}, year = {2018}, pages = {3p}, journal = {{Culture et recherche}}, number = {37} } @article{PubLIMSI-6602 , author = {Rilliard, Albert AND d'Alessandro, Christophe AND Evrard, Marc}, title = {{Paradigmatic variation of vowels in expressive speech: Acoustic description and dimensional analysis}}, year = {2018}, pages = {109-122}, journal = {{Journal of the Acoustical Society of America}}, volume = {143}, number = {1} } @article{PubLIMSI-6771, author = {Vasilescu, Ioana AND Chitoran, Ioana AND Dimulescu-Vieru, Bianca AND Adda-Decker, Martine AND Lamel, Lori AND\ Niculescu, Oana AND Langlais, P.}, title = {{Studying variation in Romanian: deletion of the definite article -l in continuous speech}}, year = {2018}, pages = {17p}, journal = {{Linguistic Vanguard}}, keywords= {Romanian, phonetics, morpho-phonology, definite article, ASR}, } @inproceedings{is18lenition, author={Ioana Vasilescu and Nidia Hernandez and Bianca Vieru and Lori Lamel}, title={Exploring Temporal Reduction in Dialectal Spanish: a Large-scale Study of Lenition of Voiced Stops and Coda-s}, year=2018, booktitle={Proc. Interspeech 2018}, pages={}, url={http://www-tlp.limsi.fr/public/is2018-1256-lenition.pdf} } % url={http://dx.doi.org/10.21437/Interspeech.2017-1280} @inproceedings{is18codeswitching, author={Jane Wottawa and Djegdjiga Amazouz and Martine Adda-Decker and Lori Lamel}, title={Studying Vowel Variation in French-Algerian Arabic Code-switched Speech}, year=2018, booktitle={Proc. Interspeech 2018}, pages={}, url={http://www-tlp.limsi.fr/public/is2018-2381-codeswitching.pdf} } @inproceedings{is18yin, author={Ruiqing Yin and Herv\'e Bredin and Claude Barras}, title={Neural Speech Turn Segmentation and Affinity Propagation for Speaker Diarization}, year=2018, booktitle={Proc. Interspeech 2018}, pages={}, url={http://www-tlp.limsi.fr/public/is18-1750-yin.pdf} } @inproceedings{is18godard, author={Pierre Godard and Marcely Zanon Boito and Lucas Ondel and Alexandre Berard and François Yvon and Aline Villavicencio and Laurent Besacier}, title={Unsupervised Word Segmentation from Speech with Attention}, year=2018, booktitle={Proc. Interspeech 2018}, pages={}, url={http://www-tlp.limsi.fr/public/is18-1308-godard.pdf} } @InProceedings{HAMLAOUI18.948, author = {Fatima Hamlaoui and Emmanuel-Moselly Makasso and Markus Müller and Jonas Engelmann and Gilles Adda and Alex Waibel and Sebastian Stüker}, title = {BULBasaa: A Bilingual Basaa-French Speech Corpus for the Evaluation of Language Documentation Tools}, booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, year = {2018}, month = {may}, date = {7-12}, location = {Miyazaki, Japan}, editor = {Nicoletta Calzolari (Conference chair) and Khalid Choukri and Christopher Cieri and Thierry Declerck and Sara Goggi and Koiti Hasida and Hitoshi Isahara and Bente Maegaard and Joseph Mariani and Hélène Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis and Takenobu Tokunaga}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {979-10-95546-00-9}, language = {english}, url={http://www-tlp.limsi.fr/public/lrec2018-948-basaa.pdf} } @InProceedings{RIALLAND18.635, author = {Annie Rialland and Martine Adda-Decker and Guy-Noël Kouarata and Gilles Adda and Laurent Besacier and Lori Lamel and Elodie Gauthier and Pierre Godard and Jamison Cooper-Leavitt}, title = {Parallel Corpora in Mboshi (Bantu C25, Congo-Brazzaville)}, booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, year = {2018}, month = {may}, date = {7-12}, location = {Miyazaki, Japan}, editor = {Nicoletta Calzolari (Conference chair) and Khalid Choukri and Christopher Cieri and Thierry Declerck and Sara Goggi and Koiti Hasida and Hitoshi Isahara and Bente Maegaard and Joseph Mariani and Hélène Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis and Takenobu Tokunaga}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {979-10-95546-00-9}, language = {english}, url={http://www-tlp.limsi.fr/public/lrec2018-635-rialland.pdf} } @InProceedings{GODARD18.694, author = {Pierre Godard and Gilles Adda and Martine Adda-Decker and Juan Benjumea and Laurent Besacier and Jamison Cooper-Leavitt and Guy-Noel Kouarata and Lori Lamel and Hélène Maynard and Markus Mueller and Annie Rialland and Sebastian Stueker and François Yvon and Marcely Zanon Boito}, title = {A Very Low Resource Language Speech Corpus for Computational Language Documentation Experiments}, booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, year = {2018}, month = {may}, date = {7-12}, location = {Miyazaki, Japan}, editor = {Nicoletta Calzolari (Conference chair) and Khalid Choukri and Christopher Cieri and Thierry Declerck and Sara Goggi and Koiti Hasida and Hitoshi Isahara and Bente Maegaard and Joseph Mariani and Hélène Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis and Takenobu Tokunaga}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {979-10-95546-00-9}, language = {english}, url={http://www-tlp.limsi.fr/public/lrec-2018-694-goddard.pdf} } @InProceedings{DJEGDJIGA18.801, author = {Amazouz, Djegdjiga AND Adda-Decker, Martine AND Lamel, Lori}, title = {The French-Algerian Code-Switching Triggered audio corpus (FACST)}, booktitle = {Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)}, year = {2018}, month = {may}, date = {7-12}, location = {Miyazaki, Japan}, keywords= {Code-switching, bilingual speakers, oral speech data, French, Arabic}, editor = {Nicoletta Calzolari (Conference chair) and Khalid Choukri and Christopher Cieri and Thierry Declerck and Sara Goggi and Koiti Hasida and Hitoshi Isahara and Bente Maegaard and Joseph Mariani and Hélène Mazo and Asuncion Moreno and Jan Odijk and Stelios Piperidis and Takenobu Tokunaga}, publisher = {European Language Resources Association (ELRA)}, address = {Paris, France}, isbn = {979-10-95546-00-9}, language = {english}, url={http://www-tlp.limsi.fr/public/lrec2018-801-amazouz.pdf} } @article{DBLP:journals/csl/LileikyteLGG18, author = {Rasa Lileikyte and Lori Lamel and Jean{-}Luc Gauvain and Arseniy Gorin}, title = {Conversational telephone speech recognition for Lithuanian}, journal = {Computer Speech {\&} Language}, volume = {49}, pages = {71--82}, year = {2018}, timestamp = {Tue, 23 Jan 2018 15:02:56 +0100}, biburl = {https://dblp.org/rec/bib/journals/csl/LileikyteLGG18}, bibsource = {dblp computer science bibliography, https://dblp.org}, url={http://www-tlp.limsi.fr/public/CSL_2018-Lileikyte.pdf} } @article{DBLP:journals/taslp/GellyG18, author = {Gregory Gelly and Jean{-}Luc Gauvain}, title = {Optimization of RNN-based Speech Activity Detection}, journal = {{IEEE/ACM} Trans. Audio, Speech {\&} Language Processing}, volume = {26}, number = {3}, pages = {646--656}, year = {2018}, timestamp = {Tue, 13 Feb 2018 17:28:35 +0100}, biburl = {https://dblp.org/rec/bib/journals/taslp/GellyG18}, bibsource = {dblp computer science bibliography, https://dblp.org}, url={http://www-tlp.limsi.fr/public/talsp2018-gelly08100927.pdf} } %%%%%%%%%% 2017 section @book{PubLIMSI-6651 , author = {Devillers, Laurence}, author+AN = {1=LIMSI , TLP ; }, title = {{Des robots et des hommes}}, AERES = {OV}, GROUP = {LIMSI,TLP}, year = {2017}, pages = {288}, publisher = {Plon}, ISBN= {2259252273}, } @inproceedings{PubLIMSI-7233 , author = {Mariani, Joseph-Jean AND Francopoulo, Gil AND Paroubek, Patrick AND Vernier, Fr{\'e}d{\'e}ric}, title = {{Rediscovering 50 Years of Discoveries in Speech and Language Processing: A Survey}}, booktitle = {{Oriental COCOSDA}}, year = {2017}, pages = {23p}, month= {2017-11-01 / 2018-07-03}, address= {Seoul - KR}, } @inproceedings{PubLIMSI-7097 , author = {Rollet, Nicolas AND Jain, Varun AND Licoppe, Christian AND Devillers, Laurence}, title = {{Towards Interactional Symbiosis: Epistemic Balance and Co-presence in a Quantified Self Experiment}}, booktitle = {{International Workshop, Symbiotic}}, year = {2017}, pages = {143-154}, month= {2017-12-18 / 2017-12-19}, address= {Eindhoven - NL}, keywords= {Quantified self, ½ Conversational analys, i�, I � Epistemics}, } @inproceedings{PubLIMSI-7096 , author = {B{\'e}chade, Lucile AND El Haddad, K{\'e}vin AND Bourquin, Juliette AND Dupont, St{\'e}phane AND Devillers, Laurence}, title = {{A corpus for experimental study of affect bursts in human-robot interaction}}, booktitle = {{International Workshop on Investigating Social Interactions with Artificial Agents}}, year = {2017}, pages = {20-21}, month= {2017-11-13}, address= {Glasgow - GB}, keywords= {human-robot interaction, affects bursts, JOKER Chistera Project}, } @inproceedings{PubLIMSI-6822 , author = {Gelly, Gregory AND Gauvain, Jean-Luc}, title = {{Spoken Language Identification Using LSTM-Based Angular Proximity}}, booktitle = {{Annual Conference of the International Speech Communication Association}}, year = {2017}, pages = {2566-2570}, month= {2017-08-20 / 2017-08-24}, address= {Stockholm - SE}, url = {http://www-tlp.limsi.fr/public/lid-is2017.pdf} } @inproceedings{PubLIMSI-6786 , author = {Peter, Jan-Thorsten AND Ney, Hermann AND Bojar, Ond\vrej AND Pham, Ngoc-Quan AND Niehues, Jan AND Waibel, Alexander AND Burlot, Franck AND Yvon, Fran\c{c}ois AND Pinnis, Mar{\={a}}rcis AND Sics, Walter AND Bastings, Joost AND Rios, Miguel AND Aziz, Wilker AND Williams, Philip AND Blain, Fr{\'e}d{\'e}ric AND Specia, Lucia}, title = {{The QT21 Combined Machine Translation System for English to Latvian}}, booktitle = {{Conference on Machine Translation}}, year = {2017}, pages = {348--357}, month= {2017-09-07 / 2017-09-08}, address= {Copenhagen - DK}, keywords= {Machine Translation, system combination}, } @inproceedings{PubLIMSI-6716 , author = {Cooper-Leavitt, Jamison AND Lamel, Lori AND Rialland, Annie AND Adda-Decker, Martine AND Adda, Gilles}, title = {{Corpus base linguistic exploration via forced alignments with a 'light-weight\u2019 ASR tool}}, booktitle = {{Language & Technology Conference : Human Language Technologies as a Challenge for Computer Science and Linguistics}}, year = {2017}, pages = {312-316}, month= {2017-11-17 / 2017-11-19}, address= {Poznań - PL}, } @inproceedings{PubLIMSI-6705 , author = {Lileikyt{\'e}, Rasa AND Fraga-Silva, Thiago AND Lamel, Lori AND Gauvain, Jean-Luc AND Laurent, Antoine AND Huang, Guangpu}, title = {{Effective keyword search for low-resourced conversational speech}}, booktitle = {{IEEE International Conference on Acoustics, Speech, and Signal Processing}}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2017}, pages = {5785-5789}, month= {2017-03-05 / 2017-03-09}, address= {New Orleans - US}, keywords= {Speech recognition, keyword search, text augmentation, language modeling, low-resourced languages}, url = {http://www-tlp.limsi.fr/public/lileikyte17.pdf} } @inproceedings{PubLIMSI-6709 , author = {Amazouz, Djegdjiga AND Adda-Decker, Martine AND Lamel, Lori}, title = {{Addressing Code-Switching in French/Algerian Arabic Speech}}, booktitle = {{Annual Conference of the International Speech Communication Association}}, year = {2017}, pages = {62-66}, month= {2017-08-20 / 2017-08-24}, address= {Stockholm - SE}, keywords= {Code-switching, Language Identification, Algerian Arabic, French.}, url={http://www-tlp.limsi.fr/public/is2017-1373-amazouz.PDF} } % url={http://dx.doi.org/10.21437/Interspeech.2017-1373} @inproceedings{PubLIMSI-6708 , author = {Wu, Yaru AND Adda-Decker, Martine AND Fougeron, C{\'e}cile AND Lamel, Lori}, title = {{Schwa Realization in French: Using Automatic Speech Processing to Study Phonological and Socio-linguistic Factors in Large Corpora}}, booktitle = {{Annual Conference of the International Speech Communication Association}}, year = {2017}, pages = {3782-3786}, month= {2017-08-20 / 2017-08-24}, address= {Stockholm - SE}, keywords= {schwa, large corpora, forced alignment, speechstyle, pre-boundary context, socio-linguistic factor, mixed mode}, url={http://www-tlp.limsi.fr/public/is2017-1470-yaru.PDF} } % url={http://dx.doi.org/10.21437/Interspeech.2017-470} @inproceedings{PubLIMSI-6707 , author = {Cooper-Leavitt, Jamison AND Lamel, Lori AND Rialland, Annie AND Adda-Decker, Martine AND Adda, Gilles}, title = {{Developing an Embosi (Bantu C25) Speech Variant Dictionary to Model Vowel Elision and Morpheme Deletion}}, booktitle = {{Annual Conference of the International Speech Communication Association}}, year = {2017}, pages = {3911-3915}, month= {2017-08-20 / 2017-08-24}, address= {Stockholm - SE}, keywords= {phonetics, phonology, language modeling, under-resourced languages}, url={http://www-tlp.limsi.fr/public/is2017-1280-jcl.PDF} } % url={http://dx.doi.org/10.21437/Interspeech.2017-1280} @inproceedings{PubLIMSI-6706 , author = {Huang, Guangpu AND Fraga Da Silva, Thiago AND Lamel, Lori AND Gauvain, Jean-Luc AND Gorin, Arseniy AND Laurent, Antoine AND Lileikyt{\'e}, Rasa AND Messaoudi, Abdelkhalek}, title = {{An investigation into language model data augmentation for low-resourced STT and KWS}}, booktitle = ieeeicassp, year = {2017}, pages = {5790-5794}, month= {2017-03-05 / 2017-03-09}, address= {New Orleans - US}, keywords= {multilingual, low resourced languages,speech recognition, keyword search}, url = {http://www-tlp.limsi.fr/public/huang17.pdf} } @inproceedings{PubLIMSI-6684 , author = {Vernade, Claire AND Capp{\'e}, Olivier AND Perchet, Vianney}, title = {{Stochastic Bandit Models for Delayed Conversions}}, booktitle = {{Conference on Uncertainty in Artificial Intelligence}}, year = {2017}, pages = {1-10}, month= {2017-08-12 / 2017-08-14}, address= {Sydney - AU}, keywords= {Multi-armed bandit; delayed feedback; online advertising}, } @inproceedings{PubLIMSI-6683 , author = {Paul, Lagr{\'e}e AND Capp{\'e}, Olivier AND Bogdan, Cautis AND Maniu, Sylviu}, title = {{Effective Large-Scale Online Influence Maximization}}, booktitle = {{IEEE International Conference on Data Mining}}, year = {2017}, pages = {937-942}, month= {2017-11-18 / 2017-11-21}, address= {New Orleans, LA - US}, keywords= {Influence Maximization; Machine Learnig}, } @inproceedings{PubLIMSI-6659 , author = {Lavergne, Thomas AND Yvon, Fran\c{c}ois}, title = {{Learning the Structure of Variable-Order CRFs: a finite-state perspective}}, booktitle = {{Conference on Empirical Methods in Natural Language Processing}}, year = {2017}, pages = {433-439}, month= {2017-09-09 / 2017-09-11}, address= {Copenhagen - DK}, keywords= {Conditional Random Fields; Sequence Models}, } @inproceedings{PubLIMSI-6571 , author = {Labeau, Matthieu AND Allauzen, Alexandre}, title = {{Character and Subword-Based Word Representation for Neural Language Modeling Prediction}}, booktitle = {{Workshop on Subword and Character Level Models in NLP}}, year = {2017}, pages = {11}, month= {2017-09-07}, address= {Copenhagen - DK}, } @inproceedings{PubLIMSI-6520 , author = {Letard, Vincent AND Illouz, Gabriel AND Rosset, Sophie}, title = {{NELIDA : assistant op{\'e}rationnel g{\'e}n{\'e}rique apprenant incr{\'e}mentalement par l'interaction}}, booktitle = {{Conférence Nationale sur les Applications Pratiques de l'Intelligence Artificielle}}, year = {2017}, pages = {85-87}, month= {2017-07-03 / 2017-07-04}, address= {Caen - FR}, keywords= {Syst{\`e}me de dialogue, Apprentissage incr{\'e}mental, Raisonnement par analogie formelle, Syst{\`e}me assistant}, } @inproceedings{PubLIMSI-6519 , author = {Bredin, Herv{\'e}}, title = {{Pyannote.metrics: a toolkit for reproducible evaluation, diagnostic, and error analysis of speaker diarization systems}}, booktitle = {{Annual Conference of the International Speech Communication Association}}, year = {2017}, pages = {3587-3591}, month= {2017-08-20 / 2017-08-24}, address= {Stockholm - SE}, keywords= {evaluation, speaker diarization, reproducible research, open-source software}, } @inproceedings{PubLIMSI-6518 , author = {Wisniewski, Guillaume AND Bredin, Herv{\'e} AND Gelly, Gregory AND Barras, Claude}, title = {{Combining Speaker Turn Embedding and Incremental Structure Prediction for Low-Latency Speaker Diarization}}, booktitle = {{Annual Conference of the International Speech Communication Association}}, year = {2017}, pages = {3582-3586}, month= {2017-08-20 / 2017-08-24}, address= {Stockholm - SE}, } @inproceedings{PubLIMSI-6517 , author = {Yin, Ruiqing AND Bredin, Herv{\'e} AND Barras, Claude}, title = {{Speaker Change Detection in Broadcast TV using Bidirectional Long Short-Term Memory Networks}}, booktitle = {{Annual Conference of the International Speech Communication Association}}, year = {2017}, pages = {3827-3831}, month= {2017-08-20 / 2017-08-24}, address= {Stockholm - SE}, keywords= {speaker diarization, speaker change detection, sequence labeling, recurrent neural network, LSTM}, } @inproceedings{PubLIMSI-6516 , author = {Le, Nam AND Bredin, Herv{\'e} AND Sargent, Gabriel AND India, Miquel AND Lopez-Otero, Paula AND Barras, Claude AND Guinaudeau, Camille AND Gravier, Guillaume AND da Fonseca, Gabriel Barbosa AND Freire, Izabela Lyon AND Patrocinio, Jr AND Guimaraes, Silvio Jamil F. AND Marti, Gerard AND Morros, Josep Ramon AND Hernando, Javier AND Docio-Fernandez, Laura AND Garcia-Mateo, Carmen AND Meignier, Sylvain AND Odobez, Jean-Marc}, title = {{Towards Large Scale Multimedia Indexing: a Case Study on Person Discovery in Broadcast News}}, booktitle = {{International Workshop on Content-Based Multimedia Indexing}}, year = {2017}, pages = {18:1-18:6}, month= {2017-06-19 / 2017-06-21}, address= {Florence - IT}, } @inproceedings{PubLIMSI-6509 , author = {Wijaya, Derry AND Callahan, Brendan AND Hewitt, John AND Gao, Jie AND Ling, Xiao AND Apidianaki, Marianna AND Callison-Burch, Chris}, title = {{Learning Translations via Matrix Completion}}, booktitle = {{Conference on Empirical Methods in Natural Language Processing}}, year = {2017}, pages = {1453-1464}, month= {2017-09-07 / 2017-09-11}, address= {Copenhagen - DK}, keywords= {bilingual lexicon induction, matrix completion, Bayesian Personalized Ranking}, } @inproceedings{PubLIMSI-6508 , author = {Mechanic, Ross AND Fulgoni, Dean AND Cutler, Hannah AND Rajana, Sneha AND Liu, Zheyuan AND Jackson, Bradley AND Cocos, Anne AND Callison-Burch, Chris AND Apidianaki, Marianna}, title = {{KnowYourNyms? A Game of Semantic Relationships}}, booktitle = {{Conference on Empirical Methods in Natural Language Processing}}, year = {2017}, pages = {37-42}, month= {2017-09-07 / 2017-09-11}, address= {Copenhagen - DK}, keywords= {semantic relationships, game with a purpose}, } @inproceedings{PubLIMSI-6507 , author = {Rajana, Sneha AND Callison-Burch, Chris AND Apidianaki, Marianna AND Shwartz, Vered}, title = {{Learning Antonyms with Paraphrases and a Morphology-aware Neural Network}}, booktitle = {{Conference on Lexical and Computational Semantics}}, year = {2017}, pages = {12-21}, month= {2017-08-03 / 2017-08-04}, address= {Vancouver - CA}, keywords= {antonym, paraphrasing, neural network}, } @inproceedings{PubLIMSI-6506 , author = {Cocos, Anne AND Apidianaki, Marianna AND Callison-Burch, Chris}, title = {{Mapping the Paraphrase Database to WordNet}}, booktitle = {{Conference on Lexical and Computational Semantics}}, year = {2017}, pages = {84-90}, month= {2017-08-03 / 2017-08-04}, address= {Vancouver - CA}, keywords= {Paraphrase Database, WordNet}, } @inproceedings{PubLIMSI-6504 , author = {Aufrant, Lauriane AND Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {{LIMSI@CoNLL'17: UD Shared Task}}, booktitle = {{Conference on Computational Natural Language Learning}}, year = {2017}, pages = {163-173}, month= {2017-08-03 / 2017-08-04}, address= {Vancouver - CA}, keywords= {Dependency parsing; low-resource languages}, } @inproceedings{PubLIMSI-6503 , author = {Apidianaki, Marianna AND Cocos, Anne AND Callison-Burch, Chris}, title = {{Word Sense Filtering Improves Embedding-Based Lexical Substitution}}, booktitle = {{Conference of the European Chapter of the Association for Computational Linguistics}}, year = {2017}, pages = {110-119}, month= {2017-04-04}, address= {Valencia - ES}, keywords= {lexical substitution, sense filtering}, } @inproceedings{PubLIMSI-6499 , author = {Burlot, Franck AND Yvon, Fran\c{c}ois}, title = {{Normalisation automatique du vocabulaire source pour traduire depuis une langue {\`a} morphologie riche}}, booktitle = {{Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles}}, year = {2017}, pages = {16p}, month= {2017-06-26 / 2017-06-30}, address= {Orl{\'e}ans - FR}, keywords= {traduction automatique; langue morphologiquement riche; classification}, } @inproceedings{PubLIMSI-6498 , author = {Bartenlian, El{\'e}onore AND Lacour, Margot AND Labeau, Matthieu AND Allauzen, Alexandre AND Wisniewski, Guil\ laume AND Yvon, Fran\c{c}ois}, title = {{Adaptation au domaine pour l'analyse morpho-syntaxique}}, booktitle = {{Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles}}, year = {2017}, pages = {8p}, month= {2017-06-26 / 2017-06-30}, address= {Orl{\'e}ans - FR}, keywords= {Analyse morpho-syntaxique; adaptation au domaine; mod{\`e}les statistiques; UGC}, } @inproceedings{PubLIMSI-6477 , author = {Bawden, Rachel}, author+AN = {1=student, LIMSI , TLP ; }, title = {{Machine Translation, it's a question of style, innit? The case of English tag questions}}, booktitle = {{Conference on Empirical Methods in Natural Language Processing}}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2017}, pages = {2497-2502}, month= {2017-09-06 / 2017-09-11}, address= {Copenhagen - DK}, keywords= {MT, machine translation, post-editing, discourse, TA, traduction automatique, post-{\'e}dition,discours, tag questions}, } @inproceedings{PubLIMSI-6475 , author = {Bawden, Rachel}, title = {{Machine Translation of Speech-Like Texts: Strategies for the Inclusion of Context}}, booktitle = {{Rencontres des Etudiants Chercheurs en Informatique pour le Traitement Automatique des Langues}}, year = {2017}, pages = {1-14}, address= {Orl{\'e}ans - FR}, keywords= {MT, machine translation, discourse, context, pronouns, gender, TA, traduction automatique, discours, context\ e, parole, pronoms, genre}, } @inproceedings{PubLIMSI-6467 , author = {Burlot, Franck AND Safari, Pooyan AND Labeau, Matthieu AND Allauzen, Alexandre AND Yvon, Fran\c{c}ois}, title = {{LIMSI@WMT'17}}, booktitle = {{Conference on Machine Translation}}, year = {2017}, pages = {257-264}, month= {2017-09-07 / 2017-09-08}, address= {Copenhagen - DK}, keywords= {machine translation, morphologically rich languages, target morphology}, } @inproceedings{PubLIMSI-6466 , author = {Burlot, Franck AND Yvon, Fran\c{c}ois}, title = {{Evaluating the morphological competence of Machine Translation Systems}}, booktitle = {{Conference on Machine Translation}}, year = {2017}, pages = {43-55}, month= {2017-09-07 / 2017-09-08}, address= {Copenhagen - DK}, keywords= {machine translation, evaluation, morphology}, } @inproceedings{PubLIMSI-6465 , author = {Burlot, Franck AND Garcia-Martinez, Mercedes AND Bougares, Fethi AND Barrault, Lo{\"i}c AND Yvon, Fran\c{c}oi\ s}, title = {{Word Representations in Factored Neural Machine Translation}}, booktitle = {{Conference on Machine Translation}}, year = {2017}, pages = {20-31}, month= {2017-09-07 / 2017-09-08}, address= {Copenhagen - DK}, keywords= {machine translation, morphology, factored machine translation}, } @inproceedings{PubLIMSI-6449 , author = {Vitale, Marilisa AND Boula De Mareuil, Philippe AND De Meo, Anna}, title = {{Chiedere in italiano: le domande polari e lo sviluppo della competenza prosodica in parlanti cinesi di italia\ no L2}}, booktitle = {{Annual Conference of the International Speech Communication Association}}, year = {2017}, pages = {142-159}, address= {Stockholm - SE}, } @inproceedings{PubLIMSI-6448 , author = {Barbosa, Plinio AND Madureira, Sandra AND Boula De Mareuil, Philippe}, title = {{Cross-Linguistic Distinctions Between Professional and Non-Professional Speaking Styles}}, booktitle = {{Annual Conference of the International Speech Communication Association}}, year = {2017}, pages = {3921-3925}, month= {2017-08-20 / 2017-08-24}, address= {Stockholm - SE}, keywords= {linguistique}, } @inproceedings{PubLIMSI-6428 , author = {Labeau, Matthieu AND Allauzen, Alexandre}, title = {{Repr{\'e}sentations continues d{\'e}riv{\'e}es des caract{\`e}res pour un mod{\`e}le de langue neuronal {\`a}\ vocabulaire ouvert}}, booktitle = {{Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles}}, year = {2017}, pages = {32-46}, month= {0000-00-00}, address= {Orl{\'e}ans - FR}, keywords= {Mod{\`e}le de langue neuronal, Repr{\'e}sentations continues d{\'e}riv{\'e}es des caract{\`e}res, Traduction\ automatique par approche statistique}, } @inproceedings{PubLIMSI-6427 , author = {Labeau, Matthieu AND Allauzen, Alexandre}, title = {{An experimental analysis of Noise-Contrastive Estimation: the noise distribution matters}}, booktitle = {{Conference of the European Chapter of the Association for Computational Linguistics}}, year = {2017}, pages = {15-20}, address= {Valencia - ES}, } @inproceedings{PubLIMSI-6395 , author = {Boula De Mareuil, Philippe AND Scherrer, Yves AND Goldman, Jean-Philippe}, title = {{Combien d'accents en fran\c{c}ais? Focus sur la France, la Belgique et la Suisse}}, booktitle = {{VALS-ASLA}}, year = {2017}, pages = {91-103}, month= {2016-01-20 / 2016-01-22}, address= {Gen{\`e}ve - CH}, keywords= {Linguistique}, } @inproceedings{PubLIMSI-6385 , author = {Aufrant, Lauriane AND Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {{Don't Stop Me Now! Using Global Dynamic Oracles to Correct Training Biases of Transition-Based Dependency Par\ sers}}, booktitle = {{Conference of the European Chapter of the ACL}}, year = {2017}, pages = {318-323}, address= {Valencia - ES}, keywords= {dependency parsing; transition-based system; machine learning}, } @inproceedings{PubLIMSI-6379 , author = {Dubuisson Duplessis, Guillaume AND Charras, Franck AND Letard, Vincent AND Ligozat, Anne-Laure AND Rosset, So\ phie}, title = {{Utterance Retrieval based on Recurrent Surface Text Patterns}}, booktitle = {{European Conference on Information Retrieval}}, year = {2017}, pages = {199-211}, address= {Aberdeen - GB}, keywords= {Dialogue utterance retrieval; Example-based dialogue modelling; Open-domain dialogue system; Evaluation}, } @inproceedings{PubLIMSI-6217 , author = {Bredin, Herv{\'e}}, title = {{TristouNet: Triplet Loss for Speaker Turn Embedding}}, booktitle = {{IEEE International Conference on Acoustics, Speech, and Signal Processing}}, year = {2017}, pages = {5430-5434}, month= {2017-03-05 / 2017-03-09}, address= {New Orleans - US}, } @article{PubLIMSI-7095 , author = {Grinbaum, Alexei AND Chatila, Raja AND Devillers, Laurence AND Ganascia, Jean-Gabriel AND Tessier, Catherine \ AND Dauchet, Max}, title = {{Ethics in Robotics Research: CERNA Mission and Context}}, year = {2017}, pages = {139-145}, journal = {{IEEE Robotics and Automation Magazine}}, volume = {24}, number = {3}, keywords= {{\'e}thique, robotique, CERNA} } @article{PubLIMSI-6772 , author = {Vasilescu, Ioana}, title = {{De la variation dans la production et la perception de la parole : analyses de grands corpus en lien avec le \ traitement automatique}}, year = {2017}, pages = {49-52}, journal = {{L'information grammaticale}}, volume = {154}, keywords= {grands corpus, production perception, acoustique, reconnaissance vocale}, } @article{PubLIMSI-6696 , author = {Boula De Mareuil, Philippe AND Vernier, Fr{\'e}d{\'e}ric AND Rilliard, Albert}, title = {{Enregistrements et transcriptions pour un atlas sonore des langues r{\'e}gionales de France}}, year = {2017}, pages = {23-48}, journal = {{G{\'e}olinguistique}}, volume = {17} } @article{PubLIMSI-6628 , author = {Mariani, Joseph-Jean AND Francopoulo, Gil AND Paroubek, Patrick}, title = {{Reuse and Plagiarism in Speech and Natural Language Processing}}, year = {2017}, pages = {1-14}, journal = {{International Journal on Digital Libraries}}, volume = {18}, keywords= {Plagiarism Detection Text reuse Natural Language Processing Speech Processing Scientometrics Informetrics} } @article{PubLIMSI-6570 , author = {Lauly, Stanislas AND Zheng, Yin AND Allauzen, Alexandre AND Larochelle, Hugo}, title = {{Document Neural Autoregressive Distribution Estimation}}, year = {2017}, pages = {24}, journal = {{Journal of Machine Learning Research}}, volume = {18}, number = {113}, keywords= {Deep-Learning, Neural Network, Natural Language processing} } @article{PubLIMSI-6543 , author = {Costa-Jussa, Marta AND Allauzen, Alexandre AND Barrault, Lo{\"i}c AND Cho, Kyunghun AND Schwenk, Holger}, title = {{Introduction to the special issue on deep learning approaches for machine translation}}, year = {2017}, pages = {367-373}, journal = {{Computer Speech and Language}}, volume = {46} } @article{PubLIMSI-6515 , author = {Poignant, Johann AND Bredin, Herv{\'e} AND Barras, Claude}, title = {{Multimodal Person Discovery in Broadcast TV: Lessons Learned from MediaEval 2015}}, year = {2017}, pages = {22547-22567}, journal = {{Multimedia Tools and Applications}}, volume = {76}, number = {21}, keywords= {Benchmark, Information retrieval, Unsupervised person recognition, Multimodal fusion, Error analysis} } @article{PubLIMSI-6497 , author = {Allauzen, Alexandre AND Do, Quoc Khanh AND Yvon, Fran\c{c}ois}, title = {{A comparison of discriminative training criteria for continuous space translation models}}, year = {2017}, pages = {19-33}, journal = {{Machine Translation}}, volume = {31}, number = {1-2}, keywords= {Statistical machine translation; Neural network translation model; Discriminative training} } @article{PubLIMSI-6432 , author = {Avanzi, Mathieu AND Boula De Mareuil, Philippe}, title = {{Identification of regional French accents in (northern) France, Belgium, and Switzerland}}, year = {2017}, pages = {17-40}, journal = {{Journal of Linguistic Geography}}, volume = {5}, keywords= {linguistique} } @article{PubLIMSI-6405 , author = {Burlot, Franck AND Yvon, Fran\c{c}ois}, title = {{Learning Morphological Normalization for Translation from and into Morphologically Rich Languages}}, year = {2017}, pages = {49-60}, journal = {{Prague Bulletin of Mathematical Linguistics}}, number = {108}, keywords= {Machine Translation, Morphologically Rich Languages, Reinflection} } @article{PubLIMSI-5824 , author = {P{\'e}cheux, Nicolas AND Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {{Reassessing the value of resources for cross-lingual transfer of POS tagging models}}, year = {2017}, pages = {927-960}, journal = {{Language Resources and Evaluation}}, volume = {51}, number = {4}, keywords= {Part-of-Speech Tagging, Cross-Lingual Transfer} } %%%%%%%%%% 2016 section @book{PubLIMSI-6240 , author = {Fort, Kar{\"e}n AND Adda, Gilles AND Cohen, Kevin}, title = {{TAL et {\'e}thique}}, AERES = {DO}, pages = {107}, volume = {57-2}, publisher = {ATALA}, editor = {}, ISBN= {1965-0906}, keywords= {{\'e}thique ; d{\'e}ontologie ; traitement du langage naturel}, } @book{PubLIMSI-6061 , author = {Adda, Gilles AND Barbu Mititelu, Verginica AND Mariani, Joseph-Jean AND Tufics, Dan AND Vasilescu, Ioa\ na}, title = {{Errors by human and machines in Multimedia, Multimodal and multilingual data processing}}, year = {2016}, pages = {223}, publisher = {Editura Academiei Romane}, ISBN= {978-973-2600-6}, } @inproceedings{PubLIMSI-6779 , author = {Vasilescu, Ioana AND Renwick, Margaret AND Dutrey, Camille AND Lamel, Lori}, title = {{R{\'e}alisation phon{\'e}tique et contraste phonologique marginal: une {\'e}tude automatique des voyelles du roumain}}, booktitle = {{Journ{\'e}es d'{\'E}tudes sur la Parole}}, year = {2016}, pages = {597-607}, month= {2016-07-04 / 2016-07-08}, address= {Paris - FR}, keywords= {voyelles, phon{\'e}tique, contraste marginal, Roumain}, } @inproceedings{PubLIMSI-6774 , author = {Renwick, Margaret AND Vasilescu, Ioana AND Dutrey, Camille AND Lamel, Lori AND Dimulescu-Vieru, Bianca}, title = {{A phonologically weak contrast can induce phonetic overlap}}, booktitle = {{Laboratory Phonology Conference}}, year = {2016}, pages = {2p}, month= {2016-07-13 / 2016-07-17}, address= {Ithaca - US}, keywords= {Romanian vowels, marginal contrast, acoustic}, } @inproceedings{PubLIMSI-6329 , author = {Povolny, Filip AND Matejka, Pavel AND Hradis, Michal AND Popkova, Anna AND Otrusina, Lubomir AND Smrz, PaveAND Wood, Ian AND Robin, Cecile AND Lamel, Lori}, title = {{Multimodal Emotion Recognition for AVEC 2016 Challenge}}, booktitle = {{Audio/Visual Emotion Challenge}}, year = {2016}, pages = {75-82}, month= {2016-10-16}, address= {Amsterdam - NL}, keywords= {emotion recognition, valence, arousal, bottleneck features, neural networks, regression, speech tran- scription, word embedding}, } @inproceedings{PubLIMSI-6327 , author = {Vasilescu, Ioana AND Renwick, Margaret EL AND Dimulescu-Vieru, Bianca AND Lamel, Lori}, title = {{On the Phonemic Status of the Romanian Vowels Ă [Ʌ] and � [Ɨ]}}, booktitle = {{Linguistic Resources and Tools for Processing the Romanian Language}}, year = {2016}, pages = {205-207}, address= {Malini - RO}, keywords= {automatic speech recognition, acoustic analysis, duration, formants, marginal contrast, phonology, Romanian vowels}, } @inproceedings{PubLIMSI-6326 , author = {Renwick, Margaret EL AND Vasilescu, Ioana AND Dutrey, Camille AND Lamel, Lori AND Dimulescu-Vieru, Bianca}, title = {{Marginal contrast among Romanian vowels: evidence from ASR and functional load}}, booktitle = {{Annual Conference of the International Speech Communication Association}}, year = {2016}, pages = {2433-2437}, address= {San Francisco - US}, keywords= {phonology, Romanian vowels, marginal contrast, frequency distribution, functional load, automatic speech recognition, pronunciation variants.}, } @inproceedings{PubLIMSI-6280 , author = {Lileikyt{\'e}, Rasa AND Gorin, Arseniy AND Lamel, Lori AND Gauvain, Jean-Luc AND Fraga-Silva, Thiago}, title = {{Lithuanian Broadcast Speech Transcription Using Semi-Supervised Acoustic Model Training}}, booktitle = {{International Workshop on Spoken Languages Technologies for Under-resourced languages}}, year = {2016}, pages = {107-113}, month= {2016-05-09 / 2016-05-12}, address= {Yogyakarta - ID}, keywords= {automatic speech recognition, low-resourced languages, semi-supervised training, neural networks, Lithuanian language}, } @inproceedings{PubLIMSI-6218 , author = {Bruneau, Pierrick AND Stefas, Micka{\"e}l AND Poignant, Johann AND Bredin, Herv{\'e} AND Barras, Claude}, title = {{Post-Hoc Interactive Analytics of Errors in the Context of a Person Discovery Task}}, booktitle = {{IEEE International Symposium on Multimedia 2016}}, year = {2016}, pages = {435-438}, address= {San Jose - US}, } @inproceedings{PubLIMSI-6270 , author = {Mariani, Joseph-Jean AND Francopoulo, Gil AND Paroubek, Patrick}, title = {{A Study of Reuse and Plagiarism in Speech and Natural Language Processing papers}}, booktitle = {{BIRNDL 2016}}, year = {2016}, pages = {72-83}, address= {Newark - US}, keywords= {Plagiarism Detection, Text reuse, Natural Language Processing, Speech Processing, Scientometrics, Informetri\ cs}, } @inproceedings{PubLIMSI-6264 , author = {Stuker, Sebastian AND Adda, Gilles AND Adda-Decker, Martine AND Ambouroue, Odette AND Besacier, Laurent AND Blachon, David AND Maynard, H{\'e}l{\`e}ne AND Gauthier, Elodie AND Godard, Pierre AND Hamlaoui, Fatima AND Idiatov, Dmitry AND Kouarata, Guy-No{\"e}l AND Lamel, Lori AND Makasso, Emmanuel-Moselly AND Muller, Markus AND Rialland, Annie AND Van de Velde, Mark AND Yvon, Fran\c{c}ois AND Zerbian, Sabine}, title = {{Innovative Technologies for Under-resourced Language Documentation: The BULB Project}}, booktitle = {{Collaboration and Computing for Under-Resourced Languages}}, AERES = {ACTI}, GROUP = {LIMSI,TLP,P2I}, year = {2016}, pages = {59-66}, address= {Portoroz - SI}, keywords= {Language documentation, automatic phonetic transcription, unwritten languages, automatic alignment}, } @inproceedings{PubLIMSI-6249 , author = {Apidianaki, Marianna}, title = {{Vector-space Models for PPDB Paraphrase Ranking in Context}}, booktitle = {{Conference on Empirical Methods in Natural Language Processing}}, year = {2016}, pages = {2028-2034}, address= {Austin - US}, keywords= {paraphrases, lexical substitution}, } @inproceedings{PubLIMSI-6262 , author = {Gauthier, Elodie AND Blachon, David AND Besacier, Laurent AND Kouarata, Guy-No{\"e}l AND Adda-Decker, Martine AND Rialland, Annie AND Adda, Gilles AND Bachman, Gr{\'e}goire}, title = {{LIG-AIKUMA: a Mobile App to Collect Parallel Speech for Under-Resourced Language Studies}}, booktitle = {{Annual Conference of the International Speech Communication Association}}, year = {2016}, pages = {2p}, address= {San Francisco - US}, keywords= {speech data collection, mobile app, unwritten languages, fieldwork linguistics, language documentation}, } @inproceedings{PubLIMSI-6219 , author = {Cohen, Kevin AND Fort, Kar\"en AND Adda, Gilles AND Zhou, Sophia AND Farri, Dimeji}, title = {{Ethical Issues in Corpus Linguistics And Annotation: Pay Per Hit Does Not Affect Effective Hourly Rate For Linguistic Resource Development On Amazon Mechanical Turk}}, booktitle = {{ETHics In Corpus Collection, Annotation & Application}}, year = {2016}, pages = {5p}, address= {Portoroz - SI}, keywords= {ethics ; ; corpus linguistics ; corpus annotation ; Amazon Mechanical Turk ; crowdsourcing}, } @inproceedings{PubLIMSI-6222 , author = {Aufrant, Lauriane AND Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {{Zero-resource Dependency Parsing: Boosting Delexicalized Cross-lingual Transfer with Linguistic Knowledge}}, booktitle = {{International Conference on Computational Linguistics}}, year = {2016}, pages = {119-130}, address= {Osaka - JP}, keywords= {dependency parsing; cross-language transfert}, } @inproceedings{PubLIMSI-6184 , author = {Poignant, Johann AND Bredin, Herv{\'e} AND Barras, Claude AND Stefas, Micka{\"e}l AND Bruneau, Pierrick AND Tamisier, Thomas}, title = {{Benchmarking multimedia technologies with the CAMOMILE platform: the case of Multimodal Person Discovery at MediaEval 2015}}, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2016}, pages = {5p}, address= {Portoroz - SI}, keywords= {evaluation campaign, collaborative annotation, multimedia}, } @inproceedings{PubLIMSI-6185 , author = {Bredin, Herv{\'e} AND Barras, Claude AND Guinaudeau, Camille}, title = {{Multimodal Person Discovery in Broadcast TV at MediaEval 2016}}, booktitle = {{MediaEval}}, year = {2016}, pages = {3p}, address= {Hilversum - NL}, } @inproceedings{PubLIMSI-6102 , author = {Bawden, Rachel AND Crabb{\'e}, Beno�t}, title = {{Boosting for Efficient Model Selection for Syntactic Parsing}}, booktitle = {{International Conference on Computational Linguistics}}, year = {2016}, pages = {1-11}, address= {Osaka - JP}, keywords= {parsing,syntax,model selection,boosting}, } @inproceedings{PubLIMSI-6151 , author = {Boula De Mareuil, Philippe AND Goldman, Jean-Philippe AND Rilliard, Albert AND Scherrer, Yves AND Vernier, Fr{\'e}d{\'e}ric}, title = {{Cartopho : un site web de cartographie de variantes de prononciation en fran\c{c}ais}}, booktitle = {{Journ{\'e}es d'{\'E}tudes sur la Parole}}, year = {2016}, pages = {119-127}, address= {Paris - FR}, keywords= {g{\'e}olinguistique, dialectologie, cartographie, visualisation, variantes de prononciation}, } @inproceedings{PubLIMSI-6070 , author = {Francopoulo, Gil AND Mariani, Joseph-Jean AND Paroubek, Patrick AND Vernier, Fr{\'e}d{\'e}ric}, title = {{Providing and Analyzing NLP Terms for our Community}}, booktitle = {{International Workshop on Computational Terminology}}, year = {2016}, pages = {94-103}, address= {Osaka - JP}, } @inproceedings{PubLIMSI-6100 , author = {Bojar, Ond\vrej AND Chatterjee, Rajen AND Federmann, Christian AND Graham, Yvette AND Haddow, Barry AND Huck, Matthias AND Jimeno Yepes, Antonio AND Koehn, Philipp AND Logacheva, Varvara AND Monz, Christof AND Negri, Matteo AND N{\'e}v{\'e}ol, Aur{\'e}lie AND Neves, Mariana AND Popel, Martin AND Post, Matt AND Rubino, Raphael AND Scarton, Carolina AND Specia, Lucia AND Turchi, Marco AND Verspoor, Karin AND Zampieri, Marcos}, title = {{Findings of the 2016 Conference on Machine Translation.}}, booktitle = {{Conference on Machine Translation}}, AERES = {ACTI}, year = {2016}, pages = {131--198}, month= {2016-08-11 / 2016-08-12}, address= {Berlin - DE}, } @inproceedings{PubLIMSI-6064 , author = {Mariani, Joseph-Jean}, title = {{Technologies de la Langue : Etat des Lieux}}, booktitle = {{Colloque Technologies de la Langue pour les Langues R{\'e}gionales de France}}, year = {2016}, pages = {27-39}, address= {Meudon - FR}, } @inproceedings{PubLIMSI-6066 , author = {Francopoulo, Gil AND Mariani, Joseph-Jean AND Paroubek, Patrick}, title = {{Text Mining for Notability Computation}}, booktitle = {{Cross-Platform Text Mining and Natural Language Processing Interoperability Workshop}}, year = {2016}, pages = {52-56}, address= {Portoroz - SI}, } @inproceedings{PubLIMSI-6068 , author = {Francopoulo, Gil AND Mariani, Joseph-Jean AND Paroubek, Patrick}, title = {{Linking Language Resources and NLP Papers}}, booktitle = {{Workshop on Research Results Reproducibility and Resources Citation in Science and Technology of Language}}, year = {2016}, pages = {24-32}, address= {Portoroz - SI}, } @inproceedings{PubLIMSI-6052 , author = {Burlot, Franck AND Labeau, Matthieu AND Knyazeva, Elena AND Lavergne, Thomas AND Allauzen, Alexandre AND Yvon, Fran\c{c}ois}, title = {{LIMSI@IWSLT: MT Track}}, booktitle = {{International Workshop on Spoken Language Translation}}, year = {2016}, pages = {7p}, address= {Seattle - US}, keywords= {Machine translation ; Morphology}, } @inproceedings{PubLIMSI-6053 , author = {Burlot, Franck AND Knyazeva, Elena AND Lavergne, Thomas AND Yvon, Fran\c{c}ois}, title = {{Two-Step MT: Predicting Target Morphology}}, booktitle = {{International Workshop on Spoken Language Translation}}, year = {2016}, pages = {8p}, address= {Seattle - US}, keywords= {Machine translation ; Morphology}, } @inproceedings{PubLIMSI-5977 , author = {Ive, Julia AND Yvon, Fran\c{c}ois}, title = {{Parallel Sentence Compression}}, booktitle = {{International Conference on Computational Linguistics}}, year = {2016}, pages = {1503-1513}, month= {2016-12-11 / 2016-12-16}, address= {Osaka - JP}, keywords= {compression simplification bilingual methods}, } @inproceedings{PubLIMSI-6030 , author = {Devillers, Laurence AND Dubuisson Duplessis, Guillaume}, title = {{Toward a Context-based Approach to Assess Engagement in Human-Robot Social Interaction}}, booktitle = {{International Workshop on Spoken Dialogue Systems}}, year = {2016}, pages = {7p}, address= {Saariselk{\"a} - FI}, keywords= {Human-Robot Interaction, Social Dialogue, Communication Accom- modation Theory, Engagement}, } @inproceedings{PubLIMSI-5928 , author = {Letard, Vincent AND Rosset, Sophie AND Illouz, Gabriel}, title = {{Incremental Learning From Scratch Using Analogical Reasoning}}, booktitle = {{IEEE International Conference on Tools with Artificial Intelligence}}, year = {2016}, pages = {8p}, address= {San Jose - US}, keywords= {formal analogical reasoning, incremental learning, language transfer, low-resourced}, } @inproceedings{PubLIMSI-5915 , author = {Poignant, Johann AND Budnik, M. AND Bredin, Herv{\'e} AND Barras, Claude AND Stefas, M. AND Bruneau, P. AND Adda, Gilles AND Mariani, Joseph-Jean AND Rosset, Sophie}, title = {{The CAMOMILE Collaborative Annotation Platform for Multi-modal, Multi-lingual and Multi-media Documents}}, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2016}, pages = {5p}, address= {Portoroz - SI}, keywords= {Annotation tool, collaborative annotation, multimedia, active learning, person annotation.}, } @inproceedings{PubLIMSI-5926 , author = {Charras, Franck AND Dubuisson Duplessis, Guillaume AND Letard, Vincent AND Ligozat, Anne-Laure AND Rosset, Sophie}, title = {{Comparing System-response Retrieval Models for Open-domain and Casual Conversational Agent}}, booktitle = {{Second Workshop on Chatbots and Conversational Agent Technologies}}, year = {2016}, pages = {12p}, address= {Los Angeles - US}, keywords= {Example-based dialogue modelling; Open-domain dialogue system;Human-Machine dialogue corpus; Evaluation}, } @inproceedings{PubLIMSI-5927 , author = {Letard, Vincent AND Illouz, Gabriel AND Rosset, Sophie}, title = {{Reducing Noise Sensitivity of Formal Analogical Reasoning applied to Language Transfer}}, booktitle = {{Computational Analogy Workshop at ICCBR}}, year = {2016}, pages = {11}, address= {Atlanta - US}, keywords= {formal analogical reasoning, example based language transfer, constraint relaxation}, } @inproceedings{PubLIMSI-5924 , author = {Letard, Vincent AND Illouz, Gabriel AND Rosset, Sophie}, title = {{{\'E}valuation de l'apprentissage incr{\'e}mental par analogie}}, booktitle = {{Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles}}, year = {2016}, pages = {13p}, address= {Paris - FR}, keywords= {apprentissage incr{\'e}mental, raisonnement analogique, transfert de langage}, } @inproceedings{PubLIMSI-5925 , author = {Charras, Franck AND Dubuisson Duplessis, Guillaume AND Letard, Vincent AND Ligozat, Anne-Laure AND Rosset, Sophie}, title = {{Un syst{\`e}me automatique de s{\'e}lection de r{\'e}ponse en domaine ouvert int{\'e}grable {\`a} un syst{\`e}me de dialogue social}}, booktitle = {{Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles}}, year = {2016}, pages = {3p}, address= {Paris - FR}, keywords= {Mod{\`e}le de dialogue bas{\'e} sur des exemples ; Syst{\`e}me de dialogue en domaine ouvert.}, } @inproceedings{PubLIMSI-5921 , author = {Galibert, Olivier AND Kahn, Juliette AND Rosset, Sophie}, title = {{Comparaison de listes d'erreurs de transcription automatique de la parole : quelle compl{\'e}mentarit{\'e} entre les diff{\'e}rentes m{\'e}triques ?}}, booktitle = {{Journ{\'e}es d'{\'E}tudes sur la Parole}}, AERES = {ACTN}, GROUP = {LIMSI,TLP}, year = {2016}, pages = {9p}, address= {Paris - FR}, keywords= {Reconnaissance automatique de la parole, M{\'e}triques d'{\'e}valuation, Analyse d'erreurs.}, } @inproceedings{PubLIMSI-5923 , author = {Galibert, Olivier AND Camelin, Nathalie AND Del{\'e}glise, Paul AND Rosset, Sophie}, title = {{Estimation de la qualit{\'e} d'un syst{\`e}me de reconnaissance de la parole pour une t�che de compr{\'e}hens\ ion}}, booktitle = {{Journ{\'e}es d'{\'E}tudes sur la Parole}}, year = {2016}, pages = {9p}, address= {Paris - FR}, keywords= {reconnaissance de la parole, compr{\'e}hension, m{\'e}trique d\u2019{\'e}valuation}, } @inproceedings{PubLIMSI-5917 , author = {Ehrmann, Maud AND Nouvel, Damien AND Rosset, Sophie}, title = {{Named Entity Resources - Overview and Outlook}}, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2016}, pages = {8p}, address= {Portoroz - SI}, keywords= {named entity, linguistic resources, NE typologies, annotated corpora, evaluation, linked data}, } @inproceedings{PubLIMSI-5918 , author = {Dubuisson Duplessis, Guillaume AND Letard, Vincent AND Ligozat, Anne-Laure AND Rosset, Sophie}, title = {{Joker Chatterbot}}, booktitle = {{Second Workshop on Chatbots and Conversational Agent Technologies}}, year = {2016}, pages = {2p}, address= {Portoroz - SI}, abstract= {The Joker chatterbot is an example-based system that uses a database of indexed dialogue examples automatically built from a television drama subtitle corpus to manage social open-domain dialogue.}, keywords= {dialog, data collection}, } @inproceedings{PubLIMSI-5914 , author = {Dubuisson Duplessis, Guillaume AND Letard, Vincent AND Ligozat, Anne-Laure AND Rosset, Sophie}, title = {{Purely Corpus-based Automatic Conversation Authoring}}, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2016}, pages = {8p}, address= {Portoroz - SI}, keywords= {dialogue, evaluation, corpus}, } @inproceedings{PubLIMSI-5916 , author = {Galibert, Olivier AND Ben Jannet, Mohamed Ameur AND Kahn, Juliette AND Rosset, Sophie}, title = {{Generating Task-Pertinent sorted Error Lists for Speech Recognition}}, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2016}, pages = {7}, address= {Portoroz - SI}, keywords= {Automatic Speech Recognition, Metrics, Error Analysis}, } @inproceedings{PubLIMSI-5894 , author = {Francopoulo, Gil AND Mariani, Joseph-Jean AND Paroubek, Patrick}, title = {{A Study of Reuse and Plagiarism in LREC papers}}, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2016}, pages = {1890-1897}, address= {Portoroz - SI}, keywords= {Plagiarism Detection, Natural Language Processing}, } @inproceedings{PubLIMSI-5896 , author = {Francopoulo, Gil AND Mariani, Joseph-Jean AND Paroubek, Patrick}, title = {{Predictive modeling: guessing the NLP terms of tomorrow}}, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2016}, pages = {336-343}, month= {2016-05-23 / 2016-05-28}, address= {Portoroz - SI}, keywords= {Predictive Modeling, Predictive Analytics, Term Extraction, Natural Language Processing}, } @inproceedings{PubLIMSI-6259 , author = {Adda, Gilles AND Stuker, Sebastian AND Adda-Decker, Martine AND Ambouroue, Odette AND Besacier, Laurent AND Blachon, David AND Maynard, H{\'e}l{\`e}ne AND Godard, Pierre AND Hamlaoui, Fatima AND Idiatov, Dmitry AND Kouarata, Guy-No{\"e}l AND Lamel, Lori AND Makasso, Emmanuel-Moselly AND Rialland, Annie AND Van de Velde, Mark AND Yvon, Fran\c{c}ois AND Zerbian, Sabine}, title = {{Breaking the Unwritten Language Barrier: The BULB Project}}, booktitle = {{International Workshop on Spoken Languages Technologies for Under-resourced languages}}, year = {2016}, pages = {8-14}, month= {2016-05-09 / 2016-05-12}, address= {Yogyakarta - ID}, keywords= {Language documentation ; automatic phonetic transcription ; unwritten languages ; automatic alignment}, } @inproceedings{PubLIMSI-5848 , author = {Bawden, Rachel}, title = {{Cross-lingual Pronoun Prediction with Linguistically Informed Features}}, booktitle = {{Conference on Machine Translation}}, year = {2016}, pages = {564-570}, month= {2016-08-11 / 2016-08-12}, address= {Berlin - DE}, keywords= {cross-lingual pronoun prediction, wmt, pronoun}, } @inproceedings{PubLIMSI-5884 , author = {Godard, Pierre AND Adda, Gilles AND Adda-Decker, Martine AND Allauzen, Alexandre AND Besacier, Laurent AND Maynard, H{\'e}l{\`e}ne AND Kouarata, Guy-No{\"e}l AND L{\`o}ser, Kevin AND Rialland, Annie AND Yvon, Fran\c{c}ois}, title = {{Preliminary Experiments on Unsupervised Word Discovery in Mboshi}}, booktitle = {{Annual Conference of the International Speech Communication Association}}, year = {2016}, pages = {3539-3543}, address= {San Francisco - US}, keywords= {automatic alignment, automatic transcription, machine translation, Bantu languages, language documentation}, } @inproceedings{PubLIMSI-5822 , author = {Aufrant, Lauriane AND Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {{Ne nous arr{\^e}tons pas en si bon chemin : am\'eliorations de l'apprentissage global d'analyseurs en d\'ependances par transition}}, booktitle = {{Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles}}, year = {2016}, pages = {248-261}, address= {Paris - FR}, keywords= {Analyse en d{\'e}pendances, Analyse par transition, Oracle}, } @inproceedings{PubLIMSI-5847 , author = {Bawden, Rachel AND Wisniewski, Guillaume AND Maynard, H{\'e}l{\`e}ne}, title = {{Investigating gender adaptation for speech translation}}, booktitle = {{Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles}}, year = {2016}, pages = {490-497}, address= {Paris - FR}, keywords= {Speech translation, SMT, gender, adaptation, parallel corpus}, } @inproceedings{PubLIMSI-5817 , author = {Lacroix, Oph{\'e}lie AND Aufrant, Lauriane AND Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {{Frustratingly Easy Cross-Lingual Transfer for Transition-Based Dependency Parsing}}, booktitle = {{Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies}}, year = {2016}, pages = {1058-1063}, address= {San Diego - US}, } @inproceedings{PubLIMSI-5819 , author = {Lacroix, Oph{\'e}lie AND Aufrant, Lauriane AND Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {{Apprentissage d'analyseur en d{\'e}pendances cross-lingue par projection partielle de d{\'e}pendances}}, booktitle = {{Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles}}, year = {2016}, pages = {1-14}, month= {2016-07-04 / 2016-07-08}, address= {Paris - FR}, keywords= {Transfert cross-lingue, Analyse en d{\'e}pendances, Annotations partielles}, } @inproceedings{PubLIMSI-5805 , author = {Yvon, Fran\c{c}ois AND Xu, Yong AND Apidianaki, Marianna AND Pillias, Cl{\'e}ment AND Cubaud, Pierre}, title = {{TransRead: Designing a Bilingual Reading Experience with Machine Translation Technologies}}, booktitle = {{Conference of the North American Chapter of the Association for Computational Linguistics: Human Language\ Technologies}}, year = {2016}, pages = {27-31}, address= {San Diego - US}, keywords= {Machine translation, statistical alignment, electronic books}, } @inproceedings{PubLIMSI-5813 , author = {Ive, Julia AND Max, Aur{\'e}lien AND Yvon, Fran\c{c}ois}, title = {{LIMSI's Contribution to the WMT Biomedical Translation Task}}, booktitle = {{Conference on Machine Translation}}, year = {2016}, pages = {8p}, month= {2016-08-11 / 2016-08-12}, address= {Berlin - DE}, keywords= {statistical machine translation, biomedical domain, confusion network decoding}, } @inproceedings{PubLIMSI-5782 , author = {Pontiki, Maria AND Galanis, Dimitrios AND Papageorgiou, Haris AND Androutsopoulos, Ion AND Manandhar, Suresh AND Al-Smadi, Mohammad AND Al-Ayyoub, Mahmoud AND Zhao, Yanyan AND Qin, Bing AND De Clercq, Orph{\'e}e AND Hoste, V{\'e}ronique AND Apidianaki, Marianna AND Tannier, Xavier}, title = {{SemEval-2016 Task 5: Aspect Based Sentiment Analysis}}, booktitle = {{International Workshop on Semantic Evaluation}}, year = {2016}, pages = {12p}, address= {San Diego - US}, } @inproceedings{PubLIMSI-5769 , author = {Campillos, Leonardo AND Bouamor, Dhouha AND Zweigenbaum, Pierre AND Rosset, Sophie}, title = {{Managing Linguistic and Terminological Variation in a Medical Dialogue System}}, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2016}, pages = {3167-3173}, address= {Portoroz - SI}, keywords= {medical terminology, natural language understanding, virtual patient consultation}, } @inproceedings{PubLIMSI-5756 , author = {Laurent, Antoine AND Fraga Da Silva, Thiago AND Lamel, Lori AND Gauvain, Jean-Luc}, title = {{Investigating Techniques for Low Resource Conversational Speech Recognition}}, booktitle = {{IEEE International Conference on Acoustics, Speech, and Signal Processing}}, year = {2016}, pages = {5975-5979}, month= {2016-03-20 / 2016-03-25}, address= {Shanghai - CN}, keywords= {low-ressource languages, speech recognition, keyword spotting, conversational speech}, } @inproceedings{PubLIMSI-5752 , author = {Ive, Julia AND Max, Aur{\'e}lien AND Yvon, Fran\c{c}ois AND Ravaud, Philippe}, title = {{Diagnosing High-Quality Statistical Machine Translation Using Traces of Post-Edition Operations}}, booktitle = {{International Conference on Language Resources and Evaluation -Workshop on Translation Evaluation: From Fragmented Tools and Data Sets to an Integrated Ecosystem}}, year = {2016}, pages = {8p}, address= {Portoro� - SI}, keywords= {MT evaluation, high-quality SMT, post-edition}, } @inproceedings{PubLIMSI-5744 , author = {Xu, Yong AND Yvon, Fran\c{c}ois}, title = {{Novel elicitation and annotation schemes for sentential and sub-sentential alignments of bitexts}}, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2016}, pages = {628-635}, month= {2016-05-23 / 2016-05-28}, address= {Portoro� - SI}, keywords= {Parallel corpora, Sentence Alignments, Word Alignments, Confidence Estimation}, } @inproceedings{PubLIMSI-5742 , author = {Xu, Yong AND Yvon, Fran\c{c}ois}, title = {{A 2D CRF Model for Sentence Alignment}}, booktitle = {{Workshop on Building and Using Comparable Corpora}}, year = {2016}, pages = {1-10}, month= {2016-05-26}, address= {Portorozk - SI}, keywords= {Sentence Alignment, Conditional Random Fields}, } @inproceedings{PubLIMSI-5702 , author = {Aufrant, Lauriane AND Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {{Cross-lingual and supervised models for morphosyntactic annotation: a comparison on Romanian}}, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2016}, pages = {7p}, address= {Portoro� - SI}, keywords= {Cross-Lingual transfer, Part-of-Speech tagging, Dependency parsing, Romanian}, } @inproceedings{PubLIMSI-5685 , author = {Apidianaki, Marianna AND Tannier, Xavier AND Richart, C{\'e}cile}, title = {{Datasets for Aspect-Based Sentiment Analysis in French}}, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2016}, pages = {5p}, address= {Portoro� - SI}, } @inproceedings{PubLIMSI-5672 , author = {Bouamor, Dhouha AND Campillos, Leonardo AND Ligozat, Anne-Laure AND Rosset, Sophie AND Zweigenbaum, Pierre}, title = {{Transfer-based learning-to-rank assessment of medical term technicality}}, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2016}, pages = {4p}, address= {Portoro� - SI}, keywords= {Natural Language Processing; Consumer Vocabulary; Learning to Rank}, } @inproceedings{PubLIMSI-5888 , author = {Gelly, Gregory AND Gauvain, Jean-Luc AND Lamel, Lori AND Laurent, Antoine AND Le Viet, Bac AND Messaoudi, Abdelkhalek}, title = {{Language Recognition for Dialects and Closely Related Languages}}, booktitle = {{The Speaker and Language Recognition Workshop}}, year = {2016}, pages = {124-131}, month= {2016-06-21 / 2016-06-24}, address= {Bilbao - ES}, keywords= {language and dialect identification, phonotactic, RNN, BLSTM, i-vectors, lexical}, url = {http://www-tlp.limsi.fr/public/odyssey2016_52.pdf} } @inproceedings{PubLIMSI-5550 , author = {Huang, Guangpu AND Gorin, Arseniy AND Gauvain, Jean-Luc AND Lamel, Lori}, title = {{Machine Translation Based Data Augmentation for Cantonese Keyword Spotting}}, booktitle = {{IEEE International Conference on Acoustics, Speech, and Signal Processing}}, year = {2016}, pages = {6020-6024}, address= {Shanghai - CN}, keywords= {keyword spotting, data augmentation, language modelling, neural networks, low-resourced languages}, } @inproceedings{PubLIMSI-5889 , author = {Bredin, Herv{\'e} AND Gelly, Gregory}, title = {{Improving Speaker Diarization of TV Series using Talking-Face Detection and Clustering}}, booktitle = {{ACM Multimedia 2016}}, year = {2016}, pages = {157-161}, address= {Amsterdam - NL}, keywords= {speaker diarization; face clustering; talking-face detection}, } @article{PubLIMSI-6648 , author = {Yvon, Fran\c{c}ois}, title = {{Traduire comme on joue au Go ?}}, year = {2016}, journal = {{Le Journal du CNRS}}, keywords= {Traduction automatique; intelligence artificielle} } @article{PubLIMSI-6396 , author = {Kolly, Marie-Jos{\'e} AND Boula De Mareuil, Philippe AND Leemann, Adrian AND Dellwo, Volker}, title = {{Listeners use temporal information to identify French- and English-accented speech}}, year = {2016}, pages = {121-134}, journal = {{Speech Communication}}, volume = {86}, keywords= {linguistique} } @article{PubLIMSI-6344 , author = {Hua, Jiewen AND Filaire, Edith AND Giraud, Tom AND Gomez Jauregui, David Antonio AND Soury, Mariette AND Martin, Jean-Claude AND Devillers, Laurence AND Le Scanff, Christine}, title = {{Predicting a Failure of Public Speaking Performance Using Multidimensional Assessment}}, AERES = {ACL}, GROUP = {LIMSI,CPU,TLP}, year = {2016}, pages = {197-209}, journal = {{Journal of Sports Sciences}}, number = {4}, keywords= {Public speaking performance, trait anxiety, coping, appraisal, the ratio of cortisol/DHEA} } @article{PubLIMSI-6333 , author = {Eyben, Florian AND Scherer, Klaus R. AND Schuller, Bj{\`o}rn W. AND Sundberg, Johan AND Andr{\'e}, Elisabeth AND Busso, Carlos AND Devillers, Laurence AND Epps, Julien AND Laukka, Petri AND Narayanan, Shrikanth AND Truong, Khiet}, title = {{The Geneva Minimalistic Acoustic Parameter Set (GeMAPS) for Voice Research and Affective Computing.}}, year = {2016}, pages = {190-202}, journal = {{IEEE Transactions on Affective Computing}}, volume = {7}, number = {2}, keywords= {Affective Computing, Acoustic Features, Standard, Emotion Recognition, Speech Analysis, Geneva Minimalistic Parameter Set}, } @article{PubLIMSI-6221 , author = {Do, Quoc Khanh AND Allauzen, Alexandre AND Yvon, Fran\c{c}ois}, title = {{Apprentissage discriminant de mod{\`e}les neuronaux pour la traduction automatique}}, year = {2016}, pages = {111-135}, journal = {{TAL (Traitement Automatique des Langues)}}, volume = {57}, number = {1}, keywords= {mod{\`e}les neuronaux, traduction automatique statistique, apprentissage discriminant}, } @article{PubLIMSI-6220 , author = {Fort, Kar{\"e}n AND Adda, Gilles AND Cohen, Kevin}, title = {{{\'E}thique et traitement automatique des langues et de la parole : entre truismes et tabous}}, year = {2016}, pages = {7-19}, journal = {{TAL (Traitement Automatique des Langues)}}, volume = {57}, number = {2}, keywords= {{\'e}thique ; d{\'e}ontologie ; {\'e}valuation}, } @article{PubLIMSI-6063 , author = {Mariani, Joseph-Jean AND Paroubek, Patrick AND Francopoulo, Gil AND Hamon, Olivier}, title = {{Rediscovering 15+2 Years of Discoveries in Language Resources and Evaluation}}, year = {2016}, pages = {165-220}, journal = {{Language Resources and Evaluation}}, volume = {50}, number = {2}, keywords= {ELRA AnthologyLanguage resourcesLanguage processing systems evaluationText analyticsSocial networksISLRNBibl\ iometricsScientometrics}, } @article{PubLIMSI-6040 , author = {Boula De Mareuil, Philippe}, title = {{Les accents du fran\c{c}ais : une po{\'e}sie mais aussi une science}}, year = {2016}, pages = {11-12}, journal = {{Ha{\"i}ti Monde}}, number = {35-36}, keywords= {linguistique} } @article{PubLIMSI-5675 , author = {Tahon, Marie AND Devillers, Laurence}, title = {{Towards a small set of robust acoustic features for emotion recognition}}, year = {2016}, pages = {16-28}, journal = {{IEEE/ACM Transactions on Audio, Speech, and Language Processing}}, volume = {24}, number = {1}, keywords= {emotion detection, cross-corpus, affective and social dimension in spoken interaction} } @article{PubLIMSI-5823 , author = {P{\'e}cheux, Nicolas AND Allauzen, Alexandre AND Niehues, Jan AND Yvon, Fran\c{c}ois}, title = {{Reordering Space Design in Statistical Machine Translation}}, year = {2016}, pages = {375-410}, journal = {{Language Resources and Evaluation}}, volume = {50}, number = {2}, keywords= {Statistical Machine Translation, Reordering Models} } @article{PubLIMSI-5815 , author = {Boula De Mareuil, Philippe AND Rilliard, Albert AND Ivent, Fanny AND Kozhevina, Varvara}, title = {{Une {\'e}tude prosodique comparative des questions en fran\c{c}ais en contact avec l'occitan et le catalan}}, year = {2016}, pages = {75-91}, journal = {{Langages}}, volume = {202}, keywords= {Prosodie en contact, accents m{\'e}ridionaux en fran\c{c}ais, langues en danger, occitan, catalan} } @article{PubLIMSI-5810 , author = {McCarthy, Diana AND Apidianaki, Marianna AND Erk, Katrin}, title = {{Word Sense Clustering and Clusterability}}, year = {2016}, pages = {245-275}, journal = {{Computational Linguistics}}, volume = {42}, number = {2}, keywords= {word sense clustering, clusterability} } @article{PubLIMSI-5566, author = {Rehm, Georg AND Uszkoreit, Hans AND Ananiadou, Sophia AND Bel, Nuria AND Bieleviciene, Audrone AND Borin, Lars AND Branco, Ant{\'o}nio AND Budin, Gerhard AND Calzolari, Nicoletta AND Daelemans, Walter AND Garabik, Radovan AND Grobelnik, Marko AND Garcia-Mateo, Carmen AND Genabith, Josef AND Hajivc, Jan AND Hernaez, Inma AND Judge, John AND Koeva, Svetla AND Krek, Simon AND Krstev, Cvetana AND Linden, Krister AND Magnini, Bernardo AND Mariani, Joseph-Jean AND McNaught, John AND Melero, Maite AND Monachini, Monica AND Moreno, Asunci{\'o}n AND Odijk, Jan AND Ogrodniczuk, Maciej AND Pkezik, Piotr AND Piperidis, Stelios AND Przepiorkowski, Adam AND R{\"o}gnvaldsson, Eir{\'i}kur AND Rosner, Mike AND Pedersen, Bolette Sandford AND Skadicna, Inguna AND Smedt, Koenraad AND Tadic, Marko AND Thompson, Paul AND Tufics, Dan AND Varadi, Tamas AND Vasicljevs, Andrejs AND Vider, Kadri AND Zabarskaite, Jolanta}, title = {{The Strategic Impact of META-NET on the Regional, National and International Level}}, year = {2016}, pages = {351-374}, journal = {{Language Resources and Evaluation}}, volume = {50}, number = {2}, keywords= {Language technology Multilingual technologies Machine translation Language resources META-NET META-SHARE}, } @inproceedings{Povolny:2016:MER:2988257.2988268, author = {Povolny, Filip and Matejka, Pavel and Hradis, Michal and Popkov\'{a}, Anna and Otrusina, Lubomir and Smrz, Pavel and Wood, Ian and Robin, Cecile and Lamel, Lori}, title = {Multimodal Emotion Recognition for AVEC 2016 Challenge}, booktitle = {Proceedings of the 6th International Workshop on Audio/Visual Emotion Challenge}, series = {AVEC '16}, year = {2016}, isbn = {978-1-4503-4516-3}, location = {Amsterdam, The Netherlands}, pages = {75--82}, numpages = {8}, url = {http://www-tlp.limsi.fr/public/avec2016.pdf}, acmid = {2988268}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {arousal, bottleneck features, emotion recognition, neural networks, regression, speech transcription, valence, word embedding}, } @inproceedings{Wanner:2016:TMK:2927006.2927011, author = {Wanner, Leo and Blat, Josep and Dasiopoulou, Stamatia and Dom\'{\i}nguez, M\'{o}nica and Llorach, Gerard and Mille, Simon and Sukno, Federico and Kamateri, Eleni and Vrochidis, Stefanos and Kompatsiaris, Ioannis and Andr{\'e}, Elisabeth and Lingenfelser, Florian and Mehlmann, Gregor and Stam, Andries and Stellingwerff, Ludo and Vieru, Bianca and Lamel, Lori and Minker, Wolfgang and Pragst, Louisa and Ultes, Stefan}, title = {Towards a Multimedia Knowledge-Based Agent with Social Competence and Human Interaction Capabilities}, booktitle = {Proceedings of the 1st International Workshop on Multimedia Analysis and Retrieval for Multimodal Interaction}, series = {MARMI '16}, year = {2016}, isbn = {978-1-4503-4362-6}, location = {New York, New York, USA}, pages = {21--26}, numpages = {6}, url = {http://doi.acm.org/10.1145/2927006.2927011}, acmid = {2927011}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {dialogue, embodied agent, multimodal communication, retrieval}, } @inproceedings{Renwick+2016, author={Margaret E.L. Renwick and Ioana Vasilescu and Camille Dutrey and Lori Lamel and Bianca Vieru}, title={Marginal Contrast Among Romanian Vowels: Evidence from ASR and Functional Load}, year=2016, booktitle={Interspeech 2016}, url = {http://www-tlp.limsi.fr/public/is2016-0762-ioana.PDF}, pages={2433--2437} } @inproceedings{Gorin+2016, author={Arseniy Gorin and Rasa Lileikyte and Guangpu Huang and Lori Lamel and Jean-Luc Gauvain and Antoine Laurent}, title={Language Model Data Augmentation for Keyword Spotting in Low-Resourced Training Conditions}, year=2016, booktitle={Interspeech 2016}, url = {http://www-tlp.limsi.fr/public/is2016-1200-gorin.PDF}, pages={775--779}, address= {San Francisco - US}, keywords= {speech recognition, text augmentation, language modeling, machine translation, low-resourced languages} } @inproceedings{AddaSAABBBGHIKL16, author = {Gilles Adda and Sebastian St{\"{u}}ker and Martine Adda{-}Decker and Odette Ambouroue and Laurent Besacier and David Blachon and H{\'{e}}l{\`{e}}ne Bonneau{-}Maynard and Pierre Godard and Fatima Hamlaoui and Dmitry Idiatov and Guy{-}No{\"{e}}l Kouarata and Lori Lamel and Emmanuel{-}Moselly Makasso and Annie Rialland and Mark Van de Velde and Fran{\c{c}}ois Yvon and Sabine Zerbian}, title = {Breaking the Unwritten Language Barrier: The {BULB} Project}, booktitle = {SLTU-2016, 5th Workshop on Spoken Language Technologies for Under-resourced languages, 9-12 May 2016, Yogyakarta, Indonesia}, pages = {8--14}, year = {2016}, url = {http://www-tlp.limsi.fr/public}, timestamp = {Sat, 16 Sep 2017 12:07:27 +0200}, biburl = {https://dblp.org/rec/bib/conf/sltu/AddaSAABBBGHIKL16}, bibsource = {dblp computer science bibliography, https://dblp.org} } @InProceedings{lileikyte16a, author = {R. Lileikyte and A. Gorin and L. Lamel and J.L. Gauvain and T. Fraga-Silva}, title = {{Lithuanian Broadcast Speech Transcription Using Semi-Supervised Acoustic Model Training}}, booktitle = {SLTU-2016}, year = {2016}, address = {Yogyakarta, Indonesia}, pages = {107-113}, url = {http://www-tlp.limsi.fr/public/SLTU-2016-lileikyte.pdf} } @InProceedings{huang16a, author = {G. Huang and A. Gorin and J.L. Gauvain and L. Lamel}, title = {{Machine Translation based data augmentation for Cantonese keyword spotting}}, booktitle = ieeeicassp, year = {2016}, address = {Shanghai, China}, pages = {6020-6024}, url = {http://www-tlp.limsi.fr/public/0006020_icassp16lmda.pdf} } @InProceedings{Laurent16a, author = {A. Laurent and T. Fraga-Silva and L. Lamel and J.L. Gauvain}, title = {{Investigating techniques for low resource conversational speech recognition}}, booktitle = ieeeicassp, year = {2016}, address = {Shanghai, China}, pages = {5975-5979}, url = {http://www-tlp.limsi.fr/public/0005975_icassp16babel.pdf} } @inproceedings{PubLIMSI-5887, author = {Gelly, Gregory AND Gauvain, Jean-Luc AND Le Viet, Bac AND Messaoudi, Abdelkhalek}, title = {{A Divide-and-Conquer Approach for Language Identification Based on Recurrent Neural Networks}}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2016)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2016}, pages = {3231-3235}, month= {September}, address= {San Francisco, CA}, url = {http://www-tlp.limsi.fr/public/IS160180.PDF} } %%%%%%%%%% 2015 section @book{PubLIMSI-5465 , author = {Nouvel, Damien AND Ehrmann, Maud AND Rosset, Sophie}, title = {{Les entit{\'e}s nomm{\'e}es pour le traitement automatique des langues}}, year = {2015}, pages = {167}, publisher = {Iste Editions}, ISBN= {978-1-78405-104-4}, keywords= {entit{\'e}s nomm{\'e}es}, } @inproceedings{PubLIMSI-6283 , author = {Fraga Da Silva, Thiago AND Laurent, Antoine AND Gauvain, Jean-Luc AND Lamel, Lori AND Le, Viet Bac AND Messaoudi, Abdelkhalek}, title = {{Improving data selection for low-resource STT and KWS}}, booktitle = {{IEEE Automatic Speech Recognition and Understanding Workshop}}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2015}, pages = {153-159}, address= {Scottsdale - US}, keywords= {data selection, low-resource languages, speech recognition, keyword spotting}, } @inproceedings{PubLIMSI-4579 , author = {Bluche, Th{\'e}odore AND Louradour, J{\'e}r{\^o}me AND Knibbe, Maxime AND Moysset, Bastien AND Benzeghiba, Faouzi AND Kermorvant, Christopher}, title = {{The A2iA Arabic Handwritten text recognition system at the OpenHaRT2013 Evaluation}}, booktitle = {{International Workshop on Document Analysis Systems}}, year = {2014}, pages = {161-165}, month= {2014-04-07 / 2014-04-10}, address= {Tours - FR}, keywords= {OpenHaRT, Recurrent Neural Networks, ROVER, Large vocabulary Handwriting Recognition}, } @inproceedings{PubLIMSI-5622 , author = {Vasilescu, Ioana AND Dutrey, Camille AND Lamel, Lori}, title = {{Large Scale Data Based Linguistic Investigations Using Speech Technology Tools: the Case of Romanian}}, booktitle = {{International Conference on Speech Technology and Human-Computer Dialogue}}, year = {2015}, pages = {6p}, month= {2015-10-14 / 2015-10-17}, address= {Bucarest - RO}, keywords= {ASR, phonetics, romanian vocalic system}, } @inproceedings{PubLIMSI-5660 , author = {Marie, Benjamin AND Apidianaki, Marianna}, title = {{Alignment-based sense selection in METEOR and the RATATOUILLE recipe}}, booktitle = {{Workshop on Machine Translation}}, year = {2015}, pages = {385-391}, month= {2015-09-17 / 2015-09-18}, address= {Lisbon - PT}, } @inproceedings{PubLIMSI-5561 , author = {Francopoulo, Gil AND Mariani, Joseph-Jean AND Paroubek, Patrick}, title = {{NLP4NLP: the cobbler's children won't go unshod}}, booktitle = {{International Workshop on Mining Scientific Publications}}, year = {2015}, pages = {6p}, address= {Knoxville - US}, } @inproceedings{PubLIMSI-5562 , author = {Francopoulo, Gil AND Mariani, Joseph-Jean AND Paroubek, Patrick}, title = {{NLP4NLP: Applying NLP to Scientific Corpora about Written and Spoken Language Processing}}, booktitle = {{International Society of Scientometrics and Bibliometrics Conference}}, year = {2015}, pages = {5-11}, month= {2015-06-29}, address= {Istanbul - TR}, } @inproceedings{PubLIMSI-5553 , author = {Rialland, Annie AND Embanga Aborobongui, Martial AND Adda-Decker, Martine AND Lamel, Lori}, title = {{Dropping of the Class-Prefix Consonant, Vowel Elision and Automatic Phonological Mining in Embosi (Bantu C 25)}}, booktitle = {{Annual Conference on African Linguistics}}, year = {2015}, pages = {221-230}, month= {2015-03-07 / 2015-03-10}, address= {Georgetown - US}, keywords= {onsonantal dropping ; vowel lengthening ; Bantu C25 ; Embosi ; automatic phonological mining}, } @inproceedings{PubLIMSI-5558 , author = {Mariani, Joseph-Jean}, title = {{Technologies de la Langue : Etat des lieux}}, booktitle = {{Technologies pour les Langues r{\'e}gionales de France}}, year = {2015}, pages = {42}, address= {Meudon - FR}, } @inproceedings{PubLIMSI-5560 , author = {Mariani, Joseph-Jean AND Paroubek, Patrick AND Francopoulo, Gil AND Vetulani, Z.}, title = {{Rediscovering 10 to 20 years of Discoveries in Language and Technology}}, booktitle = {{Language & Technology Conference : Human Language Technologies as a Challenge for Computer Science and Linguistics}}, year = {2015}, pages = {19p}, address= {Poznan - PL}, } @inproceedings{PubLIMSI-5529 , author = {Letard, Vincent AND Rosset, Sophie AND Illouz, Gabriel}, title = {{Analogical Reasoning for Natural to Formal Language Transfer}}, booktitle = {{IEEE International Conference on Tools with Artificial Intelligence}}, year = {2015}, pages = {8p}, address= {Vietri sul Mare - IT}, keywords= {analogical reasoning, language transfer, incremental learning, formal analogy}, } @inproceedings{PubLIMSI-5548 , author = {Gorin, Arseniy AND Lamel, Lori AND Gauvain, Jean-Luc AND Fraga Da Silva, Thiago}, title = {{On improving speech recognition and keyword spotting with automatically generated morphological units}}, booktitle = {{Language & Technology Conference : Human Language Technologies as a Challenge for Computer Science and Linguistics}}, year = {2015}, pages = {5 pages}, address= {Poznan Poland - PL}, keywords= {speech recognition, keyword spotting, low resourced languages}, } @inproceedings{PubLIMSI-5496 , author = {B{\'e}chade, Lucile AND Dubuisson Duplessis, Guillaume AND Sehili, Mohamed El Amine AND Devillers, Laurence}, title = {{Behavioral and Emotional Spoken Cues Related to Mental States in Human-Robot Social Interaction}}, booktitle = {{International Conference on Multimodal Interaction}}, year = {2015}, pages = {347-350}, address= {Seattle - US}, keywords= {User/Machine Systems, Experimentation, Human Factors}, } @inproceedings{PubLIMSI-5515 , author = {Sabouret, Nicolas AND Schuller, Bjoern AND Paletta, Lucas AND Marchi, Erik AND Jones, Haza{\"e}l AND Ben Youssef, Atef}, title = {{Intelligent User Interfaces in Digital Games for Empowerment and Inclusion}}, booktitle = {{International Conference on Advances in Computer Entertainment Technology}}, year = {2015}, pages = {8:1-8:8}, month= {2015-11-16 / 2015-11-19}, address= {Iskandar - MY}, } @inproceedings{PubLIMSI-5493 , author = {Campillos, Leonardo AND Bouamor, Dhouha AND Ligozat, Anne-Laure AND Bilinski, Eric AND Zweigenbaum, Pierre AND Rosset, Sophie}, title = {{Un syst{\`e}me de dialogue pour dynamiser un patient virtuel}}, booktitle = {{Colloque sur la gamification et les jeux s{\'e}rieux en sant{\'e}}}, year = {2015}, pages = {5p}, address= {Nice - FR}, keywords= {Natural Language Processing; Serious Games; Virtual Patient; Dialogue System; Human-Machine Dialogue}, } @inproceedings{PubLIMSI-5495 , author = {Devillers, Laurence AND Rosset, Sophie AND Dubuisson Duplessis, Guillaume AND Sehili, Mohamed El Amine AND B{\'e}chade, Lucile AND Delaborde, Agn{\`e}s AND Gossart, Cl{\'e}ment AND Letard, Vincent AND Yang, Fan AND Yemez, Yucel AND Turker, Bekir AND Sezgin, Metin T. AND El Haddad, K{\'e}vin AND Dupont, St{\'e}phane AND Luzzati, Daniel AND Est{\`e}ve, Yannick AND Gilmartin, Emer AND Campbell, Nick}, title = {{Multimodal data collection of human-robot humorous interactions in the Joker project}}, booktitle = {{International Conference on Affective Computing and Intelligent Interaction}}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2015}, pages = {348-354}, address= {Xi'an - CN}, keywords= {Multimodal Data, Human-Robot Interaction, Humorous Robot}, } @inproceedings{PubLIMSI-5418 , author = {Yang, Fan AND Sehili, Mohamed El Amine AND Barras, Claude AND Devillers, Laurence}, title = {{Smile and laughter detection for elderly people-robot interaction}}, booktitle = {{International Conference on Social Robotics}}, year = {2015}, pages = {694-703}, address= {Paris - FR}, } @inproceedings{PubLIMSI-5477 , author = {Burlot, Franck AND Yvon, Fran\c{c}ois}, title = {{Morphology-Aware Alignments for Translation to and from a Synthetic Language}}, booktitle = {{International Workshop on Spoken Language Translation}}, year = {2015}, pages = {188-195}, address= {Da Nang - VN}, keywords= {machine translation ; phrase-based MT ; alignments ; factored alignment model ; morphology ; morphologically rich languages ; synthetic & analytical languages}, } @inproceedings{PubLIMSI-5417 , author = {Knyazeva, Elena AND Wisniewski, Guillaume AND Bredin, Herv{\'e} AND Yvon, Fran\c{c}ois}, title = {{Structured Prediction for Speaker Identification in TV Series}}, booktitle = {{Annual Conference of the International Speech Communication Association}}, year = {2015}, pages = {195-199}, address= {Dresden - DE}, keywords= {speaker identification, speaker diarization, sequence labeling, structured prediction}, } @inproceedings{PubLIMSI-5415 , author = {Lileikyt{\'e}, Rasa AND Lamel, Lori AND Gauvain, Jean-Luc}, title = {{Conversational Telephone Speech Recognition for Lithuanian}}, booktitle = {{International Conference on Statistical Language and Speech Processing}}, year = {2015}, pages = {164-172}, address= {Budapest - HU}, keywords= {conversational telephone speech, Lithuanian, KWS, STT}, } @inproceedings{PubLIMSI-5402 , author = {Niehues, Jan AND Do, Quoc Khanh AND Allauzen, Alexandre AND Waibel, Alex}, title = {{ListNet-based MT Rescoring}}, booktitle = {{Workshop on Machine Translation}}, year = {2015}, pages = {248-255}, month= {2015-09-17 / 2015-09-18}, address= {Lisbon - PT}, keywords= {Statistical Machine Translation}, } @inproceedings{PubLIMSI-5404 , author = {Wisniewski, Guillaume AND P{\'e}cheux, Nicolas AND Yvon, Fran\c{c}ois}, title = {{Why Predicting Post-Edition is so Hard? Failure Analysis of LIMSI Submission to the APE Shared Task}}, booktitle = {{Workshop on Machine Translation}}, year = {2015}, pages = {222-227}, month= {2015-09-17 / 2015-09-18}, address= {Lisbon - PT}, } @inproceedings{PubLIMSI-5389 , author = {Poignant, Johann AND Bredin, Herv{\'e} AND Barras, Claude}, title = {{Multimodal Person Discovery in Broadcast TV at MediaEval 2015}}, booktitle = {{MediaEval}}, year = {2015}, pages = {3p}, address= {Wurzen - DE}, } @inproceedings{PubLIMSI-5390 , author = {Poignant, Johann AND Bredin, Herv{\'e} AND Barras, Claude}, title = {{LIMSI at MediaEval 2015: Person Discovery in Broadcast TV Task}}, booktitle = {{MediaEval}}, year = {2015}, pages = {3p}, address= {Wurzen - DE}, } @inproceedings{PubLIMSI-5398 , author = {Labeau, Matthieu AND L{\`o}ser, Kevin AND Allauzen, Alexandre}, title = {{Non-lexical neural architecture for fine-grained POS Tagging}}, booktitle = {{Conference on Empirical Methods in Natural Language Processing}}, year = {2015}, pages = {232-237}, address= {Lisbon - PT}, keywords= {Neural Network, Character-based embeddings, POS-tagging}, } @inproceedings{PubLIMSI-5386 , author = {Thlithi, Marwa AND Barras, Claude AND Pinquier, Julien AND Pellegrini, Thomas}, title = {{Singer diarization : application to ethnomusicological recordings}}, booktitle = {{International Workshop on Folk Music Analysis}}, year = {2015}, pages = {124-125}, address= {Paris - FR}, } @inproceedings{PubLIMSI-5387 , author = {Bruneau, Pierrick AND Stefas, Micka{\"e}l AND Bredin, Herv{\'e} AND Poignant, Johann AND Tamisier, Thomas AND Barras, Claude}, title = {{A Visual Analytics Approach to Finding Factors Improving Automatic Speaker Identifications}}, booktitle = {{International Conference on Multimodal Interaction}}, year = {2015}, pages = {323-326}, address= {Seattle - US}, keywords= {Speaker identification; Visual Analytics}, } @inproceedings{PubLIMSI-5388 , author = {Budnik, Matheuz AND Besacier, Laurent AND Poignant, Johann AND Bredin, Herv{\'e} AND Barras, Claude AND Stefas, Micka{\"e}l AND Bruneau, Pierrick AND Tamisier, Thomas}, title = {{Collaborative Annotation for Person Identification in TV Shows}}, booktitle = {{Annual Conference of the International Speech Communication Association}}, year = {2015}, pages = {2607-2608}, address= {Dresden - DE}, keywords= {multimodal person identification, collaborative annotation, active learning, data collection.}, } @inproceedings{PubLIMSI-5383 , author = {Ben Jannet, Mohamed AND Galibert, Olivier AND Adda-Decker, Martine AND Rosset, Sophie}, title = {{How to evaluate ASR output for Named Entity Recognition?}}, booktitle = {{Annual Conference of the International Speech Communication Association}}, year = {2015}, pages = {5p}, address= {Dresden - DE}, keywords= {speech recognition, ATENE, named entity recog- nition, metric}, } @inproceedings{PubLIMSI-5384 , author = {Ben Jannet, Mohamed AND Galibert, Olivier AND Adda-Decker, Martine AND Rosset, Sophie}, title = {{How to evaluate ASR errors impact on NER?}}, booktitle = {{Errors by Humans and Machines in Multimedia, Multimodal, Multilingual Data Processing}}, year = {2015}, pages = {6p}, address= {Sinaia - RO}, keywords= {speech recognition, ATENE, named entity recog- nition, metric}, } @inproceedings{PubLIMSI-5377 , author = {Dubuisson Duplessis, Guillaume AND Devillers, Laurence}, title = {{Towards the Consideration of Dialogue Activities in Engagement Measures for Human-Robot Social Interaction}}, booktitle = {{International Conference on Intelligent Robots and Systems}}, year = {2015}, pages = {19-24}, address= {Hambourg - DE}, keywords= {Human-Robot Interaction, Social Dialogue, Engagement}, } @inproceedings{PubLIMSI-5378 , author = {Dubuisson Duplessis, Guillaume AND B{\'e}chade, Lucile AND Sehili, Mohamed El Amine AND Delaborde, Agn{\`e}s AND Letard, Vincent AND Ligozat, Anne-Laure AND Del{\'e}glise, Paul AND Est{\`e}ve, Yannick AND Rosset, Sophie AND Devillers, Laurence}, title = {{Nao is doing humour in the CHIST-ERA JOKER project}}, booktitle = {{Annual Conference of the International Speech Communication Association}}, year = {2015}, pages = {1072-1073}, address= {Dresde - DE}, keywords= {human-robot social interaction, humour}, } @inproceedings{PubLIMSI-5376 , author = {de Mazancourt, Hugues AND Couillault, Alain AND Adda, Gilles AND Recourc{\'e}, Ga{\"e}lle}, title = {{Faire du TAL sur des donn{\'e}es personnelles : un oxymore ?}}, booktitle = {{Ethique et TRaitemeNt Automatique des Langues}}, year = {2015}, pages = {6p}, month= {2015-06-22}, address= {Caen - FR}, keywords= {Donn{\'e}es priv{\'e}es, Big Data, Ethique}, } @inproceedings{PubLIMSI-5331 , author = {Kolly, Marie-Jos{\'e} AND Leemann, Adrian AND Boula De Mareuil, Philippe AND Dellwo, Volker}, title = {{Speaker-idiosyncrasy in pausing behavior: evidence from a cross-linguistic study}}, booktitle = {{International Congress of Phonetic Sciences}}, year = {2015}, pages = {5p}, address= {Glasgow - GB}, keywords= {linguistique}, } @inproceedings{PubLIMSI-5332 , author = {Cherrer, Yves AND Boula De Mareuil, Philippe AND Goldman, Jean-Philippe}, title = {{Crowdsourced mapping of pronunciation variants in European French}}, booktitle = {{International Congress of Phonetic Sciences}}, year = {2015}, pages = {5p}, address= {Glasgow - GB}, keywords= {linguistique}, } @inproceedings{PubLIMSI-5295 , author = {Campillos, Leonardo AND Bouamor, Dhouha AND Bilinski, Eric AND Ligozat, Anne-Laure AND Zweigenbaum, Pierre AND Rosset, Sophie}, title = {{Description of the PatientGenesys Dialogue System}}, booktitle = {{Annual Meeting of the Special Interest Group on Discourse and Dialogue}}, year = {2015}, pages = {3p}, address= {Prague - CZ}, keywords= {Natural Language Processing; Dialogue System; Virtual Patient; Human-Machine Communication}, } @inproceedings{PubLIMSI-5304 , author = {Do, Quoc Khanh AND Allauzen, Alexandre AND Yvon, Fran\c{c}ois}, title = {{A Discriminative Training Procedure for Continuous Translation Models}}, booktitle = {{Conference on Empirical Methods in Natural Language Processing}}, year = {2015}, pages = {1046-1052}, address= {Lisbon - PT}, keywords= {Discriminative training, Neural network, Continuous space model, Translation model}, } @inproceedings{PubLIMSI-5319 , author = {Shochi, Takaaki AND Fourer, Dominique AND Rouas, Jean-Luc AND Guerry, Marine AND Rilliard, Albert}, title = {{Perceptual evaluation of spoken japanese attitudes}}, booktitle = {{International Congress of Phonetic Sciences}}, year = {2015}, pages = {5p}, address= {Glasgow - GB}, keywords= {social affects, spoken Japanese attitude, affective prosody}, } @inproceedings{PubLIMSI-5282 , author = {Rosset, Sophie AND Campillos, Leonardo AND Bouamor, Dhouha AND Bilinski, Eric AND Ligozat, Anne-Laure AND Zweigenbaum, Pierre}, title = {{Un patient virtuel dialogant}}, booktitle = {{Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles}}, year = {2015}, pages = {2p}, address= {Caen - FR}, keywords= {Patient virtuel, syst{\`e}me de dialogue, langage sp{\'e}cialis{\'e}, langage grand public}, } @inproceedings{PubLIMSI-5283 , author = {Rosset, Sophie AND Leleu, J{\'e}r{\^o}me AND Caillat-Grenier, R{\'e}gis AND Pierard, Nathalie AND Rica, Philippe AND Granry, Jean-Claude AND Lehousse, Thierry AND Pereira, Suzanne AND Bretier, Philippe AND Rosec, Olivier AND Bilinski, Eric AND Bouamor, Dhouha AND Campillos, Leonardo AND Grau, Brigitte AND Ligozat, Anne-Laure AND Zweigenbaum, Pierre}, title = {{Patient Genesys : Outil de cr{\'e}ation de cas cliniques de simulation m{\'e}dicale proposant des cas patients virtuels en 3D}}, booktitle = {{Conférence Nationale sur les Applications Pratiques de l'Intelligence Artificielle}}, year = {2015}, pages = {2p}, month= {2015-06-29 / 2015-07-01}, address= {Rennes - FR}, keywords= {Simulation ; Formation m{\'e}dicale ; Syst{\`e}me de dialogue ; Patient virtuel ; Synth{\`e}se de voix expressives}, } @inproceedings{PubLIMSI-5277 , author = {Knyazeva, Elena AND Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {{Apprentissage par imitation pour l'{\'e}tiquetage de s{\'e}quences : vers une formalisation des m{\'e}thodes \ d'{\'e}tiquetage � easy-first �}}, booktitle = {{Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles}}, year = {2015}, pages = {12p}, address= {Caen - FR}, keywords= {Apprentissage par Imitation, Apprentissage Structur{\'e}, {\'E}tiquetage de S{\'e}quences}, } @inproceedings{PubLIMSI-5267 , author = {Apidianaki, Marianna AND Marie, Benjamin}, title = {{METEOR-WSD: Improved Sense Matching in MT Evaluation}}, booktitle = {{Syntax, Semantics and Structure in Statistical Translation}}, year = {2015}, pages = {49-51}, address= {Denver - US}, keywords= {Machine Translation, MT evaluation, Word Sense Disambiguation}, } @inproceedings{PubLIMSI-5272 , author = {Do, Quoc Khanh AND Allauzen, Alexandre AND Yvon, Fran\c{c}ois}, title = {{Apprentissage discriminant des mod{\`e}les continus de traduction}}, booktitle = {{Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles}}, year = {2015}, pages = {267-278}, address= {Caen - FR}, keywords= {Mod{\`e}le neuronal de traduction, traduction automatique par approche statistique, apprentissage discriminant}, } @inproceedings{PubLIMSI-5264 , author = {Vapnarsky, Valentina AND Barras, Claude AND Becquey, C{\'e}dric AND Doukhan, David AND Adda-Decker, Martine AND Lamel, Lori}, title = {{Analysing rhythm in ritual discourse in Yucatec Maya using automatic speech alignment}}, booktitle = {{Annual Conference of the International Speech Communication Association}}, year = {2015}, pages = {344-348}, address= {Dresden - DE}, keywords= {ethnolinguistic, Yucatec Maya, ritual discourse, automatic alignment, phonetic segmentation, tempo}, } @inproceedings{PubLIMSI-5241 , author = {Bluche, Th{\'e}odore AND Kermorvant, Christopher AND Louradour, J{\'e}r{\^o}me}, title = {{Where to Apply Dropout in Recurrent Neural Networks for Handwriting Recognition?}}, booktitle = {{International Conference on Document Analysis and Recognition}}, year = {2015}, pages = {5p}, address= {Gammarth - TN}, } @inproceedings{PubLIMSI-5242 , author = {Bluche, Th{\'e}odore AND Ney, Hermann AND Louradour, J{\'e}r{\^o}me AND Kermorvant, Christopher}, title = {{Framewise and CTC Training of Neural Networks for Handwriting Recognition}}, booktitle = {{International Conference on Document Analysis and Recognition}}, year = {2015}, pages = {5p}, address= {Gammarth - TN}, } @inproceedings{PubLIMSI-5226 , author = {Grouin, Cyril AND Moriceau, V{\'e}ronique AND Rosset, Sophie AND Zweigenbaum, Pierre}, title = {{Identification de facteurs de risque pour des patients diab{\'e}tiques {\`a} partir de comptes-rendus cliniques par des approches hybrides}}, booktitle = {{Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles}}, AERES = {ACTN}, GROUP = {LIMSI,ILES,TLP}, year = {2015}, pages = {12p}, address= {Caen - FR}, keywords= {Comptes-rendus hospitaliers, extraction d'information, apprentissage statistique}, } @inproceedings{PubLIMSI-5240 , author = {Bluche, Th{\'e}odore AND Ney, Hermann AND Kermorvant, Christopher}, title = {{The LIMSI Handwriting Recognition System for the HTRtS 2014 Contest}}, booktitle = {{International Conference on Document Analysis and Recognition}}, year = {2015}, pages = {5p}, address= {Gammarth - TN}, } @inproceedings{PubLIMSI-5221 , author = {P{\'e}cheux, Nicolas AND Allauzen, Alexandre AND Lavergne, Thomas AND Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {{Oublier ce qu'on sait, pour mieux apprendre ce qu'on ne sait pas : une {\'e}tude sur les contraintes de type dans les mod{\`e}les CRF}}, booktitle = {{Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles}}, year = {2015}, pages = {12p}, address= {Caen - FR}, keywords= {{\'E}tiquetage Morpho-Syntaxique; Apprentissage Statistique; Champs Markoviens Al{\'e}atoires}, } @inproceedings{PubLIMSI-5222 , author = {Apidianaki, Marianna AND Gong, Li}, title = {{LIMSI: Translations as source of indirect supervision for multilingual all-words sense disambiguation and entity linking}}, booktitle = {{International Workshop on Semantic Evaluation}}, year = {2015}, pages = {298-302}, address= {Denver - US}, keywords= {multilingual Word Sense Disambiguation, translation alignment}, } @article{PubLIMSI-6334 , author = {Devillers, Laurence AND Tahon, Marie AND Sehili, Mohamed El Amine AND Delaborde, Agn{\`e}s}, title = {{Inference of Human Beings' Emotional States from Speech in Human-Robot Interactions}}, year = {2015}, pages = {451-463}, journal = {{International Journal of Social Robotics}}, volume = {7}, number = {4}, keywords= {social robot, affective computing} } @article{PubLIMSI-5556 , author = {Francopoulo, Gil AND Mariani, Joseph-Jean AND Paroubek, Patrick}, title = {{NLP4NLP : the cobbler's children won't go unshod.}}, year = {2015}, pages = {10}, journal = {{D-Lib Magazine}}, volume = {12}, number = {11/12} } @article{PubLIMSI-5424 , author = {Xu, Yong AND Max, Aur{\'e}lien AND Yvon, Fran\c{c}ois}, title = {{Sentence Alignment for Literary Texts}}, year = {2015}, pages = {1-25}, journal = {{Linguistic Issues in Language Technology}}, volume = {12}, number = {6}, keywords= {Alignement automatique; bitextes; traduction automatique} } @inproceedings{VasilescuDL15, author = {Ioana Vasilescu and Camille Dutrey and Lori Lamel}, title = {Large scale data based linguistic investigations using speech technology tools: The case of Romanian}, booktitle = {International Conference on Speech Technology and Human-Computer Dialogue, SpeD 2015, Bucharest, Romania, October 14-17, 2015}, pages = {1--6}, year = {2015}, timestamp = {Thu, 15 Mar 2018 16:41:08 +0100}, biburl = {https://dblp.org/rec/bib/conf/sped/VasilescuDL15}, url = {http://www-tlp.limsi.fr/public/SPED_2015_Paper.pdf }, bibsource = {dblp computer science bibliography, https://dblp.org} } @INCOLLECTION{HCL2, AUTHOR = {Lori Lamel and Jean-Luc Gauvain}, TITLE = {Speech Recognition}, BOOKTITLE = {The Oxford Handbook of Computational Linguistics 2nd edition}, EDITOR = {R. Mitkov}, PUBLISHER = {Oxford University Press}, YEAR = 2015, CHAPTER = 37, PAGES = {}, url = {http://www-tlp.limsi.fr/public} } @article {SEGAL2015a, author = {Natalia Segal AND H\'el\`ene Maynard AND Fran\c{c}ois Yvon}, title = {{Traduire la parole : le cas des TED Talks}}, booktitle = {TAL (Traitement Automatique des Langues)}, GROUP = {LIMSI,TLP}, year = {2015}, pages = {13-45}, volume = {55(2)} } @article {ROSSET2015a, author = {David Doukhan AND Sophie Rosset AND Albert Rilliard AND Christophe AND Martine Adda-Decker}, title = {{The GV-LEX corpus of tales in French}}, booktitle = {Language Resources and Evaluation}, GROUP = {LIMSI,TLP}, year = {2015}, pages = {521-547}, volume = {43(3)} } @article {BOULA2015a, author = {Philippe Boula de Mareuil AND Albert Rilliard AND Fanny Ivent AND Varvara Kozhevina}, title = {{A comparative prosodic study of questions in French in contact with Occitan and Catalan}}, GROUP = {LIMSI,TLP}, year = {2015}, pages = {59-72}, journal = {Journal of Speech Sciences}, volume = {4(2)}, keywords = {prosody} } @inproceedings {IS2015_AL, author = {Thiago Fraga-Silva and Jean-Luc Gauvain and Lori Lamel and Antoine Laurent and Viet-Bac Le and Abdel Messaoudi}, title = {{Active Learning based data selection for limited resource STT and KWS}}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2015)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2015}, pages = {3159-3163}, month = {September}, address = {Dresden}, url = {http://www-tlp.limsi.fr/public/IS2015_AL.pdf}, keywords = {active learning, limited resource STT, KWS} } @inproceedings {IS2015_yucatec, author = {Valentina Vapnarsky and Claude Barras and Cédric Becquey and David Doukhan and Martine Adda-Decker and Lori Lamel}, title = {{Analysing rhythm in ritual discourse in Yucatec Maya using automatic speech alignment}}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2015)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2015}, pages = {5}, month = {September}, address = {Dresden}, url = {http://www-tlp.limsi.fr/public/IS2015_yucatec.pdf} } @inproceedings {IS2015_vad, author = {Gregory Gelly and Jean-Luc Gauvain}, title = {{Minimum Word Error Training of RNN-based Voice Activity Detection}}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2015)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2015}, pages = {2650-2654}, month = {September}, address = {Dresden}, url = {http://www-tlp.limsi.fr/public/IS2015_vad.pdf}, keywords= {speech recognition, minimum word error, voice activity detection, recurrent neural networks, long short-term memory, particle swarm optimization} } @article {MTAP2015, author = {Anindya Roy AND Herv\'e Bredin AND William Hartmann AND Viet Bac Le AND Claude Barras AND Jean-Luc Gauvain}, title = {Lexical speaker identification in TV shows}, GROUP = {LIMSI,TLP}, year = {2015}, pages = {1377-1396}, journal = {MTAP (Multimedia Tools and Applications)}, volume = {74}, number = {4}, url = {http://www-tlp.limsi.fr/public/mtap2015.pdf}, keywords = {speaker identification} } @inproceedings{LileikyteLG15, author = {Rasa Lileikyte and Lori Lamel and Jean{-}Luc Gauvain}, title = {Conversational Telephone Speech Recognition for Lithuanian}, booktitle = {Statistical Language and Speech Processing - Third International Conference, {SLSP} 2015, Budapest, Hungary, November 24-26, 2015, Proceedings}, pages = {164--172}, year = {2015}, timestamp = {Wed, 24 May 2017 08:31:20 +0200}, url = {http://www-tlp.limsi.fr/public/slsp2015-lileikyte_paper.pdf}, biburl = {https://dblp.org/rec/bib/conf/slsp/LileikyteLG15}, bibsource = {dblp computer science bibliography, https://dblp.org} } % url = {https://doi.org/10.1007/978-3-319-25789-1_16}, %%%%%%%%%% 2014 section @inproceedings{PubLIMSI-4821 , author = {Laurent, Antoine AND Hartmann, William AND Lamel, Lori}, title = {{Unsupervised acoustic model training for the korean Language}}, booktitle = {{International Symposium on Chinese Spoken Language Processing}}, year = {2014}, pages = {5}, address= {Singapore - SG}, keywords= {speech recognition, unsupervised training, korean, under-resourced language}, } @inproceedings{PubLIMSI-4819 , author = {Bouaziz, M. AND Laurent, Antoine AND Est{\`e}ve, Y.}, title = {{D{\'e}codage hybride dans les SRAP pour l'indexation automatique de documents multim{\'e}dia}}, booktitle = {{Journ{\'e}es d'Etude sur la Parole}}, AERES = {ACTN}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {9}, address= {Le Mans - FR}, keywords= {mots hors vocabulaire, indexation automatique, d{\'e}codage hybride}, } @inproceedings{PubLIMSI-5262 , author = {Lamel, Lori}, title = {{Language Diversity: Speech Processing In A Multi-Lingual Context}}, booktitle = {{Annual Conference of the International Speech Communication Association}}, year = {2014}, pages = {1}, address= {Singapore - SG}, keywords= {Speech Processing}, } @inproceedings{PubLIMSI-5220 , author = {Bredin, Herv{\'e} AND Roy, Anindya AND P{\'e}cheux, Nicolas AND Allauzen, Alexandre}, title = {{Sheldon Speaking, Bonjour! : Leveraging Multilingual Tracks for (Weakly) Supervised Speaker Identification}}, booktitle = {{ACM International Conference on Multimedia}}, year = {2014}, pages = {137-146}, address= {Orlando - US}, keywords= {Speech Activity Detection; Speaker Identification; Multimedia Data; Weak Supervision; Multilingual Fusion}, } @inproceedings{PubLIMSI-4865 , author = {Hartmann, William AND Le, Viet Bac AND Messaoudi, Abdelkhalek AND Lamel, Lori AND Gauvain, Jean-Luc}, title = {{Comparing decoding strategies for subword-based keyword spotting in low-resourced languages}}, booktitle = {{Annual Conference of the International Speech Communication Association}}, year = {2014}, pages = {2764-2768}, month= {2014-09-14 / 2014-09-18}, address= {Singapore - SG}, keywords= {keyword search, spoken term detection, OOV, sub-word lexical units, low resource LVCSR}, } @inproceedings{PubLIMSI-4738 , author = {Mariani, Joseph-Jean AND Rehm, Georg AND Uszkoreit, Hans}, title = {{The strategic impact of META-NET on the regional, national and international level}}, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2014}, pages = {8}, address= {Reykjavik, Iceland}, } @inproceedings{PubLIMSI-4680 , author = {Laurent, Antoine AND Guinaudeau, Camille AND Roy, Anindya}, title = {{Analyse du corpus MATRICE-INA : exploration et classification automatique d\u2019archives audiovisuelles de 1930 Ã\a 2012}}, booktitle = {{Journ{\'e}es d'Etude sur la Parole}}, year = {2014}, pages = {8}, address= {Le Mans - FR}, keywords= {Fouille de donnees, clustering, transcription automatique de la parole, document audiovisuel}, } @inproceedings{PubLIMSI-4663 , author = {P{\'e}cheux, Nicolas AND Allauzen, Alexandre AND Yvon, Fran\c{c}ois}, title = {{Rule-based reordering spaces in statistical machine translation}}, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2014}, pages = {7p}, address= {Reykjavik, Iceland}, keywords= {Statistical Machine Translation; Preordering}, } @inproceedings{PubLIMSI-4601 , author = {Goryainova, Maria AND Grouin, Cyril AND Rosset, Sophie AND Vasilescu, Ioana}, title = {{Morpho-Syntactic Study of Errors from Speech Recognition System}}, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2014}, month= {May}, pages = {3045-3049}, address= {Reykjavik, Iceland}, keywords= {Automatic Speech Recognition; Error Analysis; Morpho-Syntactic Analysis}, } @inproceedings{PubLIMSI-4602 , author = {Luzzati, Daniel AND Grouin, Cyril AND Vasilescu, Ioana AND Adda-Decker, Martine AND Bilinski, Eric AND Camelin, Nathalie AND Kahn, Juliette AND Lailler, Carole AND Lamel, Lori AND Rosset, Sophie}, title = {{Human annotation of ASR error regions: Is \u201Cgravity\u201D a sharable concept for human annotators?}}, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2014}, pages = {3050-3056}, address= {Reykjavik - IS}, keywords= {Annotation; ASR Seriousness Errors; Speech Recognition}, } @inproceedings{PubLIMSI-4585 , author = {Soury, Mariette AND Devillers, Laurence}, author+AN = {1=student, LIMSI (ancien) , TLP ; 2=LIMSI , TLP ; }, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2014}, pages = {3633-3637}, address= {Reykjavik - IS}, keywords= {affect bursts, personality traits, sense of humor}, } @inproceedings{PubLIMSI-4588 , author = {Wisniewski, Guillaume AND Kubler, Natalie AND Yvon, Fran\c{c}ois}, title = {{A Corpus of Machine Translation Errors Extracted from Translation Students Exercises}}, booktitle = {{International Conference on Language Resources and Evaluation}}, year = {2014}, month = {May}, pages = {4p}, address= {Reykjavik - Iceland}, keywords= {Translation Error Corpus, Post-Edition, Error Analysis}, } @article{PubLIMSI-4735 , author = {Mariani, Joseph-Jean}, title = {{Ressources et {\'e}valuation, histoire et utilisation}}, year = {2014}, pages = {8-16}, journal = {{L'information grammaticale}}, volume = {141} } @article{PubLIMSI-6317 , author = {Devillers, Laurence AND Tahon, Marie AND Sehili, Mohamed El Amine AND Delaborde, Agn{\`e}s}, title = {{D{\'e}tection des {\'e}tats affectifs lors d'interactions partag{\'e}es : robustesse des indices non verbaux}\ }, year = {2014}, pages = {123-149}, journal = {{TAL (Traitement Automatique des Langues)}}, volume = {55}, number = {2} } @article{PubLIMSI-5312 , author = {Tomeh, Nadi AND Allauzen, Alexandre AND Yvon, Fran\c{c}ois}, title = {{Maximum-entropy word alignment and posterior-based phrase extraction for machine translation}}, year = {2014}, pages = {19-56}, journal = {{Machine Translation}}, volume = {28}, number = {1}, keywords= {Statistical machine translation; Discriminative word alignment; Phrase table extraction} } @article {PubLIMSI-4591, author = {Gahbiche-Braham, Souhir AND Maynard, H{\'e}l{\`e}ne AND Yvon, Fran\c{c}ois}, title = {Traitement automatique des entit{\'e}s nomm{\'e}es en arabe : d{\'e}tection et traduction}, AERES = {ACLN}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {101-132}, journal = {TAL (Traitement Automatique des Langues)}, volume = {54}, number = {2}, abstract= {La d{\'e}tection des entit{\'e}s nomm{\'e}es (EN) en langue arabe est un pr{\'e}traitement potentiellement utile pour de nombreuses applications du traitement des langues, en particulier pour la traduction automatique. Cette tâche repr{\'e}sente un s{\'e}rieux d{\'e}fi, compte tenu des sp{\'e}cificit{\'e}s de larabe. Dans cet article, nous pr{\'e}sentons une {\'e}tude d{\'e}taill{\'e}e des entit{\'e}s nomm{\'e}es en arabe dans le cadre d'une application de traduction automatique statistique. Nous pr{\'e}sentons notre syst{\`e}me de d{\'e}tection des EN en arabe (NERAr), dans sa configuration de base, puis dans ses diverses {\'e}volutions. Dans notre architecture, NERAr est utilis{\'e} comme un pr{\'e}traitement apportant des connaissances externes au syst{\`e}me de traduction. Plusieurs strat{\'e}gies d'int{\'e}gration de ces connaissances sont consid{\'e}r{\'e}es; dans la configuration la plus favorable, une {\'e}valuation automatique, corrobor{\'e}e par des analyses manuelles, permet d'observer une l{\'e}g{\`e}re am{\'e}lioration de la traduction des EN et une r{\'e}duction des erreurs induites par les mots inconnus.}, keywords= {entit{\'e}s nomm{\'e}es, traduction automatique, traitement automatique de l'arabe}, } @article {PubLIMSI-4714, author = {Bredin, Herv{\'e} AND Roy, Anindya AND Le, Viet-Bac AND Barras, Claude}, title = {Person instance graphs for mono-, cross- and multi-modal person recognition in multimedia data. Application to speaker identification in TV broadcast}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {16}, journal = {International Journal of Multimedia Information Retrieval}, abstract= {This work introduces a unified framework for mono-, cross- and multi-modal person recognition in multimedia data. Dubbed Person Instance Graph, it models the person recognition task as a graph mining problem: i.e. finding the best mapping between person instance vertices and identity vertices. Practically, we describe how the approach can be applied to speaker identification in TV broadcast. Then, a solution to the above-mentioned mapping problem is proposed. It relies on Integer Linear Programming to model the problem of clustering person instances based on their identity. We provide an in-depth theoretical definition of the optimization problem. Moreover, we improve two fundamental aspects of our previous related work: the problem constraints and the optimized objective function. Finally, a thorough experimental evaluation of the proposed framework is performed on a publicly available benchmark database. Depending on the graph configuration (i.e. the choice of its vertices and edges), we show that multiple tasks can be addressed interchangeably (e.g. speaker diarization, supervised or unsupervised speaker identification), significantly outperforming state-of-the-art mono-modal approaches.}, keywords= {speaker identification, multimedia, multimodal fusion}, } @article {PubLIMSI-4832, author = {Boula De Mareuil, Philippe}, title = {Qu'est-ce qu'un (phono)style ?}, AERES = {ACLN}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {9-19}, journal = {Nouveaux cahiers de linguistique fran\c{c}aise}, volume = {31}, keywords= {linguistique}, } @article {PubLIMSI-4880, author = {Roy, Anindya AND Bredin, Herv{\'e} AND Hartmann, William AND Le, Viet Bac AND Barras, Claude AND Gauvain, Jean-Luc}, title = {Lexical speaker identification in TV shows}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {20}, journal = {Multimedia Tools and Applications}, abstract= {It is possible to use lexical information extracted from speech transcripts for speaker identification (SID), either on its own or to improve the performance of standard cepstral-based SID systems upon fusion. This was established before typically using isolated speech from single speakers (NIST SRE corpora, parliamentary speeches). On the contrary, this work applies lexical approaches for SID on a different type of data. It uses the REPERE corpus consisting of unsegmented multiparty conversations, mostly debates, discussions and Q\&A sessions from TV shows. It is hypothesized that people give out clues to their identity when speaking in such settings which this work aims to exploit. The impact on SID performance of the diarization front-end required to pre-process the unsegmented data is also measured. Four lexical SID approaches are studied in this work, including TFIDF, BM25 and LDA-based topic modeling. Results are analysed in terms of TV shows and speaker roles. Lexical approaches achieve low error rates for certain speaker roles such as anchors and journalists, sometimes lower than a standard cepstral-based Gaussian Supervector - Support Vector Machine (GSV-SVM) system. Also, in certain cases, the lexical system shows modest improvement over the cepstral-based system performance using score-level sum fusion. To highlight the potential of using lexical information not just to improve upon cepstral-based SID systems but as an independent approach in its own right, initial studies on crossmedia SID is briefly reported. Instead of using speech data as all cepstral systems require, this approach uses Wikipedia texts to train lexical speaker models which are then tested on speech transcripts to identify speakers.}, keywords= {Lexical speaker identification · Broadcast conversations · TFIDF · BM25 · Speaker roles · Classifier fusion · Crossmedia learning · Wikipedia}, } @article {PubLIMSI-4883, author = {Barras, Claude AND Sarkar, Achintya Kumar AND Do, Cong-Thanh AND Le, Viet Bac}, title = {Combination of Cepstral and Phonetically Discriminative Features for Speaker Verification}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {5}, journal = {IEEE Signal Processing Letters}, volume = {21}, number = {9}, abstract= {Most speaker recognition systems rely on short- term acoustic cepstral features for extracting the speaker-relevant information from the signal. But phonetic discriminant features, extracted by a bottle-neck multi-layer perceptron (MLP) on longer stretches of time, can provide a complementary infor- mation and have been adopted in speech transcription systems. We compare the speaker verification performance using cepstral features, discriminant features, and a concatenation of both followed by a dimension reduction. We consider two speaker recognition systems, one based on maximum likelihood linear regression (MLLR) super-vectors and the other on a state-of- the-art i-vector system with two session variability compensation schemes. Experiments are reported on a standard configuration of NIST SRE 2008 and 2010 databases. The results show that the phonetically discriminative MLP features retain speaker- specific information which is complementary to the short-term cepstral features. The performance improvement is obtained with both score domain and feature domain fusion and the speaker verification equal error rate (EER) is reduced up to 50\% relative, compared to the best i-vector system using only cepstral features.}, keywords= {Speaker verification, i-vector, multi-layer per- ceptron, bottleneck features, PCA, LDA, PLDA}, } @article {PubLIMSI-4916, author = {Luzzati, Daniel AND Adda-Decker, Martine AND Rosset, Sophie}, title = {Traitements automatiques de l'oral et de l'{\'e}crit}, AERES = {ACLN}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {4-8}, journal = {L'information grammaticale}, volume = {141}, number = {mars 2014}, keywords= {Traitement automatique des langues, traitement automatique de la parole}, } @article {PubLIMSI-4917, author = {Rosset, Sophie AND Luzzati, Daniel}, title = {Le dialogue humain-machine ou la confrontation au sens}, AERES = {ACLN}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {47-53}, journal = {L'information grammaticale}, volume = {141}, number = {mars 2014}, keywords= {dialogue humain-machine}, } @article {PubLIMSI-4977, author = {Hua, Jiewen AND Le Scanff, Christine AND Larue, Jacques AND Jos{\'e}, Ferreira AND Martin, Jean-Claude AND Devillers, Laurence AND Filaire, Edith}, title = {Global stress response during a social stress test: Impact of alexithymia and its subfactors}, AERES = {ACL}, GROUP = {LIMSI,CPU,TLP}, year = {2014}, pages = {53-61}, journal = {Psychoneuroendocrinology}, number = {50}, abstract= {Alexithymia is a personality trait characterized by difficulties in identifying, describing and communicating one's own emotions. Recent studies have associated specific effects of this trait and its subfactors with hypothalamo-pituitary-adrenal (HPA) axis markers during stress. The aim of this study was to analyze the association between alexithymia and its subfactors with HPA and sympatho-adrenal medullar (SAM) activity. Stress was induced experimentally using a public-speaking paradigm. Salivary cortisol, alpha-amylase (AA), chromogranin A (CgA) and heart rate (HR) were collected during the defined periods of baseline, stress, and recovery in 19 males and 24 female healthy university students.}, keywords= {Emotion ; Physiology ; Stress} } @article {PubLIMSI-5038, author = {Apidianaki, Marianna AND Sagot, Benoît}, title = {Data-driven Synset Induction and Disambiguation for Wordnet Development}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {655-677}, journal = {Language Resources and Evaluation}, volume = {48}, number = {4}, abstract= {Automatic methods for wordnet development in languages other than English generally exploit information found in Princeton WordNet (PWN) and translations extracted from parallel corpora. A common approach consists in preserving the structure of PWN and transferring its content in new languages using alignments, possibly combined with information extracted from multilingual semantic resources. Even if the role of PWN remains central in this process, these automatic methods offer an alternative to the manual elaboration of new wordnets. However, their limited coverage has a strong impact on that of the resulting resources. Following this line of research, we apply a cross-lingual word sense disambiguation method to wordnet development. Our approach exploits the output of a data-driven sense induction method that generates sense clusters in new languages, similar to wordnet synsets, by identifying word senses and relations in parallel corpora. We apply our cross-lingual word sense disambiguation method to the task of enriching a French wordnet resource, the WOLF, and show how it can be efficiently used for increasing its coverage. Although our experiments involve the English-French language pair, the proposed methodology is general enough to be applied to the development of wordnet resources in other languages for which parallel corpora are available. Finally, we show how the disambiguation output can serve to reduce the granularity of new wordnets and the degree of polysemy present in PWN.}, keywords= {cross-lingual word sense disambiguation, word sense induction, sense clustering, parallel corpora, WordNet} } @article {PubLIMSI-5078, author = {Ananiadou, S. AND Friburger, N. AND Rosset, Sophie}, title = {Entit{\'e}s Nomm{\'e}es, Traitement Automatique des langues}, AERES = {ACLN}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {7-11}, journal = {TAL (Traitement Automatique des Langues)}, volume = {54}, number = {2}, keywords= {Traitement automatique des langues, entit{\'e}s nomm{\'e}es} } @inproceedings {PubLIMSI-4603, author = {N{\'e}v{\'e}ol, Aur{\'e}lie AND Grouin, Cyril AND Leixa, Jeremy AND Rosset, Sophie AND Zweigenbaum, Pierre}, title = {The Quaero french medical corpus: a ressource for medical entity recognition and normalization}, booktitle = {Bio text-mining workshop (BioTextM 2014)}, AERES = {ACTI}, GROUP = {LIMSI,ILES,TLP}, year = {2014}, pages = {7p}, month= {April}, address= {Reykjavik, Iceland}, abstract= {A vast amount of information in the biomedical domain is available as natural language free text. An increasing number of documents in the field are written in languages other than English. Therefore, it is essential to develop resources, methods and tools that address Natural Language Processing in the variety of languages used by the biomedical community. In this paper, we report on the development of an extensive corpus of biomedical documents in French annotated at the entity and concept level. Three text genres are covered, comprising a total of 103,056 words. Ten entity categories corresponding to UMLS Semantic Groups were annotated, using automatic pre-annotations validated by trained human annotators. The pre-annotation method was found helful for entities and achieved above 0.83 precision for all text genres. Overall, a total of 26,409 entity annotations were mapped to 5,797 unique UMLS concepts.}, keywords= {Corpus annotation; Named Entity Recognition; Entity Normalization}, } @inproceedings {PubLIMSI-4692, author = {Apidianaki, Marianna AND Verzeni, Emilia AND McCarthy, Diana}, title = {Semantic clustering of pivot paraphrases}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2014)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {4270-4275}, month= {May}, address= {Reykjavik, Iceland}, abstract= {Paraphrases extracted from parallel corpora by the pivot method (Bannard and Callison-Burch, 2005) constitute a valuable resource for multilingual NLP applications. In this study, we analyse the semantics of unigram pivot paraphrases and use a graph-based sense induction approach to unveil hidden sense distinctions in the paraphrase sets. The comparison of the acquired senses to gold data from the Lexical Substitution shared task (McCarthy and Navigli, 2007) demonstrates that sense distinctions exist in the paraphrase sets and highlights the need for a disambiguation step in applications using this resource.}, keywords= {pivot paraphrasing, sense clustering, parallel corpora}, } @inproceedings {PubLIMSI-4708, author = {Bluche, Th{\'e}odore AND Pham, Vu AND Kermorvant, Christopher AND Louradour, J{\'e}r{\^o}me}, title = {Dropout improves recurrent neural networks for handwriting recognition}, booktitle = {International Conference on Frontiers in Handwriting Recognition (ICFHR 2014)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {6}, series = {International Conference on Frontiers in Handwriting Recognition}, month= {September}, address= {Crete Island, Greece}, abstract= {Recurrent neural networks (RNNs) with Long Short-Term memory cells currently hold the best known results in unconstrained handwriting recognition. We show that their performance can be greatly improved using dropout - a recently proposed regularization method for deep architectures. While previous works showed that dropout gave superior performance in the context of convolutional networks, it had never been applied to RNNs. In our approach, dropout is carefully used in the network so that it does not affect the recurrent connections, hence the power of RNNs in modeling sequences is preserved. Extensive experiments on a broad range of handwritten databases confirm the effectiveness of dropout on deep architectures even when the network mainly consists of recurrent and shared connections.}, keywords= {Recurrent Neural Networks, Dropout, Handwriting Recognition}, } @inproceedings {PubLIMSI-4710, author = {Bluche, Th{\'e}odore AND Moysset, Bastien AND Kermorvant, Christopher}, title = {Automatic Line Segmentation and Ground-Truth Alignment of Handwritten Documents}, booktitle = {International Conference on Frontiers in Handwriting Recognition (ICFHR 2014)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {6}, series = {International Conference on Frontiers in Handwriting Recognition}, month= {September}, address= {Crete Island, Greece}, abstract= {In this paper, we present a method for the automatic segmentation and transcript alignment of documents, for which we only have the transcript at the document level. We consider several line segmentation hypotheses, and recognition hypotheses for each segmented line. The recognition is highly constrained with the document transcript. We formalize the problem in a weighted finite-state transducer framework. We evaluate how the constraints help achieve a reasonable result. In particular, we assess the performance of the system both in terms of segmentation quality and transcript mapping. The main contribution of this paper is that we jointly find the best segmentation and transcript mapping that allow to align the image with the whole ground-truth text. The evaluation is carried out on fully annotated public databases. Furthermore, we retrieved training material with this system for the Maurdor evaluation, where the data was only annotated at the paragraph level. With the automatically segmented and annotated lines, we record a relative improvement in Word Error Rate of 35.6\%.}, keywords= {Handwriting Recognition, Transcription Alignment, Finite-State Transducers}, } @inproceedings {PubLIMSI-4683, author = {Wisniewski, Guillaume AND P{\'e}cheux, Nicolas AND Knyazeva, Elena AND Allauzen, Alexandre AND Yvon, Fran\c{c}ois}, title = {Apprentissage partiellement supervis\'e d'un \'etiqueteur morpho-syntaxique par transfert cross-lingue}, booktitle = {Traitement Automatique des Langues (TALN 2014)}, AERES = {ACTN}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {11}, series = {Traitement Automatique des Langues}, month= {July}, address= {Marseille}, abstract= {Les me\thodes de transfert cross-lingue permettent partiellement de pallier labsence de corpus annote\s, en particulier dans le cas de langues peu dote\es en ressources linguistiques. Le transfert de\tiquettes morpho-syntaxiques depuis une langue riche en ressources, comple\te\ et corrige\ par un dictionnaire associant a\ chaque mot un ensemble de\ti- quettes autorise\es, ne fournit cependant quune information de supervision incomple\te. Dans ce travail, nous reformulons ce proble\me dans le cadre de lapprentissage ambigu et proposons une nouvelle me\thode pour apprendre un analyseur de manie\re faiblement supervise\e a\ partir dun mode\le a\ base dhistorique. Le\valuation de cette approche montre une ame\- lioration sensible des performances par rapport aux me\thodes de le\tat de lart pour trois langues sur quatre conside\re\es, avec des gains jusqua\ 3,9\% absolus ou 35,8\% relatifs.}, } @inproceedings {PubLIMSI-4709, author = {Moysset, Bastien AND Bluche, Th{\'e}odore AND Knibbe, Maxime AND Benzeghiba, Mohamed Faouzi AND Messina, Ronaldo AND Louradour, J{\'e}r{\^o}me AND Kermorvant, Christopher}, title = {The A2iA Multi-lingual Text Recognition System at the second Maurdor Evaluation}, booktitle = {International Conference on Frontiers in Handwriting Recognition (ICFHR 2014)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {6}, series = {International Conference on Frontiers in Handwriting Recognition}, month= {September}, address= {Crete Island, Greece}, abstract= {This paper describes the system submitted by A2iA to the second Maurdor evaluation for multi-lingual text recognition. A system based on recurrent neural networks and weighted finite state transducers was used both for printed and handwritten recognition, in French, English and Arabic. To cope with the difficulty of the documents, multiple text line segmentations were considered. An automatic procedure was used to prepare annotated text lines needed for the training of the neural network. Language models were used to decode sequences of characters or words for French and English and also sequences of part-of-arabic words (PAWs) in case of Arabic. This system scored first at the second Maurdor evaluation for both printed and handwritten text recognition in French, English and Arabic.}, } @inproceedings {PubLIMSI-4715, author = {Bredin, Herv{\'e} AND Laurent, Antoine AND Sarkar, Achintya Kumar AND Le, Viet-Bac AND Rosset, Sophie AND Barras, Claude}, title = {Person instance graphs for named speaker identification in TV broadcast}, booktitle = {The Speaker and Language Recognition Workshop (Odyssey 2014)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {8}, month= {July}, address= {Joensuu, Finlande}, abstract= {We address the problem of named speaker identification in TV broadcast which consists in answering the question ''who speaks when?'' with the real identity of speakers, using person names automatically obtained from speech transcripts. While existing approaches rely on a first speaker diarization step followed by a local name propagation step to speaker clusters, we propose a unified framework called person instance graph where both steps are jointly modeled as a global optimization problem, then solved using integer linear programming. Moreover, when available, acoustic speaker models can be added seamlessly to the graph structure for joint named and acoustic speaker identification - leading to a 10\% error decrease (from 45\% down to 35\%) over a state-of-the-art i-vector speaker identification system on the REPERE TV broadcast corpus.}, keywords= {speaker identification, unsupervised, multimodal fusion}, } @inproceedings {PubLIMSI-4716, author = {Bruneau, Pierrick AND Stefas, Mickaël AND Bredin, Herv{\'e} AND Ta, Anh-Phuong AND Tamisier, Thomas AND Barras, Claude}, title = {A Web-based Tool for the Visual Analysis of Media Annotations}, booktitle = {International Conference Information Visualisation (iV 2014)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {6}, month= {July}, address= {Paris, France}, abstract= {Multimedia annotation algorithms infer localized meta-data in multimedia content, e.g. speakers or appearing faces. There is a growing need of experts from this domain to perform advanced analyses, that go beyond medium-scale quality metrics. This paper describes a novel visual tool, that applies interactive visualization principles to the multimedia expert concerns. Multiple coordinated views, augmented by interactive inspection facilities, ease the navigation in media annotations, and the visual detection of relevant information. The effectiveness of the proposition is demonstrated by experimental scenarios on a real multimedia corpus.}, } @inproceedings {PubLIMSI-4717, author = {Bluche, Th{\'e}odore AND Ney, Hermann AND Kermorvant, Christopher}, title = {A comparison of sequence-trained deep neural networks and recurrent neural networks optical modeling for handwriting recognition}, booktitle = {International Conference on Statistical Language and Speech Processing (SLSP 2014)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {199-210}, publisher = {Laurent Besacier, Adrian-Horia Dediu, and Carlos Martín-Vide}, series = {Statistical Language and Speech Processing, Second International Conference, SLSP 2014}, month= {14/10 au 16/10}, address= {Grenoble, France}, abstract= {Long Short-Term Memory Recurrent Neural Networks are the current state-of-the-art in handwriting recognition. In speech recognition, Deep Multi-Layer Perceptrons (DeepMLPs) have become the standard acoustic model for Hidden Markov Models (HMMs). Although handwriting and speech recognition systems tend to include similar components and techniques, DeepMLPs are not used as optical model in unconstrained large vocabulary handwriting recognition. In this paper, we compare Bidirectional LSTM-RNNs with DeepMLPs for this task. We carried out experiments on two public databases of multi-line handwritten documents: Rimes and IAM. We show that the proposed hybrid systems yield performance comparable to the state-of-the-art, regardless of the type of features (hand-crafted or pixel values) and the neural network optical model (DeepMLP or RNN).}, keywords= {Handwriting Recognition, Recurrent Neural Networks, Deep Neural Networks}, } @inproceedings {PubLIMSI-4729, author = {Gahbiche-Braham, Souhir AND Maynard, H{\'e}l{\`e}ne AND Yvon, Fran\c{c}ois}, title = {Adaptation th{\'e}matique pour la traduction automatique de d{\'e}p{\^e}ches de presse}, booktitle = {Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles (TALN 2014)}, AERES = {ACTN}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {280-291}, month= {July}, address= {Marseille, France}, abstract= {L'utilisation de m{\'e}thodes statistiques en traduction automatique (TA) implique l'exploitation de gros corpus parall{\`e}les repr{\'e}sentatifs de la tâche de traduction vis{\'e}e. La relative raret{\'e} de ces ressources fait que la question de l'adaptation au domaine est une probl{\'e}matique centrale en TA. Dans cet article, une {\'e}tude portant sur l'adaptation th{\'e}matique des donn{\'e}es journalistiques issues d'une m{\^e}me source est propos{\'e}e. Dans notre approche, chaque phrase d'un document est traduite avec le syst{\`e}me de traduction appropri{\'e} (c.-{\`a}-d. sp{\'e}cifique au th{\`e}me dominant dans la phrase). Deux sc{\'e}narios de traduction sont {\'e}tudi{\'e}s~: (a) une classification manuelle, reposant sur la codification IPTC; (b) une classification automatique. Nos exp{\'e}riences montrent que le sc{\'e}nario (b) conduit {\`a} des meilleures performances ({\`a} l'aune des m{\'e}triques automatiques), que le sc{\'e}nario (a). L'approche la meilleure pour la m{\'e}trique BLEU semble toutefois consister {\`a} ne pas r{\'e}aliser d'adaptation; on observe toutefois qu'adapter permet de lever certaines ambiguit{\'e}s s{\'e}mantiques.}, keywords= {adaptation th{\'e}matique, classification automatique, traduction automatique}, } @inproceedings {PubLIMSI-4731, author = {Apidianaki, Marianna AND van der Plas, Lonneke}, title = {Cross-lingual Word Sense Disambiguation for Predicate Labelling of French}, booktitle = {Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles (TALN 2014)}, AERES = {ACTN}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {46-55}, month= {July}, address= {Marseille, France}, abstract= {Nous abordons la question du transfert d'annotations s{\'e}mantiques, et plus sp{\'e}cifiquement d'{\'e}tiquettes sur les pr{\'e}dicats, d'une langue {\`a} l'autre sur la base de corpus parall{\`e}les. Des travaux ant{\'e}rieurs ont transf{\'e}r{\'e} ces annotations directement au niveau des tokens, conduisant {\`a} un faible rappel. Nous pr{\'e}sentons une approche globale de transfert qui agr{\`e}ge des informations rep{\'e}r{\'e}es dans l'ensemble du corpus parall{\`e}le. Nous montrons que la performance de la m{\'e}thode globale est sup{\'e}rieure aux r{\'e}sultats ant{\'e}rieurs en termes de rappel sans trop affecter la pr{\'e}cision.}, keywords= {transfert inter-langue, annotation s{\'e}mantique automatique, pr{\'e}dicats, d{\'e}sambiguisation lexicale, corpus parall{\`e}les}, } @inproceedings {PubLIMSI-4741, author = {Mariani, Joseph-Jean}, title = {LREC 15th Anniversary}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2014)}, AERES = {INV}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {22}, month= {May}, address= {Reykjavik, Islande}, } @inproceedings {PubLIMSI-4773, author = {Dutrey, Camille AND Rosset, Sophie AND Adda-Decker, Martine AND Clavel, Chlo{\'e} AND Vasilescu, Ioana}, title = {Disfluences dans la parole spontane\e conversationnelle : de\tection automatique utilisant des indices lexicaux et acoustiques}, booktitle = {Journ{\'e}es d'Etude sur la Parole (JEP 2014)}, AERES = {ACTN}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {9}, month= {June}, address= {Le Mans, France}, abstract= {La parole spontan{\'e}e est caract{\'e}ris{\'e}e par la pr{\'e}sence de nombreux {\'e}l{\'e}ments disfluents qui peuvent sav{\'e}rer tr{\`e}s informatifs quant au d{\'e}roulement et {\`a} la compr{\'e}hension du dialogue, tout en {\'e}tant un obstacle {\`a} lapplication de certains traitements automatiques. Nous proposons une m{\'e}thode pour la d{\'e}tection automatique de disfluences dans des donn{\'e}es conversationnelles en fran\c{c}ais. Nous d{\'e}veloppons et {\'e}valuons une m{\'e}thode d{\'e}tiquetage s{\'e}quentiel {\`a} base de CRF, reconnus pour donner de tr{\`e}s bons r{\'e}sultats sur des tâches similaires. Notre syst{\`e}me sappuie sur des indices {\`a} la fois lexicaux et prosodiques : la combinaison de traits et de patrons construits {\`a} partir des transcriptions manuelles et du signal acoustique donne les meilleurs r{\'e}sultats pour de la d{\'e}tection de fronti{\`e}re, mais les indices acoustiques semblent d{\'e}grader les r{\'e}sultats pour une tâche conjointe didentification de fronti{\`e}re et de structuration de la disfluence.}, keywords= {champs al{\'e}atoires conditionnels, disfluences, parole conversationnelle, parole spontan{\'e}e.}, } @inproceedings {PubLIMSI-4775, author = {Ben Jannet, Mohamed Ameur AND Adda-Decker, Martine AND Galibert, Olivier AND Kahn, Juliette AND Rosset, Sophie}, title = {Comment \'evaluer la qualit\'e des transcriptions automatiques pour la d\'etection d'entit\'es nomm\'ees ?}, booktitle = {Journ{\'e}es d'Etude sur la Parole (JEP 2014)}, AERES = {ACTN}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {9}, month= {June}, address= {Le Mans, France}, abstract= {La m{\'e}trique standard pour l{\'e}valuation des performances des syst{\`e}mes de reconnaisance automatique de la parole est le taux derreur mot (Word Error Rate). Cette m{\'e}trique est tr{\`e}s efficace quand il sagit d{\'e}valuer les syst{\`e}mes de transcription seules, mais de nos jours les syst{\`e}mes RAP sont tr{\`e}s souvent combin{\'e}s avec dautres briques technologiques de Traitement Automatique des Langues telles que la traduction de la parole, le dialogue homme-machine, ou lextraction dinformations. Cette situation exacerbe le besoin exprim{\'e} par la communaut{\'e} pour une m{\'e}trique qui permet destimer la qualit{\'e} des transcriptions automatiques en tenant compte du contexte applicatif. Nous proposons une m{\'e}trique alternative au WER qui permet d{\'e}valuer la qualit{\'e} des transcriptions automatiques dans un contexte dextraction dentit{\'e}s nomm{\'e}es {\`a} partir de la parole. Notre m{\'e}trique utilise des probabilit{\'e}s a posteriori pour estimer le risque derreur induit par les erreurs de transcription sur un syst{\`e}me de d{\'e}tection dentit{\'e}s nomm{\'e}es appliqu{\'e} en aval.}, keywords= {M{\'e}trique, Évaluation, WER, ETER, Reconnaissance de la Parole, Extraction dEntit{\'e}s Nomm{\'e}es}, } @inproceedings {PubLIMSI-4787, author = {Wisniewski, Guillaume AND P{\'e}cheux, Nicolas AND Gahbiche-Braham, Souhir AND Yvon, Fran\c{c}ois}, title = {Cross-Lingual Part-of-Speech Tagging through Ambiguous Learning}, booktitle = {Conference on Empirical Methods in Natural Language Processing (EMNLP 2014)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {7}, month= {25/10 au 29/10}, address= {Doha, Qatar}, abstract= {When Part-of-Speech annotated data is scarce, e.g. for under-resourced languages, one can turn to cross-lingual transfer and crawled dictionaries to collect partially supervised data. We cast this problem in the framework of ambiguous learning and show how to learn an accurate history-based model. Experiments on ten languages show significant improvements over prior state of the art performance.}, keywords= {weakly supervised learning, cross-lingual transfer, POS tagging}, } @inproceedings {PubLIMSI-4791, author = {Adda-Decker, Martine AND Lamel, Lori AND Adda, Gilles}, title = {Mod{\'e}lisation acoustico-phon{\'e}tique de langues peu dot{\'e}es : \'etudes phon{\'e}tiques et travaux de reconnaissance automatique en luxembourgois}, booktitle = {Journ{\'e}es d'Etude sur la Parole (JEP 2014)}, AERES = {ACTN}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {9}, series = {XXXe Journ{\'e}es d'Etude sur la Parole}, month= {June}, address= {Le Mans (France)}, abstract= {Luxembourgish, a Germanic-Franconian language, is embedded in a multilingual context on the divide between Romance and Germanic cultures and remains one of Europes under-described languages. This paper investigates the similarity between Luxembourgish phone segments with German, French and English via forced speech alignment techniques. Making use of monolingual acoustic seed models from these three languages, as well as multilingual models trained on pooled speech data we investigated whether Luxembourgish was globally better represented by one of the individual languages or by the multilingual model. While globally, the German models provide the best match, a phone-based analysis, shows language-specific preferences. First ASR results illustrate the accuracy of the various sets of monolingual and multilingual acoustic models and Luxemburgish acoustic models built from 1200 hours of untranscribed Luxemburgish audio data using unsupervised methods.}, keywords= {under-resourced languages ; acoustic modeling ; multilingual models ; large vocabulary speech recognition ; Luxembourgish ; Forced alignment..}, } @inproceedings {PubLIMSI-4793, author = {Couillault, Alain AND Fort, Karen AND Adda, Gilles AND de Mazancourt, Hugues}, title = {Evaluating corpora documentation with regards to the Ethics and Big Data Charter}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2014)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {5}, series = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, month= {May}, address= {Reykjavik, Iceland}, abstract= {The authors have written the Ethic and Big Data Charter in collaboration with various agencies, private bodies and associations. This Charter aims at describing any large or complex resources, and in particular language resources, from a legal and ethical viewpoint and ensuring the transparency of the process of creating and distributing such resources. We propose in this article an analysis of the documentation coverage of the most frequently mentioned language resources with regards to the Charter, in order to show the benefit it offers.}, keywords= {language resources, ethics, documentation, big data}, } @inproceedings {PubLIMSI-4777, author = {van der Plas, Lonneke AND Apidianaki, Marianna AND Chen, Chenhua}, title = {Global methods for cross-lingual semantic role and predicate labelling}, booktitle = {25th International Conference on Computational Linguistics (COLING 2014 2014)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {12791290}, month= {August}, address= {Dublin, Ireland}, abstract= {We address the problem of transferring semantic annotations to new languages using parallel corpora. Previous work has transferred these annotations on a token-to-token basis, an approach that is sensitive to alignment errors and translation shifts. We present a global approach to transfer that aggregates information across the whole parallel corpus and leads to more robust labellers. We build two global models, one for predicate labelling and one for role labelling, each tailored to the task at hand. We show that the combination of direct and global methods outperforms previous results.}, keywords= {predicate labelling, semantic role labelling, parallel corpora, cross-lingual annotation transfer}, } @inproceedings {PubLIMSI-4799, author = {Allauzen, Alexandre AND Do, Quoc Khanh AND Yvon, Fran\c{c}ois}, title = {Mod{\`e}les de langue neuronaux : une comparaison de plusieurs strat{\'e}gies dapprentissage}, booktitle = {Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles (TALN 2014)}, AERES = {ACTN}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {12}, series = {Traitement Automatique des Langues Naturelles}, month= {July}, address= {Marseille}, abstract= {Alors que limportance des mod{\`e}les neuronaux dans le domaine du traitement automatique des langues ne cesse de croître, les difficult{\'e}s de leur apprentissage continue de freiner leur diffusion au sein de la communaut{\'e}. Cet article {\'e}tudie plusieurs strat{\'e}gies, dont deux sont originales, pour estimer des mod{\`e}les de langue neuronaux, en se focalisant sur lajustement du pas dapprentissage. Les r{\'e}sultats exp{\'e}rimentaux montrent, dune part, limportance que rev{\^e}t la conception de cette strat{\'e}gie. Dautre part, le choix dune strat{\'e}gie appropri{\'e}e permet dapprendre efficacement des mod{\`e}les de langue donnant lieu {\`a} des r{\'e}sultats {\`a} l{\'e}tat de lart en traduction automatique, avec un temps de calcul r{\'e}duit et une faible influence des hyper-param{\`e}tres.}, keywords= {R{\'e}seaux de neurones, mod{\`e}les de langue n-gramme, traduction automatique statistique.}, } @inproceedings {PubLIMSI-4818, author = {Laurent, Antoine AND Lamel, Lori}, title = {D{\'e}veloppement d'un syst{\`e}me de reconnaissance automatique de la parole en cor{\'e}en avec peu de ressources annot{\'e}es}, booktitle = {Journ{\'e}es d'Etude sur la Parole (JEP 2014)}, AERES = {ACTN}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {9}, series = {Journ{\'e}es d'\'etudes sur la Parole}, month= {June}, address= {Le Mans, France}, abstract= {Ce papier d{\'e}crit le d{\'e}veloppement d'un syst{\`e}me de reconnaissance automatique de la parole pour le cor{\'e}en. Le cor{\'e}en est une langue alpha-syllabique, parl{\'e}e par environ 78 millions de personnes dans le monde. Le d{\'e}veloppement de ce syst{\`e}me a {\'e}t{\'e} men{\'e} en utilisant tr{\`e}s peu de donn{\'e}es annot{\'e}es manuellement. Les mod{\`e}les acoustiques ont {\'e}t{\'e} adapt{\'e}s de mani{\`e}re non supervis{\'e}e en utilisant des donn{\'e}es provenant de diff{\'e}rents sites d'actualit{\'e}s cor{\'e}ens. Le corpus de d{\'e}veloppement contient des transcriptions approximatives des documents audio : il s'agit d'un corpus transcrit automatiquement et align{\'e} avec des donn{\'e}es provenant des m{\^e}mes sites Internet. Nous comparons diff{\'e}rentes approches dans ce travail, {\`a} savoir, des mod{\`e}les de langue utilisant des unit{\'e}s diff{\'e}rentes pour l'apprentissage non supervis{\'e} et pour le d{\'e}codage (des caract{\`e}res et des mots avec des vocabulaires de diff{\'e}rentes tailles), l'utilisation de phon{\`e}mes et d'unit{\'e}s ``demi-syllabiques'' et deux approches diff{\'e}rentes d'apprentissage non supervis{\'e}.}, keywords= {Reconnaissance automatique de la parole, apprentissage non supervis{\'e}, langues sous dout{\'e}es}, } @inproceedings {PubLIMSI-4842, author = {Adda, Gilles AND Besacier, Laurent AND Couillault, Alain AND Fort, Karen AND Mariani, Joseph-Jean AND de Mazancourt, Hugues}, title = {Where the data are coming from? Ethics, crowdsourcing and traceability for Big Data in Human Language Technology}, booktitle = {Crowdsourcing and human computation multidisciplinary workshop}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {2}, publisher = {CNRS}, series = {Crowdsourcing and human computation multidisciplinary workshop}, month= {September}, address= {Paris, France}, } @inproceedings {PubLIMSI-4844, author = {Boula De Mareuil, Philippe AND Vitale, Marilisa AND De Meo, Anna}, title = {An acoustic-perceptual approach to the prosody of Chinese and native speakers of Italian based on yes/no questions}, booktitle = {International Conference on Speech Prosody (SP 2014)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {648-652}, month= {May}, address= {Dublin, Ireland}, abstract= {The present study investigates the prosody of yes/no questions (in comparison with statements) in Chinese learners and native speakers of Italian. Acoustic analyses and a perceptual test were performed, in order to identify the main trends in non-native productions. Results show the relevance of prosody, which differentiates elementary, intermediate and advanced Chinese learners of Italian. Listening tests based on prosody transplantation also suggest that non-native segments with a native Italian prosody are rated as less accented than are native Italian segments with a non-native prosody. Similar trends were found, overall, in terms of question/assertion discrimination, confirming the relative importance of prosody. These findings could be helpful for teachers and learners of Italian as a foreign language.}, keywords= {linguistique}, } @inproceedings {PubLIMSI-4877, author = {Letard, Vincent AND Rosset, Sophie AND Illouz, Gabriel}, title = {A mapping-based approach for general formal human computer interaction using natural language}, booktitle = {Annual Meeting of the Association for Computational Linguistics (ACL 2014)}, AERES = {ACTI}, GROUP = {LIMSI,ILES,TLP}, year = {2014}, pages = {34-40}, series = {Proceedings of the ACL 2014 Student Research Workshop}, month= {June}, address= {Baltimore, Maryland, USA}, abstract= {We consider the problem of mapping natural language written utterances expressing operational instructions to formal language expressions, applied to French and the R programming language. Developing a learning operational assistant requires the means to train and evaluate it, that is, a baseline system able to interact with the user. After presenting the guidelines of our work, we propose a model to represent the problem and discuss the fit of direct mapping methods to our task. Finally, we show that, while not resulting in excellent scores, a simple approach seems to be sufficient to provide a baseline for an interactive learning system.}, keywords= {interactive assistant, natural language programming}, } @inproceedings {PubLIMSI-4878, author = {Letard, Vincent}, title = {Interaction homme-machine en domaine large {\`a} laide du langage naturel : une amorce par mise en correspondance}, booktitle = {Rencontres des Etudiants Chercheurs en Informatique pour le Traitement Automatique des Langues (RECITAL 2014)}, AERES = {ACTN}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {81-91}, series = {Actes de Recital 2014}, month= {July}, address= {Marseille, France}, abstract= {Cet article pr{\'e}sente le probl{\`e}me de lassociation entre {\'e}nonc{\'e}s en langage naturel exprimant des instructions op{\'e}rationnelles et leurs expressions {\'e}quivalentes et langage formel. Nous lappliquons au cas du fran\c{c}ais et du langage R. D{\'e}velopper un assistant op{\'e}rationnel apprenant, qui constitue notre objectif {\`a} long terme, requiert des moyens pour lentraîner et l{\'e}valuer, cest-{\`a}-dire un syst{\`e}me initial capable dinteragir avec lutilisateur. Apr{\`e}s avoir introduit la ligne directrice de ce travail, nous proposons un mod{\`e}le pour repr{\'e}senter le probl{\`e}me et discutons de lad{\'e}quation des m{\'e}thodes par mise en correspondance, ou mapping, {\`a} notre tâche. Pour finir, nous montrons que, malgr{\'e} des scores modestes, une approche simple semble suffisante pour amorcer un tel syst{\`e}me interactif apprenant.}, keywords= {interaction homme machine, programmation en langage naturel, assistant interactif}, } @inproceedings {PubLIMSI-4882, author = {Li{\'e}nard, Jean-Sylvain AND Barras, Claude}, title = {Etude des voyelles et de la force de voix par analyse discriminante}, booktitle = {Journ{\'e}es d'Etude sur la Parole (JEP 2014)}, AERES = {ACTN}, GROUP = {LIMSI,CPU,TLP}, year = {2014}, pages = {9}, month= {June}, address= {Le Mans, France}, abstract= {L'effort vocal, repre\sente\ ici par une mesure d'intensite\ objective appele\e force de voix, est a\ la fois un facteur de variabilite\ de la parole et une grandeur acoustique utilise\e par les interlocuteurs pour e\changer diverses informations dans une situation donne\e. La pre\sente e\tude s'inte\resse aux indices acoustiques codant ces informations dans le spectre des voyelles. L'Analyse Discriminante est mise en ½uvre d'une part pour identifier les voyelles et d'autre part pour estimer la force de voix en de\pit de leurs variabilite\s mutuelles. Les re\sultats, e\tablis sur deux bases de donne\es diffe\rentes, montrent que la force de voix peut e\tre estime\e avec pre\cision a\ partir du spectre des voyelles et que la connaissance pre\alable de la force de voix permet d'ame\liorer la classification des voyelles.}, keywords= {Voix, parole, communication orale, voyelles franc\aises, effort vocal, analyse acoustique, analyse discriminante, interactions voix-parole.}, } @inproceedings {PubLIMSI-4884, author = {Fillon, Thomas AND Simonnot, Jos{\'e}phine AND Mifune, Marie-France AND Khoury, St{\'e}phanie AND Pellerin, Guillaume AND Le Coz, Maxime AND Amy de la Bret{\`e}que, Estelle AND Doukhan, David AND Fourer, Dominique AND Rouas, Jean-Luc AND Pinquier, Julien AND Mauclair, Julie AND Barras, Claude}, title = {Telemeta: An open-source web framework for ethnomusicological audio archives management and automatic analysis}, booktitle = {International Digital Libraries for Musicology (DLfM 2014)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {8}, month= {September}, address= {London, UK}, abstract= {The audio archives of the CNRS-Musee de lHomme are among the most important collections of ethnomusicological recordings in Europe. Yet, as the number of collections increase and as new audio technologies arise, questions linked to the preservation, the archiving and the availability of these audio materials have arisen. With this in mind, since 2007, ethnomusicologists and engineers have joined their efforts to develop a scalable and collaborative web platform for managing and increasing access to digitized sound archives. This web platform is based on Telemeta, an open-source web audio framework dedicated to digital sound archives. Since 2011, the Telemeta framework has been deployed to hold the platform of the CNRS-Musee de lHommes audio archives, which are managed by the Research Center for Ethnomusicology. This framework focuses on the enhanced and collaborative user experience in accessing audio items and their associated metadata. The architecture of Telemeta relies on TimeSide, an open-source audio processing framework written in Python and JavaScript languages, which provides decoding, encoding and streaming capabilities together with a smart embeddable HTML audio player. TimeSide can also produce various automatic annotation, segmentation and musicological analysis that have been developed in the interdisciplinary research project called DIADEMS. Furthermore it includes a set of audio analysis plug-ins and wraps several audio features extraction libraries. This paper introduces the Telemeta framework and discuss how, experimenting with this advanced database for ethnomusicology through the DIADEMS project, cutting-edge tools are being implemented to fit and encourage new ways to relate to sound libraries.}, } @inproceedings {PubLIMSI-4904, author = {Mariani, Joseph-Jean}, title = {Language Technologies in Support to Multilingualism}, booktitle = {Linguistic and Cultural Diversity in Cyberspace}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {9}, month= {July}, address= {Yakutsk, Russian Federation}, } @inproceedings {PubLIMSI-4900, author = {Gong, Li AND Max, Aur{\'e}lien AND Yvon, Fran\c{c}ois}, title = {Vers un d{\'e}veloppement plus efficace des syst{\`e}mes de traduction statistique : un peu de vert dans un monde de BLEU}, booktitle = {Traitement Automatique du Langage Naturel (TALN 2014)}, AERES = {ACTN}, GROUP = {LIMSI,TLP,ILES}, year = {2014}, pages = {395-400}, month= {July}, address= {Marseilles, France}, abstract= {Dans cet article, nous montrons comment lutilisation conjointe dune technique dalignement de phrases parall{\`e}les {\`a} la demande et destimation de mod{\`e}les de traduction {\`a} la vol{\'e}e permet une r{\'e}duction en temps tr{\`e}s notable (jusqu{\`a} 93\% dans nos exp{\'e}riences) par rapport {\`a} un syst{\`e}me {\`a} l{\'e}tat de lart, tout en offrant un compromis en termes de qualit{\'e} tr{\`e}s int{\'e}ressant dans certaines configurations. En particulier, lexploitation imm{\'e}diate de documents traduits permet de compenser tr{\`e}s rapidement labsence dun corpus de d{\'e}veloppement.}, keywords= {traduction automatique, d{\'e}veloppement de syst{\`e}mes}, } @inproceedings {PubLIMSI-4901, author = {Max, Aur{\'e}lien AND Gong, Li AND Yvon, Fran\c{c}ois}, title = {Construction (tr{\`e}s) rapide de tables de traduction {\`a} partir de grands bi-textes}, booktitle = {Traitement Automatique du Langage Naturel (TALN 2014)}, AERES = {ACTN}, GROUP = {LIMSI,TLP,ILES}, year = {2014}, pages = {26-27}, month= {July}, address= {Marseille, France}, abstract= {Dans cet article de d{\'e}monstration, nous introduisons un logiciel permettant de construire des tables de traduction de mani{\`e}re beaucoup plus rapide que ne le font les techniques {\`a} l{\'e}tat de lart. Cette acc{\'e}l{\'e}ration notable est obtenue par le biais dun double {\'e}chantillonnage : lun permet la s{\'e}lection dun nombre limit{\'e} de bi-phrases contenant les segments {\`a} traduire, lautre r{\'e}alise un alignement {\`a} la vol{\'e}e de ces bi-phrases pour extraire des exemples de traduction.}, keywords= {traduction automatique, d{\'e}veloppement de syst{\`e}mes}, } @inproceedings {PubLIMSI-4919, author = {Rosset, Sophie AND Dutrey, Camille AND Clavel, Chlo{\'e} AND Vasilescu, Ioana AND Adda-Decker, Martine}, title = {A CRF-based approach to automatic disfluency detection in a french call-centre corpus}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2014)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {5}, month= {September}, address= {Singapore}, abstract= {In this paper, we present a Conditional Random Field based approach for automatic detection of edit disfluencies in a conversational telephone corpus in French. We define disfluency patterns using both linguistic and acoustic features to perform disfluency detection. Two related tasks are considered: the first task aims at detecting the disfluent speech portion proper or reparandum, i.e. the portion to be removed if we want to improve the readability of transcribed data; in the second task, we aim at identifying also the corrected portion or repair which can be useful in follow-up discourse and dialogue analyses or in opinion mining. For these two tasks, we present comparative results as a function of the involved type of features (acoustic and/or linguistic). Generally speaking, best results are obtained by CRF models combining both acoustic and linguistic features.}, keywords= {disfluencies, conditional random fields, conver- sational speech, spontaneous speech.}, } @inproceedings {PubLIMSI-4941, author = {Niehues, Jan AND Allauzen, Alexandre AND Yvon, Fran\c{c}ois AND Waibel, Alexander}, title = {Combining techniques from different NN-based language models for machine translation}, booktitle = {Conference of the Association for Machine Translation in the Americas (AMTA 2014)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {222-233}, publisher = {Yaser Al-Onaizan and Michel Simard}, month= {22/10 au 26/10}, address= {Vancouver, BC Canada}, abstract= {This paper presents two improvements of language models based on Restricted Boltzmann Machine (RBM) for large machine translation tasks. In contrast to other continuous space approach, RBM based models can easily be integrated into the decoder and are able to directly learn a hidden representation of the n-gram. Previous work on RBM-based language models do not use a shared word representation and therefore, they might suffer of a lack of generalization for larger contexts. Moreover, since the training step is very time consuming, they are only used for quite small copora. In this work we add a shared word representation for the RBM- based language model by factorizing the weight matrix. In addition, we propose an efficient and tailored sampling algorithm that allows us to drastically speed up the training process. Experiments are carried out on two German to English translation tasks and the results show that the training time could be reduced by a factor of 10 without any drop in performance. Furthermore, the RBM-based model can also be trained on large size corpora.}, keywords= {Statistical Machine Translation; Neural Networks}, } @inproceedings {PubLIMSI-4968, author = {Del Gratta, R. AND Frontini, Francesca AND Fahad, Khan AND Mariani, Joseph-Jean AND Soria, Claudia}, title = {The LREMap for under-resourced languages}, booktitle = {Workshop CCURL 2014 : Collaboration and Computing for Under-Resourced Languages in the Linked Open Data Era (CCURL'2014 2014)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {6}, series = {Workshop CCURL 2014 : Collaboration and Computing for Under-Resourced Languages in the Linked Open Data Era}, month= {May}, address= {Reykjavik, Islande}, } @inproceedings {PubLIMSI-5008, author = {Do, Quoc-Khanh AND Allauzen, Allexandre AND Yvon, Fran\c{c}ois}, title = {Discriminative Adaptation of Continuous Space Translation Models}, booktitle = {International Workshop on Spoken Language Translation (IWSLT 2014)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {192-199}, month= {04/12 au 05/12}, address= {South Lake Tahoe, California, États-Unis}, abstract= {In this paper we explore the adaptation ability of continuous space translation models (CSTMs). We consider the following practical situation: given a state-of-the-art and large scale SMT system containing a CSTM, the task is to adapt the CSTM to a new domain using a (relatively) small and unseen in-domain parallel corpus. Our method relies on the definition of new discriminative loss functions for the CSTM that borrow from both the max-margin and pair-wise ranking approaches. In our experiments, the baseline and out-of-domain SMT system is preliminarily trained for the WMT news translation task, and CSTMs are to be adapted to the lecture translation task as defined by the Ted-talks dataset. Experimental results show that an improvement of 1.5 BLEU points can be achieved with the proposed adaptation method.}, keywords= {continuous space model, continuous space translation model, domain adaptation, discriminative loss function, max-margin algorithm, pair-wise ranking}, } @inproceedings {PubLIMSI-5034, author = {Gong, Li AND Max, Aur{\'e}lien AND Yvon, Fran\c{c}ois}, title = {Incremental Development of Statistical Machine Translation Systems}, booktitle = {International Workshop on Spoken Language Translation (IWSLT 2014)}, AERES = {ACTI}, GROUP = {LIMSI,TLP,ILES}, year = {2014}, pages = {214--222}, month= {04/12-05/12}, address= {South Lake Tahoe, CA}, abstract= {Statistical Machine Translation produces results that make it a competitive option in most machine-assisted translation scenarios. However, these good results often come at a very high computational cost and correspond to training regimes which are unfit to many practical contexts, where the ability to adapt to users and domains and to continuously integrate new data (eg. in post-edition contexts) are of primary importance. In this article, we show how these requirements can be met using a strategy for on-demand word alignment and model estimation. Most remarkably, our incremental system development framework is shown to deliver top quality translation performance even in the absence of tuning, and to surpass a strong baseline when performing online tuning. All these results obtained with great computational savings as compared to conventional systems.% even when using a very large scale training corpus.}, keywords= {traduction automatique, post-{\'e}dition}, } @book {PubLIMSI-4902, author = {Mariani, Joseph-Jean AND Vetulani, Z.}, title = {Human Language Technology Challenges for Computer Science and Linguistics}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {550}, publisher = {Springer}, editor = {}, } @inproceedings {PubLIMSI-4684, author = {Wisniewski, Guillaume AND P{\'e}cheux, Nicolas AND Allauzen, Alexandre AND Yvon, Fran\c{c}ois}, title = {LIMSI Submission for WMT14 QE Task}, booktitle = {Workshop on Machine Translation (WMT 2014)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {7}, month= {June}, address= {Baltimore, USA}, abstract= {This paper describes LIMSI participation to the WMT14 Shared Task on Quality Estimation; we took part to the word-level quality estimation task for English to Spanish translations. Our system relies on a random forest classifier, an ensemble method that has been shown to be very competitive for this kind of task, when only a few dense and continuous features are used. Notably, only 16 features are used in our experiments. These features describe, on the one hand, the quality of the association between the source sentence and each target word and, on the other hand, the fluency of the hypothesis. Since the evaluation criterion is the f1 measure, a specific tuning strategy is proposed to select the optimal values for the hyper-parameters. Overall, our system achieves a 0.67 f1 score on a randomly extracted test set.}, } @inproceedings {PubLIMSI-4843, author = {Adda, Gilles}, title = {Lapport de lexpertise scientifique au d{\'e}veloppement de ressources et {\`a} l'{\'e}valuation en traitement des donn{\'e}es multim{\'e}dias}, booktitle = {S{\'e}minaire DGA Traitement de linformation multim{\'e}dia et fusion dinformation (TIM 2014)}, AERES = {COM}, GROUP = {LIMSI,TLP}, year = {2014}, month= {July}, address= {Paris, France}, } @incollection {PubLIMSI-4380, author = {Yvon, Fran\c{c}ois}, title = {Machine Translation}, booktitle = {Oxford Bibliographies in Linguistics}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {28p}, publisher = {New York: Oxford University Press}, ISBN= {9780199772810}, keywords= {Machine Translation, Bibliography}, } @incollection {PubLIMSI-4574, author = {Langlais, Philippe AND Yvon, Fran\c{c}ois}, title = {Issues in analogical inference over sequences of symbols: a case study on proper name transliteration}, booktitle = {Computational approaches to analogical reasoning: current trends}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {59-82}, publisher = {Springer-Verlag Berlin Heidelberg}, ISBN= {978-3-642-54515-3}, abstract= {Formal analogies, that is, proportional analogies involving relations at a formal level (e.g. 'cordially' is to 'cordial' as 'appreciatively' is to 'appreciative') have a long history in Linguistics. They can accommodate a wide variety of linguistic data without resorting to ad hoc representations and are inherently good at capturing long range dependencies between data. Unfortunately, applying analogical learning on top of formal analogy to current Natural Language Processing (NLP) tasks, which often involve massive amount of data, is quite challenging. In this chapter, we draw on our previous works and identify some issues that remain to be addressed for formal analogy to stand by itself in the landscape of NLP. As a case study, we monitor our current implementation of analogical learning on a task of transliterating English proper names into Chinese.}, keywords= {Analogy}, } @incollection {PubLIMSI-4788, author = {Adda-Decker, Martine AND Lamel, Lori AND Adda, Gilles AND Lavergne, Thomas}, title = {A first LVCSR system for Luxembourgish}, booktitle = {Human Language Technology Challenges for Computer Science and Linguistics - 5th Language and Technology Conference, LTC 2011, Pozna\, Poland, November 25-27, 2011, Revised Selected Papers}, AERES = {OS}, GROUP = {LIMSI,TLP,ILES}, year = {2014}, pages = {479-490}, publisher = {Springer International Publishing}, ISBN= {978-3-319-08957-7}, abstract= {Luxembourgish is embedded in a multilingual context on the divide between Romance and Germanic cultures and remains one of Europe's low-resourced languages. We describe our efforts in building a large vocabulary ASR system for such a ``minority'' language without resorting to any prior transcribed audio training data. Instead, acoustic models are derived from major European languages. Furthermore, most Luxembourgish written sources include significant parts in other languages. This poses specific challenges to Language Model estimation. Some scientific and technological issues addressed include: (i) how to build acoustic models if no labeled acoustic training data are available for the under-resourced target language? (ii) how to make use of the new system to accelerate resource production for the target language? (iii) how to build a vocabulary and a language model with multilingual written texts? (iv) how to determine the ``best'' phonemic inventory for ASR? First ASR results illustrate the accuracy of the various sets of monolingual and multilingual acoustic models and what these suggest concerning language typology issues.}, keywords= {Forced alignment; acoustic modeling; multilingual models; Luxembourgish; Germanic languages, Romance language}, } @incollection {PubLIMSI-4789, author = {Adda, Gilles AND Fort, Karën AND Sagot, Benoît AND Mariani, Joseph-Jean AND Couillault, Alain}, title = {Crowdsourcing for language resource development: criticisms about Amazon Mechanical Turk overpowering use}, booktitle = {Human Language Technology Challenges for Computer Science and Linguistics - 5th Language and Technology Conference, LTC 2011, Pozna\, Poland, November 25-27, 2011, Revised Selected Papers}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {303-314}, publisher = {Springer International Publishing}, ISBN= {978-3-319-08957-7}, abstract= {This article is a position paper about Amazon Mechanical Turk, the use of which has been steadily growing in language processing in the past few years. According to the mainstream opinion expressed in articles of the domain, this type of on-line working platforms allows to develop quickly all sorts of quality language resources, at a very low price, by people doing that as a hobby. We shall demonstrate here that the situation is far from being that ideal. Our goal here is manifold: 1- to inform researchers, so that they can make their own choices, 2- to develop alternatives with the help of funding agencies and scientific associations, 3- to propose practical and organizational solutions in order to improve language resources development, while limiting the risks of ethical and legal issues without letting go price or quality, 4- to introduce an Ethics and Big Data Charter for the documentation of language resources.}, keywords= {Amazon Mechanical Turk, language resources, ethics}, } @incollection {PubLIMSI-4903, author = {Mariani, Joseph-Jean}, title = {How Language Technologies support Multilingualism}, booktitle = {NET.LANG : Towards the Multilingual Cyberspace, Russian Edition}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {148-168}, publisher = {C\&F}, ISBN= {978-2-915825-09-1}, } @incollection {PubLIMSI-4967, author = {Mariani, Joseph-Jean AND Francopoulo, Gil}, title = {Language Matrices \& the Language Resource Impact Factor}, booktitle = {Language Production, Cognition, and the Lexicon, Festschrift in honour of Michael Zock}, AERES = {OS}, GROUP = {LIMSI,TLP,IMMI}, year = {2014}, pages = {20}, publisher = {Springer}, ISBN= {978-3-319-08042-0}, } @incollection {PubLIMSI-5071, author = {Vetulani, Z. AND Mariani, Joseph-Jean}, title = {Preface}, booktitle = {Human Language Technology Challenges for Computer Science and Linguistics}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {IV-X}, publisher = {Springer}, ISBN= {978-3-319-08957-7}, } @misc {PubLIMSI-4727, author = {P{\'e}cheux, Nicolas AND Gong, Li AND Do, Quoc Khanh AND Marie, Benjamin AND Ivanishcheva, Yulia AND Allauzen, Alexandre AND Lavergne, Thomas AND Niehues, Jan AND Max, Aur{\'e}lien AND Yvon, Fran\c{c}ois}, title = {LIMSI at WMT'14 Medical Translation Task}, booktitle = {9th Workshop on Statistical Machine Translation (WMT 2014)}, AERES = {ACTI}, GROUP = {LIMSI,TLP,ILES}, year = {2014}, pages = {8}, institution = {ACL}, abstract= {This paper describes LIMSIs submission to the first medical translation task at WMT14. We report results for English-French on the subtask of sentence t ranslation from summaries of medical articles. Our main submission uses a combination of NCODE (n-gram-based) and MOSES (phrase-based) output and continuous-space language mo dels used in a post-processing step for each system. Other characteristics of our submission include: the use of sampling for building MOSES phrase table; the implementation of the vector space model proposed by Chen et al. (2013); adaptation of the POS-tagger used by NCODE to the medical domain; and a report of error analysis based on the typol ogy of Vilar et al. (2006).}, keywords= {traduction automatique}, } @inproceedings {IS14-babel, author = {Viet-Bac Le AND Lori Lamel AND Abdel Messaoudi AND William Hartmann AND Jean-Luc Gauvain AND Cecile Woehrling AND Julien Despres AND Anindya Roy}, title = {Developing STT and KWS systems using limited language resources}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2014)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {}, month= {14-19 September}, address= {Singapore}, url = {http://www-tlp.limsi.fr/public/IS140857.pdf}, abstract= {This paper presents recent progress in developing speech-totext (STT) and keyword spotting (KWS) systems for the 2014 IARPA-Babel evaluation. Systems have been developed for the limited language pack condition for four of the five development languages in this program phase: Assamese, Bengali, Haitian Creole and Zulu. The systems have several novel characteristics that support rapid development of KWS systems. On the STT side different acoustic units are explored based on phonemic or graphemic representations, and system combination is used to improve STT performance. The acoustic models are trained on only 10 hours of speech data with manual transcriptions, completed with unsupervised training on additional untranscribed data. Both word and subword units (morphologically decomposed, syllables, phonemes) are used for KWS. The KWS systems are based on the multi-hypotheses produced by a consensus network decoding or searching word lattices. The word error rates of the individual STT systems are on the order of 50-60\%, and the KWS systems obtain Maximum Term Weighted Values ranging from 30-45\% for all keywords (invocabulary and out-of-vocabulary (OOV)). Sub-word units are shown to be successful at locating some of the OOV keywords, and system combination improves system performance.}, keywords= {STT, KWS, semi-supervised training, lattice, consensus network, sub-word lexical units, Morfessor}, } @inproceedings {IS14-hartmann, author = {William Hartmann and Viet-Bac Le and Abdel Messaoudi and Lori Lamel and Jean-Luc Gauvain}, title = {Comparing Decoding Strategies for Subword-based Keyword Spotting in Low-Resourced Languages}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2014)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2014}, pages = {}, month= {14-19 September}, address= {Singapore}, url = {http://www-tlp.limsi.fr/public/IS141176.pdf}, abstract= {For languages with limited training resources, out-ofvocabulary (OOV) words are a significant problem, both for transcription and keyword spotting. This paper investigates the use of subword lexical units for keyword spotting. Three strategies for using the sub-word units are explored: 1) converting word-based lattices to subword lattices after decoding, 2) performing a separate decoding for each subword type, and 3) a single decoding using all possible subword units. In these experiments, the best performance is achieved by carrying out a separate decoding for each subword type. Further gains are attained through system combination. We also find that ignoring word boundaries improves the detection of OOV keywords without significantly impacting in-vocabulary keyword detection. Results are presented on four languages from the IARPA Babel Program (Haitian Creole, Assamese, Bengali, and Zulu). }, keywords= {keyword search, spoken term detection, OOV, sub-word lexical units, low resource LVCSR }, } @inproceedings{eusipco14hartmann, author = {William Hartmann AND Lori Lamel}, title = {{Efficient Rule Scoring for Improved Grapheme-Based Lexicons}}, GROUP = {LIMSI,TLP}, booktitle = {Eusipco 2014}, month = {Sep 1-5}, year = {2014}, address = {Lisbon, Portugal}, pages = {}, abstract = {}, url = {http://www-tlp.limsi.fr/public/eusipco14hartmann.pdf} } @inproceedings{eusipco14thiago, author = {Fraga Da Silva, Thiago Henrique AND Gauvain, Jean-Luc AND Lori Lamel}, title = {{Speech Recognition of Multiple Accented English Data Using Acoustic Model Interpolation}}, GROUP = {LIMSI,TLP}, booktitle = {Eusipco 2014}, month = {Sep 1-5}, year = {2014}, pages = {}, abstract = {}, address = {Lisbon, Portugal}, url = {http://www-tlp.limsi.fr/public/eusipco14thiago.pdf} } @inproceedings{vasilescu2014larp, author = {Ioana Chitoran AND Ioana Vasilescu AND Bianca Vieru AND Lori Lamel}, title = {{Analyzing linguistic variation in a Romanian speech corpus through ASR errors}}, GROUP = {LIMSI,TLP}, booktitle = {LARP7 -- Laboratory Approaches to Romance Phonology VII}, MONTH = {Sept 3-5}, year = {2014}, address = {Aix-en-Provence, France}, url = {http://www-tlp.limsi.fr/public/LARP14_upload_2.pdf} } @InProceedings{LAVERGNE14.732, author = {Thomas Lavergne and Gilles Adda and Martine Adda-Decker and Lori Lamel}, title = {Automatic Language Identity Tagging on Word and Sentence-Level in Multilingual Text Sources: a Case-Study on Luxembourgish}, booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, year = {2014}, month = {May}, date = {26-31}, address = {Reykjavik, Iceland}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Hrafn Loftsson and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-8-4}, url = {http://www-tlp.limsi.fr/public/lrec14_732_Paper.pdf}, language = {english} } @InProceedings{COUILLAULT14.424, author = {Alain Couillault and Karën Fort and Gilles Adda and Hugues Mazancourt (de)}, title = {Evaluating Corpora Documentation with regards to the Ethics and Big Data Charter}, booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, year = {2014}, month = {may}, date = {26-31}, address = {Reykjavik, Iceland}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Hrafn Loftsson and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-8-4}, url = {http://www-tlp.limsi.fr/public/lrec14_424_Paper.pdf}, language = {english} } @InProceedings{GALIBERT14.1027, author = {Olivier Galibert and Jeremy Leixa and Gilles Adda and Khalid Choukri and Guillaume Gravier}, title = {The ETAPE Speech Processing Evaluation}, booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, year = {2014}, month = {may}, date = {26-31}, address = {Reykjavik, Iceland}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Hrafn Loftsson and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-8-4}, url = {http://www-tlp.limsi.fr/public/lrec14_1027_Paper.pdf}, language = {english} } @InProceedings{BENJANNET14.960, author = {Mohamed Ben Jannet and Martine Adda-Decker and Olivier Galibert and Juliette Kahn and Sophie Rosset}, title = {ETER: a New Metric for the Evaluation of Hierarchical Named Entity Recognition}, booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, year = {2014}, month = {may}, date = {26-31}, address = {Reykjavik, Iceland}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Hrafn Loftsson and Bente Maegaard and Joseph Marian\ i and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-8-4}, url = {http://www-tlp.limsi.fr/public/lrec14_960_Paper.pdf}, language = {english} } @InProceedings{PCHEUX14.735, author = {Nicolas P\'echeux and Alexander Allauzen and Fran\c{c}ois Yvon}, title = {Rule-based Reordering Space in Statistical Machine Translation}, booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, year = {2014}, month = {may}, date = {26-31}, address = {Reykjavik, Iceland}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Hrafn Loftsson and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-8-4}, url = {http://www-tlp.limsi.fr/public/lrec14_735_Paper.pdf}, language = {english} } @InProceedings{ROY14.751, author = {Anindya Roy and Camille Guinaudeau and Herve Bredin and Claude Barras}, title = {TVD: A Reproducible and Multiply Aligned TV Series Dataset}, booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, year = {2014}, month = {may}, date = {26-31}, address = {Reykjavik, Iceland}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Hrafn Loftsson and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-8-4}, url = {http://www-tlp.limsi.fr/public/lrec14_751_Paper.pdf}, language = {english} } @InProceedings{APIDIANAKI14.475, author = {Marianna Apidianaki and Emilia Verzeni and Diana Mccarthy}, title = {Semantic Clustering of Pivot Paraphrases}, booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, year = {2014}, month = {may}, date = {26-31}, address = {Reykjavik, Iceland}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Hrafn Loftsson and Bente Maegaard and Joseph Marian\ i and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-8-4}, url = {http://www-tlp.limsi.fr/public/lrec14_475_Paper.pdf}, language = {english} } @InProceedings{MARIANI14.945, author = {Joseph Mariani and Christopher Cieri and Gil Francopoulo and Patrick Paroubek and Marine Delaborde}, title = {Facing the Identification Problem in Language-Related Scientific Data Analysis.}, booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, year = {2014}, month = {may}, date = {26-31}, address = {Reykjavik, Iceland}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Hrafn Loftsson and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-8-4}, url = {http://www-tlp.limsi.fr/public/lrec14_945_Paper.pdf}, language = {english} } @InProceedings{WISNIEWSKI14.1115, author = {Guillaume Wisniewski and Natalie K\"ubler and François Yvon}, title = {A Corpus of Machine Translation Errors Extracted from Translation Students Exercises}, booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, year = {2014}, month = {may}, date = {26-31}, address = {Reykjavik, Iceland}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Hrafn Loftsson and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-8-4}, url = {http://www-tlp.limsi.fr/public/lrec14_1115_Paper.pdf}, language = {english} } @InProceedings{SOURY14.947, author = {Mariette Soury and Laurence Devillers}, title = {Smile and Laughter in Human-Machine Interaction: a Study of Engagement}, booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, year = {2014}, month = {may}, date = {26-31}, address = {Reykjavik, Iceland}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Hrafn Loftsson and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-8-4}, url = {http://www-tlp.limsi.fr/public/lrec14_947_Paper.pdf}, language = {english} } @InProceedings{GORYAINOVA14.383, author = {Maria Goryainova and Cyril Grouin and Sophie Rosset and Ioana Vasilescu}, title = {Morpho-Syntactic Study of Errors from Speech Recognition System}, booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, year = {2014}, month = {may}, date = {26-31}, address = {Reykjavik, Iceland}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Hrafn Loftsson and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-8-4}, url = {http://www-tlp.limsi.fr/public/lrec14_383_Paper.pdf}, language = {english} } @InProceedings{LUZZATI14.771, author = {Daniel Luzzati and Cyril Grouin and Ioana Vasilescu and Martine Adda-Decker and Eric Bilinski and Nathalie Camelin and Juliette Kahn and Carole Lailler and Lori Lamel and Sophie Rosset}, title = {"Human Annotation of ASR Error Regions: is ""gravity"" a Sharable Concept for Human Annotators?"}, booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, year = {2014}, month = {may}, date = {26-31}, address = {Reykjavik, Iceland}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Hrafn Loftsson and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-8-4}, url = {http://www-tlp.limsi.fr/public/lrec14_771_Paper.pdf}, language = {english} } @InProceedings{REHM14.405, author = {Georg Rehm and Hans Uszkoreit and Sophia Ananiadou and Núria Bel and AudroneÌ, BielevicÌ OeieneÌ and Lars Borin and António Branco and Gerhard Budin and Nicoletta Calzolari and Walter Daelemans and Radovan Garabík and Marko Grobelnik and Carmen Garcia-Mateo and Josef Van Genabith and Jan Hajic and Inma Hernaez and John Judge and Svetla Koeva and Simon Krek and Cvetana Krstev and Krister Linden and Bernardo Magnini and Joseph Mariani and John Mcnaught and Maite Melero and Monica Monachini and Asuncion Moreno and Jan Odijk and Maciej Ogrodniczuk and Piotr Pezik and Stelios Piperidis and Adam Przepiórkowski and Eiríkur Rögnvaldsson and Michael Rosner and Bolette Pedersen and Inguna Skadina and Koenraad De Smedt and Marko Tadic and Paul Thompson and Dan Tufis and Tamás Váradi and andrejs Vasiljevs and Kadri Vider and Jolanta Zabarskaite}, title = {The Strategic Impact of META-NET on the Regional, National and International Level}, booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, year = {2014}, month = {may}, date = {26-31}, address = {Reykjavik, Iceland}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Hrafn Loftsson and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-8-4}, url = {http://www-tlp.limsi.fr/public/lrec14_405_Paper.pdf}, language = {english} } @InProceedings{MARIANI14.1228, author = {Joseph Mariani and Patrick Paroubek and Gil Francopoulo and Olivier Hamon}, title = {Rediscovering 15 Years of Discoveries in Language Resources and Evaluation: The LREC Anthology Analysis}, booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, year = {2014}, month = {may}, date = {26-31}, address = {Reykjavik, Iceland}, editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Hrafn Loftsson and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-8-4}, url = {http://www-tlp.limsi.fr/public/lrec14_1228_Paper.pdf}, language = {english} } @inproceedings{Laurent14j1, Address = {Le Mans, France}, Author = {Laurent, A. and Camelin, N. and Raymond, C.}, Booktitle = {JEP 2014, Journ{\'e}es d'Etudes sur la Parole}, Month = {Juin}, url = {http://www-tlp.limsi.fr/public/Laurent14j1.pdf}, Title = {Boosting de bonza\"{\i} pour la combinaison efficace de descripteurs : application \`a l'identification du r\^ole du locuteur}, Year = {2014} } @inproceedings{Laurent14j2, Address = {Le Mans, France}, Author = {Laurent, A. and Guinaudeau, C. and Roy, A.}, Booktitle = {JEP 2014, Journ{\'e}es d'Etudes sur la Parole}, Abstract = {Cet article d\'ecrit les m\'ethodes mises en place pour permettre l'analyse d'un corpus compos\'e de documents audiovisuels diffus\'es au cours des 80 derni\`eres ann\'ees : le corpus MATRICE. Nous proposons une exploration des donn\'ees permettant de mettre en \'evidence les diff\'erents th\`emes et \'ev\`enements abord\'es dans le corpus. Cette exploration est, dans un premier temps, effectuée sur des notices documentaires produites manuellement par les documentalistes de l'Institut National de l'Audiovisuel. Puis, nous montrons, gr\^ace \`a une \'etude qualitative et une technique de clustering automatique, que les transcriptions automatiques permettent \'egalement d'effectuer une analyse du corpus faisant \'emerger des th\`emes coh\'erents avec les donn\'ees trait\'ees.}, Month = {Juin}, Title = {Analyse du corpus MATRICE : exploration et classification automatique d'archives audiovisuelles de 1930 à 2012}, url = {http://www-tlp.limsi.fr/public/Laurent14j2.pdf}, Year = {2014} } @inproceedings{Laurent14j3, Address = {Le Mans, France}, Author = {Laurent, A. and Lamel, L.}, Booktitle = {JEP 2014, Journ{\'e}es d'Etudes sur la Parole}, Month = {Juin}, Abstract = {Ce papier décrit le développement d'un système de reconnaissance automatique de la parole pour le coréen. Le coréen est une langue alpha-syllabique, parlée par environ 78 millions de personnes dans le monde. Le développement de ce système a été mené en utilisant très peu de données annotées manuellement. Les modèles acoustiques ont été adaptés de manière non supervisée en utilisant des données provenant de différents sites d'actualités coréens. Le corpus de développement contient des transcriptions approximatives des documents audio : il s'agit d'un corpus transcrit automatiquement et aligné avec des données provenant des mêmes sites Internet. Nous comparons différentes approches dans ce travail, à savoir, des modèles de langue utilisant des unités différentes pour l'apprentissage non supervisé et pour le décodage (des caractères et des mots avec des vocabulaires de différentes tailles), l'utilisation de phonèmes et d'unités ``demi-syllabiques'' et deux approches différentes d'apprentissage non supervisé.}, Title = {D\'eveloppement d'un syst\`eme de reconnaissance automatique de la parole en cor\'een avec peu de ressources annot\'ees}, url = {http://www-tlp.limsi.fr/public/Laurent14j3.pdf}, Year = {2014} } @inproceedings{Laurent14j4, Address = {Le Mans, France}, Author = {Bonneau-Maynard, H. and Segal, N. and Bilinski, E. and Gauvain, J.-L. and Gong, L. and Lamel, L. and Laurent, A. and Yvon, F. and Despres, J. and Josse, Y. and Le, V.-B.}, Booktitle = {JEP 2014, Journ{\'e}es d'Etudes sur la Parole}, Month = {Juin}, Title = {Traduction de la parole dans le projet RAPMAT}, url = {http://www-tlp.limsi.fr/public/Laurent14j4.pdf}, Year = {2014} } @inproceedings{Laurent14d, Author = {Laurent, Antoine and Hartmann, William and Lamel, Lori}, Year = {2014}, Title = {Unsupervised Acoustic Model Training for the Korean Language}, Booktitle = {ISCSLP at Interspeech 2014, 15th Annual Conference of the International Speech Communication Association}, Abstract = {This paper investigates unsupervised training strategies for the Korean language in the context of the DGA RAPID Rapmat project. As with previous studies, we begin with only a small amount of manually transcribed data to build preliminary acoustic models. Using the initial models, a larger set of untranscribed audio data is decoded to produce approximate transcripts. We compare both GMM and DNN acoustic models for both the unsupervised transcription and the final recognition system. While the DNN acoustic models produce a lower word error rate on the test set, training on the transcripts from the GMM system provides the best overall performance. We also achieve better performance by expanding the original phone set. Finally, we examine the efficacy of automatically building a test set by comparing system performance both before and after manually correcting the test set.}, address = {Singapore}, url = {http://www-tlp.limsi.fr/public/Laurent14d.pdf}, month = {September} } @inproceedings{Laurent14e, Author = {Laurent, A. and Camelin, N. and Raymond, C.}, Year = {2014}, Booktitle = {Interspeech 2014, 15th Annual Conference of the International Speech Communication Association}, Abstract = {In this article, we tackle the problem of speaker role detection from broadcast news shows. In the literature, many proposed solutions are based on the combination of various features coming from acoustic, lexical and semantic information with a machine learning algorithm. Many previous studies mention the use of boosting over decision stumps to combine efficiently these features. In this work, we propose a modification of this state-of-the-art machine learning algorithm changing the weak learner (decision stumps) by small decision trees, denoted bonsai trees. Experiments show that using bonsai trees as weak learners for the boosting algorithm largely improves both system error rate and learning time.}, Title = {Boosting bonsai trees for efficient features combination : application to speaker role identification}, address = {Singapore}, month = {September}, url = {http://www-tlp.limsi.fr/public/Laurent14e.pdf} } @inproceedings{Laurent14b, Author = {Bredin, H. and Laurent, A. and Sarkar, A. and Le, V.-B. and Barras, Claude and Rosset, Sophie}, Year = {2014}, Booktitle ={Odyssey 2014, The Speaker and Language Recognition Workshop}, Abstract = {We address the problem of named speaker identification in TV broadcast which consists in answering the question ``who speaks when?'' with the real identity of speakers, using person names automatically obtained from speech transcripts. While existing approaches rely on a first speaker diarization step followed by a local name propagation step to speaker clusters, we propose a unified framework called person instance graph where both steps are jointly modeled as a global optimization problem, then solved using integer linear programming. Moreover, when available, acoustic speaker models can be added seamlessly to the graph structure for joint named and acoustic speaker identification -- leading to a 10\% error decrease (from 45\% down to 35\%) over a state-of-the-art i-vector speaker identification system on the REPERE TV broadcast corpus.}, Title = {Person Instance Graphs for Named Speaker Identification in TV Broadcast}, address = {Joensuu, Finland}, month = {June}, url = {http://www-tlp.limsi.fr/public/Laurent14b.pdf} } @inproceedings{vasilescu2014, author = {Ioana Vasilescu AND Bianca Vieru AND Lori Lamel}, title = {{Exploring Pronunciation Variants for Romanian Speech-to-Text Transcription}}, GROUP = {LIMSI,TLP}, booktitle = {SLTU-2014}, year = {2014}, pages = {161--168}, abstract = {Speech processing tools were applied to investigate morpho-phonetic trends in contemporary spoken Romanian, with the objective of improving the pronunciation dictionary and more generally, the acoustic models of a speech recognition system. As no manually transcribed audio data were available for training, language models were estimated on a large text corpus and used to provide indirect supervision to train acoustic models in a semi-supervised manner. Automatic transcription errors were analyzed in order to gain insights into language specific features for both improving the current performance of the system and to explore linguistic issues. Two aspects of the Romanian morpho-phonology were investigated based on this analysis: the deletion of the masculine definite article -l and the secondary palatalization of plural nouns and adjectives and of 2nd person indicative of verbs. }, url = {http://www-tlp.limsi.fr/public/vasilescu-sltu2014.pdf} } @inproceedings{sltu2014lux, author = {Martine Adda-Decker AND Lori Lamel AND Gilles Adda}, title = {{Speech Alignment and Recognition Experiments for Luxembourgish}}, GROUP = {LIMSI,TLP}, booktitle = {SLTU-2014}, year = {2014}, pages = {53--60}, abstract = {Luxembourgish, embedded in a multilingual context on the divide between Romance and Germanic cultures, remains one of Europe's under-described languages. In this paper, we propose to study acoustic similarities between Luxembourgish and major contact languages (German, French, English) with the help of automatic speech alignment and recognition systems. Experiments were run using monolingual acoustic models trained on German, French and English together with (i) "multilingual" models trained on pooled speech data from these three languages, or with (ii) native Luxembourgish acoustic models from 1200 hours of untranscribed Luxembourgish audio data using unsupervised methods. We investigated whether Luxembourgish was globally better represented by one of the individual languages, by the multilingual model or by the native (unsupervised) model. While German provides globally the best acoustic match for native Luxembourgish, detailed analyses reveal language-specific preferences, in particular English and Luxembourgish models are preferred on diphthongs. The first ASR results illustrate the accuracy of the various sets of supervised monolingual and multilingual models versus unsupervised Luxembourgish acoustic models. The ASR word error rate is progressively reduced from 60 to 25\% on the development data set by unsupervised training of larger context-dependent models on increasing anounts of audio data.}, url = {http://www-tlp.limsi.fr/public/sltu14lux54-61.pdf} } @inproceedings{sltu14laurent, author = {Antoine Laurent AND Lori Lamel}, title = {{Development of a Korean Speech Recognition System With Little Annotated Data}}, GROUP = {LIMSI,TLP}, booktitle = {SLTU-2014}, year = {2014}, pages = {147--153}, abstract = {This paper investigates the development of a speech-to-text transcription system for the Korean language in the context of the DGA RAPID Rapmat project. Korean is an alphasyllabary language spoken by about 78 million people worldwide. As only a small amount of manually transcribed audio data were available, the acoustic models were trained on audio data downloaded from several Korean websites in an unsupervised manner, and the language models were trained on web texts. The reported word and character error rates are estimates, as development corpus used in these experiments was also constructed from the untranscribed audio data, the web texts and automatic transcriptions. Several variants for unsupervised acoustic model training were compared to assess the influence of the vocabulary size (200k vs 2M), the type of language model (words vs characters), the acoustic unit (phonemes vs half-syllables), as well as incremental batch vs iterative decoding of the untranscribed audio corpus. }, url = {http://www-tlp.limsi.fr/public/sltu14laurent147-153.pdf} } @inproceedings{sltu14hartmann, author = {William Hartmann AND Lori Lamel AND Jean-Luc Gauvain}, title = {{Cross-Word Sub-Word Units for Low-Resource Keyword Spotting}}, GROUP = {LIMSI,TLP}, booktitle = {SLTU-2014}, year = {2014}, pages = {112--117}, abstract = {We investigate the use of sub-word lexical units for the detection of out-of-vocabulary (OOV) keywords in the keyword spotting task. Sub-word units based on morphological decomposition and character ngrams are compared. In particular, we examine the benefit of sub-word units that cross word boundaries. Experiments are performed on the IARPA Babel Turkish dataset. Our results demonstrate that cross-word subword units achieve similar performance on OOV keywords as other types of sub-word units, but can be combined to produce further gains. We also show that sub-word units can be used to improve detection of in-vocabulary keywords. System combination provides a 18\% relative gain in ATWV with the best two systems, and 25\% with the best three systems.}, url = {http://www-tlp.limsi.fr/public/sltu14hartmann113-118.pdf} } @inproceedings{sltu14ctdo, author = {Cong-Thanh Do AND Lori Lamel AND Jean-Luc Gauvain}, title = {{Speech-to-Text Development for Slovak, a Low-Resourced Language}}, GROUP = {LIMSI,TLP}, booktitle = {SLTU-2014}, year = {2014}, pages = {176--182}, abstract = {Development of an automatic speech recognition (ASR) system for low-resourced languages is an important research topic in ASR. This paper reports on the development of a speech-to-text (STT) system targeting broadcast news and broadcast conversation transcription for the low-resourced Slovak language. Context-dependent acoustic models are trained without any manually transcribed audio data via cross-language transfer and unsupervised training. In addition, a pronunciation dictionary for Slovak language is created using efficient rule-based pronunciation modeling. For language modeling, large N-gram language models were estimated on 63M words of texts downloaded from the Internet. The system uses MLP (multilayer perceptron) features imported from English which are concatenated with cepstral PLP (perceptual linear prediction) and F0 (pitch) features. These techniques were applied to develop a Slovak STT system with performance similar to that obtained by state-of-the-art systems for other languages. Furthermore, we propose to reduce the dimension of the MLP+PLP+F0 features from 81 to 50, using principal component analysis (PCA), in order to reduce the redundancy between the MLP and the PLP+F0 features. This feature reduction makes it possible to reduce the word error rate (WER) and the recognition time while reducing the CMLLR adaptation time by a factor of 3.}, url = {http://www-tlp.limsi.fr/public/sltu14ctdo177-183.pdf} } @article{roy2014lexical, author = {Anindya Roy AND Herv\'e Bredin AND William Hartmann AND Viet Bac Le AND Claude Barras AND Jean-Luc Gauvain}, title = {{Lexical speaker identification in TV shows}}, GROUP = {LIMSI,TLP}, journal = {Multimedia Tools and Applications}, year = {2014}, pages = {1--20}, publisher = {Springer US}, url = {http://www-tlp.limsi.fr/public/roy_mta2014.pdf} } %%%%%%%%%% 2013 section @inproceedings{PubLIMSI-6825 , author = {Sundermeyer, M. AND Oparin, Ilya AND Gauvain, Jean-Luc AND Freiberg, B. AND Schluter, R. AND Ney, Hermann}, title = {{Comparison of feedforward and recurrent neural network language models}}, booktitle = {{IEEE International Conference on Acoustics, Speech, and Signal Processing}}, year = {2013}, pages = {8430-8434}, month= {2013-05-26 / 2013-05-31}, address= {Vancouver - CA}, keywords= {Automatic speech recognition, feedforward neural networks, recurrent neural networks}, } @inproceedings{PubLIMSI-4633 , author = {Mariani, Joseph-Jean}, title = {{ICT Enabling Language Diversity}}, booktitle = {{Symposium National Languages in Higher Education and Science}}, year = {2013}, pages = {4 pp}, address= {Athens - GR}, keywords= {Multilingualism, Language Technologies}, } @inproceedings{PubLIMSI-5053 , author = {Mbengue, Cheikh AND Gomez Jauregui, David Antonio AND Martin, Jean-Claude}, title = {{CONTACT: A Multimodal Corpus for Studying Expressive Styles and Informing the Design of Individualized Virtual Narrators}}, booktitle = {{6th Workshop on Intelligent Narrative Technologies}}, AERES = {ACTI}, GROUP = {LIMSI,CPU,TLP}, year = {2013}, pages = {86-88}, address= {Boston - US}, } @inproceedings{PubLIMSI-4071 , author = {Lebec, Olivier AND Ben Ghezala, Mohamed Walid AND Leynart, Violaine AND Laffont, Isabelle AND Fattal, Charles AND Devillers, Laurence AND Chastagnol, Cl{\'e}ment AND Martin, Jean-Claude AND Mezouar, Youcef AND Korrapatti, Hermanth AND Dupourque, Vincent AND Leroux, Christophe}, title = {{High level functions for the intuitive use of an assistive robot}}, booktitle = {{International Conference on Rehabilitation Robotics}}, AERES = {ACTI}, GROUP = {LIMSI,TLP,CPU}, year = {2013}, pages = {6p}, address= {Seattle - US}, } @article{PubLIMSI-4508 , author = {Boula De Mareuil, Philippe AND Adda, Gilles AND Adda-Decker, Martine AND Barras, Claude AND Habert, Benoit AND Paroubek, Patrick}, title = {{Une {\'e}tude quantitative des marqueurs discursifs, disfluences et chevauchements de parole dans des interviews politiques}}, year = {2013}, pages = {18}, journal = {{TIPA. Travaux interdisciplinaires sur la parole et le langage}}, volume = {29}, keywords= {fran\c{c}ais parl{\'e}, parole spontan{\'e}e, disfluences} } @InProceedings{acal44, author = {Annie Rialland and Martial Embanga Aborobongui and Martine Adda-Decker and Lori Lamel}, title = {{Dropping of the Class-Prefix Consonant, Vowel Elision and Automatic Phonological Mining in Embosi (Bantu C 25)}}, booktitle = {{Selected Proceedings of the 44th Annual Conference on African Linguistics}}, editor = {{Ruth Kramer, Elizabeth C. Zsiga, and One Tlale Boyer}}, year = {2013}, address = {Georgetown, Maryland, USA}, publisher = {Cascadilla Proceedings Project, Somerville, MA}, pages = {221-230}, url = {http://www-tlp.limsi.fr/public/asru13_0000210.pdf} } @InProceedings{Karakos13, author = {Damianos Karakos and Richard Schwartz and Stavros Tsakalidis and Le Zhang and Shivesh Ranjan and Tim Ng and Roger Hsiao and Guruprasad Saikumar and Ivan Bulyko and Long Nguyen and John Makhoul and Frantisek Grezl and Mirko Hannemann and Martin Karaat and Igor Szoke and Karel Vesely and Lori Lamel and Viet-Bac Le}, title = {{Score Normalization and System Combination for Improved Keyword Spotting}}, booktitle = {{IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)}}, year = {2013}, address = {Olomouc, Czech Republic}, pages = {}, url = {http://www-tlp.limsi.fr/public/asru13_0000210.pdf} } @InProceedings{Hartmann13, author = {William Hartmann and Anindya Roy and Lori Lamel and Jean-Luc Gauvain}, title = {{Acoustic Unit Discovery and Pronunciation Generation from a Grapheme-based Lexicon}}, booktitle = {{IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)}}, year = {2013}, address = {Olomouc, Czech Republic}, pages = {}, url = {http://www-tlp.limsi.fr/public/asru13_0000380.pdf} } @InProceedings{Oparin13, author = {I. Oparin and L. Lamel and J-L. Gauvain}, title = {{Rapid Development of a Latvian Speech-to-Text System}}, booktitle = ieeeicassp, year = {2013}, address = {Vancouver, Canada}, pages = {}, url = {http://www-tlp.limsi.fr/public/ICASSP13_Latvian.pdf} } @article {PubLIMSI-3464, author = {Le, Hai Son AND Oparin, Ilya AND Allauzen, Alexandre AND Gauvain, Jean-Luc AND Yvon, Fran\c{c}ois}, title = {Structured output layer neural network language models for speech recognition}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {197-206}, journal = {IEEE Transactions on Audio, Speech and Language Processing}, volume = {21}, number = {1}, abstract= {This paper extends a novel neural network language model (NNLM) which relies on word clustering to structure the output vocabulary: Structured OUtput Layer (SOUL) NNLM. This model is able to handle arbitrarily-sized vocabularies, hence dispensing with the need for shortlists that are commonly used in NNLMs. Several softmax layers replace the standard output layer in this model. The output structure depends on the word clustering which is based on the continuous word representation determined by the NNLM. Mandarin and Arabic data are used to evaluate the SOUL NNLM accuracy via speech-to-text experiments. Well tuned speech-to-text systems (with error rates around 10\%) serve as the baselines. The SOUL model achieves consistent improvements over a classical shortlist NNLM both in terms of perplexity and recognition accuracy for these two languages that are quite different in terms of their internal structure and recognition vocabulary size. An enhanced training scheme is proposed that allows more data to be used at each training iteration of the neural network.}, } @article {PubLIMSI-3778, author = {Lardilleux, Adrien AND Yvon, Fran\c{c}ois AND Lepage, Y.}, title = {Generalizing sampling-based multilingual alignment}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {1-23}, journal = {Machine Translation}, volume = {27}, number = {1}, abstract= {Sub-sentential alignment is the process by which multi-word translation units are extracted from sentence-aligned multilingual parallel texts. This process is required, for instance, in the course of training statistical machine translation systems. Standard approaches typically rely on the estimation of several probabilistic models of increasing complexity and on the use of various heuristics, that make it possible to align, first isolated words, then, by extension, groups of words. In this paper, we explore an alternative approach, originally proposed by (Lardilleux and Lepage,2008) (Proceedings of AMTA~2008, pp 125-132), which relies on a much simpler principle: the comparison of occurrence profiles in sub-corpora obtained by sampling. After analyzing the strengths and weaknesses of this approach, we show how to improve the detection of multi-word translation units and evaluate these improvements on machine translation tasks.}, keywords= {machine translation, association measures, xmtalign}, url = {http://www-tlp.limsi.fr/public/article_1359654199.pdf}, } @article {PubLIMSI-3855, author = {Schuller, Bjoern AND Steidl, S. AND Batliner, A. AND Burkhardt, Felix AND Devillers, Laurence AND Muller, Christian AND Narayanan, Shrikanth}, title = {Paralinguistics in speech and language State-of-the-art and the challenge}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {4-39}, journal = {Computer Speech and Language}, volume = {27}, number = {1}, url = {http://www-tlp.limsi.fr/public/CSPdevil2013_1360333825.pdf}, } @article {PubLIMSI-3939, author = {Clavel, Chlo{\'e} AND Adda, Gilles AND Cailliau, F. AND Garnier-Rizet, M. AND Cavet, Ariane AND Chapuis, G{\'e}raldine AND Chapuis, G{\'e}raldine AND Courcinous, S. AND Danesi, Charlotte AND Daquo, A. AND Deldossi, Myrtille AND Guillemin-Lanne, S. AND Seizou, Marjorie AND Suignard, P.}, title = {Spontaneous speech and opinion detection: mining call-centre transcripts}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {1-37}, journal = {Language Resources and Evaluation}, volume = {48}, number = {1}, abstract= {Opinion mining on conversational telephone speech tackles two chal- lenges: the robustness of speech transcriptions and the relevance of opinion models. The two challenges are critical in an industrial context such as marketing. The paper addresses jointly these two issues by analyzing the influence of speech transcription errors on the detection of opinions and business concepts. We present both modules: the speech transcription system, which consists in a successful adaptation of a conversational speech transcription system to call-centre data and the information extraction module, which is based on a semantic modeling of business concepts, opinions and sentiments with complex linguistic rules. Three models of opinions are implemented based on the discourse theory, the appraisal theory and the marketers expertise, respectively. The influence of speech recognition errors on the information extraction module is evaluated by comparing its outputs on manual versus automatic transcripts. The F-scores obtained are 0.79 for business concepts detection, 0.74 for opinion detection and 0.67 for the extraction of relations between opinions and their target. This result and the in-depth analysis of the errors show the feasibility of opinion detection based on complex rules on call-centre transcripts.}, keywords= {Call-centre data; Automatic speech recognition system; Opinion detection; Business concept detection; Disfluency}, url = {http://www-tlp.limsi.fr/public/LREclavel2013_1367245424.pdf}, } @article {PubLIMSI-3980, author = {Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {Oracle decoding as a new way to analyze phrase-based machine translation}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {115-138}, journal = {Machine Translation}, volume = {27}, number = {2}, abstract= {Extant Statistical Machine Translation systems are very complex pieces of software, which embed multiple layers of heuristics and encompass very large numbers of numerical parameters. As a result, it is difficult to analyze output translations and there is a real need for tools that could help developers to better understand the various causes of errors. In this study, we make a step in that direction and present an attempt to evaluate the quality of the phrase-based translation model. In order to identify those translation errors that stem from deficiencies in the phrase table, we propose to compute the oracle BLEU-4 score, that is the best score that a system based on this phrase table can achieve on a reference corpus. By casting the computation of the oracle BLEU-1 as an Integer Linear Programming problem, we show that it is possible to efficiently compute accurate upper-bounds of this score, and report measures performed on several standard benchmarks. Various other applications of these oracle decoding techniques are also reported and discussed.}, } @article {PubLIMSI-4067, author = {Devillers, Laurence}, title = {Les dimensions affectives et sociales dans les interactions humain-robot}, year = {2013}, pages = {105-117}, journal = {Interfaces num{\'e}riques}, volume = {2}, number = {1}, } @article {PubLIMSI-4072, author = {Leroux, Christophe AND Lebec, Olivier AND Ben Ghezala, Mohamed Walid AND Mezouar, Youcef AND Devillers, Laurence AND Chastagnol, Cl{\'e}ment AND Martin, Jean-Claude AND Leynart, Violaine AND Fattal, Charles}, title = {ARMEN: Assistive robotics to maintain elderly people in natural environment}, year = {2013}, pages = {101-107}, journal = {IRBM}, volume = {34}, number = {2}, url = {http://www-tlp.limsi.fr/public/PubLIMSI-4072.pdf}, } @article {PubLIMSI-4220, author = {Zweigenbaum, Pierre AND Lavergne, Thomas AND Grabar, Natalia AND Hamon, Thierry AND Rosset, Sophie AND Grouin, Cyril}, title = {Combining an expert-based medical entity recognizer to a machine-learning system: methods and a case-study}, year = {2013}, pages = {13p}, journal = {Biomedical Informatics Insights}, abstract= {Medical entity recognition is currently generally performed by data-driven methods based on supervised machine learning. Expert-based systems, where linguistic and domain expertise are directly provided to the system, for instance in the form of lexicons and pattern-based rules, are often combined with data-driven systems. We present here a case study where an existing expert-based medical entity recognition system, Ogmios, is combined with a data-driven system, Caramba, based on a linear-chain Conditional Random Field (CRF) classifier. We examine different methods to combine two such systems and test the most relevant ones through experiments performed on the i2b2/VA 2012 challenge data. Our case study specifically highlights the risk of overfitting incurred by an expert-based system. We observe that it prevents the combination of the two systems from obtaining improvements in precision, recall, or F-measure, and analyse the underlying mechanisms through a post-hoc feature-level analysis. We also observe that wrapping the expert-based system alone as attributes input to a CRF classifier does boost its F-measure from 0.603 to 0.710 (strict matching of types and boundaries, as per the conlleval program), bringing it on par with the data-driven system. The generality of this method remains to be further investigated.}, keywords= {Natural Language Processing, Information Extraction, Medical records, Machine Learning, Hybrid Methods, Overfitting}, url = {http://www-tlp.limsi.fr/public/paper_1372430042.pdf}, } @article {PubLIMSI-4256, author = {Boula De Mareuil, Philippe AND Woehrling, C{\'e}cile AND Adda-Decker, Martine}, title = {Contribution of automatic speech processing to the study of Northern/Southern French}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {75-82}, journal = {Language Sciences}, volume = {39}, abstract= {This study focuses on the pronunciation of oral mid vowels and nasal vowels in Northern and Southern French. It is based on the investigation of a large corpus (30 hours, over 100 speakers) of spoken French, enabled by recent advances in the area of automatic speech processing. The realisation of oral mid vowels is investigated through two approaches using automatic phone alignment. The first approach explores formant measurements whereas the second one investigates pronunciation variants such as /\/~/Å“/~/o/. The latter approach, simulating a categorical approach, was also used to question the realisation/deletion of schwas and the realisation of nasal vowels as sequences including a potentially oral vowel and a nasal consonant. In this article, five features are being addressed: /\/ fronting in the North, /o/ opening within a subset of words (e.g. spelled with au or {\^o}), /\/ closing within another subset of words (e.g. ending in -ais or ait), schwa realisation and nasal vowel denasalisation in the South. The results of the two approaches to oral vowel quality converge, showing that these variables contrast Northern and Southern French. The contrast is sharper regarding the /O/ archiphoneme than the /E/ archiphoneme. It is also sharper regarding nasal vowels than the schwa. These empirical data are valuable in affording insight into sociophonetics and corpus phonology.}, keywords= {linguistique}, url = {http://www-tlp.limsi.fr/public/ls13_1372682891.pdf}, } @misc {PubLIMSI-4482, author = {Apidianaki, Marianna AND Ljube¨i\, Nikola AND Fi¨er, Darja}, title = {Cross-lingual WSD for translation extraction from comparable corpora}, booktitle = {6th Workshop on Building and Using Comparable Corpora (BUCC 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {1-10}, institution = {ACL}, abstract= {We propose a data-driven approach to enhance translation extraction from comparable corpora. Instead of resorting to an external dictionary, we translate source vector features by using a cross-lingual Word Sense Disambiguation method. The candidate senses for a feature correspond to sense clusters of its translations in a parallel corpus and the context used for disambiguation consists of the vector that contains the feature. The translations found in the disambiguation output convey the sense of the features in the source vector, while the use of translation clusters permits to expand their translation with several variants. As a consequence, the translated vectors are less noisy and richer, and allow for the extraction of higher quality lexicons compared to simpler methods.}, keywords= {Word Sense Disambiguation, sense clustering, comparable corpora}, } @article {PubLIMSI-4265, author = {Apidianaki, Marianna AND Ljubesic, Nikola AND Fiser, Darja}, title = {Vector disambiguation for translation extraction from comparable corpora.}, AERES = {ACLN}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {193-201}, journal = {Informatica}, volume = {37}, number = {2}, abstract= {We present a new data-driven approach for enhancing the extraction of translation equivalents from comparable corpora which exploits bilingual lexico-semantic knowledge harvested from a parallel corpus. First, the bilingual lexicon obtained from word-aligning the parallel corpus replaces an external seed dictionary, making the approach knowledge-light and portable. Next, instead of using simple one-to-one mappings between the source and the target language, translation equivalents are clustered into sets of synonyms by a cross-lingual Word Sense Induction method. The obtained sense clusters enable us to expand the translation of vector features with several translation variants using a cross-lingual Word Sense Disambiguation method. Consequently, the vector features are disambiguated and translated with the translation variants included in the semantically most appropriate cluster, thus producing less noisy and richer vectors that allow for a more successful cross-lingual vector comparison than in previous methods.}, keywords= {Word Sense Disambiguation, sense clustering, comparable corpora}, url = {http://www-tlp.limsi.fr/public/Apidianaki_1373643438.pdf}, } @article {PubLIMSI-4324, author = {Wisniewski, Guillaume AND Singh, Anil Kumar AND Yvon, Fran\c{c}ois}, title = {Quality estimation for machine translation: some lessons learned}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {1-26}, journal = {Machine Translation}, abstract= {The dissemination of statistical machine translation (SMT) systems in the professional translation industry is still limited by the lack of reliability of SMT outputs, the quality of which varies to a great extent. A critical piece of information would be for MT systems to automatically assess their output translations with automatically derived quality measures. Predicting quality measures was indeed the goal of a shared task at the Workshop on SMT in 2012. In this contribution, we first report our results for this shared task, detailing the features that we found to be the most predictive of quality. In the latter part, we reexamine the shared task data and protocol and show that several factors actually contributed to the difficulty of the task, and discuss alternative evaluation designs.}, keywords= {1-26}, } @inproceedings {PubLIMSI-3910, author = {Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {La t\^ache de pr{\'e}diction de qualit{\'e}}, booktitle = {Tralogy}, AERES = {ACTN}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {1-16}, month= {January}, address= {Paris, France}, abstract= {La diffusion des syst{\`e}mes de traductions automatiques est limit{\'e}e par leur manque de fiabilit{\'e}~: la qualit{\'e} de traduction varie beaucoup, parfois de mani{\`e}re impr{\'e}visible. Une mani{\`e}re de contourner cette limite serait, pour les syst{\`e}mes de traduction automatique, de pr{\'e}dire, en m{\^e}me temps que la traduction, une mesure num{\'e}rique de sa qualit{\'e}. Cette pr{\'e}diction de qualit{\'e} permettrait de fournir une information comparable au pourcentage de correspondance dans une m{\'e}moires de traduction, qui est usuellement utilis{\'e} en traduction assist{\'e}e par ordinateur. Cette information pourrait, par exemple, {\^e}tre utilis{\'e}e par des traductions pour corriger les traductions automatiques plus efficacement en se concentrant sur les phrases les plus probl{\'e}matiques. Dans cette contribution, nous pr{\'e}senterons la mani{\`e}re dont la tâche de pr{\'e}diction de qualit{\'e} est g{\'e}n{\'e}ralement abord{\'e}e par les chercheurs en traduction automatique. Nous d{\'e}crirons notre contribution {\`a} la premi{\`e}re campagne internationale d'{\'e}valuation de pr{\'e}diction de qualit{\'e} et, {\`a} partir de cette exp{\'e}rience, mettrons en {\'e}vidence les difficult{\'e}s de cette tâche.}, keywords= {Traduction automatique, pr{\'e}diction de qualit{\'e}}, url = {http://www-tlp.limsi.fr/public/tralogy-13_1364221837.pdf}, } @inproceedings {PubLIMSI-3929, author = {Schmiedeke, Sebastian AND Xu, Peng AND Ferran{\'e}, Isabelle AND Eskevich, Maria AND Kofler, Christoph AND Larson, Martha A. AND Est{\`e}ve, Yannick AND Lamel, Lori AND Jones, Gareth J.F. AND Sikora, Thomas}, title = {Blip10000: A social video dataset containing SPUG content for tagging and retrieval}, booktitle = {Multimedia Systems Conference}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {6p}, month= {27/01 au 1/02}, address= {Oslo, Norway}, abstract= {The increasing amount of digital multimedia content available is inspiring potential new types of user interaction with video data. Users want to easily find the content by searching and browsing. For this reason, techniques are needed that allow automatic categorisation, searching the content and linking to related information. In this work, we present a dataset that contains comprehensive semi-professional user-generated (SPUG) content, including audiovisual content, user-contributed metadata, automatic speech recognition transcripts, automatic shot boundary files, and social information for multiple `social levels'. We intend this Blip10000 dataset to be a useful resource for evaluating tagging techniques as well as retrieval techniques. We describe the principal characteristics of this dataset and present results that have been achieved on different tasks.}, keywords= {Dataset, SPUG Content, Video Tagging, Speech Retrieval}, url = {http://www-tlp.limsi.fr/public/MMSysLamel2013_1366035274.pdf}, } @inproceedings {PubLIMSI-4024, author = {Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {Fast large-margin learning for statistical machine translation}, booktitle = {International Conference on Intelligent Text Processing and Computational Linguistics (CICLing 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {12p}, month= {March}, address= {Samos, Greece}, abstract= {Statistical Machine Translation (SMT) can be viewed as a generate-and-select process, where the selection of the best translation is based on multiple numerical features assessing the quality of a translation hypothesis. Training a SMT system consists in finding the right balance between these features, so as to produce the best possible output, and is usually achieved through Minimum Error Rate Training (MERT). Despite several improvements, training remains one of the most time consuming step in the development of SMT systems and is a major bottleneck for experimentations. Building on recent advances in stochastic optimization and online machine learning, this paper studies a possible alternative to MERT, based on standard and well-understood algorithms. This approach is shown to deliver competitive solutions, at a much faster pace than the standard training machinery.}, } @inproceedings {PubLIMSI-4025, author = {Wisniewski, Guillaume AND Singh, Anil Kumar AND Segal, Natalia AND Yvon, Fran\c{c}ois}, title = {Un corpus d'erreurs de traduction}, booktitle = {Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles (TALN 2013)}, AERES = {ACTN}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {8p}, month= {June}, address= {Sables dOlonne, France}, abstract= {Avec le d{\'e}veloppement de la post-{\'e}dition, de plus en plus de corpus contenant des corrections de traductions sont disponibles. Ce travail pr{\'e}sente un corpus de corrections d'erreurs de traduction collect{\'e} dans le cadre du projet ANR/TRACE et illustre les diff{\'e}rents types d'analyses auxquels il peut servir. Nous nous int{\'e}resserons notamment {\`a} la d{\'e}tection des erreurs fr{\'e}quentes et {\`a} l'analyse de la variabilit{\'e} des post-{\'e}ditions.}, } @inproceedings {PubLIMSI-4037, author = {Foucault, Nicolas AND Rosset, Sophie AND Adda, Gilles}, title = {Pr{\'e}-segmentation de pages web et s{\'e}lection de documents pertinents en Questions-R{\'e}ponses}, booktitle = {Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles (TALN 2013)}, AERES = {ACTN}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {14p}, month= {June}, address= {Sables dOlonne, France}, abstract= {Dans cet article, nous pr{\'e}sentons une m{\'e}thode de segmentation de pages web en blocs de texte pour la s{\'e}lection de documents pertinents en questions-r{\'e}ponses. La segmentation des documents se fait pr{\'e}alablement {\`a} leur indexation en plus du d{\'e}coupage des segments obtenus en passages au moment de lextraction des r{\'e}ponses. Lextraction du contenu textuel des pages est faite {\`a} laide dun extracteur maison. Nous avons test{\'e} deux m{\'e}thodes de segmentation. Lune segmente les textes extraits des pages web uniform{\'e}ment en blocs de taille fixe, lautre les segmente par TextTiling (Hearst, 1997) en blocs th{\'e}matiques de taille variable. Les exp{\'e}riences men{\'e}es sur un corpus de 500K pages web et un jeu de 309 questions factuelles en fran\c{c}ais, issus du projet Quaero (Quintard et al., 2010), montrent que la m{\'e}thode employ{\'e}e tend {\`a} am{\'e}liorer la pr{\'e}cision globale (top-10) du syst{\`e}me RITELQR (Rosset et al., 2008) dans sa tâche.}, keywords= {pages web, TextTiling, s{\'e}lection de documents, questions-r{\'e}ponses, Quaero, Ritel, segmentation textuelle, segmentation th{\'e}matique.}, } @inproceedings {PubLIMSI-4039, author = {Bredin, Herv{\'e} AND Poignant, Johann}, title = {Integer linear programming for speaker diarization and cross-modal identification in TV broadcast}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {1467-1471}, month= {August}, address= {Lyon, France}, url = {http://www-tlp.limsi.fr/public/IS130099.PDF}, abstract= {Most state-of-the-art approaches address speaker diarization as a hierarchical agglomerative clustering problem in the audio domain. In this paper, we propose to revisit one of them: speech turns clustering based on the Bayesian Information Criterion (a.k.a. BIC clustering). First, we show how to model it as an integer linear programming (ILP) problem.Its resolution leads to the same overall diarization error rate as standard BIC clustering but generates significantly purer speaker clusters. Then, we describe how this approach can easily be extended to the audiovisual domain and TV broadcast in particular. The straightforward integration of detected overlaid names (used to introduce guests or journalists, and obtained via video OCR) into a multimodal ILP problem yields significantly better speaker diarization results. Finally, we explain how this novel paradigm can incidentally be used for unsupervised speaker identification (i.e. not relying on any prior acoustic speaker models). Experiments on the REPERE TV broadcast corpus show that it achieves performance close to that of an oracle capable of identifying any speaker as long as their name appears on screen at least once in the video.}, keywords= {speaker diarization, integer linear programming, speaker identification, multimodal fusion, optical character recognition}, } @inproceedings {IS13JPLB, author = {Poignant, Johann AND Besacier, Laurent AND Le, Viet-Bac AND Rosset, Sophie AND Qu\'enot, Georges }, title = {Unsupervised naming of speakers in broadcast TV: using written names, pronounced names or both?}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {1467-1471}, month= {August}, address= {Lyon, France}, url = {http://www-tlp.limsi.fr/public/IS130097.PDF}, abstract= {Persons identification in video from TV broadcast is a valuable tool for indexing them. However, the use of biometric models is not a very sustainable option without a priori knowledge of people present in the videos. The pronounced names (PN) or written names (WN) on the screen can provide hypotheses names for speakers. We propose an experimental comparison of the potential of these two modalities (names pronounced or written) to extract the true names of the speakers. The names pronounced offer many instances of citation but transcription and named-entity detection errors halved the potential of this modality. On the contrary, the written names detection benefits of the video quality improvement and is nowadays rather robust and efficient to name speakers. Oracle experiments presented for the mapping between written names and speakers also show the complementarity of both PN and WN modalities.}, keywords= {Speaker identification, OCR, ASR}, } @inproceedings {IS13-RNMAD, author = {Nemoto, Rena AND Adda-Decker, Martine}, title = {How Are Word-final Schwas Different in the North and South of France?}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {305-309}, month= {August}, address= {Lyon, France}, url = {http://www-tlp.limsi.fr/public/IS130806.PDF}, abstract= {The aim of this paper is twofold: (i) give a large-scale description in realized word-final schwas of French lexical words for different regions (North vs. South) and different speaking styles (read vs. spontaneous speech); (ii) highlight differences in prosodic features and test these differences via automatic classification techniques. The proposed study relies on a subset of 12.5 hours of the French PFC corpus. Manually transcribed speech was segmented and labeled using automatic speech alignment and a pronunciation dictionary including optional word-final schwas for all words ending in a consonant. f0 and intensity values were extracted and averaged over segments. Our study revealed that, for both speaking styles, wordfinal schwas of southern French tended to keep relatively high f0 values and longer durations in comparison with northern French where f0 tends to drop on a word-final schwa. On average, spontaneous speech featured smaller f0 drops between final full vowel and subsequent word-final schwa vowel as well as longer durations. The automatic North/South classification of wordfinal schwas achieved better results for spontaneous speech. As for distinguishing between speaking styles, southern French obtained slightly better scores than the northern varieties.}, keywords= {French, word-final schwa, prosody}, } @inproceedings {PubLIMSI-4476, author = {Sarkar, Achintya Kumar AND Barras, Claude}, title = {Multi-class UBM-Based MLLR m-Vector system for speaker verification}, booktitle = {European Signal Processing Conference (EUSIPCO 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {6p}, month= {September}, address= {Marrakech, Morocco}, abstract= {In this paper, we extend the recently introduced Maximum Like- lihood Linear Regression (MLLR) super-vector based m-vector speaker verification system to a multi-class MLLR m-vector system. In the conventional case, global class MLLR transformation is es- timated with respect to Universal Background Model (UBM) for a given speech data, which is then used in the form of super-vector for m-vector system. In the proposed system, Gaussian mean vectors of the UBM are first clustered into several classes. Then, MLLR trans- formations are estimated (of a speech data) for each class, and are used in the form of super-vectors for speaker characterization using the m-vector technique. We consider two clustering approaches: one is based on the conventional K-means and the other is proposed based on Expectation Maximization (EM) and Maximum Likelihood (ML). Both systems yield better performance than the conventional m-vector system and allow for multiple MLLR transforms without additional temporal alignment of the data with respect to UBM. Furthermore, we show that, contrary to conventional K-means, the proposed clustering is not affected by the random initialization, and also provides equal or comparable system performance. The system performances are shown on NIST 2008 SRE core condition over various tasks.}, keywords= {Multi-class m-vector, Statistical clustering al- gorithm, MLLR super-vector, UBM, Speaker verification}, } @inproceedings {PubLIMSI-4474, author = {Sarkar, Achintya Kumar AND Barras, Claude}, title = {Anchor and UBM-based multi-class MLLR M-Vector system for speaker verification}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {2450-2454}, month= {August}, address= {Lyon, France}, abstract= {In this paper, we propose two techniques to extend the recently introduced global Maximum Likelihood Linear Regression (MLLR) transformation (i.e. super-vector) based m-vector system for speaker verification into a multi-class MLLR m-vector system in the Universal Background Model (UBM) framework. In the first method, Gaussian mean vectors of the UBM are first grouped into several classes using conventional K-means and a proposed clustering algorithm based on Expectation Maximization (EM) and Maximum Likelihood (ML) concepts. Then, MLLR transformations are calculated for a given speech data with respect to each class, which are used in the form of super-vector for speaker representation by their m-vectors. In the second approach, several MLLR transformations are estimated with respect to pre-defined models called anchors. The proposed systems show better performance than the conventional system. Furthermore, the proposed UBM-based system does not require additional alignment of speech data with respect to the UBM for estimation of multiple MLLR transformations. We also further show that the proposed EM \& ML clustering algorithm is robust to random initialization and provides equal or comparable system performance compared to K-means. The experimental results are shown on NIST 2008 SRE core condition over various tasks.}, keywords= {m-Vector, Multi-Class MLLR, Anchor Model, EM Clustering, Speaker Verification}, } @inproceedings {IS13-CTDO, author = {Do, Cong-Thanh AND Barras, Claude AND Le, Viet-Bac AND Sarkar, Achintya}, title = {Augmenting Short-term Cepstral Features with Long-term Discriminative Features for Speaker Verification of Telephone Data}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {2484-2489}, month= {25/08 au 29/08}, address= {Lyon, France}, url = {http://www-tlp.limsi.fr/public/IS130783.PDF}, abstract= {Short-term cepstral features have long been chosen as standard features for speaker recognition thanks to their relevance and effectiveness. In contrast, discriminative features, calculated by a multi-layer perceptron (MLP) from much longer stretches of time, have been gradually adopted in automatic speech recognition (ASR). It has been shown that augmenting short-term cepstral features with long-term MLP (multi-layer perceptron) features makes it possible to improve significantly the performance of ASR. In this work, we investigate the possibility of augmenting short-term cepstral features with MLP features in order to improve the performance of text-independent speaker verification. We show, that, even though augmenting cepstral features with MLP features does not directly improve speaker verification performance, reducing the dimension of the augmented features, using principal component analysis (PCA), makes it possible to reduce, relatively, around 12\% of the equal error rate (EER). Experiments are performed on telephone data of the 2008 NIST SRE (speaker recognition evaluation) database}, keywords= {Speaker verification, multi-layer perceptron (MLP), principal component analysis (PCA), NIST SRE 2008, GMM-UBM}, } @inproceedings {is13JSL-CB, author = {Li\'enard, Jean-Sylvain AND Barras, Claude}, title = {Fine-grain voice strength estimation from vowel spectral cues}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {128-132}, month= {25/08 au 29/08}, address= {Lyon, France}, url = {http://www-tlp.limsi.fr/public/IS131076.PDF}, abstract= {This study investigates the possibility to recover the voice strength, i.e. the sound level produced by the speaker, from the signal recorded. The dataset consists of a set of isolated vowels (720 tokens) recorded in a situation where two interlocutors interacted orally at a distance comprised between 0.40 and 6 meters, in a furnished room. For each token, voice strength is measured at the intensity peak, and several sets of acoustic cues are extracted from the signal spectrum, after frequency weighting and intensity normalization. In the first phase, the tokens are grouped into increasing voice strength categories. Discriminant Analysis produces a classifier which takes into account all the signal dimensions implicitly coded in the set of cues. In the second phase, the cues of a new token are given to the classifier, which in turn produces its distances to the groups, providing the basis for estimating the unknown voice strength. The quality of the process is evaluated either in self-consistency mode or by cross-validation, i.e. by comparing the estimate with the value initially measured on the same token. The statistical margin of error is quite low, of the order of 3 dB, depending on the sets of cues used.}, keywords= {vocal effort, vocal intensity, voice quality, discriminant analysis}, } @inproceedings {PubLIMSI-4040, author = {Lavergne, Thomas AND Allauzen, Alexandre AND Yvon, Fran\c{c}ois}, title = {Un cadre d'apprentissage int{\'e}gralement discriminant pour la traduction statistique}, booktitle = {Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles (TALN 2013)}, year = {2013}, pages = {14p}, month= {17/06 au 21/06}, address= {Les Sables d'Olonne}, abstract= {Une faiblesse des syst{\`e}mes de traduction statistiques est le caract{\`e}re ad hoc du processus d'apprentissage, qui repose sur un empilement d'heuristiques et conduit {\`a} apprendre des param{\`e}tres dont la valeur est sous-optimale. Dans ce travail, nous reformulons la traduction automatique sous la forme famili{\`e}re de l'apprentissage d'un mod{\`e}le probabiliste structur{\'e} utilisant une param{\'e}trisation log-lin{\'e}aire. Cette entreprise est rendue possible par le d{\'e}veloppement d'une implantation efficace qui permet en particulier de prendre en compte la pr{\'e}sence de variables latentes dans le mod{\`e}le. Notre approche est compar{\'e}e, avec succ{\`e}s, avec une approche de l'{\'e}tat de l'art sur la tâche de traduction de donn{\'e}es du BTEC pour le couple Fran\c{c}ais-Anglais.}, keywords= {Traduction Automatique, Apprentissage Discriminant}, } @inproceedings {PubLIMSI-4050, author = {Charlet, Delphine AND Barras, Claude AND Li{\'e}nard, Jean-Sylvain}, title = {Impact of overlapping speech detection on speaker diarization for broadcast news and debates}, booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP,CPU}, year = {2013}, pages = {5p}, month= {26/05 au 31/05}, address= {Vancouver, Canada}, abstract= {The overlapping speech detection systems developped by Orange and LIMSI for the ETAPE evaluation campaign on French broadcast news and debates are described. Using either cepstral features or a multi-pitch analysis, a F1-measure for overlapping speech detection up to 59.2\% is reported on the TV data of the ETAPE evaluation set, where 6.7\% of the speech was measured as overlapping, ranging from 1.2\% in the news to 10.4\% in the debates. Overlapping speech segments were excluded during the speaker diarization stage, and these segments were further labelled with the two nearest speaker labels, taking into account the temporal distance. We describe the effects of this strategy for various overlapping speech systems and we show that it improves the diarization error rate in all situations and up to 26.1\% relative in our best configuration.}, keywords= {speaker diarization, overlapping speech}, } @inproceedings {PubLIMSI-4051, author = {Sarkar, Achintya Kumar AND Barras, Claude AND Le, Viet Bac}, title = {Lattice MLLR based m-vector system for speaker verification}, booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {5p}, month= {26/05 au 31/05}, address= {Vancouver, Canada}, abstract= {The recently introduced m-vector approach uses Maximum Likelihood Linear Regression (MLLR) super-vectors for speaker verification, where MLLR super-vectors are estimated with respect to a Universal Background Model (UBM) without any transcription of speech segments and speaker m-vectors are obtained by uniform segmentation of their MLLR super-vectors. Hence, this approach does not exploit the phonetic content of the speech segments. In this paper, we propose the integration of an Automatic Speech Recognition (ASR) based multi-class MLLR transformation into the m-vector system. We consider two variants, with MLLR transformations computed either on the 1-best (hypothesis) or on the lattice word transcriptions. The former case is able to account for the risk of ASR transcription errors. We show that the proposed systems outperform the conventional method over various tasks of the NIST SRE 2008 core condition.}, keywords= {m-Vector, Lattice MLLR, MLLR Super-Vector, Session Variability Compensation, Speaker Verification}, } @misc {PubLIMSI-4377, author = {Soury, Mariette AND Devillers, Laurence}, title = {Nao makes me laugh: the impact of humor in human- robot interactions}, booktitle = {Towards social humanoid robots: what makes interaction human-like?}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, institution = {IROS2013}, abstract= {Which are the aspects of human-likeness that are more relevant for human-robot interaction? We present an experiment using humor in interactions with Aldebaran robot Nao to make users laugh. Our aim is to evaluate the correlations between the users profile (gender, cultural background, sense of humor, personality traits) and his propensity to laugh with a humanoid robot. We analyze audio data collected during the Interspeech 2013 conference. Some correlations appear, suggesting the interest of a priori knowledge of the user to make him laugh. This work will provide new insight on the building of human-robot relationships in the French project ROMEO2.}, keywords= {human-robot interactions, laugh, nao, user profile}, } @inproceedings {PubLIMSI-4586, author = {Devillers, Laurence AND Soury, Mariette}, title = {A social interaction system for studying humor with the robot NAO}, booktitle = {International Conference on Multimodal Interaction (ICMI 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {313-314}, publisher = {ACM}, series = {ICMI Demonstrations and Exhibits}, month= {Dec}, address= {Sydney, Australia}, abstract= {The video of our demonstrator presents a social interaction system for studying humor with the Aldebaran robot NAO. Our application records and analyzes audio and video stream to provide real-time feedback. Using this dialog system during show \& tell sessions at Interspeech 2013, we have collected different kind of laughter (positive and negative) from 45 subjects. The participants were involved in a verbal exchange with NAO, including tongue-twisters games and jokes, as well as witty remarks and laughs from the robot. The conversation data captured is used here to study subject behaviors from various personalities and cultural backgrounds.}, keywords= {Social interaction, Laughter, valence, prosody, data collection, multicultural analysis}, } @inproceedings {PubLIMSI-4501, author = {Giraud, Tom AND Soury, Mariette AND Hua, Jiewen AND Delaborde, Agn{\`e}s AND Tahon, Marie AND Gomez Jauregui, David Antonio AND Eyharabide, Maria Victoria AND Filaire, Edith AND Le Scanff, Christine AND Devillers, Laurence AND Isableu, Brice AND Martin, Jean-Claude}, title = {Multimodal expressions of stress during a public speaking task}, booktitle = {International Conference on Affective Computing and Intelligent Interaction (ACII 2013)}, AERES = {ACTI}, GROUP = {LIMSI,CPU,TLP,AMI}, year = {2013}, pages = {417-422}, month= {Sep.}, address= {Gen{\`e}ve, Suisse}, abstract= {Databases of spontaneous multimodal expressions of affective states occurring during a task are few. This paper presents a protocol for eliciting stress in a public speaking task. Behaviors of 19 participants were recorded via a multimodal setup including speech, video of the facial expressions and body movements, balance via a force plate, and physiological measures. Questionnaires were used to assert emotional states, personality profiles and relevant coping behaviors to study how participants cope with stressful situations. Several subjective and objective performances were also evaluated. Results show a significant impact of the overall task and conditions on the participants emotional activation. The possible future use of this new multimodal emotional corpus is described.}, keywords= {databases, stress, emotion, multimodality, individual differences}, } @inproceedings {PubLIMSI-4127, author = {Soury, Mariette AND Devillers, Laurence}, title = {Stress detection from audio on multiple window analysis size in a public speaking task}, booktitle = {International Conference on Affective Computing and Intelligent Interaction (ACII 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {529-533}, publisher = {IEEE Computer Society}, month= {Sep.}, address= {Geneva, Switzerland}, abstract= {Speech production modifications are one of the many indications of stress in humans. A job interview simulation task permitted the collection of a multimodal corpus, including physiological data. Physiological cues of stress are reliable on long periods, and require invasive sensors. Human voice variations have been proved to be a non-invasive stress cue. In this paper, we focus on a frame-wise detection of stress on several window analysis sizes and analyze the behavior of different audio features classes. We trained our system on 19 subjects, and test it on 10 other subjects. Our best system obtains a 71.9 percent Unweighted Average Recall on 5s windows.}, keywords= {stress, prosodic cues, public speaking task}, } @inproceedings {PubLIMSI-4224, author = {Karanasou, Penny AND Yvon, Fran\c{c}ois AND Lavergne, Thomas AND Lamel, Lori}, title = {Discriminative training of a phoneme confusion model for a dynamic lexicon in ASR}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP,ILES}, year = {2013}, pages = {1966-1970}, month= {25/08 au 29/08}, address= {Lyon, France}, url = {http://www-tlp.limsi.fr/public/IS131264.PDF}, abstract= {To enhance the recognition lexicon, it is important to be able to add pronunciation variants while keeping the confusability introduced by the extra phonemic variation low. However, this confusability is not easily correlated with the ASR performance, as it is an inherent phenomenon of speech. This paper proposes a method to construct a multiple pronunciation lexicon with a high discriminability. To do so, a phoneme confusion model is used to expand the phonemic search space of pronunciation variants during ASR decoding and a discriminative framework is adopted for the training of the weights of the phoneme confusions. For the parameter estimation, two training algorithms are implemented, the perceptron and the CRF model, using finite state transducers. Experiments on English data were conducted using a large state-of-the-art ASR system of continuous speech.}, keywords= {FST-based ASR decoding, dynamic recognition lexicon, phoneme confusion model, discriminative training}, } @inproceedings {PubLIMSI-4222, author = {Lehnen, Patrick AND Allauzen, Alexandre AND Lavergne, Thomas AND Yvon, Fran\c{c}ois AND Hahn, Stefan AND Ney, Hermann}, title = {Structure learning in hidden conditional random fields for grapheme-to-phoneme conversion}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2013)}, AERES = {ACTI}, GROUP = {LIMSI,ILES,TLP}, year = {2013}, pages = {2326-2330}, month= {25/08 au 29/08}, address= {Lyon, France}, url = {http://www-tlp.limsi.fr/public/IS130754.PDF}, abstract= {Accurate grapheme-to-phoneme (g2p) conversion is needed for several speech processing applications, such as automatic speech synthesis and recognition. For some languages, notably English, improvements of g2p systems are very slow, due to the intricacy of the associations between letter and sounds. In recent years, several improvements have been obtained either by using variable-length associations in generative models (joint-n-grams), or by recasting the problem as a conventional sequence labeling task, enabling to integrate rich dependencies in discriminative models. In this paper, we consider several ways to reconciliate these two approaches. Introducing hidden variable-length alignments through latent variables, our Hidden Conditional Random Field (HCRF) models are able to produce comparative performance compared to strong generative and discriminative models on the CELEX database.}, keywords= {grapheme-to-phoneme conversion, G2P, HCRF, discriminative models, hidden conditional random fields}, } @inproceedings {PubLIMSI-4227, author = {Rosset, Sophie AND Grouin, Cyril AND Lavergne, Thomas AND Ben Jannet, Mohamed AND Leixa, Jeremy AND Galibert, Olivier AND Zweigenbaum, Pierre}, title = {Automatic named entity pre-annotation for out-of-domain human annotation}, booktitle = {Linguistic Annotation Workshop (LAW 2013)}, AERES = {ACTI}, GROUP = {LIMSI,ILES,TLP}, year = {2013}, pages = {10p}, publisher = {ACL}, month= {Aug.}, address= {Sofia, Bulgaria}, abstract= {Automatic pre-annotation is often used to improve human annotation speed and accuracy. We address here out-of-domain named entity annotation, and examine whether automatic pre-annotation is still beneficial in this setting. Our study design includes two different corpora, three pre-annotation schemes linked to two annotation levels, both expert and novice annotators, a questionnaire-based subjective assessment and a corpus-based quantitative assessment. We observe that pre-annotation helps in all cases, both for speed and for accuracy, and that the subjective assessment of the annotators does not always match the actual benefits measured in the annotation outcome.}, keywords = {corpus annotation} } @inproceedings {PubLIMSI-4634, author = {Mariani, Joseph-Jean AND Francopoulo, Gil}, title = {{Language matrices \& the language resource impact factor: a journey through the LR Landscape}}, booktitle = {ELRA 18th Anniversary Workshop}, AERES = {COM}, GROUP = {LIMSI,TLP,IMMI}, year = {2013}, month= {November}, address= {Paris}, keywords= {Language Resources} } @misc {PubLIMSI-4637, author = {Mariani, Joseph-Jean}, title = {Le Web sera-t-il polyglotte ?}, howpublished = {magazine La Recherche}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {62-64}, keywords= {Multilingualism, Language Technologies}, } @inproceedings {PubLIMSI-4241, author = {Mariani, Joseph-Jean AND Paroubek, Patrick AND Francopoulo, Gil AND Deleborde, Marine}, title = {Rediscovering 25 years of discoveries in spoken language processing: a preliminary analysis of the ISCA archive}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2013)}, AERES = {ACTI}, GROUP = {LIMSI,IMMI,ILES,TLP}, year = {2013}, month= {August}, address= {Lyon, France}, } @inproceedings {PubLIMSI-4240, author = {Soria, Claudia AND Mariani, Joseph-Jean AND Zoli, Carlo}, title = {Dwarfs sitting on the giants' shoulders - how LTs for regional and minority languages can benefit from piggybacking major languages}, booktitle = {Conference of the Foundation for Endangered Languages (FEL 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, month= {October}, address= {Ottawa, Canada}, } @inproceedings {PubLIMSI-4317, author = {Bluche, Th{\'e}odore AND Ney, Hermann AND Kermorvant, Christopher}, title = {Tandem HMM with convolutional neural network for handwritten word recognition}, booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {5p}, month= {26/05 au 31/05}, address= {Vancouver, Canada}, abstract= {In this paper, we investigate the combination of hidden Markov models and convolutional neural networks for handwritten word recognition. The convolutional neural networks have been successfully applied to various computer vision tasks, including handwritten character recognition. In this work, we show that they can replace Gaussian mixtures to compute emission probabilities in hidden Markov models (hybrid combination), or serve as feature extractor for a standard Gaussian HMM system (tandem combination). The proposed systems outperform a basic HMM based on either decorrelated pixels or handcrafted features. We validated the approach on two publicly available databases, and we report up to 60\% (Rimes) and 35\% (IAM) relative improvement compared to a Gaussian HMM based on pixel values. The final systems give comparable results to recurrent neural networks, which are the best systems since 2009.}, keywords= {Handwriting recognition, Hidden Markov Model, Convolutional Neural Network}, } @inproceedings {PubLIMSI-4318, author = {Bluche, Th{\'e}odore AND Ney, Hermann AND Kermorvant, Christopher}, title = {Feature extraction with convolutional neural networks for handwritten word recognition}, booktitle = {International Conference on Document Analysis and Recognition (ICDAR 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {5p}, month= {August}, address= {Washington DC, USA}, } @inproceedings {PubLIMSI-4353, author = {Wisniewski, Guillaume AND Singh, Anil Kumar AND Segal, Natalia AND Yvon, Fran\c{c}ois}, title = {Design and analysis of a large corpus of post-edited translations: quality estimation, failure analysis and the variability of post-edition}, booktitle = {Machine Translation Summit (MT Summit 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {117-124}, month= {Sep.}, address= {Nice, France}, abstract= {Machine Translation (MT) is now often used to produce approximate translations that are then corrected by trained professional post-editors. As a result, more and more datasets of post-edited translations are being collected. These datasets are very useful for training, adapting or testing existing MT systems. In this work, we present the design and content of one such corpus of post-edited translations, and consider less studied possible uses of these data, notably the development of an automatic Quality Estimation (QE) system and the detection of frequent errors in automatic translations. Both applications require a careful assessment of the variability in post-editions, that we study here.}, keywords= {MT, MT Évaluation}, } @inproceedings {PubLIMSI-4026, author = {Wisniewski, Guillaume}, title = {On the predictability of human assessment: when matrix completion meets NLP evaluation}, booktitle = {Annual Meeting of the Association for Computational Linguistics (ACL 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {137-142}, publisher = {Association for Computational Linguistics}, series = {Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics}, month= {04/08 au 09/08}, address= {Sofia, Bulgaria}, abstract= {This paper tackles the problem of collecting reliable human assessments. We show that knowing multiple scores for each example instead of a single score results in a more reliable estimation of a system quality. To reduce the cost of collecting these multiple ratings, we propose to use matrix completion techniques to predict some scores knowing only scores of other judges and some common ratings. Even if prediction performance is pretty low, decisions made using the predicted score proved to be more reliable than decision based on a single rating of each example.}, } @inproceedings {PubLIMSI-4327, author = {Fraga Da Silva, Thiago Henrique AND Gauvain, Jean-Luc AND Lamel, Lori}, title = {Interpolation of acoustic models for speech recognition}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {3347-3351}, month= {25/08 au 29/08}, address= {Lyon, France}, url = {http://www-tlp.limsi.fr/public/IS130847.PDF}, abstract= {Acoustic models for speech recognition are often trained on data coming from a variety of sources. The usual approach is to pool together all of the available training data, considering them all to be part of a unique training set. In this work, assuming that each source may have a different degree of relevance for a given target task, two techniques are proposed to weigh subsets of the training data. The first one is based on the interpolation of the model probability densities, while the other on data weighting. An method to automatically select the mixture coefficients is also proposed. The best technique presented here outperformed unsupervised MAP adaptation and led to improvements in word accuracy (up to 6\% relative) over the pooled model.}, keywords= {Acoustic modeling, model interpolation, adaptation}, } @inproceedings {PubLIMSI-4328, author = {Roy, Anindya AND Lamel, Lori AND Fraga Da Silva, Thiago Henrique AND Gauvain, Jean-Luc AND Oparin, Ilya}, title = {Some issues affecting the transcription of hungarian broadcast audio}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {3102-3106}, month= {25/08 au 29/08}, address= {Lyon, France}, url = {http://www-tlp.limsi.fr/public/IS130299.PDF}, abstract= {This paper reports on a speech-to-text (STT) transcription system for Hungarian broadcast audio developed for the 2012 Quaero evaluations. For this evaluation, no manually transcribed audio data were provided for model training, however a small amount of development data were provided to assess system performance. As a consequence, the acoustic models were developed in an unsupervised manner, with the only supervision provided indirectly by the language model. The language models were trained on texts downloaded from various websites, also without any speech transcripts. This contrasts with other STT systems for Hungarian broadcast audio which use at least 10 to 50 hours of manually transcribed data for acoustic training, and typically include speech transcripts in the language models. Based on mixed results previously reported applying morph-based approaches to agglutinative languages such as Hungarian, word-based language models were used. The initial Word Error Rate (WER) of the system using context-independent seed models from other languages of 59.8\% on the 3h development corpus was reduced to 25.0\% after successive training iterations and system refinement. The same system obtained a WER of 23.3\% on the independent Quaero 2012 evaluation corpus (a mix of broadcast news and broadcast conversation data). These results compare well with previously reported systems on similar data. Various issues affecting system performance are discussed, such as amount of training data, the acoustic features and choice of text sources for language model training.}, keywords= {Large vocabulary continuous speech recognition (LVCSR), broadcast news transcription, Hungarian language, unsupervised training, agglutinative languages, Bottleneck MLP features}, } @inproceedings {PubLIMSI-4357, author = {Pellegrini, Thomas AND Hamalainen, Anika AND Boula De Mareuil, Philippe AND Tjalve, Michael AND Trancoso, Isabel AND Candeias, Sara AND Sales Dias, Miguel AND Braga, Daniela}, title = {A corpus-based study of elderly and young speakers of European Portuguese: acoustic correlates and their impact on speech recognition performance}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {852-856}, month= {25/08 au 29/08}, address= {Lyon, France}, abstract= {This paper presents a study of European Portuguese elderly speech, in which the acoustic characteristics of two groups of elderly speakers (aged 60-75 and over 75) are compared with those of young adult speakers (aged 19-30). The correlation between age and a set of 14 acoustic features was investigated, and decision trees were used to establish the relative importance of the features. A greater use of pauses characterized speakers aged 60 and over. For female speakers, speech rate also appeared to correlate with age. For male speakers, jitter distinguished between speakers aged 60-75 and older. The correlation between the features and speech recognition performance was also investigated. Word error rate correlated mostly with the use of pauses, speech rate, and the ratio of long phone realizations. Finally, by comparing the phone sequences used by the recognizer on the most frequent words, we observed that the young adult speakers reduced schwas more than the elderly speakers. This result seems to confirm the common idea that young speakers reduce articulation more than older speakers. Further investigation is needed to confirm this result by determining whether this is due to ageing or to the generation gap.}, keywords= {analyses acoustiques, reconnaissance de la parole, voix âg{\'e}es, portugais}, } @inproceedings {PubLIMSI-4415, author = {Candea, Maria AND Adda-Decker, Martine AND Lamel, Lori}, title = {Recent Evolution of Non Standard Consonantal Variants in French Broadcast News}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {412-416}, publisher = {International Speech Communication Association, F. Bimbot, C. Cerisara, C. Fougeron, G. Gravier, L. Lamel, F. Pellegrino, P. Perrier}, month= {25/08 au 29/08}, address= {Lyon, France}, url = {http://www-tlp.limsi.fr/public/IS130037.PDF}, abstract= {This paper investigates sociophonetic questions about global tendencies in contemporaneous European spoken French. The authors argue that automatic alignment allowing targeted variants can provide evidence for current hypotheses about possible ongoing sound changes or about destandardization even in formal contexts as broadcast news. This study focused on the evolution over a decade, in radio or TV news, of three non- standardconsonantal variants: consonant cluster reduction, affrication/palatalization of dental stops and voiceless fricative epithesis. Measures obtained by this method showed that the first variant remains almost absent in journalists speech, exactly as affrication of /d/. In contrast, affrication of /t/ is increasing and the fricative epithesis, partially unpredictable, becomes longer. Our findings support the use of automatic alignment as an aid to validate sociolinguistic hypotheses and to develop pattern-driven studies, gathering more variables.}, keywords= {sociophonetics, affrication, voiceless fricative epithesis, consonantic cluster reduction}, } @misc {PubLIMSI-4537, author = {N{\'e}v{\'e}ol, Aur{\'e}lie AND Max, Aur{\'e}lien AND Ivanishcheva, Yulia AND Ravaud, Philippe AND Zweigenbaum, Pierre AND Yvon, Fran\c{c}ois}, title = {Statistical machine translation of systematic reviews into French}, booktitle = {Workshop on Optimizing understanding in multilingual hospital encounters}, AERES = {ACTI}, GROUP = {LIMSI,TLP,ILES}, year = {2013}, pages = {4p}, institution = {Universit{\'e} Paris 13}, } @inproceedings {PubLIMSI-4462, author = {Gong, Li AND Max, Aur{\'e}lien AND Yvon, Fran\c{c}ois}, title = {Improving bilingual sub-sentential alignment by sampling-based transpotting}, booktitle = {International Workshop on Spoken Language Translation (IWSLT 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {8p}, month= {05/12 au 06/12}, address= {Heidelberg, Germany}, abstract= {In this article, we present a sampling-based approach to improve bilingual sub-sentential alignment in parallel corpora. This approach can be used to align parallel sentences on an as needed basis, and is able to accurately align newly available sentences. We evaluate the resulting alignments on several Machine Translation tasks.~Results show that for the tasks considered here, our approach performs on par with the state-of-the-art statistical alignment pipeline Giza++/Moses, and obtains superior results in a number of configurations, notably when aligning additional parallel sentence pairs carefully selected to match the test input.}, keywords= {Machine translation, association measures, word alignment}, } @incollection {PubLIMSI-4237, author = {Froeliger, Nicolas AND Mariani, Joseph-Jean AND Nomine, Jean-Fran\c{c}ois AND Wallon, Alain}, title = {Translation Careers and Technologies: Convergence Points for the Future - Proceedings Tralogy II}, AERES = {DO}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {500}, publisher = {INIST}, } @book {PubLIMSI-4278, author = {Nguyen, N. AND Adda-Decker, Martine}, title = {M{\'e}thodes et outils pour l'analyse phon{\'e}tique des grands corpus oraux}, AERES = {DO}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {320}, series = {Trait{\'e} IC2, s{\'e}rie Cognition et traitement de l'information}, publisher = {Herm{\`e}s}, edition = {Herm{\`e}s science}, } @book {PubLIMSI-4463, author = {Bimbot, Fr{\'e}d{\'e}ric AND Cerisara, Christophe AND Fougeron, C{\'e}cile AND Gravier, Guillaume AND Lamel, Lori AND Pellegrino, Fran\c{c}ois AND Perrier, Pascal}, title = {Proceedings of the 14th Annual Conference of the International Speech Communication Association (Interspeech 2013)}, AERES = {DO}, GROUP = {LIMSI,TLP}, year = {2013}, publisher = {International Speech Communication Association}, abstract= {Interspeech 2013 14th edition Lyon, France 25-29 August 2013}, } @inproceedings {PubLIMSI-4073, author = {Chastagnol, Cl{\'e}ment AND Devillers, Laurence}, title = {Emotion detection system for human-robot interaction}, booktitle = {Colloque du Centre Expertise National en Robotique (CENRob 2013)}, AERES = {AFF}, GROUP = {LIMSI,TLP}, year = {2013}, month= {04/04 au 05/04}, address= {Evry, France}, } @inproceedings {PubLIMSI-4286, author = {Adda-Decker, Martine AND Embanga Aborobongui, Martial AND Lamel, Lori AND Rialland, Annie}, title = {Embosi: automatic alignment with segments and words and phonological mining}, booktitle = {International Conference on Bantu Languages (BANTU 2013)}, AERES = {COM}, GROUP = {LIMSI,TLP}, year = {2013}, month= {12/06 au 15/06}, address= {Paris, France}, } @incollection {PubLIMSI-3451, author = {Buendia, Axel AND Devillers, Laurence}, title = {From informative cooperative dialogues to long-term social relation with a robot}, booktitle = {Towards a Natural Interaction with Robots, Knowbots and Smartphones, Putting Spoken Dialog Systems into Practice}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2013}, publisher = {Springer}, ISBN= {978-1-4614-8279-6}, abstract= {A lot of progress have been made in the domain of human-machine dialogue, but it is still a real challenge and, most often, only informative cooperative kind of dialogues are explored. This paper tries to explore the ability of a robot to create and maintain a long term social relationship through more advanced dialogue techniques. We expose the social (Goffman), psychological (Scherer) and neural(Mountcastle) theories used to accomplish such kind of complex social interactions. From these theories, we build a consistent model, computationally efficient to create a robot that can understand the concept of lying, and have compassion: a robotic social companion.}, } @misc {PubLIMSI-3708, author = {Adda, Gilles AND Mariani, Joseph-Jean}, title = {Economic, Legal and Ethical analysis of Crowdsourcing for Speech Processing}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {30}, institution = {LIMSI}, abstract= {We describe the major ethical, legal and economic issues raised by representative crowdsourcing and microworking services, with a focus on Amazon Mechanical Turk, the main crowdsourcing, microworking service used nowadays by researchers in speech. Crowdsourcing is a neologism designed to summarize a complex process within a single word. To examine how ethics and economy are intertwined in crowdsourcing, the concept will be dissected and a short review of the different crowdsourcing services will be presented. In the context of this article, Microworking refers to the division of tasks into multiple parts and Crowdsourcing refers to the fact that the job is outsourced via the web and done by many people (paid or not). In particular, the issue of compensation (monetary or otherwise) for the completed tasks will be addressed, as will be the ethical and legal problems raised when considering this work as labor in the legal sense. The proposed debate has to be considered in relation to both the economic models of the various crowdsourcing services. Finally, this contribution aims to propose some specific solutions for researchers who wish to use crowdsourcing in an ethical way. Some general solutions to the problem of ethical crowdsourced linguistic resources will be outlined.}, keywords= {Amazon Mechanical Turk, ressources linguistiques, {\'e}thique}, } @misc {PubLIMSI-3876, author = {Adda, Gilles AND Mariani, Joseph-Jean AND Besacier, Laurent AND Gelas, Hadrien}, title = {Crowdsourcing for speech: economic, legal and ethical analysis}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {30}, volume = {9781118358696}, institution = {Eskenazi, M. and Levow, G.A. and Meng, H. and Parent, G. and Suendermann, D.}, abstract= {Cet article d{\'e}crit les principaux enjeux {\'e}thiques, l{\'e}gaux et {\'e}conomiques soulev{\'e}s par le crowdsourcing, en mettant l'accent sur Amazon Mechanical Turk, le principal service de microworking utilis{\'e} de nos jours par les chercheurs en parole. Le Crowdsourcing est un n{\'e}ologisme qui r{\'e}sume un processus complexe que nous pouvons d{\'e}nommer plus utilement myriadisation du travail parcellis{\'e}. Nous pr{\'e}senterons tout d'abord une br{\`e}ve revue des diff{\'e}rents syst{\`e}mes de crowdsourcing. Dans le contexte de cet article, Microworking fait r{\'e}f{\'e}rence au fait que le travail est segment{\'e} en petites tâches, et Crowdsourcing au fait que le travail est d{\'e}localis{\'e} (outsourced) et est effectu{\'e} par un grand nombre de personnes (crowd), pay{\'e}es ou non. En particulier, nous aborderons la question de l'indemnisation (p{\'e}cuniaire ou autre), ainsi que les probl{\`e}mes {\'e}thiques et juridiques soulev{\'e}s lorsque l'on consid{\`e}re les tâches effectu{\'e}es comme du travail au sens l{\'e}gal du terme. L'analyse que nous proposons prend {\'e}galement en compte les mod{\`e}les {\'e}conomiques des services de crowdsourcing. Enfin, cet article vise {\`a} proposer des solutions sp{\'e}cifiques pour les chercheurs qui souhaitent utiliser le crowdsourcing de mani{\`e}re {\'e}thique. A cette fin, nous pr{\'e}senterons des solutions au probl{\`e}me de la constitution {\'e}thique de ressources linguistiques.}, keywords= {Amazon Mechanical Turk, ressources linguistiques, {\'e}thique}, } @incollection {PubLIMSI-3940, author = {Adda, Gilles AND Mariani, Joseph-Jean AND Besacier, Laurent AND Gelas, Hadrien}, title = {Economic and ethical background of crowdsourcing for speech}, booktitle = {Crowdsourcing for Speech Processing: Applications to Data Collection, Transcription and Assessment}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {303-334}, publisher = {Wiley}, ISBN= {9781118358696}, abstract= {Cet article d{\'e}crit les principaux enjeux {\'e}thiques, l{\'e}gaux et {\'e}conomiques soulev{\'e}s par le crowdsourcing, en mettant l'accent sur Amazon Mechanical Turk, le principal service de microworking utilis{\'e} de nos jours par les chercheurs en parole. Le Crowdsourcing est un n{\'e}ologisme qui r{\'e}sume un processus complexe que nous pouvons d{\'e}nommer plus utilement myriadisation du travail parcellis{\'e}. Nous pr{\'e}senterons tout d'abord une br{\`e}ve revue des diff{\'e}rents syst{\`e}mes de crowdsourcing. Dans le contexte de cet article, Microworking fait r{\'e}f{\'e}rence au fait que le travail est segment{\'e} en petites tâches, et Crowdsourcing au fait que le travail est d{\'e}localis{\'e} (outsourced) et est effectu{\'e} par un grand nombre de personnes (crowd), pay{\'e}es ou non. En particulier, nous aborderons la question de l'indemnisation (p{\'e}cuniaire ou autre), ainsi que les probl{\`e}mes {\'e}thiques et juridiques soulev{\'e}s lorsque l'on consid{\`e}re les tâches effectu{\'e}es comme du travail au sens l{\'e}gal du terme. L'analyse que nous proposons prend {\'e}galement en compte les mod{\`e}les {\'e}conomiques des services de crowdsourcing. Enfin, cet article vise {\`a} proposer des solutions sp{\'e}cifiques pour les chercheurs qui souhaitent utiliser le crowdsourcing de mani{\`e}re {\'e}thique. A cette fin, nous pr{\'e}senterons des solutions au probl{\`e}me de la constitution {\'e}thique de ressources linguistiques.}, keywords= {Amazon Mechanical Turk, ressources linguistiques, {\'e}thique}, } @incollection {PubLIMSI-4069, author = {Chastagnol, Cl{\'e}ment AND Clavel, C{\'e}line AND Courgeon, Matthieu AND Devillers, Laurence}, title = {Designing an emotion detection system for a socially-intelligent human-robot interaction}, booktitle = {Towards a Natural Interaction with Robots, Knowbots and Smartphones, Putting Spoken Dialog Systems into Practice}, AERES = {OS}, GROUP = {LIMSI,TLP,CPU}, year = {2013}, publisher = {Springer}, ISBN= {978-1-4614-8279-6}, abstract= {The long-term goal of this work is to build an assistive robot for elderly and disabled people. It is part of the French ANR ARMEN project. The subjects will interact with a mobile robot controlled by a virtual character. In order to build this system, we collected interactions between patients from different medical centers and a Wizard-of-Oz operated virtual character in the frame of scenarii written with physicians and functional therapists. The human-robot spoken interaction consisted mainly of small-talk with patients, with no real task to perform. For precise tasks such as Finding a remote-control, keywords recognition is performed. The main focus of the article is to build an emotion detection system that will be used to control the dialog and the answer strategy of the virtual character. This article presents the Wizard-of-Oz system for the audio corpus collection which is used for training the emotion detection module. We analyze the audio data at the segmental level on annotated measures of acoustically perceived emotion but also at the interaction level with global objective measures such as amount of speech and emotion. We also report on the results of a questionnaire qualifying the interaction and the agent and compare between objective and subjective measures.}, } @incollection {PubLIMSI-4070, author = {Devillers, Laurence}, title = {Automatic detection of emotion from real-life data}, booktitle = {Prosody and Iconicity}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {219-231}, publisher = {John Benjamins Publishing Company}, ISBN= {978-90-272-72195}, abstract= {Recognition of emotion in speech has recently matured to one of the key disciplines in speech analysis serving next generation human-machine communication. This paper provides the best practices in the automatic detection of real-life emotion from vocal expression. Real-life emotion is hard to collect, ambiguous to annotate, and tricky to distribute due to privacy preservation. Acting of emotions was often seen as a solution to the desperate need for data. In contrast with most previous studies, conducted on artificial data with archetypal emotions, this paper addresses some of the challenges faced when studying real-life non-basic emotions. What needs to be done in this field to improve emotion detection is also discussed.}, keywords= {emotion detection, real-life data}, } @incollection {PubLIMSI-4187, author = {Adda-Decker, Martine AND Adda, Gilles AND Lamel, Lori}, title = {Syst{\`e}mes de transcription comme instruments}, booktitle = {M{\'e}thodes et outils pour l'analyse phon{\'e}tique des grands corpus oraux}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {159-202}, publisher = {Herm{\`e}s science}, ISBN= {9782746245303}, abstract= {Nous montrons de quelle mani{\`e}re ces syst{\`e}mes nous offrent aujourd'hui la possibilit{\'e} d'explorer des corpus oraux dont la taille est virtuellement illimit{\'e}e, en ouvrant un champ dinvestigation hors de notre port{\'e}e auparavant. Nous pr{\'e}sentons en premier lieu les principes de mod{\'e}lisation statistique de la parole permettant de r{\'e}aliser des {\'e}tiquetages et segmentations temporelles des donn{\'e}es (« annotations »). La qualit{\'e} et la pr{\'e}cision des annotations sont discut{\'e}es en fonction de la configuration du syst{\`e}me, et notamment du type des mod{\`e}les acoustiques et des dictionnaires de prononciation. Selon la configuration choisie, les annotations produites peuvent repr{\'e}senter des prononciations canoniques (segmentation phon{\'e}mique) ou bien refl{\'e}ter les variantes de prononciation (segmentation plut{\^o}t phon{\'e}tique). La coh{\'e}rence et la pr{\'e}cision de la segmentation temporelle sont examin{\'e}es, dans la mesure o{\`u} de nombreuses {\'e}tudes linguistiques prennent appui sur cette segmentation. Les capacit{\'e}s danalyse de ces instruments sont mises en lumi{\`e}re {\`a} travers le cas des variantes de prononciation. Les capacit{\'e}s de mesure sont illustr{\'e}es par quelques grandeurs simples, comme les comptes doccurrence et les dur{\'e}es de phon{\`e}mes, et par des grandeurs plus complexes concernant la r{\'e}alisation des variantes de prononciation.}, keywords= {Phon{\'e}tique, reconnaissance de la parole, alignement}, } @incollection {PubLIMSI-4275, author = {Boula De Mareuil, Philippe AND Boutin, B{\'e}atrice Akissi}, title = {Perception et caract{\'e}risation d'accents ouest-africains en fran\c{c}ais}, booktitle = {La perception des accents du fran\c{c}ais hors de France}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {61-80}, publisher = {Éditions CIPA}, ISBN= {2295-0222}, abstract= {Le but de cette {\'e}tude est double : examiner dans quelle mesure divers accents ouest-africains en fran\c{c}ais peuvent {\^e}tre distingu{\'e}s et trouver des indices phon{\'e}tiques discriminant des vari{\'e}t{\'e}s de fran\c{c}ais parl{\'e}es en Afrique de l'Ouest. Une exp{\'e}rience perceptive a dans un premier temps {\'e}t{\'e} men{\'e}e, dont la tâche consistait (entre autres choses) {\`a} identifier le pays de r{\'e}sidence de 20 locuteurs mossi, akan, bambara, s{\'e}noufo et wolof, enregistr{\'e}s au Burkina Faso, en C{\^o}te d'Ivoire, au Mali et au S{\'e}n{\'e}gal. Elle a montr{\'e} que des accents ouest-africains (notamment s{\'e}n{\'e}galais et ivoirien) peuvent {\^e}tre identifi{\'e}s par des auditeurs ouest-africains sans que le style (lu ou spontan{\'e}) ni le niveau d{\'e}tudes des locuteurs ne semble affecter les r{\'e}sultats. Des indices perceptivement saillants, diff{\'e}renciant notamment les accents s{\'e}n{\'e}galais et ivoirien, ont ensuite {\'e}t{\'e} analys{\'e}s sur le corpus exp{\'e}rimental. Des traits suprasegmentaux (diff{\'e}rences de fr{\'e}quence fondamentale sur les polysyllabes) et segmentaux (diff{\'e}rentes r{\'e}alisations du /R/) ont corrobor{\'e} certaines impressions des auditeurs et/ou connaissances linguistiques sur les syst{\`e}mes des langues en pr{\'e}sence, tandis que le trait subsegmental de VOT ne s'est pas montr{\'e} discriminant dans le travail rapport{\'e} ici. Les diff{\'e}rences les plus importantes qui ont {\'e}t{\'e} d{\'e}gag{\'e}es concernaient le S{\'e}n{\'e}gal (avec une propension {\`a} l'accentuation initiale suivie de mouvements m{\'e}lodiques descendants) et la C{\^o}te d'Ivoire (avec une tendance {\`a} l'{\'e}lision ou vocalisation du /R/). L'{\'e}tape suivante a consist{\'e} {\`a} v{\'e}rifier si les r{\'e}sultats li{\'e}s aux deux premiers traits (ceux qui semblaient pertinents) pouvaient {\^e}tre {\'e}tendus {\`a} un plus grand corpus. Nous avons continu{\'e} {\`a} chercher les indices susceptibles d{\^e}tre mobilis{\'e}s pour distinguer entre accents ouest-africains, en termes de pays, ce qui nous a permis d'{\'e}largir l'ensemble de locuteurs. En utilisant lalignement automatique en phon{\`e}mes, les textes lus par 52 locuteurs du Burkina Faso, de C{\^o}te d'Ivoire, du Mali et du S{\'e}n{\'e}gal ont {\'e}t{\'e} analys{\'e}s et compar{\'e}s aux lectures de 21 locuteurs de France. Les mesures ont pour une large part confirm{\'e} les premi{\`e}res tendances : patrons m{\'e}lodiques descendants (HL) sur les polysyllabes et davantage de /R/ dorsaux au S{\'e}n{\'e}gal, patrons LH sur les polysyllabes et davantage de /R/ vocalis{\'e}s ou {\'e}lid{\'e}s en C{\^o}te d'Ivoire.}, keywords= {linguistique}, } @incollection {PubLIMSI-4276, author = {Bardiaux, Alice AND Boula De Mareuil, Philippe}, title = {Allongements vocaliques en fran\c{c}ais de Belgique : une approche perceptive}, booktitle = {La perception des accents du fran\c{c}ais hors de France}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {21-40}, publisher = {Éditions CIPA}, ISBN= {2295-0222}, abstract= {Dans cette {\'e}tude exp{\'e}rimentale et perceptive de l'accent belge, nous nous sommes concentr{\'e}s sur l'allongement vocalique et l'influence de ce dernier en perception. S'appuyant sur des enregistrements de locuteurs belges en lecture, deux exp{\'e}riences perceptives ont {\'e}t{\'e} conduites, chacune aupr{\`e}s d'auditeurs belges et fran\c{c}ais. La premi{\`e}re a permis d'identifier de fa\c{c}on robuste des voyelles allong{\'e}es per\c{c}ues comme r{\'e}gionalement marqu{\'e}es, de d{\'e}terminer un seuil d'allongement perceptivement pertinent et de caract{\'e}riser les voyelles les plus susceptibles d'{\^e}tre allong{\'e}es. La deuxi{\`e}me exp{\'e}rience perceptive, {\`a} base de manipulation de parole, a permis de tester l'impact de l'allongement vocalique dans la perception de l'accent belge. En mati{\`e}re d'accents, il peut y avoir un d{\'e}calage entre production et perception L'allongement vocalique est ainsi un ph{\'e}nom{\`e}ne parmi d'autres, difficile {\`a} saisir. La premi{\`e}re exp{\'e}rience a mis en {\'e}vidence le fait qu'en moyenne une occurrence d'allongement par phrase peut {\^e}tre per\c{c}ue : la grande majorit{\'e} des voyelles per\c{c}ues comme allong{\'e}es est en syllabe p{\'e}nulti{\`e}me de mot ou appartient {\`a} des monosyllabes, et il s'agit g{\'e}n{\'e}ralement de voyelles nasales ou semi-ferm{\'e}es. Les r{\'e}sultats de cette exp{\'e}rience ont {\'e}t{\'e} utilis{\'e}s pour s{\'e}lectionner les stimuli d'une deuxi{\`e}me exp{\'e}rience, dans laquelle des {\'e}chantillons de parole marqu{\'e}s prosodiquement ont {\'e}t{\'e} rendus non-marqu{\'e}s par synth{\`e}se et vice versa. Cette derni{\`e}re exp{\'e}rience, sans diff{\'e}rences significatives entre auditeurs belges et fran\c{c}ais, sugg{\`e}re que les stimuli pr{\'e}sentant des allongements vocaliques sont {\'e}valu{\'e}s avec un degr{\'e} d'accent plus {\'e}lev{\'e} que leurs contreparties sans allongement. Ce r{\'e}sultat a {\'e}t{\'e} confirm{\'e} en termes de mots saillants point{\'e}s par les auditeurs (plus souvent quand ces mots montraient que quand ils ne montraient pas d'allongements vocaliques). Certaines repr{\'e}sentations linguistiques pr{\'e}sentes dans limaginaire des auditeurs, autour de l'accent belge, sont {\'e}galement discut{\'e}es.}, keywords= {linguistique}, } @incollection {PubLIMSI-4282, author = {Boula De Mareuil, Philippe AND Woehrling, C{\'e}cile AND Adda-Decker, Martine}, title = {Apports du traitement automatique {\`a} une approche linguistique de la variation r{\'e}gionale dans la parole}, booktitle = {M{\'e}thodes et outils pour l'analyse phon{\'e}tique des grands corpus oraux}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {145-165}, publisher = {Herm{\`e}s science}, ISBN= {9782746245303}, } @incollection {PubLIMSI-4283, author = {Adda-Decker, Martine AND Gendrot, C. AND Snoeren, Natalie AND Nguyen, N.}, title = {Apport du traitement automatique {\`a} l{\'e}tude des voyelles}, booktitle = {M{\'e}thodes et outils pour l'analyse phon{\'e}tique des grands corpus oraux}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {187-214}, publisher = {Herm{\`e}s science}, ISBN= {9782746245303}, } @incollection {PubLIMSI-4290, author = {Boula De Mareuil, Philippe AND Woehrling, C{\'e}cile AND Adda-Decker, Martine}, title = {Traitement de la variation r{\'e}gionale}, booktitle = {M{\'e}thodes et outils pour l'analyse phon{\'e}tique des grands corpus oraux}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {203-230}, publisher = {Herm{\`e}s Science}, ISBN= {978-2-7462-4530-3}, abstract= {L'objet de recherche en linguistique qu'est la variation impose de manipuler d'importantes quantit{\'e}s de donn{\'e}es. Les instruments de mesure d{\'e}riv{\'e}s du traitement automatique de la parole sont donc particuli{\`e}rement appropri{\'e}s pour quantifier des tendances connues et moins connues en phon{\'e}tique/phonologie. Le but, d{\`e}s lors, est {\`a} la fois d'augmenter nos connaissances et d'am{\'e}liorer les syst{\`e}mes de traitement automatique, de relever un d{\'e}fi scientifique et de lever un verrou technologique. Outre le changement diachronique, la litt{\'e}rature sociolinguistique distingue trois dimensions dans lesquelles peut se d{\'e}ployer la variation : diaphasique ("stylistique", intra-locuteur), diatopique (r{\'e}gionale) et diastratique (socio-culturelle). Depuis quelques ann{\'e}es, le terme "sociophon{\'e}tique" a fait son apparition. Les facteurs {\'e}tudi{\'e}s (notamment l'âge et la classe sociale, substitut de pratiques sociales), li{\'e}s {\`a} la variation diastratique, ont souvent {\'e}t{\'e} ignor{\'e}s par la phon{\'e}tique exp{\'e}rimentale et la phonologie traditionnelles. Il va de soi que, dans ce que le traitement automatique de la parole peut apporter {\`a} une approche linguistique de la variation, nous ne saurions {\^e}tre exhaustifs. Ce chapitre qui n'est qu'une {\'e}bauche de panorama pr{\'e}sentant diff{\'e}rents instruments de mesure (analyse de donn{\'e}es, alignement et extraction de formants) se focalise sur la variation diatopique entre le nord de la Loire et le sud de la France, en commen\c{c}ant par des exp{\'e}riences d'identification perceptive, en poursuivant par des mesures de formants et en finissant par des analyses par alignement automatique.}, keywords= {linguistique}, } @inproceedings {PubLIMSI-4238, author = {Mariani, Joseph-Jean}, title = {La langue fran\c{c}aise {\`a} l'heure du num{\'e}rique}, howpublished = {Expolangues}, GROUP = {LIMSI,TLP}, year = {2013}, institution = {DGLFLF}, } @inproceedings {PubLIMSI-4457, author = {Allauzen, Alexandre AND P{\'e}cheux, Nicolas AND Do, Quoc Khanh AND Dinarelli, Marco AND Lavergne, Thomas AND Max, Aur{\'e}lien AND Le, Hai Son AND Yvon, Fran\c{c}ois}, title = {LIMSI at WMT13}, booktitle = {8th Workshop on Statistical Machine Translation (WMT 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP,ILES}, year = {2013}, pages = {62-69}, institution = {ACL}, abstract= {This paper describes LIMSIs submissions to the shared WMT13 translation task. We report results for French-English, German-English and Spanish-English in both directions. Our submissions use n-code, an open source system based on bilingual n-grams, and continuous space models in a post-processing step. The main novelties of this years participation are the following: our first participation to the Spanish-English task; experiments with source pre-ordering; a tighter integra- tion of continuous space language models using artificial text generation (for German); and the use of different tuning sets according to the original language of the text to be translated.}, keywords= {traduction automatique statistique}, } @inproceedings {PubLIMSI-4250, author = {Apidianaki, Marianna}, title = {Cross-lingual word sense disambiguation using translation sense clustering}, booktitle = {7th International Workshop on Semantic Evaluation (SemEval 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {5p}, institution = {ACL}, } @inproceedings {PubLIMSI-4251, author = {Soria, Claudia AND Mariani, Joseph-Jean}, title = {Searching LTs for minority languages}, booktitle = {Traitement Automatique des Langues R{\'e}gionales de France et dEurope (TALaRE 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {13p}, institution = {ATALA}, } @inproceedings {PubLIMSI-4358, author = {Poignant, Johann AND Bredin, Herv{\'e} AND Besacier, Laurent AND Qu{\'e}not, Georges AND Barras, Claude}, title = {Towards a better integration of written names for unsupervised speakers identification in videos}, booktitle = {First Workshop on Speech, Language and Audio in Multimedia (SLAM 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {84-89}, institution = {ISCA / IEEE}, abstract= {Existing methods for unsupervised identification of speakers in TV broadcast usually rely on the output of a speaker diarization module and try to name each cluster using names provided by another source of information: we call it late naming. Hence, written names extracted from title blocks tend to lead to high precision identification, although they cannot correct errors made during the clustering step. In this paper, we extend our previous late naming approach in two ways: integrated naming and early naming. While late naming relies on a speaker diarization module optimized for speaker diarization, integrated naming jointly optimize speaker diarization and name propagation in terms of identification errors. Early naming modifies the speaker diarization module by adding constraints preventing two clusters with different written names to be merged together. While integrated naming yields similar identification performance as late naming (with better precision), early naming improves over this baseline both in terms of identification error rate and stability of the clustering stopping criterion.}, keywords= {speaker identification, speaker diarization, written names, multimodal fusion, TV broadcast.}, } @inproceedings {PubLIMSI-4359, author = {Bredin, Herv{\'e} AND Poignant, Johann AND Fortier, Guillaume AND Tapaswi, Makarand AND Le, Viet Bac AND Roy, Anindya AND Barras, Claude AND Rosset, Sophie AND Sarkar, Achintya Kumar AND Yang, Qian AND Gao, Hua AND Mignon, Alexis AND Verbeek, Jakob AND Besacier, Laurent AND Qu{\'e}nor, Georges AND Ekenel, Hazim Kemal AND Stiefelhagen, Rainer}, title = {{QCompere at REPERE 2013}}, booktitle = {First Workshop on Speech, Language and Audio in Multimedia (SLAM 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {49-54}, institution = {ISCA / IEEE}, abstract= {We describe QCompere consortium submissions to the REPERE 2013 evaluation campaign. The REPERE challenge aims at gathering four communities (face recognition, speaker identification, optical character recognition and named entity detection) towards the same goal: multimodal person recogni- tion in TV broadcast. First, four mono-modal components are introduced (one for each foregoing community) constituting the elementary building blocks of our various submissions. Then, depending on the target modality (speaker or face recognition) and on the task (supervised or unsupervised recognition), four different fusion techniques are introduced: they can be summarized as propagation-, classifier-, rule- or graph-based approaches. Finally, their performance is evaluated on REPERE 2013 test set and their advantages and limitations are discussed.}, keywords= {speaker identification, face recognition, named entity detection, video optical character recognition, multi-modal fusion}, url = {http://www-tlp.limsi.fr/public/Bredin2013a.pdf}, } @inproceedings {PubLIMSI-4361, author = {Boula De Mareuil, Philippe AND Rilliard, Albert AND Lehka-Lemarchand, Iryna AND Ivent, Fanny}, title = {Regional accents and languages in France: a contrastive prosodic analysis of Romance varieties}, booktitle = {Workshop on Phonetics, Phonology and Languages in Contact (PPLC 2013)}, AERES = {ACTI}, GROUP = {LIMSI,AA,TLP}, year = {2013}, pages = {72-75}, institution = {ISCA}, abstract= {Regional accents find multiple origins, including language contact. What characterises them at the prosodic level is still poorly understood. This study investigates to what extent some intonational particularities in French varieties spoken in Corsica and the south of France may be attributed to prosodic transfers from Corsican and Occitan, respectively.}, keywords= {phonetics, prosody, languages in contact}, } @inproceedings {PubLIMSI-4367, author = {Singh, Anil Kumar AND Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {LIMSI submission for the WMT 13 quality estimation task: an experiment with n-gram posteriors}, booktitle = {8th Workshop on Statistical Machine Translation (WMT 2013)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {398-404}, institution = {ACL}, keywords= {Linguistique}, } @inproceedings {PubLIMSI-3887, author = {Boula De Mareuil, Philippe}, title = {En France, pourquoi a-t-on des accents diff{\'e}rents ?}, howpublished = {Mon quotidien}, GROUP = {LIMSI,TLP}, year = {2013}, } @inproceedings {PubLIMSI-3901, author = {Boula De Mareuil, Philippe}, title = {Histoire du paysage sonore : les accents}, howpublished = {La fabrique de l'histoire}, GROUP = {LIMSI,TLP}, year = {2013}, } @inproceedings {PubLIMSI-3911, author = {Boula De Mareuil, Philippe}, title = {Parlez-vous r{\'e}mois ?}, howpublished = {L'Union de Reims}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {5}, abstract= {Chaque r{\'e}gion a sa fa\c{c}on de parler et Reims ny {\'e}chappe pas. Pour autant, il nexiste sans doute pas d'accent r{\'e}mois. M{\^e}me si lexpression est parfois employ{\'e}e dans la presse, il sagit plut{\^o}t dun mythe servant {\`a} affirmer et {\`a} d{\'e}fendre l'identit{\'e} de la ville. La proximit{\'e} par rapport {\`a} Paris (qui incarne en g{\'e}n{\'e}ral la norme) et limage bourgeoise qua Reims ({\`a} tort ou {\`a} raison, associ{\'e}e au Champagne) rendent suspecte lexistence de particularit{\'e}s de prononciation. Le « ui » pour « oui », le « t » prononc{\'e} dans « vingt » et le « a » post{\'e}rieur, dans des mots comme « l{\`a} » ou « pas », sont des traits de prononciation que lon trouve dans de nombreuses r{\'e}gions de France, surtout dans le Nord-Est (voire en Suisse et en Belgique), avec diverses connotations.}, keywords= {linguistique}, } @inproceedings {PubLIMSI-4192, author = {Boula De Mareuil, Philippe}, title = {Vocabulaire de cit{\'e}}, howpublished = {VSD}, GROUP = {LIMSI,TLP}, year = {2013}, abstract= {De nouvelles expressions font r{\'e}guli{\`e}rement leur apparition. Le parler des cit{\'e}s est d{\'e}sormais lobjet d{\'e}tudes pour les sociolinguistes. Et le vocabulaire des banlieues se propage dans toute la soci{\'e}t{\'e} grâce aux rappeurs et au Web. Le site Internet dictionnairedelazone.fr met en ligne les derni{\`e}res expressions de largot urbain. Avec plus de deux mille entr{\'e}es, ce dictionnaire tente de r{\'e}pertorier les mots les plus courants.}, keywords= {linguistique}, } @inproceedings {PubLIMSI-4245, author = {Mariani, Joseph-Jean}, title = {21 langues menac{\'e}es d'extinction num{\'e}rique}, howpublished = {Journal du CNRS n°270}, GROUP = {LIMSI,TLP}, year = {2013}, } @inproceedings {PubLIMSI-4321, author = {Soury, Mariette AND Gossart, Cl{\'e}ment AND Adda-Decker, Martine AND Devillers, Laurence}, title = {A Tool to elicit and collect multicultural and multimodal laughter}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {773-774}, abstract= {We present the implementation of a data collection tool of multicultural and multi-modal laughter for the 14th Interspeech conference. The application will automatically record and analyze audio and video stream to provide real-time feedback. Using this tool, we expect to collect multimodal cues of different kind of laughers elicited in participants with funny videos, as well as jokes and tongue-twisters games with the Nao robot. The collected corpus will be used for paralinguistic challenges.}, keywords= {laughter, smile, data collection, feedback}, } @incollection {PubLIMSI-3646, author = {Despres, J. AND Lamel, Lori AND Gauvain, Jean-Luc AND Dimulescu-Vieru, Bianca AND Woehrling, C{\'e}cile AND Le, Viet Bac AND Oparin, Ilya}, title = {{The Vocapia Research ASR systems for Evalita 2011}}, booktitle = {Evaluation of Natural Language and Speech Tool for Italian (Proceedings of EVALITA 2011, Rome, Italy, ISSN 2240-5186) Lecture Notes in Computer Science, Vol. 7689}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {286-294}, publisher = {Springer}, ISBN= {978-3-642-35827-2}, keywords= {automatic speech recognition, speech-to-text transcription, Italian, Evalita, unsupervised adaptation, MLP features, NN language models, PLP, MMIE, SAT, MAP}, url = {http://www-tlp.limsi.fr/public/EVALITAlamel2013_1356702371.pdf}, } %%%%%%%%% 2012 section @article {PubLIMSI-1573, author = {Ramona, M. AND Fenet, S. AND Blouet, R. AND Bredin, Herv{\'e} AND Fillon, T. AND Peeters, G.}, title = {A public audio identification evaluation framework for broadcast monitoring}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {119-136}, journal = {Applied Artificial Intelligence}, volume = {26}, number = {1-2}, abstract= {This paper presents the first public framework for the evaluation of audio fingerprinting techniques. Although the domain of audio identification is very active, both in the industry and the academic world, there is at present no common basis to compare the proposed techniques. This is because corpuses and evaluation protocols differ among the authors. The framework we present here corresponds to a use-case in which audio excerpts have to be detected in a radio broadcast stream. This scenario, indeed, naturally provides a large variety of audio distortions that makes this task a real challenge for fingerprinting systems. Scoring metrics are discussed with regard to this particular scenario. We then describe a whole evaluation framework including an audio corpus, together with the related groundtruth annotation, and a toolkit for the computation of the score metrics. An example of an application of this framework is finally detailed, that took place during the evaluation campaign of the Quaero project. This evaluation framework is publicly available for download and constitutes a simple, yet thorough, platform that can be used by the community in the field of audio identification to encourage reproducible results.}, } @article {PubLIMSI-3000, author = {Boula De Mareuil, Philippe AND Rilliard, Albert AND Allauzen, Alexandre}, title = {Variation diachronique dans la prosodie du style journalistique : le cas de laccent initial}, AERES = {ACL}, GROUP = {LIMSI,TLP,AA}, year = {2012}, pages = {97-111}, journal = {Revue Fran\c{c}aise de Linguistique Appliqu{\'e}e}, volume = {17}, number = {1}, abstract= {Cette {\'e}tude traite de l{\'e}volution de la prosodie dans le style journalistique fran\c{c}ais, {\`a} partir de lanalyse acoustique darchives audiovisuelles remontant aux ann{\'e}es 1940. Un corpus dune dizaine dheures de parole a {\'e}t{\'e} examin{\'e} automatiquement, et nous nous sommes concentr{\'e}s sur laccent initial, qui peut donner une impression de style emphatique. Des mesures objectives sugg{\`e}rent quen un demi-si{\`e}cle les traits suivants ont diminu{\'e} : la hauteur de voix des journalistes, la mont{\'e}e m{\'e}lodique associ{\'e}e {\`a} laccent initial et la dur{\'e}e vocalique caract{\'e}risant un accent initial emphatique. Les attaques de syllabes initiales accentu{\'e}es, quant {\`a} elles, se sont allong{\'e}es. Ce r{\'e}sultat sugg{\'e}re que les corr{\'e}lats de dur{\'e}e de laccent initial ont chang{\'e} au cours du temps, dans le style journalistique fran\c{c}ais.}, keywords= {corpus diachronique, traitement automatique, prosodie}, } @article {PubLIMSI-3300, author = {Ercolessi, P. AND S{\'e}nac, C. AND Bredin, Herv{\'e} AND Mouysset, Sandrine}, title = {Vers un R{\'e}sum{\'e} Automatique de S{\'e}ries T{\'e}l{\'e}vis{\'e}es bas{\'e} sur une Recherche Multimodale d'Histoires}, AERES = {ACLN}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {9-34}, journal = {Document Num{\'e}rique}, volume = {15}, number = {2}, abstract= {Modern TV series have complex plots made of several intertwined stories following numerous characters. In this paper, we propose an approach for automatically detecting these stories in order to generate video summaries and we propose a visualization tool to have a quick and easy look at TV series. Based on automatic scene segmentation of each TV series episode (a scene is defined as temporally and spatially continuous and semantically coherent), scenes are clustered into stories, made of (non necessarily adjacent) semantically similar scenes. Visual, audio and text modalities are combined to achieve better scene segmentation and story detection performance. An extraction of salient scenes from stories is performed to create the summary. Experimentations are conducted on two TV series with different formats.}, } @article {PubLIMSI-3609, author = {Clavel, C{\'e}line AND Devillers, Laurence AND Plessier, J. AND Ach, L. AND Morel, B. AND Martin, Jean-Claude}, title = {Combinaisons d'expressions vocales, faciales et posturales des {\'e}motions chez un agent anim{\'e}. Perception par les utilisateurs}, AERES = {ACL}, GROUP = {LIMSI,CPU,TLP}, year = {2012}, pages = {533-564}, journal = {Technique et Science Informatiques}, volume = {31}, number = {4}, abstract= {Le contr{\^o}le {\'e}motionnel temps-r{\'e}el davatars soul{\`e}ve plusieurs questions en termes de d{\'e}tection des {\'e}motions dans le comportement de lutilisateur et en termes de coh{\'e}rence expressive des avatars {\`a} travers les diff{\'e}rentes modalit{\'e}s. Lobjectif de cet article est de pr{\'e}senter l{\'e}valuation dun syst{\`e}me de communication {\`a} distance, m{\'e}diatis{\'e} par un avatar capable d'exprimer de mani{\`e}re non verbale les {\'e}motions d{\'e}tect{\'e}es dans la voix de lutilisateur. Un module de reconnaissance audio des {\'e}motions exprim{\'e}es dans la voix et une librairie d'expressions faciales et posturales d{\'e}motions ont {\'e}t{\'e} d{\'e}velopp{\'e}s pour pouvoir animer lavatar. Cet article d{\'e}crit une {\'e}tude {\'e}valuant la perception quont des sujets de lexpressivit{\'e} multimodale de lavatar.}, keywords= {agent anim{\'e}, avatar, {\'e}motion, d{\'e}tection, comportement non verbal, multimodalit{\'e}, expression {\'e}motionnelle}, } @article {PubLIMSI-3755, author = {Adda-Decker, Martine AND Fougeron, C{\'e}cile AND Gendrot, C{\'e}dric AND Delais-Roussarie, Elisabeth AND Lamel, Lori}, title = {La liaison dans la parole spontan{\'e}e famili{\`e}re : une {\'e}tude sur grand corpus}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {113-128}, journal = {Revue Fran\c{c}aise de Linguistique Appliqu{\'e}e}, number = {17}, abstract= {Cet article porte sur la liaison dans un parler spontan{\'e} familier. Partant du constat que, dans la parole famili{\`e}re, les prononciations s{\'e}cartent souvent de leur forme canonique du fait dun taux de r{\'e}duction temporelle {\'e}lev{\'e}, nous faisons lhypoth{\`e}se que le nombre de liaisons r{\'e}alis{\'e}es se trouve diminu{\'e} dans ce type de parole. Notre {\'e}tude repose sur une exploration du corpus NCCFr (Nijmegen Corpus of Casual French) {\`a} partir de techniques automatiques comme lalignement automatique. Les taux de r{\'e}alisation sont mesur{\'e}s pour les consonnes de liaison les plus fr{\'e}quentes (/z/, /n/ et /t/) dans des sites de liaison potentielle class{\'e}s selon que la liaison y est obligatoire, facultative ou interdite. Nous proposons {\'e}galement une {\'e}tude sur les relations entre le d{\'e}bit de parole et la r{\'e}alisation des liaisons.}, keywords= {liaison, parler spontan{\'e} familier, alignement automatique, d{\'e}bit}, } @article {PubLIMSI-4171, author = {Dale, Robert AND Yvon, Fran\c{c}ois}, title = {Editorial of the special issue on error handling in natural language processing}, AERES = {DO}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {1-4}, journal = {TAL (Traitement Automatique des Langues)}, volume = {53}, number = {3}, keywords= {Error; Natural Language Processing}, } @article {PubLIMSI-4218, author = {Bellot, P. AND Chappell, T. AND Doucet, A. AND Geva, S. AND Gurajada, S. AND Kamps, J. AND Kazai, G. AND Koolen, M. AND Landoni, M. AND Marx, M. AND Mishra, A. AND Moriceau, V{\'e}ronique AND Mothe, Josiane AND Preminger, M. AND Wang, Jianchu AND Ramírez, G. AND Wang, Yun AND Sanderson, M. AND SanJuan, E. AND Wang, W. AND Scholer, F. AND Wang, Yushan AND Schuh, A. AND Tannier, Xavier AND Wang, Xin AND Theobald, M. AND Trappett, M. AND Trotman, A. AND Wang, Q.}, title = {Report on INEX 2012}, AERES = {ASCL}, GROUP = {LIMSI,ILES,LIR,AMI,TLP}, year = {2012}, pages = {50-59}, journal = {SIGIR Forum}, volume = {46}, number = {2}, } @inproceedings {PubLIMSI-2062, author = {Zweigenbaum, Pierre AND Wisniewski, Guillaume AND Dinarelli, Marco AND Grouin, Cyril AND Rosset, Sophie}, title = {R{\'e}solution des cor{\'e}f{\'e}rences dans des comptes rendus cliniques. Une exp{\'e}rimentation issue du d{\'e}fi i2b2/VA 2011}, booktitle = {Congr{\`e}s Francophone sur la Reconnaissance des Formes et l'Intelligence Artificielle}, AERES = {ACTI}, GROUP = {LIMSI,ILES,TLP}, year = {2012}, pages = {8p}, month= {24 au 27/01}, address= {Lyon, France}, keywords = {medical information extraction} } @inproceedings {PubLIMSI-2359, author = {Bredin, Herv{\'e}}, title = {Segmentation of TV shows into scenes using speaker diarization and speech recognition}, booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {2377-2380}, month= {25/03 au 30/03}, address= {Kyoto, Japan}, abstract= {We investigate the use of speaker diarization (SD) and automatic speech recognition (ASR) for the segmentation of audiovisual documents into scenes. We introduce multiple monomodal and multimodal approaches based on a state-of-the-art algorithm called generalized scene transition graph (GSTG). First, we extend the latter with the use of semantic information derived from both SD and ASR. Then, multimodal fusion of color histograms, SD and ASR is investigated at various point of the GSTG pipeline (early, late or intermediate fusion). Experiments driven on a few episodes of a popular TV show indicate that SD and ASR can be successfully combined with visual information and bring an additional +11\% relative increase in terms of F-Measure for scene boundary detection over the state-of-the-art baseline.}, } @inproceedings {PubLIMSI-2360, author = {Bredin, Herv{\'e}}, title = {Community-driven hierarchical fusion of numerous classifiers: application to video semantic indexing}, booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {2329-2332}, month= {25/03 au 30/03}, address= {Kyoto, Japan}, abstract= {We deal with the issue of combining dozens of classifiers into a better one. Our first contribution is the introduction of the notion of communities of classifiers. We build a complete graph with one node per classifier and edges weighted by a measure of similarity between connected classifiers. The resulting community structure is uncovered from this graph using the state-of-the-art Louvain algorithm. Our second contribution is a hierarchical fusion approach driven by these communities. First, intra-community fusion results in one classifier per community. Then, inter-community fusion takes advantage of their complementarity to achieve much better classification performance. Application to the combination of 90 classifiers in the framework of TRECVid 2010 Semantic Indexing task shows a 30\% increase in performance relative to a baseline flat fusion.}, } @inproceedings {PubLIMSI-2829, author = {Grappy, Arnaud AND Grau, Brigitte AND Rosset, Sophie}, title = {Fusion des r{\'e}ponses de syst{\`e}mes de questions-r{\'e}ponses}, booktitle = {Conf{\'e}rence en Recherche d'Information et Applications (CORIA 2012)}, AERES = {ACTI}, GROUP = {LIMSI,ILES,TLP}, year = {2012}, pages = {99-110}, month= {21/03 au 23/03}, address= {Bordeaux, France}, abstract= {Les r{\'e}ponses donn{\'e}es par plusieurs syst{\`e}mes de questions-r{\'e}ponses proviennent de lapplication de strat{\'e}gies diff{\'e}rentes, et de ce fait permettent de r{\'e}pondre {\`a} des questions diff{\'e}rentes. La combinaison de ces syst{\`e}mes vise alors {\`a} accroître le nombre total de questions r{\'e}solues. Cet article pr{\'e}sente la combinaison de trois syst{\`e}mes : QAVAL, qui sappuie sur un module de validation de r{\'e}ponses et deux versions du syst{\`e}mes RITEL qui sappuie sur une analyse multi-niveaux appliqu{\'e}e aux questions et aux documents. La fusion des r{\'e}sultats est effectu{\'e}e de diff{\'e}rentes mani{\`e}res : en fusionnant les passages, {\`a} la sortie des syst{\`e}mes par vote ou fusion en tenant compte du poids ou du rang des r{\'e}ponses propos{\'e}es et par un m{\'e}canisme dapprentissage sur les caract{\'e}ristiques des r{\'e}ponses.}, keywords= {fusion de r{\'e}ponses, syst{\`e}mes de questions-r{\'e}ponses, r{\'e}ordonnancent de r{\'e}ponses}, } @inproceedings {PubLIMSI-2870, author = {Galibert, Olivier AND Rosset, Sophie AND Grouin, Cyril AND Zweigenbaum, Pierre AND Quintard, Ludovic}, title = {Extended named entities annotation on OCRed documents: from corpus constitution to evaluation campaign}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2012)}, AERES = {ACTI}, GROUP = {LIMSI,ILES,TLP}, year = {2012}, pages = {1-6}, month= {21/05 au 27/05}, address= {Istanbul, Turkey}, abstract= {Within the framework of the Quaero project, we proposed a new de\nition of named entities, based upon an extension of the coverage of named entities as well as the structure of those named entities. In this new de\nition, the extended named entities we proposed are both hierarchical and compositional. In this paper, we focused on the annotation of a corpus composed of press archives, OCRed from French newspapers of December 1890. We present the methodology we used to produce the corpus and the characteristics of the corpus in terms of named entities annotation. This annotated corpus has been used in an evaluation campaign. We present this evaluation, the metrics we used and the results obtained by the participants.}, keywords= {Named Entity, Press Archives Annotation, Evaluation, corpus annotation}, } @inproceedings {PubLIMSI-2872, author = {Apidianaki, Marianna AND Sagot, B.}, title = {Applying cross-lingual WSD to wordnet development}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {833-840}, number = {0}, month= {21/05 au 27/05}, address= {Istanbul, Turkey}, } @inproceedings {PubLIMSI-2958, author = {Calzolari, N. AND Del Gratta, R. AND Francopoulo, Gil AND Mariani, Joseph-Jean AND Rubino, Francesco AND Russo, I. AND Soria, Julio}, title = {The LRE map. Harmonising community descriptions of resources}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP,IMMI}, year = {2012}, pages = {1084-1089}, month= {21/05 au 27/05}, address= {Istanbul, Turkey}, abstract= {Accurate and reliable documentation of Language Resources is an undisputable need: documentation is the gateway to discovery of Language Resources, a necessary step towards promoting the data economy. Language resources that are not documented virtually do not exist: for this reason every initiative able to collect and harmonise metadata about resources represents a valuable opportunity for the NLP community. In this paper we describe the LRE Map, reporting statistics on resources associated with LREC2012 papers and providing comparisons with LREC2010 data. The LRE Map, jointly launched by FLaReNet and ELRA in conjunction with the LREC 2010 conference, is an instrument for enhancing availability of information about resources, either new or already existing ones, reinforcing and facilitating the use of standards in the community. The LRE Map web interface provides the possibility of searching according to a fixed set of metadata and to view the details of extracted resources. The LRE Map is continuing to collect bottom-up input about resources from authors of other conferences through standard submission process. This will help broadening the notion of language resources and attract to the field neighboring disciplines that so far have been only marginally involved by the standard notion of language resources.}, keywords= {language resources, metadata, documentation}, } @inproceedings {PubLIMSI-2881, author = {Rosset, Sophie AND Grouin, Cyril AND Fort, Karën AND Galibert, Olivier AND Kahn, Juliette AND Zweigenbaum, Pierre}, title = {Structured named entities in two distinct press corpora: contemporary broadcast news and old newspaper}, AERES = {ACTI}, GROUP = {LIMSI,ILES,TLP}, booktitle = {Linguistic Annotation Workshop (LAW VI)},}, year = {2012}, pages = {1-8}, volume = {247}, month= {12/07 au 13/07}, address= {Jeju Do, South Korea}, abstract= {This paper compares the reference annotation of structured named entities in two corpora with different origins and properties. It addresses two questions linked to such a comparison. On the one hand, what specific issues were raised by reusing the same annotation scheme on a corpus that differs from the 1st in terms of media and that predates it by more than a century? On the other hand, what contrasts were observed in the resulting annotations across the two corpora?}, keywords= {Extended Names Entities, corpus annotation, cultural press, audio press}, } @inproceedings {PubLIMSI-2908, author = {Bardiaux, Alice AND Boula De Mareuil, Philippe}, title = {Allongements vocaliques en fran\c{c}ais de Belgique : approche exp{\'e}rimentale et perceptive}, booktitle = {Journ{\'e}es d'Etude sur la Parole (JEP 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {625-632}, series = {Actes de la conf{\'e}rence conjointe JEP-TALN-RECITAL}, month= {04/06 au 08/06}, address= {Grenoble, France}, abstract= {Le pr{\'e}sent article {\'e}tudie lallongement de certaines voyelles en fran\c{c}ais de Belgique ainsi que son influence sur la perception de laccent belge. À partir denregistrements effectu{\'e}s en Belgique, deux exp{\'e}riences perceptives ont {\'e}t{\'e} men{\'e}es, aupr{\`e}s dauditeurs belges et fran\c{c}ais : lune a permis didentifier de fa\c{c}on robuste des voyelles allong{\'e}es per\c{c}ues comme r{\'e}gionalement marqu{\'e}es par des experts ; lautre, utilisant la modification/resynth{\`e}se de prosodie, a permis de tester limpact de lallongement vocalique dans la perception de laccent belge chez des auditeurs naifs. La premi{\`e}re exp{\'e}rience a montr{\'e} que la grande majorit{\'e} des voyelles per\c{c}ues comme allong{\'e}es est en syllabe p{\'e}nulti{\`e}me de mot ou appartient {\`a} des monosyllabes et que ces voyelles sont g{\'e}n{\'e}ralement nasales ou semi-ferm{\'e}es. La deuxi{\`e}me exp{\'e}rience sugg{\`e}re que, toutes choses {\'e}gales par ailleurs, les {\'e}chantillons de parole pr{\'e}sentant des allongements vocaliques sont {\'e}valu{\'e}s avec un degr{\'e} d'accent plus {\'e}lev{\'e} que leurs contreparties sans allongement.}, keywords= {variation r{\'e}gionale, accent belge, perception, (re)synth{\`e}se de prosodie}, } @inproceedings {PubLIMSI-3064, author = {Ligozat, Anne-Laure AND Grau, Brigitte AND Tribout, Delphine}, title = {Morphological resources for precise information retrieval}, booktitle = {Text, Speech and Dialog (TSD 2012)}, AERES = {ACTI}, GROUP = {LIMSI,ILES,TLP}, year = {2012}, pages = {689-696}, publisher = {Springer}, series = {Text, Speech and Dialogue}, month= {03/09 au 07/09}, address= {Brno, Czech Republic}, abstract= {Question answering (QA) systems aim at providing a precise answer to a given user question. Their major difficulty lies in the lexical gap problem between question and answering passages. We present here the different types of morphological phenomena in question answering, the resources available for French, and in particular a resource that we built containing deverbal agent nouns. Then, we evaluate the results of a particular QA system, according to the morphological knowledge used.}, keywords= {morphological resources, question answering}, } @inproceedings {PubLIMSI-3092, author = {Boula De Mareuil, Philippe AND Rilliard, Albert AND Mairano, Paolo AND Lai, J.}, title = {Questions corses : peut-on mettre en {\'e}vidence un transfert prosodique du corse vers le fran\c{c}ais ?}, booktitle = {Journ{\'e}es d'Etude sur la Parole (JEP 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP,AA}, year = {2012}, pages = {609-616}, series = {Actes de la conf{\'e}rence conjointe JEP-TALN-RECITAL}, month= {04/06 au 08/06}, address= {Grenoble, France}, abstract= {Cet article aborde la question suivante : peut-on mettre en {\'e}vidence un transfert prosodique du corse (une langue italo-romane) vers le fran\c{c}ais parl{\'e} en Corse, o{\`u} le fran\c{c}ais est maintenant la langue dominante ? Un corpus de phrases transparentes en corse et en fran\c{c}ais telles que a turista trova a caserna (« la touriste trouve la caserne ») a {\'e}t{\'e} mis au point, et les productions de locuteurs bilingues enregistr{\'e}s en Corse ont {\'e}t{\'e} compar{\'e}es avec les contreparties fran\c{c}aises de locuteurs parisiens de r{\'e}f{\'e}rence. Il apparaît que la m{\'e}lodie des questions totales diff{\'e}rencie dun c{\^o}t{\'e} le corse et le fran\c{c}ais de Corse (avec tous deux des tons hauts suivis de descentes m{\'e}lodiques finales), de lautre le fran\c{c}ais standard (avec des tons hauts en fin de question). Ce premier patron peut {\^e}tre interpr{\'e}t{\'e} comme un transfert prosodique du corse vers le fran\c{c}ais.}, keywords= {prosodie en contact, questions, accent corse en fran\c{c}ais, langues en danger.}, } @inproceedings {PubLIMSI-3093, author = {Boula De Mareuil, Philippe AND Mairano, Paolo AND Rilliard, Albert AND Lai, J.}, title = {Corsican French questions: is there a prosodic transfer from Corsican to French and how to highlight it?}, booktitle = {International Conference on Speech Prosody (SP 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP,AA}, year = {2012}, pages = {418-421}, month= {22/05 au 25/05}, address= {Shanghai, China}, abstract= {This study investigates whether a prosodic transfer can be highlighted from Corsican (an Italo-Romance language) to French spoken in Corsica, where French is now the dominant language. A corpus of transparent sentences such as la touriste trouve la caserne (French) or a turista trova a caserna (Corsican) was designed and the productions of bilingual speakers, recorded in Corsica, were compared with the French counterparts of Parisian reference speakers. The melody of yes/no questions turns out to contrast Corsican and Corsican French (both with high tones followed by final pitch falls) and standard French (with utterance-final high tones). The former pattern can be interpreted as a prosodic transfer from Corsican to French. Various methods are considered to validate this hypothesis and an experimental paradigm is proposed.}, } @inproceedings {PubLIMSI-3194, author = {Dinarelli, Marco AND Rosset, Sophie}, title = {Tree representations in probabilistic models for extended named entities detection}, booktitle = {Conference of the European Chapter of the ACL (EACL 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {174-184}, publisher = {Association for Computational Linguistics}, series = {Proceedings of the 13th Conference of the European Chapter of the Association for Computational Linguistics}, month= {23/04 au 27/04}, address= {Avignon, France}, abstract= {In this paper we deal with Named Entity Recognition (NER) on transcriptions of French broadcast data. There are two aspects making the task more difficult with respect to previous NER tasks: i) named entities annotated used in this work have a tree structure, thus the task cannot be tackled as a sequence labelling task; ii) the data used are more noisy than data used for previous NER tasks. We approach the task in two steps, involving Conditional Random Fields and Probabilistic Context-Free Grammars, integrated in a single parsing algorithm. We analyse the effect of using several tree representations for the task, providing results comparable with those obtained in the official evaluation campaign. Our system outperforms the best system of the evaluation campaign by a significant margin.}, keywords = {named entity detection}, } @inproceedings {PubLIMSI-3195, author = {Doukhan, David AND Rilliard, Albert AND Rosset, Sophie AND d'Alessandro, Christophe}, title = {Modelling pause duration as a function of contextual length}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2012)}, AERES = {ACTI}, GROUP = {LIMSI,AA,TLP}, year = {2012}, pages = {1-4}, series = {InterSpeech'12}, month= {09/09 au 13/09}, address= {Portland, USA}, abstract= {Effects of contextual length are known to affect pause durations in neutral speech. The present study investi- gates these effects on an expressive corpus of read tales in French. Computational models of intra-sentence, and inter-sentence pause durations, as functions of contextual lengths are proposed. These models are aimed at improv- ing Text-To-Speech synthesis systems, and provide clues for synthesizing prosodic instructions above the level of the sentence. They are also aimed to help in the prosodic analysis of pause durations, which may be biased by con- textual length effects. We find the phoneme to be the best unit for measuring contextual length. Inter-sentence pause durations were more influenced by the length of the preceding sentences. Intra-sentence pause durations were more influenced by the length of the following pseudo- clauses.}, keywords= {pause duration, prosodic analysis, Text- To-Speech Synthesis}, } @inproceedings {PubLIMSI-3200, author = {Gábor, Kata AND Apidianaki, Marianna AND Sagot, B. AND Villemonte De La Clergerie, E.}, title = {Boosting the coverage of a semantic lexicon by automatically extracted event nominalizations}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {7p}, month= {21/05 au 25/05}, address= {Istanbul, Turkey}, abstract= {An important trend in recent works on lexical semantics has been the development of learning methods capable of extracting semantic information from text corpora. The majority of these methods are based on the distributional hypothesis of meaning and acquire semantic information by identifying distributional patterns in texts. In this article, we present a distributional analysis method for extracting nominalization relations from monolingual corpora. The acquisition method makes use of distributional and morphological information to select nominalization candidates. We explain how the learning is performed on a dependency annotated corpus and describe the nominalization results. Furthermore, we show how these results served to enrich an existing lexical resource, the WOLF (Wordnet Libre du Fran\c{c}ais). We present the techniques that we developed in order to integrate the new information into WOLF, based on both its structure and content. Finally, we evaluate the validity of the automatically obtained information and the correctness of its integration into the semantic resource. The method proved to be useful for boosting the coverage of WOLF and presents the advantage of filling verbal synsets, which are particularly difficult to handle due to the high level of verbal polysemy.}, } @inproceedings {PubLIMSI-3301, author = {Ercolessi, P. AND Bredin, Herv{\'e} AND S{\'e}nac, C.}, title = {StoViz: Story Visualization of TV Series}, booktitle = {ACM International Conference on MultiMedia (ACM Multimedia 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {1-2}, month= {29/10 au 02/11}, address= {Nara, Japan}, abstract= {Recent TV series tend to have more and more complex plot. They follow the lives of numerous characters and are made of multiple intertwined stories. In this paper, we introduce StoViz, a web-based interface allowing a fast overview of this kind of episode structure, based on our plot de-interlacing system. StoViz has two main goals. First, it provides the user with a useful overview of the episode by displaying each story separately and a short abstract extracted from them. Then, it allows an efficient visual comparison of the output of any automatic plot de-interlacing algorithm with the manual annotation in terms of stories and is therefore very helpful for evaluation purposes. StoViz is available online at http://stoviz.niderb.fr.}, } @inproceedings {PubLIMSI-3336, author = {Tribout, Delphine AND Ligozat, Anne-Laure AND Bernhard, Delphine}, title = {Constitution automatique d'une ressource morphologique : VerbAgent}, booktitle = {Congr{\`e}s Mondial de Linguistique Fran\c{c}aise (CMLF 2012)}, AERES = {ACTI}, GROUP = {LIMSI,ILES,TLP}, year = {2012}, pages = {13p}, month= {04/07 au 07/07}, address= {Lyon, France}, } @inproceedings {PubLIMSI-3440, author = {Delaborde, Agn{\`e}s AND Devillers, Laurence}, title = {Impact du comportement social d'un robot sur les {\'e}motions de lutilisateur : une exp{\'e}rience perceptive}, booktitle = {Journ{\'e}es d'Etude sur la Parole (JEP 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {281-288}, series = {Actes de la conf{\'e}rence conjointe JEP-TALN-RECITAL}, month= {04/06 au 08/06}, address= {Grenoble, France}, } @inproceedings {PubLIMSI-3445, author = {Tahon, Marie AND Degottex, Gilles AND Devillers, Laurence}, title = {Usual voice quality features for emotionnal valence detection.}, booktitle = {International Conference on Speech Prosody (SP 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {4p}, month= {22/05 au 25/05}, address= {Shangai, China}, } @inproceedings {PubLIMSI-3446, author = {Vaudable, Christophe AND Devillers, Laurence}, title = {Negative emotions detection as an indicator of dialogs quality in call centers}, booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {5109-5112}, month= {25/03 au 30/03}, address= {Kyoto, Japan}, abstract= {Negative emotions such as anger recognition in particular can deliver useful information to both the customer and the agent of Interactive Voice Response platforms. The state-ofthe- art of emotion detection is characterized as not taking into account real-life emotion behavior but realistic induced emotion. This study is part of the French project Voxfactory (Cap Digital). The aim is to analyze the quality of the interactions collected in call centers by using the topics of the dialogs, but also informations on opinions and emotions. A corpus of 18 hours of real dialogs between agent and customer collected in a service of complaints of French company EDF (power supply) has been annotated with emotional labels. We describe experiments on detection of three emotional states during calls. Full speaker independent test set has been used in order to be closer to a real life situation. The novelty of this paper is the analysis of full conversations (including turns with low confidence in emotion annotation and noisy turns) and the impact on the detection score. The idea is to see how far we are from a system adapted to a real life situation.}, keywords= {anger detection, call center data, Voxfactory project}, } @inproceedings {PubLIMSI-3456, author = {Chastagnol, Cl{\'e}ment AND Devillers, Laurence}, title = {Detection d'{\'e}motions dans la voix de patients en interaction avec un agent conversationnel anim{\'e}}, booktitle = {Journ{\'e}es d'Etude sur la Parole (JEP 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {8p}, series = {Actes de la conf{\'e}rence conjointe JEP-TALN-RECITAL}, month= {04/06 au 08/06}, address= {Grenoble, France}, abstract= {Le projet fran\c{c}ais ANR ARMEN a pour objectif de construire un robot assistant pour les personnes âg{\'e}es et handicap{\'e}es. Linteraction avec le robot est r{\'e}alis{\'e}e avec un agent conversationnel anim{\'e} (ACA), le robot est une plateforme mobile. Ce travail se concentre sur la construction du module de d{\'e}tection d'{\'e}motions du syst{\`e}me robotique. A cette fin, des donn{\'e}es ont {\'e}t{\'e} collect{\'e}es aupr{\`e}s de 77 patients de plusieurs centres m{\'e}dicaux. L'interaction avec les sujets {\'e}tait presque enti{\`e}rement conduite de mani{\`e}re naturelle en parlant avec l'agent virtuel. La difficult{\'e} sp{\'e}cifique de ce projet r{\'e}side dans la grande vari{\'e}t{\'e} de voix (âg{\'e}es, pathologiques) et de comportement affectif des utilisateurs. Nos premiers r{\'e}sultats montrent un score de 46\% de bonne d{\'e}tection sur quatre classes {\'e}motionnelles (Col{\`e}re, Joie, Tristesse, Neutre). Nous analysons ces scores selon l'âge et la qualit{\'e} vocale.}, keywords= {robotique d'assitance, voix âg{\'e}es, reconnaissance des {\'e}motions}, } @inproceedings {PubLIMSI-3461, author = {Lardilleux, Adrien AND Yvon, Fran\c{c}ois AND Lepage, Y.}, title = {Hierarchical sub-sentential alignment with Anymalign}, booktitle = {Annual Conference of the European Association for Machine Translation (EAMT 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {279-286}, month= {28/05 au 30/05}, address= {Trento, Italy}, abstract= {We present a sub-sentential alignment algorithm that relies on association scores between words or phrases. This algorithm is inspired by previous work on alignment by recursive binary segmentation and on document clustering. We evaluate the resulting alignments on machine translation tasks and show that we can obtain state-of-the-art results, with gains up to more than 4 BLEU points compared to previous work, with a method that is simple, independent of the size of the corpus to be aligned, and directly computes symmetric alignments. This work also provides new insights regarding the use of ``heuristic'' alignment scores in statistical machine translation}, keywords = { xmtalign }, } @article {PubLIMSI-4512, author = {Sokolov, Artem AND Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {Lattice BLEU oracles in machine translation}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2013}, pages = {18\_1-18\_29}, journal = {ACM Transactions on Speech and Language Processing}, volume = {10}, number = {4}, abstract= {The search space of Phrase-Based Statistical Machine Translation (PBSMT) systems can be represented as a directed acyclic graph (lattice). By exploring this search space, it is possible to analyze and understand the failures of PBSMT systems. Indeed, useful diagnoses can be obtained by computing the so-called oracle hypotheses, which are hypotheses in the search space that have the highest quality score. For standard SMT metrics, this problem is, however, NP-hard and can only be solved approximately. In this work, we present two new methods for efficiently computing oracles on lattices: the first one is based on a linear approximation of the corpus bleu score and is solved using generic shortest distance algorithms; the second one relies on an Integer Linear Programming (ILP) formulation of the oracle decoding that incorporates count clipping constraints. It can either be solved directly using a standard ILP solver or using Lagrangian relaxation techniques. These new decoders are evaluated and compared with several alternatives from the literature for three language pairs, using lattices produced by two PBSMT systems.}, keywords= {BLEU, Integer linear programming, lattices, machine translation, oracle decoding}, } @inproceedings {PubLIMSI-3476, author = {Sokolov, Artem AND Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {Non-linear n-best list reranking with few features}, booktitle = {Conference of the Association for Machine Translation in the Americas (AMTA 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {1-10}, month= {28/10 au 30/10}, address= {San Diego, USA}, } @inproceedings {PubLIMSI-3517, author = {Tahon, Marie AND Delaborde, Agn{\`e}s AND Devillers, Laurence}, title = {Corpus of children voices for mid-level markers and affect bursts analysis}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {2366-2369}, month= {21/05 au 27/05}, address= {Istanbul, Turkey}, abstract= {This article presents a corpus featuring children playing games in interaction with the humanoid robot Nao: children have to express emotions in the course of a storytelling by the robot. This corpus was collected to design an affective interactive system driven by an interactional and emotional representation of the user. We evaluate here some mid-level markers used in our system: reaction time, speech duration and intensity level. We also question the presence of affect bursts, which are quite numerous in our corpus, probably because of the young age of the children and the absence of predefined lexical content.}, keywords= {emotion detection, interaction with the humanoid robot Nao}, } @inproceedings {PubLIMSI-3549, author = {Le, Hai Son AND Allauzen, Alexandre AND Yvon, Fran\c{c}ois}, title = {Continuous space translation models with neural networks}, booktitle = {Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {39-48}, series = {http://aclweb.org/anthology-new/N/N12/}, month= {03/06 au 08/06}, address= {Montr{\'e}al, Qu{\'e}bec}, abstract= {The use of conventional maximum likelihood estimates hinders the performance of existing phrase-based translation models. For lack of sufficient training data, most models only consider a small amount of context. As a partial remedy, we explore here several continuous space translation models, where translation probabilities are estimated using a continuous representation of translation units in lieu of standard discrete representations. In order to handle a large set of translation units, these representations and the associated estimates are jointly computed using a multi-layer neural network with a SOUL architecture. In small scale and large scale English to French experiments, we show that the resulting models can effectively be trained and used on top of a n-gram translation system, delivering significant improvements in performance.}, } @inproceedings {PubLIMSI-3633, author = {Mathet, Yann AND Widlöcher, Antoine AND Fort, Karën AND Fran\c{c}ois, Claire AND Galibert, Olivier AND Grouin, Cyril AND Kahn, Juliette AND Rosset, Sophie AND Zweigenbaum, Pierre}, title = {Manual corpus annotation: giving meaning to the evaluation metrics}, booktitle = {International Conference on Computational Linguistics (COLING 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP,ILES}, year = {2012}, pages = {1-9}, month= {08/12 au 15/12}, address= {Mumbai, India}, abstract= {Computing inter-annotator agreement measures on a manually annotated corpus is necessary to evaluate the reliability of its annotation. However, the interpretation of the obtained results is recognized as highly arbitrary. We describe in this article a method and a tool that we developed which shuffles a reference annotation according to different error paradigms, thereby creating artificial annotations with controlled errors. Agreement measures are computed on these corpora, and the obtained results are used to model the behavior of these measures and understand their actual meaning.}, keywords= {accord inter-annotateur, annotation manuelle de corpus, {\'e}valuation, corpus annotation}, } @inproceedings {PubLIMSI-3642, author = {Adda-Decker, Martine AND Candea, Maria AND Lamel, Lori}, title = {Recent evolution of some non standard variants in French broadcast news}, booktitle = {Sociolinguistics Symposium 19}, AERES = {AFF}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {2p}, month= {21/08 au 24/08}, address= {Berlin, Germany}, } @inproceedings {PubLIMSI-3658, author = {Gong, Li AND Max, Aur{\'e}lien AND Yvon, Fran\c{c}ois}, title = {Towards contextual adaptation for any-text translation}, booktitle = {International Workshop on Spoken Language Translation (IWSLT 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP,ILES}, year = {2012}, pages = {292-299}, month= {06/12 au 07/12}, address= {Hong Kong, People's Republic of China}, keywords= {traduction automatique}, } @inproceedings {PubLIMSI-3678, author = {Adda, Gilles}, title = {Un cadre exp{\'e}rimental pour les sciences de la parole}, booktitle = {Journ{\'e}es d'Etude sur la Parole (JEP 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {185-192}, series = {Actes de la conf{\'e}rence conjointe JEP-TALN-RECITAL}, month= {04/06 au 08/06}, address= {Grenoble, France}, abstract= {Cet article est une prise de position pour la mise en place dun cadre th{\'e}orique et pratique permettant de faire {\'e}merger une science empirique de la parole. Cette science doit se fonder sur lapport de toutes les sciences, du traitement automatique ou de la linguistique, dont lobjet d{\'e}tude est la parole. Au coeur de ce rapprochement se trouve lid{\'e}e que les syst{\`e}mes automatiques peuvent {\^e}tre utilis{\'e}s comme des instruments afin d'explorer les tr{\`e}s grandes quantit{\'e}s de donn{\'e}es {\`a} notre disposition et den tirer des connaissances nouvelles qui, en retour, permettront dam{\'e}liorer les mod{\'e}lisations utilis{\'e}es en traitement automatique. Quelques points cruciaux sont abord{\'e}s ici, comme la d{\'e}finition de lobservable, l{\'e}tude du r{\'e}siduel en tant que diagnostic de l{\'e}cart entre la mod{\'e}lisation et la r{\'e}alit{\'e}, et la mise en place de centres instrumentaux permettant la mutualisation du d{\'e}veloppement et de la maintenance de ces instruments complexes que sont les syst{\`e}mes de traitement automatique de la parole.}, keywords= {analyse derreurs, structuration de la recherche en parole.}, } @inproceedings {PubLIMSI-3505, author = {Apidianaki, Marianna AND Ljubesic, Nikola AND Fiser, Darja}, title = {Disambiguating vectors for bilingual lexicon extraction from comparable corpora}, booktitle = {Language Technologies Conference}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {10-15}, month= {08/10 au 09/10}, address= {Ljubljana, Slovenia}, abstract= {This paper presents an approach to enhance the extraction of translation equivalents from comparable corpora by plugging in bilingual lexico-semantic knowledge harvested from a parallel corpus. First, the bilingual lexicon obtained from word-aligning the parallel corpus replaces an external seed dictionary, making the approach knowledge-light and portable. Next, instead of using simple 1:1 mappings between the source and the target language, translation equivalents are clustered into sets of synonyms based on contextual similarities, enabling us to expand the translation of vector features with several translation variants. And last but not least, the vector features are disambiguated and translated only with the translation variants from the most appropriate cluster, thus producing less noisy vectors that allow for a more successful cross-lingual comparison of the vectors compared to simpler methods.}, } @inproceedings {PubLIMSI-3506, author = {Apidianaki, Marianna}, title = {Measuring the adequacy of cross-lingual paraphrases in a Machine Translation setting}, booktitle = {International Conference on Computatioindings highlight the importance of complementing the current evaluation schemes with translation information to allow a more accurate estimation of the systems impact on end-to-end applications.}, year = {2012}, keywords= {Cross-Lingual Word Sense Disambiguation, Cross-Lingual Lexical Substitution, paraphrasing, Machine Translation}, } @inproceedings {PubLIMSI-3719, author = {Fort, Karën AND Nazarenko, Adeline AND Rosset, Sophie}, title = {Modeling the Complexity of Manual Annotation Tasks: a Grid of Analysis}, booktitle = {International Conference on Computational Linguistics (COLING 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {1-16}, month= {08/12 au 15/12}, address= {Mumbai, India}, abstract= {Manual corpus annotation is getting widely used in Natural Language Processing (NLP). While being recognized as a difficult task, no in-depth analysis of its complexity has been performed yet. We provide in this article a grid of analysis of the different complexity dimensions of an annotation task, which helps estimating beforehand the difficulties and cost of annotation campaigns. We observe the applicability of this grid on existing annotation campaigns and detail its application on a real-world example.}, keywords= {manual corpus annotation, annotation campaign management, annotation campaign cost estimate}, } @inproceedings {PubLIMSI-3777, author = {Lardilleux, Adrien AND Yvon, Fran\c{c}ois AND Lepage, Y.}, title = {Alignement sous-phrastique hi{\'e}rarchique avec Anymalign}, booktitle = {Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles (TALN 2012)}, AERES = {ACTN}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {113-126}, series = {Paru dans Actes de la conf{\'e}rence conjointe JEP-TALN-RECITAL 2012}, month= {04/06 au 08/06}, address= {Grenoble, France}, abstract= {Nous pr{\'e}sentons un algorithme d'alignement sous-phrastique permettant d'aligner tr{\`e}s facilement un couple de phrases {\`a} partir d'une matrice d'alignement pr{\'e}-remplie. Cet algorithme s'inspire de travaux ant{\'e}rieurs sur l'alignement par segmentation binaire r{\'e}cursive ainsi que de travaux sur le clustering de documents. Nous {\'e}valuons les alignements produits sur des tâches de traduction automatique et montrons qu'il est possible d'atteindre des r{\'e}sultats du niveau de l'{\'e}tat de l'art, affichant des gains tr{\`e}s cons{\'e}quents allant jusqu'{\`a} plus de 4~points BLEU par rapport {\`a} nos travaux ant{\'e}rieurs, {\`a} l'aide une m{\'e}thode tr{\`e}s simple, ind{\'e}pendante de la taille du corpus {\`a} traiter, et produisant directement des alignements sym{\'e}triques. En utilisant cette m{\'e}thode en tant qu'extension {\`a} l'outil d'extraction de traductions Anymalign, nos exp{\'e}riences nous permettent de cerner certaines limitations de ce dernier et de d{\'e}finir des pistes pour son am{\'e}lioration.}, keywords= {traduction automatique, xmtalign}, } @inproceedings {PubLIMSI-3760, author = {Nemoto, Rena AND Adda-Decker, Martine AND Durand, J.}, title = {The influence of speaking style on lexical f0 profiles in French}, booktitle = {International Conference on Speech Prosody (SP 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {1-4}, month= {22/05 au 25/05}, address= {Shangai, China}, abstract= {plays a role in f0 profile patterns and whether the previous find- ings hold across the newly added conditions . This study presents a comparison of French lexical fundamen- In the following, section 2 presents the different style tal frequency (f0 ) profiles for different speaking styles using speech corpora and recalls the methodology to extract and or- phonemic, syllabic and lexical transcriptions as well as part- ganize the measurements. Section 3 compares and discusses f0 of-speech annotations. Three speaking styles (broadcast news, profiles across conditions. Conclusions and future perspectives broadcast conferences and conversations) with over 20 hours of are given in section 4. speech were used. Syllabic word length and POS were con- sidered as influential factors. Results confirm word final syl- lable accentuation as common tendency in French. The study highlights noun word-initial accentuation after determiner for BN style speech. Journalistic prepared speech features lex- ical words with more dynamic f0 profiles on average versus more stable flat profiles for our spontaneous data. Future works include localization of named-entity and/or focus of speech within the framework of discriminative classifiers.}, keywords= {fundamental frequency, lexical f0 profiles, French, word-final accentuation, POS annotation, corpus-based study, automatic processing}, } @inproceedings {PubLIMSI-3761, author = {Adda-Decker, Martine AND Nemoto, Rena}, title = {Prosodic patterns of Estonian words: a corpus-based description using spontaneous speech}, booktitle = {International Conference on Human Language Technologies - The Baltic Perspective (HLT 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {286 - 293}, volume = {247}, publisher = {IOS Press}, series = {Frontiers in Artificial Intelligence and Applications}, month= {04/10 au 05/10}, address= {Tartu, Estonia}, abstract= {This paper deals with average prosodic characteristics of Estonian as observed in 25 hours of manually transcribed spontaneous speech. The profiles show very similar profiles across word categories, duration profiles reveal less regular patters. For the frequent word category, a rising duration can be observed from the first to final vowels, whilst the other words show longer duration in monosyllabic words and in final vowels of 4-syllabic words. Overall, our results suggest that prosodic cues could contribute to word boundary location in continuous speech.}, keywords= {Estonian, prosodic lexical profiles, spontaneous speech, large corpus}, } @inproceedings {PubLIMSI-3790, author = {Dutrey, Camille AND Clavel, Chlo{\'e} AND Rosset, Sophie AND Vasilescu, Ioana AND Adda-Decker, Martine}, title = {Quel est l'apport de la d{\'e}tection d'entit{\'e}s nomm{\'e}es pour l'extraction d'information en domaine restreint ?}, booktitle = {Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles (TALN 2012)}, AERES = {ACTN}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {359-366}, series = {Actes de la conf{\'e}rence conjointe JEP-TALN-RECITAL}, month= {04/06 au 08/06}, address= {Grenoble, France}, abstract= {Les travaux li{\'e}s {\`a} la d{\'e}finition et {\`a} la reconnaissance des entit{\'e}s nomm{\'e}es sont g{\'e}n{\'e}ralement envisag{\'e}s en domaine ouvert, {\`a} travers la conception de cat{\'e}gories g{\'e}n{\'e}riques (noms de personnes, etc.) et leur application {\`a} des donn{\'e}es textuelles issues de la presse (orale comme {\'e}crite). Par ailleurs, la fouille des donn{\'e}es issues de centres dappel est strat{\'e}gique pour une entreprise comme EDF, compte tenu du r{\^o}le crucial jou{\'e} par lopinion pour les applications marketing, ce qui passe par la d{\'e}finition d'entit{\'e}s d'int{\'e}r{\^e}t propres au domaine. Nous comparons les deux types de mod{\`e}les d'entit{\'e}s g{\'e}n{\'e}riques et sp{\'e}cifiques {\`a} un domaine pr{\'e}cis afin dobserver leurs points de recouvrement, via lannotation manuelle dun corpus de conversations en centres d'appel. Nous souhaitons ainsi {\'e}tudier lapport dune d{\'e}tection en entit{\'e}s nomm{\'e}es g{\'e}n{\'e}riques pour lextraction dinformation m{\'e}tier en domaine restreint.}, keywords= {entit{\'e}s nomm{\'e}es, concepts m{\'e}tier, extraction d'information, donn{\'e}es conversationnelles, annotation}, } @inproceedings {PubLIMSI-3859, author = {Gesmundo, Andrea AND Tomeh, Nadi}, title = {{HadoopPerceptron: a toolkit for distributed perceptron training and prediction with MapReduce}}, booktitle = {Conference of the European Chapter of the ACL (EACL 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {97-101}, publisher = {EACL}, month= {23/04 au 27/04}, address= {Avignon, France}, abstract= {We propose a set of open-source software modules to perform structured Perceptron Training, Prediction and Evaluation within the Hadoop framework. Apache Hadoop is a freely available environment for running distributed applications on a computer cluster. The software is designed within the Map-Reduce paradigm. Thanks to distributed computing, the proposed software reduces substantially execution times while handling huge data-sets. The distributed Perceptron training algorithm preserves convergence properties, thus guaranties same accuracy performances as the serial Perceptron. The presented modules can be executed as stand-alone software or easily extended or integrated in complex systems. The execution of the modules applied to specific NLP tasks can be demonstrated and tested via an interactive web interface that allows the user to inspect the status and structure of the cluster and interact with th has been shown to be efficient for pruning language models for more than a decade ago. Recently, this method has been applied to Phrase-based Machine Translation, and results suggest that this method is comparable the state-of-art pruning method based on significance tests. In this work, we show that these 2 methods are effective in pruning different types of phrase pairs. On one hand, relative entropy pruning searches for phrase pairs that can be composed using smaller constituents with a small or no loss in probability. On the other hand, significance pruning removes phrase pairs that are likely to be spurious. Then, we show that these methods can be combined in order to produce better results, over both metrics when used individually.}, } @inproceedings {PubLIMSI-3419, author = {Soria, Julio AND Bel, Nuria AND Choukri, Khalid AND Mariani, Joseph-Jean AND Monachini, Monica AND Odijk, J. AND Piperidis, S. AND Quochi, V. AND Calzolari, N., (editors)}, title = {The FLaReNet strategic language resource agenda}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2012)}, AERES = {ACTI}, GROUP = {LIMSI,IMMI,TLP}, year = {2012}, pages = {1379-1386}, month= {21/05 au 27/05}, address= {Istanbul, Turkey}, } @book {PubLIMSI-3430, author = {Mariani, Joseph-Jean AND Paroubek, Patrick AND Francopoulo, Gil AND Max, Aur{\'e}lien AND Yvon, Fran\c{c}ois AND Zweigenbaum, Pierre}, title = {The French language in the digital age / La Langue fran\c{c}aise {\`a} l{\`e}re du num{\'e}rique}, AERES = {OS}, GROUP = {LIMSI,ILES,IMMI,TLP}, year = {2012}, pages = {102}, series = {European Languages Whitepaper Series}, publisher = {Springer} } @book {PubLIMSI-3462, author = {Gaussier, E. AND Yvon, Fran\c{c}ois, (editors)}, title = {Textual information access}, AERES = {DO}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {429}, publisher = {ISTE/Wiley, Paris}, } @inproceedings {PubLIMSI-3528, author = {Mariani, Joseph-Jean AND Paroubek, Patrick AND Francopoulo, Gil AND Max, Aur{\'e}lien AND Yvon, Fran\c{c}ois AND Zweigenbaum, Pierre}, title = {The White Paper on the French Language in the Digital Age}, booktitle = {International Symposium on Multilingualism in Cyberspace (SIMC 2012)}, AERES = {INV}, GROUP = {LIMSI,IMMI,ILES,TLP}, year = {2012}, month= {21/11 au 23/11}, address= {Paris, France}, } @incollection {PubLIMSI-2418, author = {Mariani, Joseph-Jean}, title = {Les technologies de la langue en soutien au multilinguisme.}, booktitle = {NET.LANG R{\'e}ussir le cyberespace multilingue}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {149-170}, publisher = {C\&F}, ISBN= {978-2-915825-23-7}, } @incollection {PubLIMSI-2419, author = {Mariani, Joseph-Jean}, title = {How Language Technologies support Multilingualism}, booktitle = {Towards the Multilingual Cyberspace.}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {141-160}, publisher = {C\&F}, ISBN= {978-2-915825-24-4}, } @incollection {PubLIMSI-3452, author = {Devillers, Laurence AND Mariani, Joseph-Jean}, title = {Des robots et des hommes}, booktitle = {Et l'Homme... cr{\'e}a le Robot - Catalogue d'exposition, Mus{\'e}e des arts et m{\'e}tiers, 30 octobre 2012 au 3 mars 2013}, AERES = {OV}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {30-37}, publisher = {Co{\'e}dition Somogy/CNAM}, ISBN= {9782757205907}, } @incollection {PubLIMSI-3459, author = {Allauzen, Alexandre AND Yvon, Fran\c{c}ois}, title = {Statistical methods for machine translation}, booktitle = {Textual information access}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {223--304}, publisher = {ISTE/Wiley, Paris}, ISBN= {978-1-84821-322-7}, } @incollection {PubLIMSI-3460, author = {Yvon, Fran\c{c}ois}, title = {Probabilistic models: an introduction}, booktitle = {Textual information access}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {369-420}, publisher = {ISTE/Wiley, Paris}, ISBN= {978-1-84821-322-7}, } @incollection {PubLIMSI-3616, author = {Boula De Mareuil, Philippe AND Woehrling, C{\'e}cile AND Adda-Decker, Martine AND Bardiaux, Alice AND Simon, Anne Catherine}, title = {Une {\'e}tude par traitement automatique de la prosodie du fran\c{c}ais {\`a} la fronti{\`e}re des domaines roman et germanique}, booktitle = {La variation prosodique r{\'e}gionale en fran\c{c}ais}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {121-138}, publisher = {De Boeck}, ISBN= {978-2-8011-1695-1}, abstract= {Ce chapitre est consacr{\'e} {\`a} la variation prosodique dans le fran\c{c}ais parl{\'e} en Alsace, en Belgique et en Suisse romande, en comparaison {\'e}galement avec le fran\c{c}ais « standard ».}, keywords= {linguistique}, } @incollection {PubLIMSI-3617, author = {Adda-Decker, Martine AND Nemoto, Rena AND Boula De Mareuil, Philippe}, title = {Une approche automatis{\'e}e de la diversit{\'e} prosodique en fran\c{c}ais}, booktitle = {La variation prosodique r{\'e}gionale en fran\c{c}ais}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {4164}, publisher = {De Boeck}, ISBN= {978-2-8011-1695-1}, abstract= {Ce chapitre transversal pr{\'e}sente la m{\'e}thodologie sappuyant sur des alignements et des annotations automatiques, ainsi que des r{\'e}sultats comparatifs sur des aspects prosodiques pour une quinzaine de points denqu{\^e}te PFC. Lambition de ce chapitre est de pr{\'e}senter ce que lon peut et ce que lon ne peut pas (encore) faire par traitement automatique pour d{\'e}crire la variation prosodique r{\'e}gionale. Il ne pr{\'e}tend pas {\`a} lexhaustivit{\'e} mais, centr{\'e} sur le fran\c{c}ais, il pr{\'e}sente quelques approches qui ont {\'e}t{\'e} {\'e}prouv{\'e}es pour divers accents du fran\c{c}ais.}, keywords= {linguistique}, } @incollection {PubLIMSI-3618, author = {Simon, Anne Catherine AND Hambye, Philippe AND Bardiaux, Alice AND Boula De Mareuil, Philippe}, title = {Caract{\'e}ristiques des accents r{\'e}gionaux en fran\c{c}ais : que nous apprennent les approches perceptives ?}, booktitle = {La variation prosodique r{\'e}gionale en fran\c{c}ais}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {27-40}, publisher = {De Boeck}, ISBN= {978-2-8011-1695-1}, abstract= {Dans ce chapitre, nous passerons en revue diff{\'e}rentes {\'e}tudes sur la perception de la prosodie r{\'e}gionale en nous focalisant sur la mani{\`e}re dont elles rel{\`e}vent ces d{\'e}fis m{\'e}thodologiques et sur les r{\'e}sultats auxquels elles ont d{\`e}s lors pu aboutir. Nous examinerons tout dabord la question g{\'e}n{\'e}rale de la s{\'e}lection des enregistrements qui servent de base aux {\'e}tudes perceptives, avant de nous pencher sur les m{\'e}thodes {\'e}labor{\'e}es pour pouvoir isoler les variables linguistiques soumises au jugements des informateurs, ainsi que sur les travaux visant {\`a} identifier les corr{\'e}lats acoustiques des variantes per\c{c}ues comme marqu{\'e}es. Nous terminons le chapitre en mettant en {\'e}vidence les questions laiss{\'e}es en suspens et en soulignant les avanc{\'e}es rendues possibles par le recours aux enqu{\^e}tes sur la perception.}, keywords= {linguistique}, } @incollection {PubLIMSI-3651, author = {Romano, A. AND Boula De Mareuil, Philippe AND Lai, J. AND Mairano, Paolo}, title = {Mistral et Paoli : sur la m{\^e}me longueur d'onde ? Quelques patrons m{\'e}lodiques de l'occitan et du corse}, booktitle = {La le\c{c}on des dialectes. Hommages {\`a} Jean-Philippe Dalbera}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {219-232}, publisher = {Edizione Dell'Orso}, ISBN= {978-88-6274-416-4}, abstract= {Dans cet article, nous nous proposons de comparer certaines propri{\'e}t{\'e}s phon{\'e}tiques de l'intonation du corse et de l'occitan dans le cadre de l'Atlas Multim{\'e}dia Prosodique de l'Espace Roman (AMPER). Nous avons trouv{\'e} des similitudes frappantes, ainsi que des ph{\'e}nom{\`e}nes de divergence assez importants entre des locuteurs de vari{\'e}t{\'e}s pourtant proches. Des similarit{\'e}s sont notables, en particulier, entre les patrons intonatifs des questions totales observ{\'e}es {\`a} Corte et {\`a} Gap : des courbes m{\'e}lodiques descendantes ont {\'e}t{\'e} relev{\'e}es, alors que ces deux points d'enqu{\^e}te appartiennent {\`a} des aires dialectales distinctes. Alors que les langues de Paoli et de Mistral se pr{\'e}sentent comme fragment{\'e}es dialectalement ainsi que sur le plan prosodique, des convergences peuvent apparaître entre les deux espaces linguistiques, d{\'e}finissant des patrons qui peuvent se retrouver localement « sur la m{\^e}me longueur d'onde ». Cette tendance demande {\`a} {\^e}tre confirm{\'e}e aupr{\`e}s d'autres locuteurs, car l'intonation d{\'e}crite par exemple pour les donn{\'e}es de Gap n'a pas {\'e}t{\'e} mise en {\'e}vidence dans les autres localit{\'e}s du domaine occitan que nous avons examin{\'e}es (autour de Nice, Avignon, Montpellier). Le fait que des formes prosodiques puissent se retrouver dans des zones microg{\'e}ographiques discontinues, selon nous, ouvre des pistes int{\'e}ressantes pour de futurs travaux.}, keywords= {linguistique}, } @incollection {PubLIMSI-3819, author = {Garcia-Fernandez, Anne AND Ligozat, Anne-Laure AND Dinarelli, Marco AND Bernhard, Delphine}, title = {M{\'e}thodes pour l'arch{\'e}ologie linguistique: datation par combinaison d'indices temporels}, booktitle = {Exp{\'e}rimentations et {\'e}valuations en fouille de textes, un panorama des campagnes DEFT}, AERES = {OS}, GROUP = {LIMSI,ILES,TLP}, year = {2012}, pages = {231-246}, publisher = {Hermes Lavoisier}, ISBN= {9782746238367}, abstract= {Dans ce chapitre, nous pr{\'e}sentons le syst{\`e}me de datation dun document que nous avons d{\'e}velopp{\'e} {\`a} loccasion de DEFT2011. Notre approche est fond{\'e}e sur une com- binaison de plusieurs sous-syst{\`e}mes et utilise plusieurs ressources externes comme Wikip{\'e}dia, les Google Books n-grams ainsi que des connaissances sur les r{\'e}formes orthographiques du fran\c{c}ais. Notre meilleur syst{\`e}me obtient un score de 0,435 sur les portions de 300 mots et de 0,512 sur les portions de 500 mots, ce qui repr{\'e}sente 42\% de d{\'e}cennies correctes et 7\% dann{\'e}es correctes au premier rang sur les portions de 300 mots, et 50\% de d{\'e}cennies correctes et 11\% dann{\'e}es correctes au premier rang sur les portions de 500 mots.}, keywords= {datation de textes, cat{\'e}gorisation de textes}, } @phdthesis {PubLIMSI-3706, author = {Le, Hai Son}, title = {Continuous space models with neural networks in natural language processing}, AERES = {TH}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {178}, abstract= {The purpose of language models is in general to capture and to model regularities of language, thereby capturing morphological, syntactical and distributional properties of word sequences in a given language. They play an important role in many successful applications of Natural Language Processing, such as Automatic Speech Recognition, Machine Translation and Information Extraction. The most successful approaches to date are based on n-gram assumption and the adjustment of statistics from the training data by applying smoothing and back-off techniques, notably Kneser-Ney technique, introduced twenty years ago. In this way, language models predict a word based on its n-1 previous words. In spite of their prevalence, conventional n-gram based language models still suffer from several limitations that could be intuitively overcome by consulting human expert knowledge. One critical limitation is that, ignoring all linguistic properties, they treat each word as one discrete symbol with no relation with the others. Another point is that, even with a huge amount of data, the data sparsity issue always has an important impact, so the optimal value of n in the n-gram assumption is often 4 or 5 which is insufficient in practice. This kind of model is constructed based on the count of n-grams in training data. Therefore, the pertinence of these models is conditioned only on the characteristics of the training text (its quantity, its representation of the content in terms of theme, date). Recently, one of the most successful attempts that tries to directly learn word similarities is to use distributed word representations in language modeling, where distributionally words, which have semantic and syntactic similarities, are expected to be represented as neighbors in a continuous space. These representations and the associated objective function (the likelihood of the training data) are jointly learned using a multi-layer neural network architecture. In this way, word similarities are learned automatically. This approach has shown significant and consistent improvements when applied to automatic speech recognition and statistical machine translation tasks. A major difficulty with the continuous space neural network based approach remains the computational burden, which does not scale well to the massive corpora that are nowadays available. For this reason, the first contribution of this dissertation is the definition of a neural architecture based on a tree representation of the output vocabulary, namely Structured OUtput Layer (SOUL), which makes them well suited for large scale frameworks. The SOUL model combines the neural network approach with the class-based approach. It achieves significant improvements on both state-of-the-art large scale automatic speech recognition and statistical machine translations tasks. The second contribution is to provide several insightful analyses on their performances, their pros and cons, their induced word space representation. Finally, the third contribution is the successful adoption of the continuous space neural network into a machine translation framework. New translation models are proposed and reported to achieve significant improvements over state-of-the-art baseline systems.}, keywords= {Continuous Space model, Neural Network, Statistical Language Model, Statistical Machine Translation, Natural Language Processing, Automatic Speech Recognition}, } @inproceedings {PubLIMSI-2830, author = {Grappy, Arnaud AND Grau, Brigitte AND Rosset, Sophie}, title = {Methods combination and ML-based re-ranking of multiple hypothesis for question-answering systems}, booktitle = {Workshop on Innovative Hybrid Approaches to the Processing of Textual Data (Hybrid2012)}, AERES = {ACTI}, GROUP = {LIMSI,ILES,TLP}, year = {2012}, pages = {87-96}, institution = {ACL}, abstract= {Question answering systems answer cor- rectly to different questions because they are based on different strategies. In order to increase the number of questions which can be answered by a single process, we propose solutions to combine two question answering systems, QAVAL and RITEL. QAVAL proceeds by selecting short pas- sages, annotates them by question terms, and then extracts from them answers which are ordered by a machine learning valida- tion process. RITEL develops a multi-level analysis of questions and documents. An- swers are extracted and ordered according to two strategies: by exploiting the redun- dancy of candidates and a Bayesian model. In order to merge the system results, we de- veloped different methods either by merg- ing passages before answer ordering, or by merging end-results. The fusion of end- results is realized by voting, merging, and by a machine learning process on answer characteristics, which lead to an improve- ment of the best system results of 19\%.}, keywords= {system output fusion, question-answering system, answer reranking}, } @inproceedings {PubLIMSI-3026, author = {Sokolov, Artem}, title = {Learning semantic similarity by selecting random word subsets}, booktitle = {6th International Workshop on Semantic Evaluation (SemEval 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {543-546}, institution = {ACL}, abstract= {We propose a semantic similarity learning method based on Random Indexing (RI) and ranking with boosting. Unlike classical RI, we use only those context vector features that are informative for the semantics modeled. Despite ignoring text preprocessing and dispensing with semantic resources, the approach was ranked as high as 22nd among 89 participants in the SemEval-2012 Task6: Semantic Textual Similarity.}, keywords= {semantics, random indexing, textual similarity, boosting, learning-to-rank}, } @inproceedings {PubLIMSI-3053, author = {Apidianaki, Marianna AND Wisniewski, Guillaume AND Sokolov, Artem AND Max, Aur{\'e}lien AND Yvon, Fran\c{c}ois}, title = {WSD for n-best reranking and local language modeling in SMT}, booktitle = {Sixth Workshop on Syntax, Semantics and Structure in Statistical Translation (SSST-6)}, AERES = {ACTI}, GROUP = {LIMSI,ILES,TLP}, year = {2012}, pages = {9p}, institution = {Quaero}, abstract= {We integrate semantic information at two stages of the translation process of a state-of-the-art SMT system. A Word Sense Disambiguation (WSD) classifier produces a probability distribution over the translation candidates of source words which is exploited in two ways. First, the probabilities serve to rerank a list of n-best translations produced by the system. Second, the WSD predictions are used to build a supplementary language model for each sentence, aimed to favor translations that seem more adequate in this specific sentential context. Both approaches lead to significant improvements in translation performance, highlighting the usefulness of source side disambiguation for SMT.}, keywords= {WSD, SMT}, } @inproceedings {PubLIMSI-3252, author = {Do, Cong Thanh AND Barras, Claude}, title = {Cochlear implant-like processing of speech signal for speaker verification}, booktitle = {SAPA - SCALE Conference 2012}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {5}, institution = {ISCA}, abstract= {In this paper, we investigate the cochlear implant-like processing of speech signal in speaker verification. This processing was applied on each speech utterance, in the temporal domain, to reduce spectral information in the original speech signal and synthesize a new one, called cochlear implant-like spectrally reduced speech (SRS), only from low-bandwidth subband temporal envelopes of the original speech. Spectral analyses, performed on voiced speech frames, showed that despite of the spectral and perceptual reduction induced by the cochlear implant-like signal processing, the global shape of the short- term spectral envelopes of the SRS signal is rather similar to that of the original speech signal. Although the SRS is synthesized only from low-bandwidth subband temporal envelopes of original speech signal, its use in a baseline GMM-UBM speaker verification system, with cellular telephone conversational speech of the Switchboard corpus (used in NIST SRE 2002), did not alter substantially the minimal DCF (detection cost function) of the system. Furthermore, using appropriate SRS signals made it possible to reduce the minimal DCF (5.7\% relative reduction) of the system. The linear combination at the score level, with equal weights, of the baseline and the SRS-based systems could also help in reducing the minimal DCF.}, keywords= {reconnaissance du locuteur}, } @inproceedings {PubLIMSI-3325, author = {Ercolessi, P. AND S{\'e}nac, C. AND Mouysset, Sandrine AND Bredin, Herv{\'e}}, title = {Hierarchical framework for plot de-interlacing of TV series based on speakers, Dialogues and images}, booktitle = {1st ACM International Workshop on Audio and Multimedia Methods for Large-Scale Video Analysis}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {1-6}, institution = {ACM}, abstract= {Since the 90s, TV series tend to introduce more and more main characters and they are often composed of multiple intertwined stories. In this paper, we propose a hierarchical framework of plot de-interlacing which permits to cluster semantic scenes into stories: a story is a group of scenes not necessarily contiguous but showing a strong semantic relation. Each scene is described using three different modalities (based on color histograms, speaker diarization or automatic speech recognition outputs) as well as their multimodal combination. We introduce the notion of character-driven episodes as episodes where stories are emphasized by the presence or absence of characters, and we propose an automatic method, based on a social graph, to detect these episodes. Depending on whether an episode is character-driven or not, the plot-de-interlacing -which is a scene clustering- is made either through a traditional average-link agglomerative clustering with speaker modality only, either through a spectral clustering with the fusion of all modalities. Experiments, conducted on twenty three episodes from three quite different TV series (different lengths and formats), show that the hierarchical framework brings an improvement for all the series.}, } @inproceedings {PubLIMSI-3330, author = {Strat, Tiberius AND Benoit, Alexandre AND Bredin, Herv{\'e} AND Qu{\'e}not, G. AND Lambert, Patrick}, title = {Hierarchical late fusion for concept detection in videos}, booktitle = {Workshop on Information Fusion in Computer Vision for Concept Recognition}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {335-344}, } @inproceedings {PubLIMSI-3331, author = {Bredin, Herv{\'e} AND Poignant, Johann AND Tapaswi, Makarand AND Fortier, Guillaume AND Le, Viet Bac AND Napoleon, Thibault AND Gao, Hua AND Barras, Claude AND Rosset, Sophie AND Besacier, L. AND Verbeek, Jakob AND Qu{\'e}not, G. AND Jurie, Fr{\'e}d{\'e}ric AND Ekenel, H.}, title = {Fusion of speech, faces and text for person identification in TV broadcast}, booktitle = {Workshop on Information Fusion in Computer Vision for Concept Recognition}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {385-394}, keywords = {named entity, multimodal fusion, person identification}, } @inproceedings {PubLIMSI-3362, author = {Grouin, Cyril AND Grabar, Natalia AND Rosset, Sophie AND Tannier, Xavier AND Zweigenbaum, Pierre AND Hamon, Thierry}, title = {A tale of temporal relations between clinical concepts and temporal expressions: towards a representation of the clinical patient's timeline}, booktitle = {i2b2/VA Challenge}, AERES = {ACTI}, GROUP = {LIMSI,ILES,TLP}, year = {2012}, pages = {9}, institution = {i2b2/VA}, abstract= {This paper presents the experiments we made to process the temporal relations between clinical concepts and temporal expressions as part of our participation in the 2012 i2b2/VA challenge. In order to detect the clinical concepts, we reused and adapted the platforms we developed during the 2010 and 2011 i2b2/VA editions; those platforms integrate rules and machine-learning process. Moreover, we built models based upon Random Forest to identify the modality and the polarity of each concept. In order to identify the temporal expressions, we used the HeidelTime algorithm and made a few adaptations to deal with the specificities of the clinical documents. Finally, we split the set of temporal relations according to a series of distinct situations and built a series of models based upon decision trees with two strategies: the first one to give priority to precision, and the second one to balance recall and precision. After the official runs, we added more features, exhaustive enumeration of positive and negative instances at training time, and voting combination of five classifiers. On the first task (Event/Timex3 identification), our best submission achieved a 0.8307 global F-measure on the Event identification and a 0.8385 global F-measure on the Timex3. On the end-to-End task, we also achieved our best results on the third task, with a 0.4932 global F-measure on the Tlink identification; results on the Event/Timex3 are the same than the previous ones. Finally, on the Tlink task based upon the Event/Timex3 ground truth corpus, we achieved a 0.5471 global F-measure. The additional experiments performed after the official runs increased this F-measure to 0.5968.}, keywords= {traitement automatique des langues, extraction d'information, extraction d'entit{\'e}s, extraction d'expressions temporelles, extraction de relations temporelles, domaine m{\'e}dical, medical information extraction}, } @inproceedings {PubLIMSI-3437, author = {Soury, Mariette AND Devillers, Laurence}, title = {Collecte de donn{\'e}es pour la d{\'e}tection du stress dans les interactions sociales}, booktitle = {Workshop Affects, Compagnons Artificiels et Interactions (WACAI 2012)}, AERES = {COM}, GROUP = {LIMSI,TLP}, year = {2012}, institution = {Universit{\'e} Paris Sud Orsay}, abstract= {Ce papier pr{\'e}sente une collecte de donn{\'e}es multimodales relative au stress dans la prise de parole lors dinteractions sociales. Cette exp{\'e}rimentation a {\'e}t{\'e} faite afin de constituer un corpus de stress pour la d{\'e}tection automatique dans le traitement des phobies sociales, et afin de valider une application danimations dynamiques pour la collecte de donn{\'e}es. Ces travaux sont r{\'e}alis{\'e}s dans le cadre du projet FEDER E-Therapie.}, keywords= {phobie sociale, stress, logiciel de rem{\'e}diation, corpus multimodal}, } @inproceedings {PubLIMSI-3442, author = {Delaborde, Agn{\`e}s AND Devillers, Laurence}, title = {Impact of the social behaviours of the robot on the users emotions: importance of the task and the subjects age}, booktitle = {Workshop Affects, Compagnons Artificiels et Interactions (WACAI 2012)}, AERES = {COM}, GROUP = {LIMSI,TLP}, year = {2012}, institution = {Universit{\'e} Paris Sud Orsay}, } @inproceedings {PubLIMSI-3455, author = {Chastagnol, Cl{\'e}ment AND Devillers, Laurence}, title = {Collecting spontaneous emotional data for a social assistive robot}, booktitle = {4th International Workshop on Corpora for Research on EMOTION SENTIMENT \& SOCIAL SIGNALS (ES3 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {5p}, institution = {ELRA}, } @inproceedings {PubLIMSI-3463, author = {Yu, Qian AND Yvon, Fran\c{c}ois AND Max, Aur{\'e}lien}, title = {Aligning bilingual literary works: a pilot study}, booktitle = {Workshop on Computational Linguistics for Literature (CLFL 2012)}, AERES = {ACTI}, GROUP = {LIMSI,ILES,TLP}, year = {2012}, pages = {36--44}, institution = {Google Gift}, abstract= {Electronic versions of literary works abound on the Internet and the rapid dissemination of electronic readers will make electronic books more and more common. It is often the case thatliterary works exist in more than one language, suggesting that, if properly aligned, they could be turned into useful resources for many practical applications, such as writing and language learning aids, translation studies, or data-based machine translation. To be of any use, these bilingual works need to be aligned as precisely as possible, a notoriously difficult task. In this paper, we revisit the problem of sentence alignment for literary works and explore the performance of a new, multi-pass, approach based on a combination of systems. Experiments conducted on excerpts of ten masterpieces of the French and English literature show that our approach significantly outperforms two open source tools.}, } @inproceedings {PubLIMSI-3391, author = {Do, Cong Thanh AND Taghizadeh, Mohammad J. AND Garner, Philip N.}, title = {Combining cepstral normalization and cochlear implant-like speech processing for microphone array-based speech recognition}, booktitle = {2012 IEEE Workshop on Spoken Language Technology (SLT 2012)}, AERES = {COM}, GROUP = {LIMSI,TLP}, year = {2012}, institution = {IEEE Signal Processing Society, ACL}, abstract= {This paper investigates the combination of cepstral normalization and cochlear implant-like speech processing for microphone array- based speech recognition. Testing speech signals are recorded by a circular microphone array and are subsequently processed with superdirective beamforming and McCowan post-\ltering. Training speech signals, from the multichannel overlapping Number corpus (MONC), are clean and not overlapping. Cochlear implant-like speech processing, which is inspired from the speech processing strategy in cochlear implants, is applied on the training and testing speech signals. Cepstral normalization, including cepstral mean and variance normalization (CMN and CVN), are applied on the training and testing cepstra. Experiments show that implementing either cepstral normalization or cochlear implant-like speech pro- cessing helps in reducing the WERs of microphone array-based speech recognition. Combining cepstral normalization and cochlear implant-like speech processing reduces further the WERs, when there is overlapping speech. Train/test mismatches are measured using the Kullback-Leibler divergence (KLD), between the global probability density functions (PDFs) of training and testing cepstral vectors. This measure reveals a train/test mismatch reduction when either cepstral normalization or cochlear implant-like speech pro- cessing is used. It reveals also that combining these two processing reduces further the train/test mismatches as well as the WERs.}, keywords= {Cepstral normalization, Cochlear implant-like speech processing, Kullback-Leibler divergence, Microphone array speech recognition, Overlapping speech}, } @inproceedings {PubLIMSI-3551, author = {Le, Hai Son AND Allauzen, Alexandre AND Yvon, Fran\c{c}ois}, title = {Measuring the Influence of Long Range Dependencies with Neural Network Language Models}, booktitle = {Will We Ever Really Replace the N-gram Model? On the Future of Language Modeling for HLT}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {1-10}, institution = {ACL}, } @inproceedings{Sarkar12, author= {A.K. Sarkar AND V.B. Le AND C.-T. Do AND A. Roy AND C. Barras AND L. Lamel AND J.-L. Gauvain}, title = {{LIMSI/Vocapia speaker verification system for NIST SRE 2012}}, booktitle = {The 2012 NIST Speaker Recognition Evaluation (SRE12)}, AERES = {COM}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {4p}, institution = {NIST}, abstract= {LIMSI and Vocapia Research developed two main speaker verification systems which were combined for submission to the NIST SRE 2012 core condition: a GSV-PCA system and a Lattice MLLR-based m-vector system. Both are super-vector based.}, keywords= {speaker verification}, url = {http://www-tlp.limsi.fr/public/sre12-limsivr-v4_1365428920.pdf}, } @inproceedings {PubLIMSI-4157, author = {Le, Hai Son AND Lavergne, Thomas AND Allauzen, Alexandre AND Apidianaki, Marianna AND Gong, Li AND Max, Aur{\'e}lien AND Sokolov, Artem AND Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {LIMSI @ WMT12}, booktitle = {7th Workshop on Statistical Machine Translation (WMT 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP,ILES}, year = {2012}, pages = {330-337}, institution = {Association for Computational Linguistics}, } @inproceedings {PubLIMSI-4368, author = {Zhuang, Yong AND Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {Non-linear models for confidence estimation}, booktitle = {7th Workshop on Statistical Machine Translation (WMT 2012)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2012}, pages = {157-162}, institution = {ACL}, } @InProceedings{hlt12lamel, author = { L. Lamel}, title = {{ Multilingual Speech Processing Activities in Quaero: Application to Multimedia Search in Unstructured Data}}, booktitle = {The Fifth International Conference: Human Language Technologies - The Baltic Perspective }, MONTH = {October 4-5}, year = {2012}, address = {Tartu, Estonia}, pages = {1-8}, abstract = {Spoken language processing technologies are principle components in most of the applications being developed as part of the Quaero program. Quaero is a large research and industrial innovation program focusing on the development of technologies for automatic analysis and classification of multimedia and multilingual documents. Concerning speech processing, research aims to substantially improve the state-ofthe- art in speech-to-text transcription, speaker diarization and recognition, language recognition, and speech translation.}, keywords = { multilingualasr }, url = {http://www-tlp.limsi.fr/public/hlt12_lamel.pdf} } @InProceedings{is12ccld, author = {Clement Chastagnol and Laurence Devillers}, title = {{Personality traits detection using a parallelized modified SFFS algorithm}}, booktitle = {InterSpeech 2012: 13th Annual Conference of the International Speech Communication Association }, month = { September 9-13}, year = {2012}, address = {Portland, Oregon}, pages = {}, abstract = {2012 Speaker Trait Challenge. We participated in the Personality Sub-Challenge, where the main characteristics of speakers according to the five OCEAN dimensions had to be determined based on short audio recordings solely. We considered the task as a general optimization problem and applied a modified version of the SFFS algorithm, wrapped around a SVM classifier, along with parallelized parameter tuning. Our system has yielded higher than baseline scores on all five dimensions for the development set, adding almost 14 percentage points to the recognition score of the Openness dimension.}, url = {http://www-tlp.limsi.fr/public/is12_1243devil.pdf} } @InProceedings{is12hb, author = {Johann Poignant and Herve Bredin and Viet-Bac Le and Laurent Besacier and Claude Barras and Georges Quenot}, title = {{Unsupervised Speaker Identification using Overlaid Texts in TV Broadcast}}, booktitle = {InterSpeech 2012: 13th Annual Conference of the International Speech Communication Association }, month = { September 9-13}, year = {2012}, address = {Portland, Oregon}, pages = {}, abstract = {We propose an approach for unsupervised speaker identification in TV broadcast videos, by combining acoustic speaker diarization with person names obtained via video OCR from overlaid texts. Three methods for the propagation of the overlaid names to the speech turns are compared, taking into account the co-occurence duration between the speaker clusters and the names provided by the video OCR and using a task-adapted variant of the TF-IDF information retrieval coefficient. These methods were tested on the REPERE dry-run evaluation corpus, containing 3 hours of annotated videos. Our best unsupervised system reaches a F-measure of 70.2\% when considering all the speakers, and 81.7\% if anchor speakers are left out. By comparison, a mono-modal, supervised speaker identification system with 535 speaker models trained on matching development data and additional TV and radio data only provided a 57.5\% F-measure when considering all the speakers and 45.7\% without anchor.}, url = {http://www-tlp.limsi.fr/public/is12_473_hb.pdf} } @InProceedings{is12penny, author = {Panagiota Karanasou and Lukas Burget and Dimitra Vergyri and Murat Akbacak and Arindam Mandal}, title = {{Discriminatively trained phoneme confusion model for keyword spotting}}, booktitle = {InterSpeech 2012: 13th Annual Conference of the International Speech Communication Association }, month = { September 9-13}, year = {2012}, address = {Portland, Oregon}, pages = {}, abstract = {Keyword Spotting (KWS) aims at detecting speech segments that contain a given query within large amounts of audio data. Typically, a speech recognizer is involved in a first indexing step. One of the challenges of KWS is how to handle recognition errors and out-of-vocabulary (OOV) terms. This work proposes the use of discriminative training to construct a phoneme confusion model, which expands the phonemic index of a KWS system by adding phonemic variation to handle the abovementioned problems. The objective function that is optimized is the Figure of Merit (FOM), which is directly related to the KWS performance. The experiments conducted on English data sets show some improvement on the FOM and are promising for the use of such technique.}, url = {http://www-tlp.limsi.fr/public/is12_1044_penny.pdf} } @InProceedings{is12mfb, author = {Mohamed Faouzi BenZeghiba and Jean-Luc Gauvain and Lori Lamel}, title = {{Phonotactic Language Recognition Using MLP Features}}, booktitle = {InterSpeech 2012: 13th Annual Conference of the International Speech Communication Association }, month = { September 9-13}, year = {2012}, address = {Portland, Oregon}, pages = {}, abstract = {This paper describes a very efficient Parallel Phone Recognizers followed by Language Modeling (PPRLM) system in terms of both performance and processing speed. The system uses context-independent phone recognizers trained on MLP features concatenated with the conventional PLP and pitch features. MLP features have several interesting properties that make them suitable for speech processing, in particular the temporal context provided to the MLP inputs and the discriminative criterion used to learn the MLP parameters. Results of preliminary experiments conducted on the NIST LRE 2005 for the closed-set task show significant improvements obtained by the proposed system compared with a PPRLM system using context-independent phone models trained on PLP features. Moreover, the proposed system performs as well as a PPRLM system using context-dependent phone models, while running 6 times faster.}, url = {http://www-tlp.limsi.fr/public/is12_1063_mfb.pdf}, keywords = { mlpfeat, lid }, } @InProceedings{is12jk, author = {Jachym Kolar and Lori Lamel}, title = {{ Development and Evaluation of Automatic Punctuation for French and English Speech-to-Text }}, booktitle = {InterSpeech 2012: 13th Annual Conference of the International Speech Communication Association}, month = { September 9-13}, year = {2012}, address = {Portland, Oregon}, pages = {}, abstract = {Automatic punctuation of speech is important to make speechto- text output more readable and to facilitate downstream language processing. This paper describes the development of an automatic punctuation system for French and English. The punctuation model uses both textual information and acoustic (prosodic) information and is based on adaptive boosting. The system is evaluated on a challenging speech corpus under real-application conditions using output from a state-of-the-art speech-to-text system and automatic audio segmentation and speaker diarization. Unlike previous work, automatic punctuation is scored on two independent manual references. Comparisons are made for the two languages and the performance of the automatic system is compared with inter-annotator agreement.}, url = {http://www-tlp.limsi.fr/public/is12_1398_jk.pdf} } @INPROCEEDINGS{odyssey12mfb, AUTHOR = {Mohamed Faouzi BenZeghiba and Jean-Luc Gauvain and Lori Lamel }, TITLE = {{ Fusing Language Information from Diverse Data Sources for Phonotactic Language Recognition }}, BOOKTITLE = Odyssey, YEAR = 2012, MONTH = {25-28 June}, ADDRESS = {Singapore}, pages = { }, abstract = { The baseline approach in building phonotactic language recognition systems is to characterize each language by a single phonotactic model generated from all the available languagespecific training data. When several data sources are available for a given target language, system performance can be improved using language source-dependent phonotactic models. In this case, the common practice is to fuse language source information (i.e., the phonotactic scores for each language/ source) early (at the input) to the backend. This paper proposes to postpone the fusion to the end (at the output) of the backend. In this case, the language recognition score can be estimated from well-calibrated language source scores. Experiments were conducted using the NIST LRE 2007 and the NIST LRE 2009 evaluation data sets with the 30s condition. On the NIST LRE 2007 eval data, a Cavg of 0.9% is obtained for the closed-set task and 2.5% for the open-set task. Compared to the common practice of early fusion, these results represent relative improvements of 18% and 11%, for the closed-set and open-set tasks, respectively. Initial tests on the NIST LRE 2009 eval data gave no improvement on the closedset task. Moreover, the Cllr measure indicates that language recognition scores estimated by the proposed approach are better calibrated than the common practice (early fusion).}, keywords = { lid }, URL = {http://www-tlp.limsi.fr/public/odyssey12_mfb.pdf} } @INPROCEEDINGS{jep12liaison, AUTHOR = { Martine Adda-Decker and Cecile Fougeron and Cedric Gendrot and Elisabeth Delais-Roussaire and Lori Lamel }, TITLE = {{ La liaison dans la parole spontanee familiere : explorations semi-automatiques de grands corpus}}, BOOKTITLE = jep, YEAR = 2012, MONTH = {June 4-8}, ADDRESS = {Grenoble}, abstract = { The realisation of the French Liaison is investigated in a large corpus of casual speech. Considering that casual speech gives rise to a large range of pronunciation variants and that overall temporal reduction increases (word and phone duration measurements) as compared to read and prepared speech, one may hypothesize that French liaison tends to be less productive in this kind of speaking style. We made use of automatic speech alignments to evaluate optional liaison realisations in potential liaison sites (word ending in a liaison consonant followed by a word-initial (semi)-vowel). Speech comes from the NCCFr corpus including 46 mostly young speakers with a total of more than 30 hours of speech. Realized liaisons were examined and measured for the most frequent liaison consonants (/z/, /n/ and /t/) as a function of a classification of the sites as mandatory, optional or forbidden with respect to liaison realization. An original contribution investigates liaison realization as a function of a speaker-dependent speech rate measure.}, URL = {http://www-tlp.limsi.fr/public/jep2012liaisonFinalv0.pdf } } @INPROCEEDINGS{jep12Gendrot, AUTHOR = { Cedric Gendrot and Martine Adda-Decker and Carolin Schmid }, TITLE = {{ Comparaison de parole journalistique et de parole spontanee journalistique et de parole : analyse de sequences entre pause }}, BOOKTITLE = jep, YEAR = 2012, MONTH = {June 4-8}, ADDRESS = {Grenoble}, URL = {http://www-tlp.limsi.fr/public/} } @INPROCEEDINGS{jep12schmid, AUTHOR = { Carolin Schmid and Cedric Gendrot and Martine Adda-Decker }, TITLE = {{ F0 declinasion: une comparaison entre le francais et l'allemand journalistique}}, BOOKTITLE = jep, YEAR = 2012, MONTH = {June 4-8}, ADDRESS = {Grenoble}, URL = {http://www-tlp.limsi.fr/public/} } @InProceedings{VASILESCU12.300, author = {Ioana Vasilescu and Martine Adda-Decker and Lori Lamel}, title = {Cross-lingual studies of ASR errors: paradigms for perceptual evaluations}, booktitle = {Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)}, year = {2012}, month = {may}, date = {23-25}, address = {Istanbul, Turkey}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-7-7}, abstract = {It is well-known that human listeners significantly outperform machines when it comes to transcribing speech. This paper presents a progress report of the joint research in the automatic vs human speech transcription and of the perceptual experiments developed at LIMSI that aims to increase our understanding of automatic speech recognition errors. Two paradigms are described here in which human listeners are asked to transcribe speech segments containing words that are frequently misrecognized by the system. In particular, we sought to gain information about the impact of increased context to help humans disambiguate problematic lexical items, typically homophone or near-homophone words. The long-term aim of this research is to improve the modeling of ambiguous contexts so as to reduce automatic transcription errors. }, url = {http://www-tlp.limsi.fr/public/lrec12_300_iv.pdf} } @InProceedings{GRAVIER12.495, author = {Guillaume Gravier and Gilles Adda and Niklas Paulsson and Matthieu Carre and Aude Giraudel and Olivier Galibert}, title = {The ETAPE corpus for the evaluation of speech-based TV content processing in the French language}, booktitle = {Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)}, year = {2012}, month = {may}, date = {23-25}, address = {Istanbul, Turkey}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-7-7}, abstract={ The paper presents a comprehensive overview of existing data for the evaluation of spoken content processing in a multimedia framework for the French language. We focus on the ETAPE corpus which will be made publicly available by ELDA mid 2012, after completion of the evaluation campaign, and recall existing resources resulting from previous evaluation campaigns. The ETAPE corpus consists of 30 hours of TV and radio broadcasts, selected to cover a wide variety of topics and speaking styles, emphasizing spontaneous speech and multiple speaker areas.}, language = {english} } @InProceedings{DOUKHAN12.876, author = {David Doukhan and Sophie Rosset and Albert Rilliard and Christophe d'Alessandro and Martine Adda-Decker}, title = {Designing French Tale Corpora for Entertaining Text To Speech Synthesis}, booktitle = {Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)}, year = {2012}, month = {may}, date = {23-25}, address = {Istanbul, Turkey}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-7-7}, language = {english}, keywords = {corpus annotation, gvlex}, } @InProceedings{sltu12fraga, author = {T. Fraga da Silva and V.-B. Le and L. Lamel and J-L. Gauvain}, title = {{Incorporating MLP features in the unsupervised training process}}, booktitle = {{SLTU 2012 Third International Workshop on Spoken Languages Technologies for Under-resourced Languages}}, year = {2012}, address = {Cape Town, South Africa}, pages = {30-34}, abstract = {The combined use of multi layer perceptron (MLP) and perceptual linear prediction (PLP) features has been reported to improve the performance of automatic speech recognition systems for many different languages and domains. However, MLP features have not yet been used on unsupervised acoustic model training. This approach is introduced in this paper with encouraging results. In addition, unsupervised language model training was also investigated for a Portuguese broadcast speech recognition task, leading to a slight improvement of performance. The joint use of the unsupervised techniques presented here leads to an absolute WER reduction up to 3.2\% over a baseline unsupervised system.}, keywords = { mlpfeat }, url = {http://www-tlp.limsi.fr/public/sltu12_fraga.pdf} } @InProceedings{sltu12lamel, author = {L. Lamel and S. Courcinous and and J-L. Gauvain and Y. Josse and V.-B. Le}, title = {{ Transcription of Russian conversational speech }}, booktitle = {{SLTU 2012 Third International Workshop on Spoken Languages Technologies for Under-resourced Languages}}, year = {2012}, address = {Cape Town, South Africa}, pages = {162-167}, abstract = {This paper presents initial work in transcribing conversational telephone speech in Russian. Acoustic seed models were derived from other languages. The initial studies are carried out with 9 hours of transcribed data, and explore the choice of the phone set and use of other data types to improve transcription performance. Discriminant features produced by a Multi Layer Perceptron trained on a few hours of Russian conversational data are contrasted with those derived from well-trained networks for English telephone speech and from Russian broadcast data. Acoustic models trained on broadcast data filtered to match the telephone band achieve results comparable to those obtained with models trained on the small conversation telephone speech corpus.}, url = {http://www-tlp.limsi.fr/public/sltu12_lamel.pdf} } @InProceedings{Oparin12, author = {I Oparin and M. Sundermeyer and H. Ney and J-L. Gauvain}, title = {{Performance Analysis of Neural Networks in Combination with N-Gram Language Models}}, booktitle = ieeeicassp, year = {2012}, address = {Kyoto, Japan}, pages = {5005-5008}, abstract = {Neural Network language models (NNLMs) have recently become an important complement to conventional n-gram language models (LMs) in speech-to-text systems. However, little is known about the behavior of NNLMs. The analysis presented in this paper aims to understand which types of events are better modeled by NNLMs as compared to n-gram LMs, in what cases improvements are most substantial and why this is the case. Such an analysis is important to take further benefit from NNLMs used in combination with conventional ngram models. The analysis is carried out for different types of neural network (feed-forward and recurrent) LMs. The results showing for which type of events NNLMs provide better probability estimates are validated on two setups that are different in their size and the degree of data homogeneity.}, url = {http://www-tlp.limsi.fr/public/Oparin_ICASSP_2012.pdf} } %%%%%%%%%% 2011 section @article {PubLIMSI-1478, author = {Lavergne, Thomas AND Urvoy, T. AND Yvon, Fran\c{c}ois}, title = {Filtering artificial texts with statistical machine learning techniques}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {25-43}, journal = {Language Resources and Evaluation}, volume = {45}, number = {1}, abstract= {Fake content is flourishing on the Internet, ranging from basic random word salads to web scraping. Most of this fake content is generated for the purpose of nourishing fake web sites aimed at biasing search engine indexes: at the scale of a search engine, using automatically generated texts render such sites harder to detect than using copies of existing pages. In this paper, we present three methods aimed at distinguishing natural texts from artificially generated ones: the first method uses basic lexicometric features, the second one uses standard language models and the third one is based on a relative entropy measure which captures short range dependencies between words. Our experiments show that lexicometric features and language models are efficient to detect most generated texts, but fail to detect texts that are generated with high order Markov models. By comparison our relative entropy scoring algorithm, especially when trained on a large corpus, allows us to detect these hard text generators with a high degree of accuracy.}, keywords= {Web spam filtering Statistical language models Artificial languages}, url = {http://www-tlp.limsi.fr/public/2011_JLRE_Yvon_1359578993.pdf}, } @article {PubLIMSI-1507, author = {Vieru-Dimulescu, B. AND Boula De Mareuil, Philippe AND Adda-Decker, Martine}, title = {Identification and characterisation of non-native French accents}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {292-310}, journal = {Speech Communication}, volume = {53}, number = {3}, url = {http://www-tlp.limsi.fr/public/SCvieru2011_1375090272.pdf}, } @article {PubLIMSI-1517, author = {Fort, Karën AND Adda, Gilles AND Bretonnel-Cohen, K.}, title = {Amazon mechanical Turk: gold mine or coal mine?}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {413-420}, journal = {Computational Linguistics}, volume = {37}, number = {2}, url = {http://www-tlp.limsi.fr/public/CLadda2011_1375090340.pdf}, } @article {PubLIMSI-1537, author = {Misra, H. AND Yvon, Fran\c{c}ois AND Jose, J. AND Capp{\'e}, Olivier}, title = {Text segmentation: a topic modeling perspective}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {528-544}, journal = {Information Processing \& Management}, volume = {47}, number = {4}, url = {http://www-tlp.limsi.fr/public/IPMyvon2011_1326379734.pdf}, } @article {PubLIMSI-1680, author = {Crego, Josep-Maria AND Yvon, Fran\c{c}ois AND Mariño, Jos{\'e} B.}, title = {N-code: an open-source Bilingual N-gram SMT Toolkit}, AERES = {ACLN}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {49-58}, journal = {Prague Bulletin of Mathematical Linguistics}, volume = {96}, abstract= {This paper describes Ncode, an open source statistical machine translation (SMT) toolkit for translation models estimated as $n$-gram language models of bilingual units (emphtuples). This toolkit includes tools for extracting tuples, estimating models and performing translation. It can be easily coupled to several other open source toolkits to yield a complete SMT pipeline. In this article, we review the main features of the toolkit and explain how to build a translation engine with Ncode. We also report a short comparison with the widely known Moses system. Results show that Ncode outperforms Moses in terms of memory requirements and translation speed. Ncode also achieves slightly higher accuracy results.}, } @article {PubLIMSI-1688, author = {Minard, Anne-Lyse AND Ligozat, Anne-Laure AND Ben Abacha, Asma AND Bernhard, Delphine AND Cartoni, Bruno AND Del{\'e}ger, Louise AND Grau, Brigitte AND Rosset, Sophie AND Zweigenbaum, Pierre AND Grouin, Cyril}, title = {Hybrid methods for improving information access in clinical documents: concept, assertion, and relation identification}, AERES = {ACL}, GROUP = {LIMSI,ILES,TLP}, year = {2011}, pages = {588-593}, journal = {Journal of the American Medical Information Association (JAMIA)}, volume = {18}, number = {5}, abstract= {OBJECTIVE: This paper describes the approaches the authors developed while participating in the i2b2/VA 2010 challenge to automatically extract medical concepts and annotate assertions on concepts and relations between concepts. DESIGN: The authors'approaches rely on both rule-based and machine-learning methods. Natural language processing is used to extract features from the input texts; these features are then used in the authors' machine-learning approaches. The authors used Conditional Random Fields for concept extraction, and Support Vector Machines for assertion and relation annotation. Depending on the task, the authors tested various combinations of rule-based and machine-learning methods. RESULTS: The authors'assertion annotation system obtained an F-measure of 0.931, ranking fifth out of 21 participants at the i2b2/VA 2010 challenge. The authors' relation annotation system ranked third out of 16 participants with a 0.709 F-measure. The 0.773 F-measure the authors obtained on concept extraction did not make it to the top 10. CONCLUSION: On the one hand, the authors confirm that the use of only machine-learning methods is highly dependent on the annotated training data, and thus obtained better results for well-represented classes. On the other hand, the use of only a rule-based method was not sufficient to deal with new types of data. Finally, the use of hybrid approaches combining machine-learning and rule-based approaches yielded higher scores.}, url = {http://www-tlp.limsi.fr/public/JAMIAligozat2011_1375092403.pdf}, keywords = {medical information extraction}, } @article {PubLIMSI-1725, author = {Lardilleux, Adrien AND Lepage, Y. AND Yvon, Fran\c{c}ois}, title = {The contribution of low frequencies to multilingual sub-sentential alignment: a differential associative approach}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {189-217}, journal = {International Journal of Advanced Intelligence}, volume = {3}, number = {2}, abstract= {The goal of this paper is to show that, contrary to preconceived ideas, one can efficiently take advantage of low frequency words in natural language processing. We put them to use in sub-sentential alignment, which constitutes the \&172564257;rst step of most data-driven machine translation systems (statistical or example-based machine translation). We show that rare words can be used as a foundation in the design of a multilingual sub-sentential alignment method, using di\&172564256;erential techniques similar to those found in example-based machine translation. This method is truly multilingual, in that it allows the simultaneous processing of any number of languages. Moreover, it is very simple, anytime, and scales up naturally. We compare our implementation, Anymalign, with two statistical tools proven in the domain. Although its current results are on average slightly behind those of state of the art methods in phrase-based statistical machine translation, we show that the intrinsic quality of our lexicons is actually superior to that of lexicons produced by state of the art methods.}, keywords= {Machine Translation}, url = {http://www-tlp.limsi.fr/public/IJAIlardilleux2011_1375091048.pdf}, } @article {PubLIMSI-2335, author = {Batliner, A. AND Steidl, S. AND Schuller, Bjoern AND Seppi, Dino AND Vogt, T. AND Wagner, J. AND Devillers, Laurence AND Vidrascu, Laurence AND Aharonson, V. AND Kessous, Loic AND Amir, N.}, title = {Whodunnit searching for the most important feature types signalling emotion-related user states in speech}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {4-18}, journal = {Computer Speech and Language}, volume = {25}, number = {1}, } @article {PubLIMSI-2336, author = {Clavel, C{\'e}line AND Vasilescu, Ioana AND Devillers, Laurence}, title = {Fiction supports for realistic portrayals of fear-type emotional manifestations}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {63-83}, journal = {Computer Speech and Language}, volume = {25}, number = {1}, } @article {PubLIMSI-1946, author = {Boula De Mareuil, Philippe AND Boutin, B{\'e}atrice Akissi}, title = {Évaluation et identification perceptives daccents ouest-africains en fran\c{c}ais}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {361-379}, journal = {Journal of French Language Studies}, volume = {21}, number = {3}, abstract= {Cette {\'e}tude examine avec quel degr{\'e} de granularit{\'e} divers accents ouest-africains en fran\c{c}ais peuvent {\^e}tre distingu{\'e}s. Elle vise avant tout {\`a} d{\'e}terminer si, en perception, des auditeurs de lAfrique de lOuest sont capables didentifier lappartenance ethnolinguistique, le pays de r{\'e}sidence et le niveau d{\'e}tudes de locuteurs akan, bambara, s{\'e}noufo, mossi (de langue moor{\'e}) et wolof, enregistr{\'e}s en C{\^o}te dIvoire, au Mali, au Burkina Faso et au S{\'e}n{\'e}gal. Une exp{\'e}rience perceptive a {\'e}t{\'e} men{\'e}e, do{\`u} il ressort que les dimensions {\'e}tudi{\'e}es sont bien identifi{\'e}es par les sujets. Pour les jeunes locuteurs comme pour les locuteurs plus âg{\'e}s, en lecture comme en parole spontan{\'e}e, les r{\'e}sultats sont tr{\`e}s robustes. Ils sont de plus assez fid{\`e}les {\`a} la conscience linguistique auto-{\'e}valu{\'e}e par les auditeurs : quand ceux-ci se d{\'e}claraient confiants pour reconnaître les accents en pr{\'e}sence, tel a effectivement {\'e}t{\'e} le cas. Quelques indices acoustiques diff{\'e}renciant notamment les accents wolof (S{\'e}n{\'e}gal) et akan (C{\^o}te dIvoire) ont finalement {\'e}t{\'e} d{\'e}gag{\'e}s.}, keywords= {fran\c{c}ais en Afrique, identification d'accents, conscience linguistique, test perceptif, analyse phon{\'e}tique}, } @article {PubLIMSI-2565, author = {Adda-Decker, Martine AND Snoeren, Natalie}, title = {Quantifying temporal speech reduction in French using forced speech alignment}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {261-270}, journal = {Journal of Phonetics}, volume = {39}, number = {3}, } @article {PubLIMSI-3723, author = {Bernhard, Delphine AND Cartoni, Bruno AND Tribout, Delphine}, title = {A task-based evaluation of French morphological resources and tools: a case study for question-answer pairs}, AERES = {ACLN}, GROUP = {LIMSI,ILES,TLP}, year = {2011}, pages = {1-41}, journal = {LILT (Linguistic Issues in Language Technology)}, volume = {5}, number = {2}, abstract= {Morphology is a key component for many Language Technology applications. However, morphological relations, especially those relying on the derivation and compounding processes, are often addressed in a super\cial manner. In this article, we focus on assessing the relevance of deep and motivated morphological knowledge in Natural Language Processing applications. We \rst describe an annotation experiment whose goal is to evaluate the role of morphology for one task, namely Question Answering (QA). We then highlight the kind of linguistic knowledge that is necessary for this particular task and propose a qualitative analysis of morphological phenomena in order to identify the morphological processes that are most relevant. Based on this study, we perform an intrinsic evaluation of existing tools and resources for French morphology, in order to quantify their coverage. Our conclusions provide helpful insights for using and building appropriate morphological resources and tools that could have a significant impact on the application performance.}, keywords= {linguistique, traitement automatique des langues, morphologie, questions-r{\'e}ponses}, } @article {PubLIMSI-4189, author = {Yvon, Fran\c{c}ois}, title = {Note de lecture: Bitext Alignment, by Jörg Tiedemann}, AERES = {AP}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {292-294}, journal = {TAL (Traitement Automatique des Langues)}, volume = {52}, number = {3}, keywords= {Alignement, traduction statistique}, } @article {PubLIMSI-4054, author = {Romano, Antonio AND Boula De Mareuil, Philippe AND Lai, Jean-Pierre AND Mairano, Paolo}, title = {Quelques patrons intonatifs du corse dans le cadre de l'AMPER}, AERES = {ACLN}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {25-42}, journal = {Bollettino dell'Atlante Linguistico Italiano}, volume = {35}, abstract= {Cet article pr{\'e}sente certaines propri{\'e}t{\'e}s phon{\'e}tiques de l'intonation du corse (une langue italo-romane du groupe toscan) dans le cadre de l'Atlas Multim{\'e}dia Prosodique de l'Espace Roman (AMPER). La m{\'e}thodologie AMPER est fond{\'e}e sur une m{\^e}me strat{\'e}gie de r{\'e}colte et d'analyse de donn{\'e}es en vue de d{\'e}crire la prosodie d'{\'e}nonc{\'e}s simples aux modalit{\'e}s d{\'e}clarative et interrogative totale. Cet article pr{\'e}sente les premi{\`e}res analyses de donn{\'e}es recueillies en corse, lors dune enqu{\^e}te de terrain men{\'e}e {\`a} Corte. Les structures prosodiques des {\'e}nonc{\'e}s d{\'e}claratifs et des questions totales de trois locuteurs, en particulier, sont analys{\'e}es. Un ton haut en d{\'e}but de question et une descente m{\'e}lodique {\`a} la fin ont {\'e}t{\'e} observ{\'e}s; et les conditions de variation entrant en jeu sont d{\'e}taill{\'e}es. Ces contours m{\'e}lodiques descendants (ou montants-descendants) sur les accents de groupe ou de phrase semblent sp{\'e}cifiques du parler corse {\'e}tudi{\'e}.}, keywords= {linguistique}, } @inproceedings {PubLIMSI-1516, author = {Sagot, B. AND Fort, Karën AND Addalding and Using Comparable Corpora: Comparable Corpora and the Web - Co-located with ACL-HLT}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {44-51}, month= {June 24, 2011}, address= {Portland, USA}, } @inproceedings {PubLIMSI-1534, author = {Tomeh, Nadi AND Allauzen, Alexandre AND Lavergne, Thomas AND Yvon, Fran\c{c}ois}, title = {Designing an improved discriminative word aligner}, booktitle = {International Conference on Intelligent Text Processing and Computational Linguistics (CICLing 2011)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {12p}, month= {February 20-26, 2011}, address= {Tokyo, Japan}, keywords = {mtalign}, } @inproceedings {PubLIMSI-1578, author = {Zhang, Qing Qing AND Lamel, Lori AND Gauvain, Jean-Luc}, title = {Genre categorization and modeling for broadcast speech transcription}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2011)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {1707-1712}, month= {27/08 au 31/08}, address= {Florence, Italy}, } @inproceedings {PubLIMSI-1586, author = {Ercolessi, P. AND Bredin, Herv{\'e} AND S{\'e}nac, C. AND Joly, P.}, title = {Segmenting TV series into scenes using speaker diarization}, booktitle = {International Workshop on Image Analysis for Multimedia Interactive Services (WIAMIS 2011)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {4p}, month= {13/4 au 15/4}, address= {Delft, The Netherlands}, } @inproceedings {PubLIMSI-1735, author = {El Maarouf, Ismail AND Villaneau, J. AND Rosset, Sophie}, title = {Extraction de patrons s{\'e}mantiques appliqu{\'e}e {\`a} la classsification d'entit{\'e}s nomm{\'e}es}, booktitle = {Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles (TALN 2011)}, AERES = {ACTN}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {173-184}, month= {27/06 au 01/07}, address= {Montpellier, France}, keywords = {named entity}, } @inproceedings {PubLIMSI-1689, author = {Garcia-Fernandez, Anne AND Ligozat, Anne-Laure AND Dinarelli, Marco AND Bernhard, Delphine}, title = {When was it written? Automatically determining publication dates}, booktitle = {International Symposium on String Processing and Information Retrieval (SPIRE 2011)}, AERES = {ACTI}, GROUP = {LIMSI,ILES,TLP}, year = {2011}, pages = {221-236}, month= {17/10 au 21/10}, address= {Pise, Italie}, abstract= {Automatically determining the publication date of a document is a complex task, since a document may contain only few intra-textual hints about its publication date. Yet, it has many important applications. Indeed, the amount of digitized historical documents is constantly increasing, but their publication dates are not always properly identified via OCR acquisition. Accurate knowledge about publication dates is crucial for many applications, e.g. studying the evolution of documents topics over a certain period of time. In this article, we present a method for automatically determining the publication dates of documents, which was evaluated on a French newspaper corpus in the context of the DEFT 2011 evaluation campaign. Our system is based on a combination of different individual systems, relying both on supervised and unsupervised learning, and uses several external resources, e.g. Wikipedia, Google Books Ngrams, and etymological background knowledge about the French language. Our system detects the correct year of publication in 10\% of the cases for 300-word excerpts and in 14\% of the cases for 500-word excerpts, which is very promising given the complexity of the task.}, } @inproceedings {PubLIMSI-1733, author = {Galibert, Olivier AND Rosset, Sophie AND Grouin, Cyril AND Zweigenbaum, Pierre AND Quintard, Ludovic}, title = {Structured and extended named entity evaluation in automatic speech transcriptions}, booktitle = {International Joint Conference on Natural Language Processing (IJCNLP 2011)}, AERES = {ACTI}, GROUP = {LIMSI,ILES,TLP}, year = {2011}, pages = {518-526}, month= {08/11 au 13/11}, address= {Chiang Mai, Thailand}, keywords = {evaluation, metrics, named entity} } @inproceedings {PubLIMSI-1731, author = {Dinarelli, Marco AND Rosset, Sophie}, title = {Models cascade for tree-structured named entity detection}, booktitle = {International Joint Conference on Natural Language Processing (IJCNLP 2011)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {1269-1278}, month= {08/11 au 13/11}, address= {Chiang Mai, Thailand}, keywwords = {named entity}, } @inproceedings {PubLIMSI-1729, author = {Dinarelli, Marco AND Rosset, Sophie}, title = {Hypotheses selection criteria in a reranking framework for spoken language understanding}, booktitle = {Conference on Empirical Methods in Natural Language Processing (EMNLP 2011)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {1104-1115}, month= {27/07 au 29/07}, address= {Edinburgh, UK}, keywords = {spoken language understanding} } @inproceedings {PubLIMSI-1711, author = {Li{\'e}nard, Jean-Sylvain AND Barras, Claude AND Signol, Fran\c{c}ois}, title = {Time structure and detection of the multivoiced segments in mixed speech}, booktitle = {International Congress of Phonetic Sciences (ICPhS 2011)}, AERES = {ACTI}, GROUP = {LIMSI,TLP,CPU}, year = {2011}, pages = {1234-1237}, series = {http://www.icphs2011.hk/ICPHS\_CongressProceedings.htm}, month= {17/08 au 21/08}, address= {Hong Kong, China}, abstract= {When two speech signals are mixed in a single channel the voiced parts of any of them remain mostly unaltered during the voicing interruptions of the other, i.e. pauses and voiceless consonants. The mixture is made of 3 types of multivoiced segments noted 0V (unvoiced), 1V (one voicing) and 2V (two voicings). A statistical study of read-aloud texts reveals that total time spent in the 1V state is twice as long as the time spent in any of the other states. The HSC multipitch algorithm, based on a specific mechanism that eliminates the f0 halving and doubling errors, is used to locate the 3 segments types in the signal. This feature is illustrated by the task of spotting a short utterance repeatedly mixed with a long text.}, keywords= {voicing, pitch, multipitch, speech separation}, } @inproceedings {PubLIMSI-1773, author = {Foucault, Nicolas AND Adda, Gilles AND Rosset, Sophie}, title = {Language modeling for document selection in question answering}, booktitle = {Recent Advances in Natural Language Processing (RANLP 2011)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {716-720}, publisher = {RANLP 2011 Organising Committee}, month= {12/09 au 14/09}, address= {Hissar, Bulgaria}, abstract= {Usually, in the Question Answering domain, for a question in natural language, precise answers to this question are extracted from documents according only to the context of the question. In this work, we complemented the classical Question Answering model by adding a filtering process on top of the document retrieval. This way, the system re-evaluates the documents it has originally selected during the information retrieval step before the answer extraction and scoring. Such re-evaluation consists in filtering out documents considered unusable for the search. Based on statistical language modeling, the filtering process firstly determines the intrinsic relevancy of a document and then decides whether this document is a priori relevant for finding answers. Evaluation on factoid questions and a collection of 500k web documents has shown that the proposed filtering method properly supports a Question Answering system in its task.}, keywords= {mod{\`e}le de langage, syst{\`e}me question-r{\'e}ponse}, } @inproceedings {PubLIMSI-1815, author = {Adda, Gilles AND Sagot, B. AND Fort, Karën AND Mariani, Joseph-Jean}, title = {Crowdsourcing for language resource development: critical analysis of amazon mechanical turk overpowering use}, booktitle = {Language \& Technology Conference : Human Language Technologies as a Challenge for Computer Science and Linguistics (L\&TC 2011)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {304-308}, month= {25/11 au 27/11}, address= {Poznan, Poland}, abstract= {This article is a position paper about crowdsourced microworking systems and especially Amazon Mechanical Turk, the use of which has been steadily growing in language processing in the past few years. According to the mainstream opinion expressed in the articles of the domain, this type of on-line working platforms allows to develop very quickly all sorts of quality language resources, for a very low price, by people doing that as a hobby or wanting some extra cash. We shall demonstrate here that the situation is far from being that ideal, be it from the point of view of quality, price, workers' status or ethics and bring back to mind already existing or proposed alternatives. Our goal here is threefold: 1 - to inform researchers, so that they can make their own choices with all the elements of the reflection in mind, 2- to ask for help from funding agencies and scientific associations, and develop alternatives, 3- to propose practical and organizational solutions in order to improve new language resources development, while limiting the risks of ethical and legal issues without letting go price or quality.}, keywords= {Amazon Mechanical Turk, language resources}, } @inproceedings {PubLIMSI-1947, author = {Boula De Mareuil, Philippe AND Bardiaux, Alice}, title = {Perception of French, Belgian and Swiss accents by French and Belgian listeners}, booktitle = {ISCA Tutorial and Research Workshop on Experimental Linguistics (ExLing 2011)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {47-50}, month= {25/05 au 27/05}, address= {Paris, France}, abstract= {This article addresses the perceptual identification of French regional accents by listeners from the Paris region and Belgium. It is based on the geographical localisation of about thirty speakers from seven French-speaking areas: Vendee (West of France), Languedoc (South of France), Alsace (East of France), Romand Switzerland, East, Centre and West of Belgium. Contrary to the speakers age bracket (older or younger than 60) and speaking style (read or spontaneous speech), listeners region of origin and speakers degree of accentedness (also rated by the listeners) have a major effect. Confusions are frequent among the Belgian areas, but taken as a whole, the Belgian accent is remarkably well identified, especially by Belgian listeners. The Southern accent remains identified best.}, keywords= {dialectologie perceptive, variation r{\'e}gionale, accents en fran\c{c}ais}, } @inproceedings {PubLIMSI-1950, author = {Boula De Mareuil, Philippe AND Rouas, Jean-Luc AND Yapomo Domkem, Manuela}, title = {In search of cues discriminating West-African accents in French}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2011)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {725-728}, month= {27/08 au 31/08}, address= {Florence, Italy}, abstract= {This study investigates to what extent West-African French accents can be distinguished, based on recordings made in Burkina Faso, Ivory Coast, Mali and Senegal. First, a perceptual experiment was conducted, suggesting that these accents are well identified by West-African listeners (especially the Senegal and Ivory Coast accents). Second, prosodic and segmental cues were studied by using speech processing methods such as automatic phoneme alignment. Results show that the Senegal accent (with a tendency toward word-initial stress followed by a falling pitch movement) and the Ivory Coast accent (with a tendency to delete/vocalise the /R/ consonant) are most distinct from standard French and among the West-African accents under investigation.}, keywords= {identification d'accents, perception, prosodie, alignement automatique, sociophon{\'e}tique du fran\c{c}ais}, } @inproceedings {PubLIMSI-1983, author = {Rilliard, Albert AND Allauzen, Alexandre AND Boula De Mareuil, Philippe}, title = {Using dynamic time warping to compute prosodic similarity measures}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2011)}, AERES = {ACTI}, GROUP = {LIMSI,TLP,AA}, year = {2011}, pages = {2021-2024}, month= {27/08 au 31/08}, address= {Florence, Italy}, abstract= {This paper presents the use of Dynamic Time Warping (DTW) for measuring prosodic differences between variable-sized sentences. This methodological study may apply to various prosodic functions, accented or expressive speech. Both the structuring and attitudinal functions of prosody are investigated here. We evaluated the relevance of three prosodic (dis)similarity measures to account for perceived variations. The importance of constraints on the DTW alignment process is highlighted, together with the possibility to use prosodic features beyond pitch. Results show the effectiveness of DTW-based measurements to capture different syntactic-prosodic structures and to cluster prosodically similar attitudinal expressions, irrespective of the utterance length.}, keywords= {prosodie, alignement, distance objective}, } @inproceedings {PubLIMSI-2197, author = {Tomeh, Nadi AND Turchi, Marco AND Wisniewski, Guillaume AND Allauzen, Alexandre AND Yvon, Fran\c{c}ois}, title = {How Good Are Your Phrases? Assessing Phrase Quality with Single Class Classification}, booktitle = {International Workshop on Spoken Language Translation (IWSLT 2011)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {8p}, publisher = {Hwang, Mei-Yuh and Stuker, Sebastian}, month= {08/12 au 09/12}, address= {San Francisco, USA}, abstract= {We present a novel translation quality informed procedure for both extraction and scoring of phrase pairs in PBSMT systems. We reformulate the extraction problem in the supervised learning framework. Our goal is twofold. First, We attempt to take the translation quality into account; and second we incorporating arbitrary features in order to circumvent alignment errors.One-Class SVMs and the Mapping Convergence algorithm permit training a single-class classifier to discriminate between useful and useless phrase pairs. Such classifier can be learned from a training corpus that comprises only useful instances. The confidence score, produced by the classifier for each phrase pairs, is employed as a selection criteria. The smoothness of these scores allow a fine control over the size of the resulting translation model. Finally, confidence scores provide a new accuracy-based feature to score phrase pairs. Experimental evaluation of the method shows accurate assessments of phrase pairs quality even for regions in the space of possible phrase pairs that are ignored by other approaches. This enhanced evaluation of phrase pairs leads to improvements in the translation performance as measured by BLEU.}, keywords= {traduction automatique}, } @inproceedings {PubLIMSI-2250, author = {Tahon, Marie AND Delaborde, Agn{\`e}s AND Devillers, Laurence}, title = {Real-life emotion detection from speech in human-robot interaction: experiments across diverse corpora with child and adult voices}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2011)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {3121-3124}, month= {27/08 au 31/08}, address= {Florence, Italy}, keywords= {emotion detection, human-robot interaction, cross-corpus, realistic corpus}, } @inproceedings {PubLIMSI-1948, author = {Boula De Mareuil, Philippe AND Lehka-Lemarchand, Iryna}, title = {Can a prosodic pattern induce/reduce the perception of a lower-class suburban accent in French?}, booktitle = {International Congress of Phonetic Sciences (ICPhS 2011)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {348-351}, month= {17/08 au 21/08}, address= {Hong Kong, China}, abstract= {Previous sociophonetic work suggested that an atypical prosodic pattern, namely a word-final sharp pitch fall, could characterise the French vernacular of youth living in working-class suburbs (the so-called suburban accent). A question we investigate in this study is whether the presence/absence of these prosodic patterns increases/decreases the perception of the suburban accent. Using prosody modification and re-synthesis, perceptual experiments were conducted. Results involving listeners from two French regions are rather robust. They show that utterances with (respectively without) high-low pitch falls are perceived as presenting a higher (resp. lower) degree of accentedness.}, keywords= {prosodie, perception, sociophon{\'e}tique, accents en fra\c{c}ais}, } @inproceedings {PubLIMSI-2334, author = {Chastagnol, Cl{\'e}ment AND Devillers, Laurence}, title = {Analysis of Anger across several agent-customer interactions in French call centers}, booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2011)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {4960-4963}, month= {22/05 au 27/05}, address= {Prague, Czech Republic}, } @inproceedings {PubLIMSI-2514, author = {Le, Hai Son AND Oparin, Ilya AND Messaoudi, Abdelkhalek AND Allauzen, Alexandre AND Gauvain, Jean-Luc AND Yvon, Fran\c{c}ois}, title = {Large vocabulary SOUL neural network language models}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2011)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {1469-1472}, month= {27/08 au 31/08}, address= {Florence, Italy}, } @inproceedings {PubLIMSI-3483, author = {Juan, Luo AND Lardilleux, Adrien AND Lepage, Y.}, title = {Improving sampling-based alignment by investigating the distribution of N-grams in phrase translation tables}, booktitle = {Pacific Asia Conference on Language, Information and Computation (PACLIC 2011)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {150--159}, month= {16/12 au 18/12}, address= {Singapore, Republic of Singapore}, keywords = { xmtalign }, } @inproceedings {PubLIMSI-3725, author = {Bernhard, Delphine AND Cartoni, Bruno AND Tribout, Delphine}, title = {Évaluer la pertinence de la morphologie constructionnelle dans les syst{\`e}mes de Question-R{\'e}ponse}, booktitle = {Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles (TALN 2011)}, GROUP = {LIMSI,ILES,TLP}, year = {2011}, pages = {1-12}, address= {27/06 au 01/07}, abstract= {Les connaissances morphologiques sont fr{\'e}quemment utilis{\'e}es en Question-R{\'e}ponse afin de faciliter lappariement entre mots de la question et mots du passage contenant la r{\'e}ponse. Il nexiste toutefois pas d{\'e}tude qualitative et quantitative sur les ph{\'e}nom{\`e}nes morphologiques les plus pertinents pour ce cadre applicatif. Dans cet article, nous pr{\'e}sentons une analyse d{\'e}taill{\'e}e des ph{\'e}nom{\`e}nes de morphologie constructionnelle permettant de faire le lien entre question et r{\'e}ponse. Pour ce faire, nous avons constitu{\'e} et annot{\'e} un corpus de paires de questions-r{\'e}ponses, qui nous a permis de construire une ressource de r{\'e}f{\'e}rence, utile pour l{\'e}valuation de la couverture de ressources et doutils danalyse morphologique. Nous d{\'e}taillons en particulier les ph{\'e}nom{\`e}nes de d{\'e}rivation et de composition et montrons quil reste un nombre important de relations morphologiques d{\'e}rivationnelles pour lesquelles il nexiste pas encore de ressource exploitable pour le fran\c{c}ais.}, keywords= {linguistique, traitement automatique des langues, morphologie, questions-r{\'e}ponses}, } @inproceedings {PubLIMSI-3762, author = {Hume, Elizabeth AND Currie-Hall, Kathleen AND Wedel, Andrew AND Ussishkin, Adam AND Adda-Decker, Martine AND Gendrot, C{\'e}dric}, title = {Anti-markedness patterns in French epenthesis: An information theoretic approach}, booktitle = {Annual Meeting of the Berkeley Linguistics Society}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {104-123}, publisher = {Chundra Cathcart, I-Hsuan Chen, Greg Finley, Shinae Kang, Clare S. Sandy, and Elise Stickles, eds.}, series = {Proceedings of the Thirty-Seventh Annual Meeting of the Berkeley Linguistics Society (BLS37)}, month= {12/02 au 13/02}, address= {Berkeley, USA}, } @book {PubLIMSI-1481, author = {Minker, W. AND Lee, G. AND Nakamura, S. AND Mariani, Joseph-Jean, (editors)}, title = {Spoken dialogue systems technology and design}, AERES = {DO}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {277p}, publisher = {Springer}, } @book {PubLIMSI-1535, author = {Gaussier, E. AND Yvon, Fran\c{c}ois, (éditeurs)}, title = {Mod{\`e}les statistiques pour l'acc{\`e}s {\`a} l'information textuelle}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {482p}, publisher = {Herm{\`e}s}, } @inproceedings {PubLIMSI-2151, author = {Mariani, Joseph-Jean}, title = {Charting the field to identify the gaps in META-VISION}, booktitle = {Language \& Technology Conference : Human Language Technologies as a Challenge for Computer Science and Linguistics (L\&TC 2011)}, AERES = {COM}, GROUP = {LIMSI,TLP}, year = {2011}, month= {25/11 au 27/11}, address= {Poznan, Poland}, } @inproceedings {PubLIMSI-2148, author = {Mariani, Joseph-Jean}, title = {Language resources, technologies and standards in the sharing paradigm}, booktitle = {International Joint Conference on Natural Language Processing (IJCNLP 2011)}, AERES = {COM}, GROUP = {LIMSI,TLP}, year = {2011}, month= {9/11 au 13/11}, address= {Chiang Mai, Thailand}, } @inproceedings {PubLIMSI-2146, author = {Mariani, Joseph-Jean}, title = {Language Technologies in Support to Multilingualism}, booktitle = {International Symposium on Multilingualism in Cyberspace (SIMC 2011)}, AERES = {COM}, GROUP = {LIMSI,TLP}, year = {2011}, month= {07/11 au 09/11}, address= {Brasilia, Brazil}, } @inproceedings {PubLIMSI-2144, author = {Mariani, Joseph-Jean}, title = {Ethical Dimension of Crowdsourcing}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2011)}, AERES = {COM}, GROUP = {LIMSI,TLP}, year = {2011}, month= {28/08 au 31/08}, address= {Florence, Italy}, } @incollection {PubLIMSI-1518, author = {Adda, Gilles AND Chollet, G. AND Essid, S. AND Fillon, T. AND Garnier-Rizet, M. AND Hory, C. AND Beltaifa-Zouari, L.}, title = {Traitement des modalit{\'e}s}, booktitle = {S{\'e}mantique et multimodalit{\'e} en analyse de l'information}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {143-188}, publisher = {Herm{\`e}s}, ISBN= {978-2-7462-3139-9}, } @incollection {PubLIMSI-1519, author = {Adda, Gilles AND Cailliau, F. AND Daquo, A. AND Garnier-Rizet, M. AND Guillemin-Lanne, S. AND Suignard, P. AND Waast-Richard, C.}, title = {La transcription automatique et la fouille de donn{\'e}es conversationnelles pour l'analyse de la relation client}, booktitle = {S{\'e}mantique et multimodalit{\'e} en analyse de l'information}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {215-250}, publisher = {Herm{\`e}s}, ISBN= {978-2-7462-3139-9}, } @incollection {PubLIMSI-1536, author = {Allauzen, Alexandre AND Yvon, Fran\c{c}ois}, title = {M{\'e}thodes statistiques pour la traduction automatique}, booktitle = {Mod{\`e}les statistiques pour l'acc{\`e}s {\`a} l'information textuelle. Chap 7}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {271-356}, publisher = {Herm{\`e}s}, } @incollection {PubLIMSI-2126, author = {Vetulani, Z. AND Mariani, Joseph-Jean}, title = {Preface}, booktitle = {Human Language Technology for Computer Sciences and Linguistics}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {5-11}, publisher = {Springer Verlag}, ISBN= {978-3-642-2009-46}, } @incollection {PubLIMSI-2337, author = {Douglas-Cowie, E. AND Cox, C. AND Martin, Jean-Claude AND Devillers, Laurence AND Cowie, R. AND Sneddon, I. AND Mcrorie, M. AND Pelachaud, C. AND Peters, C. AND Lowry, O. AND Batliner, A. AND Hönig, Florian}, title = {The HUMAINE database}, booktitle = {Emotion-oriented systems: the humaine handbook}, AERES = {OS}, GROUP = {LIMSI,CPU,TLP}, year = {2011}, pages = {243-286}, publisher = {Springer}, ISBN= {978-3642151835}, } @incollection {PubLIMSI-2338, author = {Batliner, A. AND Schuller, Bjoern AND Seppi, Dino AND Steidl, S. AND Devillers, Laurence AND Vidrascu, Laurence AND Vogt, T. AND Aharonson, V. AND Amir, N.}, title = {The automatic recognition of emotions in speech}, booktitle = {Emotion-oriented systems: the humaine handbook}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {71-99}, ISBN= {978-3642151835}, } @incollection {PubLIMSI-2340, author = {Martin, Jean-Claude AND Devillers, Laurence AND Raouzaiou, Amaryllis AND Caridakis, G. AND Ruttkay, Zsófia AND Pelachaud, C. AND Mancini, Maurizio AND Niewiadomski, Radoslaw AND Pirker, H. AND Krenn, Brigitte AND Poggi, Isabella AND Caldognetto, E. AND Cavicchio, Federica AND Merola, Giorgio AND Garcia Rojas, Alejandra AND Vexo, Fr{\'e}d{\'e}ric}, title = {Coordinating the generation of signs in multiple modalities in an affective agent}, booktitle = {Emotion-oriented systems: the humaine handbook}, AERES = {OS}, GROUP = {LIMSI,CPU,TLP}, year = {2011}, pages = {349-368}, publisher = {Springer}, ISBN= {978-3642151835}, } @incollection {PubLIMSI-2347, author = {Lamel, Lori AND Messaoudi, Abdelkhalek AND Gauvain, Jean-Luc}, title = {Explicit and implicit modeling of short vowels for Arabic STT}, booktitle = {Handbook of natural language processing and machine translation: DARPA global autonomous language exploitation (Chap. 3.6.2)}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {523-528}, publisher = {Springer}, ISBN= {978-1441977120}, } @incollection {PubLIMSI-2348, author = {Fousek, Petr AND Lamel, Lori AND Gauvain, Jean-Luc}, title = {Combining MLP and PLP features for speech transcription}, booktitle = {Handbook of natural language processing and machine translation: DARPA global autonomous language exploitation}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {408-416}, publisher = {Springer}, ISBN= {978-1441977120}, keywords = { mlpfeat }, } @incollection {PubLIMSI-2349, author = {Matsoukas, S. AND Zhang, Bing AND Ma, J. AND Ng, T. AND Nguyen, L. AND Diehl, Franck AND Gales, M. AND Liu, X. AND Park, Junho AND Tomalin, Marcus AND Woodland, P. AND Fousek, Petr AND Gauvain, Jean-Luc AND Lamel, Lori AND Luo, Jun AND Messaoudi, Abdelkhalek}, title = {Optimizing Speech-to-Text System Combination for Machine Translation}, booktitle = {Handbook of natural language processing and machine translation: DARPA global autonomous language exploitation (Chap. 3.7.4)}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {590-597}, publisher = {Springer}, ISBN= {978-1441977120}, } @incollection {PubLIMSI-2357, author = {Rosset, Sophie AND Galibert, Olivier AND Lamel, Lori}, title = {Spoken question answering}, booktitle = {Spoken language understanding: systems for extracting semantic information from speech}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {Chap 6}, publisher = {Wiley}, ISBN= {978-0-470-68824-3}, keywords = {question-answering}, } @incollection {PubLIMSI-2745, author = {Schwenk, Holger AND Messaoudi, Abdelkhalek AND Luo, Jun AND Gauvain, Jean-Luc}, title = {On the use of Continuous Space Neural Network Language Models}, booktitle = {Handbook of natural language processing and machine translation: DARPA global autonomous language exploitation (Chap. 3.4.2)}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {460-467}, publisher = {Springer}, ISBN= {978-1441977120}, keywords= {Continuous Space Neural Network Language Models}, } @incollection {PubLIMSI-2510, author = {Vasilescu, Ioana}, title = {Emotion perception and recognition}, booktitle = {Emotion-oriented systems}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {191-209}, publisher = {J. Wiley}, ISBN= {9781848212589}, } @incollection {PubLIMSI-2517, author = {Hall{\'e}, P. AND Adda-Decker, Martine}, title = {Voice assimilation in French obstruents: A gradient or a categorical process?}, booktitle = {Tones and features: A festschrift for Nick Clements}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {149-175}, publisher = {De Gruyter}, ISBN= {978-3-11-024622-3}, } @incollection {PubLIMSI-2531, author = {Yvon, Fran\c{c}ois}, title = {Introduction aux mod{\`e}les probabilistes pour la fouille de textes}, booktitle = {Mod{\`e}les statistiques pour l'acc{\`e}s {\`a} l'information textuelle}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {423-477}, publisher = {Herm{\`e}s}, } @incollection {PubLIMSI-2743, author = {Galibert, Olivier AND Rosset, Sophie AND Lamel, Lori}, title = {Questions-r{\'e}ponses sur des documents audio}, booktitle = {Recherche d'information contextuelle, assist{\'e}e et personnalis{\'e}e (Chap. 4)}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {99-124}, publisher = {Herm{\`e}s--Lavoisier}, ISBN= {978-2-7462-2583-1}, keywords = {question-answering}, } @incollection {PubLIMSI-3749, author = {Adda-Decker, Martine AND Gendrot, C.}, title = {Influence du contexte consonantique et de la dur{\'e}e des voyelles sur la centralisation des voyelles orales en fran\c{c}ais}, booktitle = {La coarticulation : des indices {\`a} la repr{\'e}sentation}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {133-142}, publisher = {l'Harmattan}, ISBN= {978-2-296-55503-7}, abstract= {L'effet du contexte consonantique (C1VC2) est {\'e}tudi{\'e}, combin{\'e} {\`a} leffet de la dur{\'e}e des voyelles, sur la centralisation acoustique des formants des voyelles orales du fran\c{c}ais. Sur la base dun corpus de 25 heures de parole journalistique radio-diffus{\'e}e, align{\'e} automatiquement avec le syst{\`e}me de reconnaissance de la parole du LIMSI [3], les valeurs de formants de 180000 voyelles ont {\'e}t{\'e} mesur{\'e}es automatiquement {\`a} l'aide du logiciel PRAAT. Les mots grammaticaux ainsi que les mots lexicaux les plus fr{\'e}quents (39\%) ont {\'e}t{\'e} retir{\'e}s au pr{\'e}alable et les voyelles ayant des valeurs formantiques acoustiquement improbables ont {\'e}t{\'e} rejet{\'e}es (5\%). Leffet de centralisation acoustique des deux premiers formants (F1 et F2) pour les voyelles de courte dur{\'e}e peut s'expliquer en partie par l'assimilation contextuelle seulement. Les consonnes adjacentes avec diff{\'e}rents loci ont tendance {\`a} favoriser la centralisation F1/F2 dans 40\% des contextes seulement. Nous montrons que la dur{\'e}e joue un r{\^o}le important dans la centralisation des voyelles, ind{\'e}pendamment du contexte consonantique.}, keywords= {centralisation, voyelles orales, fran\c{c}ais, alignement automatique, corpus ESTER, parole journalistique}, } @inproceedings {PubLIMSI-1491, author = {Grouin, Cyril AND Galibert, Olivier AND Rosset, Sophie AND Quintard, Ludovic AND Zweigenbaum, Pierre}, title = {Mesures d'{\'e}valuation pour entit{\'e}s nomm{\'e}es structur{\'e}es}, booktitle = {Ateliers joints QDC'2011 - EvalECD'2011. Évaluation des m{\'e}thodes d'Extraction de Connaissances dans les Donn{\'e}es}, AERES = {COM}, GROUP = {LIMSI,ILES,TLP}, year = {2011}, pages = {13p}, keywords = {evaluation, metrics, named entity} } @inproceedings {PubLIMSI-1691, author = {Garcia-Fernandez, Anne AND Ligozat, Anne-Laure AND Dinarelli, Marco AND Bernhard, Delphine}, title = {M{\'e}thodes pour l'arch{\'e}ologie linguistique : datation par combinaison d'indices temporels}, booktitle = {Atelier DEfi Fouilles de Textes (DEFT)}, AERES = {COM}, GROUP = {LIMSI,ILES,TLP}, year = {2011}, pages = {12p}, institution = {TALN}, } @inproceedings {PubLIMSI-1849, author = {Sokolov, Artem AND Urvoy, T. AND Le, Hai Son}, title = {Low-dimensional feature learning with Kernel construction}, booktitle = {NIPS 2011 Workshop on Deep Learning and Unsupervised Feature Learning}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {1-9}, institution = {NIPS}, abstract= {We propose a practical method of semi-supervised feature learning with constructed kernels from combinations of non-linear weak rankers for classification applications. While in kernel methods one usually avoids working in the implied implicit feature space, we use the outputs of weak rankers as new features, and define the kernel as scalar product in the new feature space. The kernel is then used to map high-dimensional data into a low-dimensional space keeping the mapping informative enough to be used as training data for learning algorithms. We evaluate and compare the proposed method with other approaches on a public dataset released during the recent Semi-Supervised Feature Learning Challenge.}, keywords= {informatique, noyaux, apprentissage automatique}, } @inproceedings {PubLIMSI-2053, author = {Grouin, Cyril AND Dinarelli, Marco AND Rosset, Sophie AND Wisniewski, Guillaume AND Zweigenbaum, Pierre}, title = {Coreference resolution in clinical reports. The LIMSI participation in the i2b2/VA 2011 Challenge}, booktitle = {i2b2/VA Challenge}, AERES = {ACTI}, GROUP = {LIMSI,TLP,ILES}, year = {2011}, pages = {10p}, institution = {i2b2 / VA}, keywords = {medical information extraction}, } @inproceedings {PubLIMSI-2911, author = {Allauzen, Alexandre AND Adda, Gilles AND Bonneau-Maynard, H. AND Crego, Josep-Maria AND Le, Hai Son AND Lavergne, Thomas AND Max, Aur{\'e}lien AND Wisniewski, Guillaume AND Yvon, Fran\c{c}ois AND Sokolov, Artem AND Lardilleux, Adrien}, title = {LIMSI @ WMT11}, booktitle = {6th Workshop on Statistical Machine Translation}, AERES = {ACTI}, GROUP = {LIMSI,TLP,ILES}, year = {2011}, pages = {309-315}, institution = {University of Edinburgh / Association for Computational Linguistics}, abstract= {This paper describes LIMSI's submissions to the Sixth Workshop on Statistical Machine Translation. We report results for the French-English and German-English shared translation tasks in both directions. Our systems use n-code, an open source Statistical Machine Translation system based on bilingual n-grams. For the French-English task, we focussed on finding efficient ways to take advantage of the large and heterogeneous training parallel data. In particular, using a simple filtering strategy helped to improve both processing time and translation quality. To translate from English to French and German, we also investigated the use of the SOUL language model in Machine Translation and showed significant improvements with a 10-gram SOUL model. We also briefly report experiments with several alternatives to the standard n-best MERT procedure, leading to a significant speed-up.}, } @inproceedings {PubLIMSI-2932, author = {Freitag, Markus AND Leusch, G. AND Wuebker, Joern AND Peitz, Stephan AND Ney, Hermann AND Herrmann, Teresa AND Niehues, Jan AND Waibel, Alex AND Allauzen, Alexandre AND Adda, Gilles AND Crego, Josep-Maria AND Buschbeck, Bianka AND Wandmacher, Tonio AND Senellart, Jean}, title = {Joint WMT Submission of the QUAERO Project}, booktitle = {6th Workshop on Statistical Machine Translation}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {358-364}, institution = {Association for Computational Linguistics}, abstract= {This paper describes the joint QUAERO submission to the WMT 2011 machine translation evaluation. Four groups (RWTH Aachen University, Karlsruhe Institute of Technology, LIMSI-CNRS, and SYSTRAN) of the QUAERO project submitted a joint translation for the WMT German!English task. Each group translated the data sets with their own systems. Then RWTH system combination combines these translations to a better one. In this paper, we describe the single systems of each group. Before we present the results of the system combination, we give a short description of the RWTH Aachen system combination approach.}, keywords= {traduction statistique}, } @inproceedings {PubLIMSI-4155, author = {Lavergne, Thomas AND Le, Hai Son AND Allauzen, Alexandre AND Yvon, Fran\c{c}ois}, title = {LIMSI's experiments in domain adaptation for IWSLT11}, booktitle = {8th International Workshop on Spoken Language Translation (IWSLT 2011)}, AERES = {ACTI}, GROUP = {LIMSI,ILES,TLP}, year = {2011}, pages = {8p}, institution = {Carnegie Mellon University / Karlsruhe Institute of Technology}, abstract= {LIMSI took part in the IWSLT 2011 TED task in the MT track for English to French using the in-house Ncode system, which implements the n-gram based approach to Machine Translation. This framework not only allows to achieve state-of-the-art results for this language pair, but is also appealing due to its conceptual simplicity and its use of well understood statistical language models. Using this approach, we compare several ways to adapt our existing systems and resources to the TED task with mixture of language models and try to provide an analysis of the modest gains obtained by training a log linear combination of in- and out-of-domain models}, keywords= {traduction automatique}, } @inproceedings {PubLIMSI-4369, author = {Lavergne, Thomas AND Allauzen, Alexandre AND Crego, Josep-Maria AND Yvon, Fran\c{c}ois}, title = {From n-gram-based to CRF-based translation models}, booktitle = {6th Workshop on Statistical Machine Translation (WMT 2011)}, AERES = {ACTI}, GROUP = {LIMSI,TLP,ILES}, year = {2011}, pages = {542-553}, institution = {ACL}, abstract= {A major weakness of extant statistical machine translation (SMT) systems is their lack of a proper training procedure. Phrase extraction and scoring processes rely on a chain of crude heuristics, a situation judged problematic by many. In this paper, we recast the machine translation problem in the familiar terms of a sequence labeling task, thereby enabling the use of enriched feature sets and exact training and inference procedures. The tractability of the whole enterprise is achieved through an efficient implementation of the conditional random fields (CRFs) model using a weighted finite-state transducers library. This approach is experimentally contrasted with several conventional phrase-based systems.}, } @inproceedings {PubLIMSI-2032, author = {Boula De Mareuil, Philippe}, title = {Laccent parisien existe-t-il ?}, howpublished = {Paris.fr}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {http://www.paris.fr/accue}, abstract= {Dans l'imaginaire collectif il existe bel et bien. Souvent assimil{\'e} {\`a} laccent « titi parisien » des ann{\'e}es 30 et 40, lid{\'e}e dun accent parisien {\`a} la vie dure. Pourtant, de leau a coul{\'e} sous les ponts de la Seine depuis le « Atmosph{\`e}re, atmosph{\`e}re » d'Arletty et rares sont les Parisiens qui aujourdhui parlent de cette fa\c{c}on. Alors laccent parisien existe-t-il ?}, keywords= {accent}, } @inproceedings {PubLIMSI-2028, author = {Boula De Mareuil, Philippe}, title = {L'accent d'Eva Joly}, howpublished = {Le Parisien}, GROUP = {LIMSI,TLP}, year = {2011}, pages = {7}, abstract= {Difficile de ne pas le remarquer : Eva Joly, d'origine norv{\'e}gienne, a un accent. Mais, pour la candidate {\'e}colo {\`a} la pr{\'e}sidentielle, est-ce un atout ou une faiblesse ? Philippe Boula de Mareuil, chercheur au CNRS, nous explique comment nous pouvons projeter des pr{\'e}jug{\'e}s sur les accents. Concernant celui d'Eva Joly, explique-t-il, le r{\'e}sultat n'est pas le m{\^e}me selon l'origine g{\'e}ogra Joseph-Jean}, GROUP = {LIMSI,TLP}, } @InProceedings{Lawto11a, author = {J. Law-To and G. Grefenstette and J.L. Gauvain and L. Lamel and G. Gravier and C. Guinaudeau and P. Sebillot and J. Despres}, title = {{A Scalable Video Search Engine Based on Audio Content Indexing and Topic Segmentation}}, booktitle = {NEM SUMMIT 2011}, year = {2011}, address = {Torino, Italy}, pages = {}, abstract = {}, url = {http://www-tlp.limsi.fr/public/law-to_a_scalable_video_nem_summit2011.pdf} } @InProceedings{iwslt11lamel, author = {Lori Lamel and Sandrine Courcinous and Julien Despres and Jean-Luc Gauvain and Yvan Josse and Kevin Kilgour and Florian Kraft and Viet Bac Le and Hermann Ney and Markus Nu\ss{}baum-Thom and Ilya Oparin and Tim Schlippe and Ralf Schl\"uter and Tanja Schultz and Thiago Fraga da Silva and Sebastian St\"uker and Martin Sundermeyer and Bianca Vieru and Ngoc Thang Vu and Alexander Waibel and C\'ecile Woehrling}, title = {{Speech Recognition for Machine Translation in Quaero}}, booktitle = {IWSLT}, year = {2011}, address = {San Francisco, CA, USA}, pages = {}, abstract = {}, url = {http://www-tlp.limsi.fr/public/final_proceedings_quaeroASR.pdf} } @InProceedings{Madda11b, author = {M. Adda-Decker and L. Lamel and G. Adda}, title = {{A first LVCSR system for Luxembourgish, an under-resourced European language}}, booktitle = {LTC'11 LTC-LRL workshop, 5th Language \& Technology Conference}, year = {2011}, address = {Poznan, Poland}, pages = {}, abstract = {}, url = {http://www-tlp.limsi.fr/public/addadecker_a_first_lvcsr_ltc2011.pdf} } @InProceedings{Adda11a, author = {G. Adda and B. Sagot and K. Fort and J. Mariani}, title = {{Crowdsourcing for Language Resource Development: Critical Analysis of Amazon Mechanical Turk Overpowering Use}}, booktitle = {LTC'11, 5th Language \& Technology Conference}, year = {2011}, address = {Poznan, Poland}, pages = {}, abstract = {}, url = {} } @InProceedings{Zhang11a, author = {Q. Zhang and L. Lamel and J-L. Gauvain}, title = {{Genre Categorization and Modeling for Broadcast Speech Transcription}}, booktitle = {InterSpeech'11, 12th Annual Conference of the International Speech Communication Association}, year = {2011}, address = {Florence, Italy}, pages = {1709-1712}, abstract = {}, url = {http://www-tlp.limsi.fr/public/zhang_genre_categorization_interspeech2011.pdf} } @InProceedings{is11Doukhan, author = {David Doukhan and Albert Rilliard and Sophie Rosset and Martine Adda-Decker and Christophe d'Alessandro}, title = {{Prosodic Analysis of a Corpus of Tales}}, booktitle = {InterSpeech'11, 12th Annual Conference of the International Speech Communication Association}, year = {2011}, address = {Florence, Italy}, pages = {3129-3132}, abstract = {}, url = {http://www-tlp.limsi.fr/public/IS110477.PDF}, keywords = {corpus analysis, prosody, gvlex}, } @InProceedings{Madda11a, author = {M. Adda-Decker and N. Snoeren and L. Lamel}, title = {{Studying Luxembourgish phonetics via multilingual forced alignments}}, booktitle = {ICPhS'11, 17th International Congress of Phonetic Sciences}, year = {2011}, address = {Hong Kong, China}, pages = {}, abstract = {}, keywords = { multilingualasr }, url = {http://www-tlp.limsi.fr/public/adda_decker_studying_icphs2011.pdf} } @InProceedings{Kolar11a, author = {J. Kolar and L. Lamel}, title = {{On Development of Consistently Punctuated Speech Corpora}}, booktitle = {InterSpeech'11, 12th Annual Conference of the International Speech Communication Association}, year = {2011}, address = {Florence, Italy}, pages = {}, abstract = {}, url = {http://www-tlp.limsi.fr/public/kolar_on_development_interspeech11.pdf} } @InProceedings{Le11b, author = {H-S. Le and I. Oparin and A. Messaoudi and A. Allauzen and J-L. Gauvain and F. Yvon}, title = {{Large Vocabulary SOUL Neural Network Language Models}}, booktitle = {InterSpeech'11, 12th Annual Conference of the International Speech Communication Association}, year = {2011}, address = {Florence, Italy}, pages = {1469-1472}, abstract = {}, url = {http://www-tlp.limsi.fr/public/hai_son_le_large_vocabulary_interspeech11.pdf} } @InProceedings{Vasilescu11a_672, author = {I. Vasilescu and D. Yahia and N. Snoeren and M. Adda-Decker and L. Lamel}, title = {{Cross-lingual study of ASR errors: on the role of the context in human perception of near homophones}}, booktitle = {InterSpeech'11, 12th Annual Conference of the International Speech Communication Association}, year = {2011}, address = {Florence, Italy}, pages = {1949-1952}, abstract = {It is widely acknowledged that human listeners significantly outperform machines when it comes to transcribing speech. This paper presents a paradigm for perceptual experiments that aims to increase our understanding of human and automatic speech recognition errors. The role of the context length is investigated through perceptual recovery of small homophonic words or near-homophones yielding frequent automatic transcription errors. The same experimental protocol of varied size speech stimuli transcription is applied to both French and English. Our hypothesis is that ambiguity due to homophonic words reduces with context size for both languages, which in turn should entail reduced perception and transcription errors. The results show that context plays a central role as the human word error rate decreases significantly with increasing context. The long-term aim is to improve the modelling of such ambiguous items to reduce automatic errors.}, url = {http://www-tlp.limsi.fr/public/vasilescuetal_cross-lingual_interspeech11.pdf} } @InProceedings{Tran11a, author = {V.A. Tran and V.B. Le and C. Barras and L. Lamel}, title = {{Comparing Multi-Stage Approaches for Cross-Show Speaker Diarization}}, booktitle = {InterSpeech'11, 12th Annual Conference of the International Speech Communication Association}, year = {2011}, address = {Florence, Italy}, pages = {}, abstract = {}, url = {http://www-tlp.limsi.fr/public/viet-anh-tran_comparing_interspeech11-2.pdf} } @InProceedings{Karanasou11a, author = {Karanasou, Panagiota AND Lamel, Lori}, title = {{Pronunciation Variants Generation Using SMT-Inspired Approaches}}, booktitle = {ICASSP'11, 36th International Conference on Acoustics, Speech and Signal Processing}, year = {2011}, address = {Prague, Czech Republic}, pages = {4908-4911}, abstract = {}, url = {http://www-tlp.limsi.fr/public/icassp11pk_pronunciation.pdf} } @InProceedings{Karanasou11b, author = {P. Karanasou and F. Yvon and L. Lamel}, title = {{Measuring the confusability of pronunciations in speech recognition}}, booktitle = {FSMNLP'11, 9th International Workshop on Finite State Methods and Natural Language Processing}, year = {2011}, address = {Blois, France}, pages = {}, abstract = {}, url = {http://www-tlp.limsi.fr/public/karanasou_measuring_fsmnlp2011.pdf} } @InProceedings{Karanasou11c, author = {Karanasou, Panagiota AND Lamel, Lori}, title = {{Automatic Generation of a Pronunciation Dictionary with Rich Variation Coverage Using SMT Methods}}, booktitle = {Computational Linguistics and Intelligent Text Processing - 12th International Conference, CICLing}, year = {2011}, address = {Tokyo, Japan}, pages = {506-517}, abstract = {}, url = {} } @InProceedings{Rosset11a, author = {S. Rosset and C. Grouin and O. Galibert and P. Zweigenbaum and K. Fort and L. Quintard}, title = {{Les entités nommées dans le programme Quaero : Proposition pour une extension de la définition des EN, de la définition à l'évaluation}}, booktitle = {Journée ATALA, Reconnaissance d'Entités Nommées, nouvelles Frontières et Nouvelles Approches}, year = {2011}, address = {Paris, France}, pages = {}, abstract = {}, url = {}, keywords = {named entity}, } @InProceedings{Sokolov11a, author = {A. Sokolov and F. Yvon}, title = {{Minimum Error Rate Training Semi-Ring}}, booktitle = {EAMT'11, 15th Annual Conference of the European Association for Machine Translation}, year = {2011}, address = {Leuven, Belgium}, pages = {}, abstract = {}, url = {} } @InProceedings{Allauzen11a, author = {A. Allauzen et al.}, title = {{LIMSI at WMT'11}}, booktitle = {EMNLP'11, 6th workshop on statistical machine translation (WMT'11)}, year = {2011}, address = {Edinburgh, UK}, pages = {}, abstract = {}, url = {http://www-tlp.limsi.fr/public/allauzen_limsiatwmt11.pdf} } @InProceedings{Lavergne11a, author = {T. Lavergne and A. Allauzen and F. Yvon}, title = {{From n-gram based to CRF-based translation models}}, booktitle = {EMNLP'11, 6th workshop on statistical machine translation (WMT'11)}, year = {2011}, address = {Edinburgh, UK}, pages = {}, abstract = {}, url = {http://www-tlp.limsi.fr/public/lavergne_fromn-gram_emnlp2011.pdf} } @InProceedings{Le11a, author = {H-S. Le and I. Oparin and A. Allauzen and J-L. Gauvain}, title = {{Structured output layer neural network language model}}, booktitle = {ICASSP'11, 36th International Conference on Acoustics, Speech and Signal Processing}, year = {2011}, address = {Prague, Czech Republic}, pages = {}, abstract = {This paper introduces a new neural network language model (NNLM) based on word clustering to structure the output vocabulary: Structured Output Layer NNLM. This model is able to handle vocabularies of arbitrary size, hence dispensing with the design of short-lists that are commonly used in NNLMs. Several softmax layers replace the standard output layer in this model. The output structure depends on the word clustering which uses the continuous word representation induced by a NNLM. The GALE Mandarin data was used to carry out the speech-to-text experiments and evaluate the NNLMs. On this data the well tuned baseline system has a character error rate under 10\%. Our model achieves consistent improvements over the combination of an n-gram model and classical short-list NNLMs both in terms of perplexity and recognition accuracy.}, url = {http://www-tlp.limsi.fr/public/le_structured_output_icassp2011.pdf} } @InProceedings{Grouin11b, author = {C. Grouin and L. Deléger and B. Cartoni and S. Rosset and P. Zweigenbaum}, title = {{Accès au contenu sémantique en langue de spécialité: extraction des prescriptions et concepts médicaux}}, booktitle = {TALN'11, Traitement Automatique des Langues Naturelles}, year = {2011}, address = {Montpellier, France}, pages = {}, abstract = {}, url = {http://www-tlp.limsi.fr/public/grouin_taln2011_acces_au_contenu.pdf}, keywords = {medical information extraction}, } @InProceedings{Tomeh11a, author = {N. Tomeh and A. Allauzen and F. Yvon}, title = {{Estimation d'un modèle de traduction à partir d'alignements mot-à-mot non-déterministes}}, booktitle = {TALN'11, Traitement Automatique des Langues Naturelles}, year = {2011}, address = {Montpellier, France}, pages = {}, abstract = {}, url = {http://www-tlp.limsi.fr/public/tomeh_taln11_estimation_modele.pdf}, keywords = {mtalign}, } @InProceedings{Lamel11a, author = {L. Lamel and J.-L. Gauvain and V.-B. Le and I. Oparin and S. Meng}, title = {{Improved Models for mandarin Speech-to-Text Transcription}}, booktitle = {ICASSP'11, 36th International Conference on Acoustics, Speech and Signal Processing}, year = {2011}, address = {Prague, Czech Republic}, pages = {4660-4663}, abstract = {This paper describes recent advances at LIMSI in Mandarin Chinese speech-to-text transcription. A number of novel approaches were introduced in the different system components. The acoustic models are trained on over 1600 hours of audio data from a range of sources, and include pitch and MLP features. N-gram and neural network language models are trained on very large corpora, over 3 billion words of texts; and LM adaptation was explored at different adaptation levels: per show, per snippet, or per speaker cluster. Character-based consensus decoding was found to outperform word-based consensus decoding for Mandarin. The improved system reduces the relative character error rate (CER) by about 10\% on previous GALE development and evaluation data sets, obtaining a CER of 9.2\% on the P4 broadcast news and broadcast conversation evaluation data.}, url = {http://www-tlp.limsi.fr/public/icassp11man_lamel_improvedmodels.pdf} } @InProceedings{Fraga11a, author = {Thiago Fraga-Silva and Jean-Luc Gauvain and Lori Lamel}, title = {{Lattice-based Unsupervised Acoustic Model Training}}, booktitle = {ICASSP'11, 36th International Conference on Acoustics, Speech and Signal Processing}, year = {2011}, address = {Prague, Czech Republic}, pages = {4656-4659}, abstract = {Unsupervised acoustic model training has been successfully used to improve the performance of automatic speech recognition systems when only a small amount of manually tran- scribed data is available for the target domain. The most common approach is use automatic transcriptions to guide acous- tic model estimation. However, since the best recognition hypotheses are known to contain errors, we propose to consider multiple transcription hypotheses during training. The idea is that the EM process can benet from the estimated posterior probabilities of the hypotheses to converge to a better solution. The proposed unsupervised training method is based on lattices. Lattice-based training gives a relative improvement of 2.2\% over 1-best training on a Broadcast News transcription task and converges faster with the iterative incremental training.}, url = {http://www-tlp.limsi.fr/public/fraga_lattice_based_unsupervised_icassp.pdf} } @InProceedings{AddaDecker11a, author = {M. Adda-Decker and I. Vasilescu and N. Snoeren and D. Yahia and L. Lamel}, title = {{Towards Exploring Linguistic Variation in ASR Errors: Paradigm and Tool for Perceptual experiments}}, booktitle = {VLSP'11, Very Large Scale Phonetics Research}, year = {2011}, address = {Philadelphia, USA}, pages = {}, abstract = {}, url = {http://www-tlp.limsi.fr/public/} } @InProceedings{Greco11a, author = {J.M Crego and F. Yvon and J. Marino}, title = {{Ncode : an Open Source Bilingual N-gram SMT Toolkit}}, booktitle = {The Prague Bulletin of Mathematical Linguistics nr 96}, year = {2011}, address = {}, pages = {}, abstract = {}, url = {http://www-tlp.limsi.fr/public/art-crego-yvon-marino.pdf} } @InProceedings{Grouin11a, author = {C. Grouin and L. Deléger and A.-L. Minard and A.-L. Ligozat and A. Ben Abacha and D. Bernhard and B. Cartoni and B. Grau and S. Rosset and P. Zweigenbaum}, title = {{Extraction d'informations médicales au LIMSI}}, booktitle = {TALN'11, Traitement Automatique des Langues Naturelles}, year = {2011}, address = {Montpellier, France}, pages = {}, abstract = {}, url = {http://www-tlp.limsi.fr/public/grouin_taln2011demo.pdf}, keywords = {medical information extraction}, } %%%%%%%%%%% 2010 section @article {PubLIMSI-1064, author = {Torreira, F. AND Adda-Decker, Martine AND Ernestus, M.}, title = {The nijmegen corpus of casual French}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {201-212}, journal = {Speech Communication}, volume = {52}, number = {3}, url = {http://www-tlp.limsi.fr/public/SCaddadecker2010_1375099629.pdf} } @article {PubLIMSI-1128, author = {Yvon, Fran\c{c}ois}, title = {Rewriting the orthography of SMS messages}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {133-159}, journal = {Natural Language Engineering}, volume = {16}, number = {2}, url = {http://www-tlp.limsi.fr/public/NLEyvon2010_1375099794.pdf} } @article {PubLIMSI-1199, author = {Qu{\'e}not, G. AND Tan, T. AND Le, Viet Bac AND Ayache, S. AND Besacier, L. AND Mulhem, P.}, title = {Content-based search in multilingual audiovisual documents using the international phonetic alphabet}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {123-140}, journal = {Multimedia Tools and Applications}, volume = {48}, number = {3}, url = {http://www-tlp.limsi.fr/public/MTAquenot2010_1375100202.pdf} } @article {PubLIMSI-1230, author = {Crego, Josep-Maria AND Leusch, G. AND Max, Aur{\'e}lien AND Ney, Hermann AND Yvon, Fran\c{c}ois}, title = {Micro-adaptation lexicale en traduction automatique statistique}, AERES = {ACLN}, GROUP = {LIMSI,TLP,ILES}, year = {2010}, pages = {65-93}, journal = {TAL (Traitement Automatique des Langues)}, volume = {51}, number = {2}, url = {http://www-tlp.limsi.fr/public/TALcrego2010_1375100450.pdf} } @article {PubLIMSI-1232, author = {Crego, Josep-Maria AND Yvon, Fran\c{c}ois}, title = {Factored bilingual n-gram language models for statistical machine translation}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {159-175}, journal = {Machine Translation}, volume = {24}, number = {2}, url = {http://www-tlp.limsi.fr/public/MTyvon2010_1375100485.pdf} } @article {PubLIMSI-1297, author = {Sokolovska, N. AND Lavergne, Thomas AND Capp{\'e}, Olivier AND Yvon, Fran\c{c}ois}, title = {Efficient learning of sparse conditional random fields for supervised sequence labeling}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {953-964}, journal = {IEEE Journal of Selected Topics in Signal Processing - Special Issue on Statistical Learning Methods for Speech and Language Processing}, volume = {4}, number = {6}, url = {http://www-tlp.limsi.fr/public/IEEEJSTSPyvon2010_1375106989.pdf} } @inproceedings {PubLIMSI-1076, author = {Quintard, Ludovic AND Galibert, Olivier AND Laurent, D. AND Rosset, Sophie AND Adda, Gilles AND Moriceau, V{\'e}ronique AND Tannier, Xavier AND Grau, Brigitte AND Vilnat, Anne}, title = {Question answering on web data : the QA evaluation in Quaero}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP,ILES}, year = {2010}, pages = {7p}, month= {May 17-23, 2010}, address= {Valetta, Malta}, keywords = {question-answering}, } @inproceedings {PubLIMSI-1078, author = {Galibert, Olivier AND Rosset, Sophie AND Tannier, Xavier AND Grandry, Fanny}, title = {Hybrid citation extraction from patents}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP,ILES}, year = {2010}, pages = {5p}, month= {May 17-23, 2010}, address= {Valetta, Malta}, keywords = {information extraction}, } @inproceedings {PubLIMSI-1079, author = {Galibert, Olivier AND Quintard, Ludovic AND Rosset, Sophie AND Zweigenbaum, Pierre AND Nedellec, C. AND Aubin, S. AND Gillard, L. AND Raysz, J. AND Pois, D. AND Tannier, Xavier AND Del{\'e}ger, Louise AND Laurent, D.}, title = {Named and specific entity detection in varied data : the Quaero named entity baseline evaluation}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP,ILES}, year = {2010}, pages = {6p}, month= {May 17-23, 2010}, address= {Valetta, Malta}, keywords = {named entity}, } @inproceedings {PubLIMSI-1084, author = {Garcia-Fernandez, Anne AND Rosset, Sophie AND Vilnat, Anne}, title = {MACAQ : A multi annotated corpus to study how we adapt answers to various questions}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2010)}, AERES = {ACTI}, GROUP = {LIMSI,ILES,TLP}, year = {2010}, pages = {7p}, month= {May 17-23, 2010}, address= {Valetta, Malta}, keywords={dialog system, question-answering, corpus annotation}, } @inproceedings {PubLIMSI-1085, author = {Garcia-Fernandez, Anne AND Rosset, Sophie AND Vilnat, Anne}, title = {Comment formule-t-on une r{\'e}ponse en langue naturelle?}, booktitle = {Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles (TALN 2010)}, AERES = {ACTN}, GROUP = {LIMSI,ILES,TLP}, year = {2010}, pages = {10p}, month= {19-23 juillet 2010}, address= {Montr{\'e}al, Canada}, keywords={dialog system, question-answering, human study}, } @inproceedings {PubLIMSI-1108, author = {Tahon, Marie AND Devillers, Laurence}, title = {Acoustic measures characterizing anger across corpora collected in artificial or natural context}, booktitle = {International Conference on Speech Prosody (SP 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {4p}, month= {May 11-14, 2010}, address= {Chicago, USA}, } @inproceedings {PubLIMSI-1109, author = {Schuller, Bjoern AND Zaccarelli, Riccardo AND Rollet, Nicolas AND Devillers, Laurence}, title = {CINEMO - A French spoken language resource for complex emotions : facts and baselines}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {5p}, month= {May 17-23, 2010}, address= {Valetta, Malta}, } @inproceedings {PubLIMSI-1110, author = {Brendel, Matthias AND Zaccarelli, Riccardo AND Devillers, Laurence}, title = {Building a system for emotions detection from speech to control an affective avatar}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {5p}, month= {May 17-23, 2010}, address= {Valetta, Malta}, } @inproceedings {PubLIMSI-1111, author = {Tahon, Marie AND Delaborde, Agn{\`e}s AND Barras, Claude AND Devillers, Laurence}, title = {A corpus for identification of speakers and their emotions}, booktitle = {International Workshop on EMOTION : Corpora for Research on Emotion Affect}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {5p}, month= {23/05}, address= {Valetta, Malta}, } @inproceedings {PubLIMSI-1112, author = {Delaborde, Agn{\`e}s AND Tahon, Marie AND Devillers, Laurence}, title = {Affective links in a child-robot interaction}, booktitle = {International Workshop on EMOTION : Corpora for Research on Emotion Affect}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {5p}, month= {May 23, 2010}, address= {Valetta, Malta}, } @inproceedings {PubLIMSI-1113, author = {Vaudable, Christophe AND Rollet, Nicolas AND Devillers, Laurence}, title = {Annotation of affective interaction in real-life dialogs collected in a call-center}, booktitle = {International Workshop on EMOTION : Corpora for Research on Emotion Affect}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {5p}, month= {May 23, 2010}, address= {Valetta, Malta}, } @inproceedings {PubLIMSI-1114, author = {Brendel, Matthias AND Zaccarelli, Riccardo AND Schuller, Bjoern AND Devillers, Laurence}, title = {Towards measuring similarity between emotional corpora}, booktitle = {International Workshop on EMOTION : Corpora for Research on Emotion Affect}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {5p}, month= {May 23, 2010}, address= {Valetta, Malta}, } @inproceedings {PubLIMSI-1117, author = {Wisniewski, Guillaume AND Max, Aur{\'e}lien AND Yvon, Fran\c{c}ois}, title = {Recueil et analyse d'un corpus {\'e}cologique de corrections orthographiques extrait des r{\'e}visions de Wikip{\'e}dia}, booktitle = {Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles (TALN 2010)}, AERES = {ACTN}, GROUP = {LIMSI,TLP,ILES}, year = {2010}, pages = {10p}, month= {19-23 juillet 2010}, address= {Montr{\'e}al, Canada}, } @inproceedings {PubLIMSI-1118, author = {Max, Aur{\'e}lien AND Wisniewski, Guillaume}, title = {Mining naturally-occurring corrections and paraphrases from Wikipedia's revision history}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2010)}, AERES = {ACTI}, GROUP = {LIMSI,ILES,TLP}, year = {2010}, pages = {6p}, month= {May 17-23, 2010}, address= {Valetta, Malta}, } @inproceedings {PubLIMSI-1122, author = {Rouas, Jean-Luc AND Beppu, Mayumi AND Adda-Decker, Martine}, title = {Comparison of spectral properties of read, prepared and casual speech in French}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {6p}, month= {May 17-23, 2010}, address= {Valetta, Malta}, } @inproceedings {PubLIMSI-1123, author = {Rouas, Jean-Luc AND Beppu, Mayumi AND Adda-Decker, Martine}, title = {Comparaison des propri{\'e}t{\'e}s acoustiques de la parole lue, pr{\'e}par{\'e}e et conversationnelle en fran\c{c}ais}, booktitle = {Journ{\'e}es d'Etude sur la Parole (JEP 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {4p}, month= {25-28 mai 2010}, address= {Mons, Belgique}, } @inproceedings {PubLIMSI-1129, author = {Max, Aur{\'e}lien AND Crego, Josep-Maria AND Yvon, Fran\c{c}ois}, title = {Contrastive lexical evaluation of machine translation}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP,ILES}, year = {2010}, pages = {5p}, month= {May 17-23, 2010}, address= {Valetta, Malta}, } @inproceedings {PubLIMSI-1130, author = {Misra, H. AND Yvon, Fran\c{c}ois}, title = {Mod{\`e}les th{\'e}matiques pour la segmentation de documents}, booktitle = {International Conference Journ{\'e}es d'Analyse statistique des Donn{\'e}es Textuelles (JADT 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {203-213}, month= {9-11 juin 2010}, address= {Rome, Italie}, } @inproceedings {PubLIMSI-1132, author = {Kaglik, A. AND Boula De Mareuil, Philippe}, title = {Polish-accented French prosody in perception and production : transfer or universal acquisition process?}, booktitle = {International Conference on Speech Prosody (SP 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {4p}, month= {May 11-14, 2010}, address= {Chicago, USA}, } @inproceedings {PubLIMSI-1133, author = {Boula De Mareuil, Philippe AND Adda-Decker, Martine AND Woehrling, C{\'e}cile}, title = {Ant{\'e}riorisation / aperture des voyelles //~/o/ en fran\c{c}ais du nord et du sud}, booktitle = {Journ{\'e}es d'Etude sur la Parole (JEP 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {4p}, month= {25-28 mai 2010}, address= {Mons, Belgique}, } @inproceedings {PubLIMSI-1139, author = {Nemoto, Rena AND Adda-Decker, Martine AND Durand, J.}, title = {Investigation of lexical f0 and duration patterns in French using large broadcast news speech corpora}, booktitle = {International Conference on Speech Prosody (SP 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {4p}, month= {May 11-14, 2010}, address= {Chicago, USA}, } @inproceedings {PubLIMSI-1140, author = {Kolar, Jachym AND Liu, Y.}, title = {Comparing and combining modeling techniques for sentence segmentation of spoken Czech using textual and prosodic information}, booktitle = {International Conference on Speech Prosody (SP 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {4p}, month= {May 11-14, 2010}, address= {Chicago, USA}, } @inproceedings {PubLIMSI-1142, author = {Adda-Decker, Martine AND Lamel, Lori AND Snoeren, Natalie}, title = {Initializing acoustic phone models of under-resourced languages : a case-study of Luxembourgish}, booktitle = {International Workshop on Spoken Languages Technologies for Under-resourced languages (SLTU 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {74-80}, month= {May 3-5, 2010}, address= {Penang, Malaysia}, } @inproceedings {PubLIMSI-1169, author = {Signol, Fran\c{c}ois AND Li{\'e}nard, Jean-Sylvain AND Barras, Claude}, title = {Influence de la d{\'e}cision vois{\'e}/non-vois{\'e} dans l'{\'e}valuation comparative d'algorithmes d'estimation de F0}, booktitle = {Journ{\'e}es d'Etude sur la Parole (JEP 2010)}, AERES = {ACTI}, GROUP = {LIMSI,CPU,TLP}, year = {2010}, pages = {4p}, month= {25-28 mai 2010}, address= {Mons, Belgique}, } @inproceedings {PubLIMSI-1179, author = {Calzolari, N. AND Soria, Julio AND Del Gratta, R. AND Goggi, S. AND Quochi, V. AND Russo, I. AND Choukri, Khalid AND Mariani, Joseph-Jean AND Piperidis, S.}, title = {The LREC 2010 resource map}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {6p}, month= {May 17-23, 2010}, address= {Valetta, Malta}, } @inproceedings {PubLIMSI-1248, author = {Le, Viet Bac AND Barras, Claude AND Ferras, M.}, title = {On the use of GSV-SVM for speaker diarization and tracking}, booktitle = {The Speaker and Language Recognition Workshop (Odyssey 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {146-150}, month= {June 28-July 1, 2010}, address= {Brno, Czech Republic}, } @inproceedings {PubLIMSI-1292, author = {Snoeren, Natalie AND Adda-Decker, Martine AND Adda, Gilles}, title = {The study of writing variants in an under-resourced language: some evidence from Mobile N-Deletion in Luxembourgish}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {5p}, month= {May 17-23, 2010}, address= {Valetta, Malta}, } @inproceedings {PubLIMSI-1300, author = {Crego, Josep-Maria AND Yvon, Fran\c{c}ois}, title = {Improving reordering with linguistically informed bilingual n-grams}, booktitle = {International Conference on Computational Linguistics (COLING 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {197-205}, month= {August 23-27, 2010}, address= {Beijing, China}, } @inproceedings {PubLIMSI-1301, author = {El-Kahlout, Ilknur Durgar AND Yvon, Fran\c{c}ois}, title = {The pay-offs of preprocessing for German-English statistical machine translation}, booktitle = {International Workshop on Spoken Language Translation (IWSLT 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {251-258}, month= {2/12 au 3/12}, address= {Paris, France}, } @inproceedings {PubLIMSI-1302, author = {Lavergne, Thomas AND Capp{\'e}, Olivier AND Yvon, Fran\c{c}ois}, title = {Practical very large scale CRFs}, booktitle = {Annual Meeting of the Association for Computational Linguistics (ACL 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {504-513}, month= {July 11-16, 2010}, address= {Uppsala, Sweden}, } @inproceedings {PubLIMSI-1303, author = {Le, Hai Son AND Allauzen, Alexandre AND Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {Training continuous space language models: some practical issues}, booktitle = {Conference on Empirical Methods in Natural Language Processing (EMNLP 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {778-788}, month= {October 9-11, 2010}, address= {Cambridge, USA}, } @inproceedings {PubLIMSI-1304, author = {Tomeh, Nadi AND Allauzen, Alexandre AND Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {Refining word alignment with discriminative training}, booktitle = {Conference of the Association for Machine Translation in the Americas (AMTA 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {10p}, month= {31/10 au 04/11}, address= {Denver, USA}, keywords = {mtalign}, } @inproceedings {PubLIMSI-1305, author = {Wisniewski, Guillaume AND Allauzen, Alexandre AND Yvon, Fran\c{c}ois}, title = {Assessing phrase-based translation models with Oracle decoding}, booktitle = {Conference on Empirical Methods in Natural Language Processing (EMNLP 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {933-943}, month= {October 9-11, 2010}, address= {Cambridge, USA}, } @inproceedings {PubLIMSI-1314, author = {Garcia-Fernandez, Anne AND Vasilescu, Ioana AND Rosset, Sophie}, title = {Euh as cue for speaker confidence and word searching in human spoken answers in French}, booktitle = {Workshop on Disfluency in Spontaneous Speech - The 2nd International Symposium on Linguistic Patterns in Spontaneous Speech (DiSS-LPSS 2010)}, AERES = {ACTI}, GROUP = {LIMSI,ILES,TLP}, year = {2010}, pages = {2p}, month= {September 25-26, 201}, address= {Tokyo, Japan}, keywords = {question-answering, human production analysis, disfluences}, } @inproceedings {PubLIMSI-1315, author = {Vasilescu, Ioana AND Rosset, Sophie AND Adda-Decker, Martine}, title = {On the functions of the vocalic hesitation euh in interactive man-machine question answering dialogs in French}, booktitle = {Workshop on Disfluency in Spontaneous Speech - The 2nd International Symposium on Linguistic Patterns in Spontaneous Speech (DiSS-LPSS 2010)}, AERES = {ACTI}, GROUP = {LIMSI,ILES,TLP}, year = {2010}, pages = {4p}, month= {September 25-26, 201}, address= {Tokyo, Japan}, keywords = {question-answering, human machine dialog, disfluences}, } @inproceedings {PubLIMSI-1317, author = {Zidouni, A. AND Rosset, Sophie AND Glotin, H.}, title = {Efficient combined approach for named entity recognition in spoken language}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {1293-1296}, month= {27/09 au 31/09}, address= {Makuhari, Japan}, keywords = {named entity}, } @inproceedings {PubLIMSI-1320, author = {Oparin, Ilya AND Lamel, Lori AND Gauvain, Jean-Luc}, title = {Improving Mandarin Chinese STT system with random forests language models}, booktitle = {International Symposium on Chinese Spoken Language Processing (ISCSLP 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {242-245}, month= {29/11 au 03/12}, address= {Tainan, Taiwan}, url = {http://www-tlp.limsi.fr/public/ISCSLPoparin2010.pdf}, } @inproceedings {PubLIMSI-2332, author = {Schuller, Bjoern AND Steidl, S. AND Batliner, A. AND Burkhardt, Felix AND Devillers, Laurence AND Muller, Christian AND Narayanan, Shrikanth}, title = {The INTERSPEECH 2010 paralinguistic challenge}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {2794-2797}, month= {27/09 au 30/10}, address= {Makuhari, Japan}, } @inproceedings {PubLIMSI-2526, author = {Nemoto, Rena AND Adda-Decker, Martine AND Durand, J.}, title = {Word boundaries in French: Evidence from large speech corpora}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {3272-3277}, month= {19/05 au 21/05}, address= {Valetta, Malta}, } @inproceedings {PubLIMSI-2525, author = {Nemoto, Rena AND Adda-Decker, Martine AND Durand, J.}, title = {D{\'e}marcation lexicale en fran\c{c}ais : profils prosodiques sur grand corpus}, booktitle = {Journ{\'e}es d'Etude sur la Parole (JEP 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {237-240}, month= {25/05 au 28/05}, address= {Mons, Belgique}, } @inproceedings {PubLIMSI-2820, author = {Brendel, Matthias AND Zaccarelli, Riccardo AND Devillers, Laurence}, title = {A Quick Sequential Forward Floating Feature Selection Algorithm for Emotion Detection from Speech}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {1157-1160}, month= {26/09 au 30/09}, address= {Makuhari, Chiba, Japan}, abstract= {In this paper we present an improved Sequential Forward Floating Search algorithm. Subsequently, extensive tests are carried out on a selection of French emotional language resources well suited for a first impression on general applicability. A detailed analysis is presented to test the various modifications suggested one-by-one. Our conclusion is that the modification in the forward step result in a considerable improvement in speed (~80\%) while no considerable and systematic loss in quality is experienced. The modifications in the backward step seem to have only significance when a higher number of features is achieved. The final clarification of this issue remains the task of future work. As a result we may suggest a quick feature selection algorithm, which is practically more suitable for the state of the art, larger corpora and wider feature-banks. Our quick SFFS is general: it can also be used in any other field of application.}, } @inproceedings {PubLIMSI-2821, author = {Devillers, Laurence AND Vaudable, Christophe AND Chastagnol, Cl{\'e}ment}, title = {Real-Life Emotion-Related States Detection in Call Centers: A Cross-Corpora Study}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {2350-2353}, month= {26/09 au 30/09}, address= {Makuhari, Chiba, Japan}, abstract= {In this article, we describe experiments on the detection of three emotional states (Anger, Positive and Neutral) for two French corpora collected in call centers in different contexts (service complaints and medical emergency). These corpora have a high level of privacy. In order to be comparable with results obtained in the community we used the openEAR acoustic features extraction platform instead of our own library. One of our aims being the comparison of anger and positive emotions across corpora, we train models on one corpus and test it on the other to compare their similarities, then conversely. We will discuss the possible gain in generalization power.}, } @inproceedings {PubLIMSI-2822, author = {Schuller, Bjoern AND Devillers, Laurence}, title = {Incremental Acoustic Valence Recognition: An Inter-Corpus Perspective on Features, Matching, and Performance in a Gating Paradigm}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {801-804}, month= {26/09 au 30/09}, address= {Makuhari, Chiba, Japan}, abstract= {It is not fully known how long it takes a human to reliably recognize emotion in speech from the beginning of a phrase. However, many technical applications demand for very quick system responses, e.g. to prepare different feedback alternatives before the end of a speaker turn in a dialog system. We therefore investigate this \gating paradigm\ employing two spoken language resources in a cross- and combined manner with a focus on valence: we determine how quick a reliable estimate is obtainable and whether matching by models trained on the same length of speech prevails. In addition we analyze how individual feature groups by type and derived functionals respond and find considerably different behavior. The language resources have been chosen to cover for manually segmented and automatically segmented speech at the same time. In the result one second of speech is sufficient on the datasets considered.}, } @book {PubLIMSI-1091, author = {Boula De Mareuil, Philippe}, title = {D'o{\`u} viennent les accents r{\'e}gionaux?}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {63p}, publisher = {Le Pommier}, } @book {PubLIMSI-1127, author = {Yvon, Fran\c{c}ois AND Hansen, V., (editors)}, title = {Proceedings of the 14th annual conference of the European Association for Machine Translation}, AERES = {DO}, GROUP = {LIMSI,TLP}, year = {2010}, volume = {http://www.mt-archive.info/EAMT-2010-TOC.htm}, edition = {}, } @book {PubLIMSI-2450, author = {Lee, G. AND Mariani, Joseph-Jean AND Minker, W. AND Nakamura, S., (editors)}, title = {Spoken dialogue systems for ambient environments}, AERES = {DO}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {199}, publisher = {Springer}, } @book {PubLIMSI-2540, author = {Federico, Marcello AND Lane, Ian AND Paul, Michael AND Yvon, Fran\c{c}ois AND Mariani, Joseph-Jean, (editors)}, title = {Proceedings of the International Workshop on Spoken Language Translation (IWSLT'2010)}, AERES = {DO}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {100}, publisher = {Paris, France}, } @incollection {PubLIMSI-1131, author = {Marotta, G. AND Boula De Mareuil, Philippe}, title = {Persistenza dell'accento straniero. Uno studio percettivo sull'italiano L2}, booktitle = {La dimensione temporale del parlato}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {475-494}, publisher = {EDK Editore}, } @incollection {PubLIMSI-1196, author = {Devillers, Laurence AND Martin, Jean-Claude}, title = {Corpus {\'e}motionnels : de l'acquisition {\`a} la mod{\'e}lisation}, booktitle = {Syst{\`e}mes d'interaction {\'e}motionnelle (Trait{\'e} signal et image, IC2), chap. 3}, AERES = {OS}, GROUP = {LIMSI,TLP,AMI}, year = {2010}, publisher = {Herm{\`e}s Science}, } @incollection {PubLIMSI-1197, author = {Vasilescu, Ioana}, title = {Perception et reconnaissance des {\'e}motions}, booktitle = {Syst{\`e}mes d'interaction {\'e}motionnelle (Trait{\'e} signal et image, IC2)}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {223-245}, publisher = {Herm{\`e}s Science}, } @incollection {PubLIMSI-1325, author = {Cowie, R. AND Douglas-Cowie, E. AND Martin, Jean-Claude AND Devillers, Laurence}, title = {The essential role of human databases for learning in and validation of affectively competent agents}, booktitle = {Blueprint for Affective Computing. A sourcebook}, AERES = {OS}, GROUP = {LIMSI,AMI,TLP}, year = {2010}, pages = {151-165}, publisher = {Oxford University Press}, } @incollection {PubLIMSI-2331, author = {Devillers, Laurence AND Vidrascu, Laurence AND Layachi, Omar}, title = {Automatic detection of emotion from vocal expression}, booktitle = {A Blueprint for affective computing: a sourcebook and manual}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {232-244}, publisher = {Oxford University Press}, ISBN= {978-0199566709}, } @inproceedings {PubLIMSI-1185, author = {Mariani, Joseph-Jean AND Choukri, Khalid AND Vetulani, Z.}, title = {Getting less-resourced languages on-board!}, booktitle = {Cocosda-Write-FLaReNet Workshop}, AERES = {COM}, GROUP = {LIMSI,TLP}, year = {2010}, } @inproceedings {PubLIMSI-1186, author = {Adda, Gilles AND Mariani, Joseph-Jean}, title = {Language Resources and Amazon Mechanical Turk: ethical, legal, and other issues}, booktitle = {Workshop on Legal Issues for Sharing Language Resources (LISLR): Constraints and Best Practices in conjunction with LREC 2010}, AERES = {COM}, GROUP = {LIMSI,TLP}, year = {2010}, } @inproceedings {PubLIMSI-1329, author = {d'Alessandro, Christophe AND Quok, A. AND Deroo, O. AND Doukhan, David AND Gelin, R. AND Martin, Jean-Claude AND Pelachaud, C. AND Rilliard, Albert AND Rosset, Sophie}, title = {Towards a storytelling humanoid robot}, booktitle = {AAAI 2010 Fall Symposium on Dialog With Robots}, AERES = {ACTI}, GROUP = {LIMSI,AA,AMI,TLP}, year = {2010}, pages = {4p}, keywords={gvlex}, } @inproceedings {PubLIMSI-2333, author = {Delaborde, Agn{\`e}s AND Devillers, Laurence}, title = {Use of nonverbal speech cues in social interaction between human and robot: emotional and interactional markers}, booktitle = {3rd ACM Workshop on Affective Interaction in Natural Environments (AFFINE 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, institution = {ACM}, } @inproceedings {PubLIMSI-2489, author = {Grouin, Cyril AND Ben Abacha, Asma AND Bernhard, Delphine AND Cartoni, Bruno AND Del{\'e}ger, Louise AND Grau, Brigitte AND Ligozat, Anne-Laure AND Minard, Anne-Lyse AND Rosset, Sophie AND Zweigenbaum, Pierre}, title = {CARAMBA: Concept, Assertion, and Relation Annotation using machine-learning-based approaches}, booktitle = {Fourth i2b2/VA Shared-Task and Workshop Challenges in Natural Language Processing for Clinical Data}, AERES = {COM}, GROUP = {LIMSI,ILES,TLP}, year = {2010}, pages = {1}, institution = {i2b2/VA}, keywords={medical information extraction}, } @inproceedings {PubLIMSI-2735, author = {Clavel, C{\'e}line AND Devillers, Laurence AND Martin, Jean-Claude}, title = {Combinaisons d'expressions vocales, faciales et posturales des {\'e}motions chez un agent anim{\'e} : ce que per\c{c}oivent les utilisateurs}, booktitle = {4{\`e}me Workshop sur les Agents Conversationnels Anim{\'e}s (WACA 2010)}, AERES = {ACTN}, GROUP = {LIMSI,CPU,TLP}, year = {2010}, pages = {8p}, institution = {Universit{\'e} Lille I}, abstract= {Lobjectif de ce papier est de pr{\'e}senter l{\'e}valuation dun syst{\`e}me de communication {\`a} distance m{\'e}diatis{\'e}e par un avatar capable dexprimer les {\'e}motions d{\'e}tect{\'e}es dans la voix de lutilisateur. Un module de reconnaissance audio des {\'e}motions exprim{\'e}es dans la voix et une librairie dexpressions non verbales {\'e}motionnelles ont ainsi {\'e}t{\'e} d{\'e}velopp{\'e}s. Le focus de ce papier porte plus particuli{\`e}rement sur limpact de lint{\'e}gration de ces deux composants sur la perception {\'e}motionnelle et sur la qualit{\'e} per\c{c}ue de la synchronisation audio/vid{\'e}o.}, keywords= {Agent conversationnel, {\'e}motion, d{\'e}tection, voix, comportement non verbal}, } @inproceedings {PubLIMSI-4137, author = {Allauzen, Alexandre AND Crego, Josep-Maria AND Le, Hai Son AND Wisniewski, Guillaume AND Yvon, Fran\c{c}ois}, title = {LIMSI @ IWSLT 2010}, booktitle = {7th International Workshop on Spoken Language Translation (IWSLT 2010)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2010}, pages = {105-112}, } @InProceedings{Lawto10a, author = {J. Law-To and G. Grefenstette and J.-L. Gauvain and G. Gravier and L. Lamel and J. Despres}, title = {{Introducing topic segmentation and segmented-based browsing tools into a content based video retrieval system}}, booktitle = {ACM Multimedia 2010}, year = {2010}, address = {Florence, Italy}, pages = {}, abstract = {}, url = {http://www-tlp.limsi.fr/public/law-to_introducing_challengeacm.pdf} } @InProceedings{BenZeghiba10a, author = {M.F. BenZeghiba and J.L. Gauvain and L. Lamel}, title = {{Improved N-Gram Phonotactic Models for Language Recognition}}, booktitle = {InterSpeech'10, 11th Annual Conference of the International Speech Communication Association}, year = {2010}, address = {Makuhari, Japan}, pages = {}, abstract = {}, keywords = { lid }, url = {http://www-tlp.limsi.fr/public/mfb_interspeech10.pdf} } @InProceedings{Oparin10a, author = {I. Oparin and L. Lamel and J-L Gauvain}, title = {{Large-Scale Language Modeling with Random Forests for Mandarin Chinese Speech-to-Text}}, booktitle = {7th International Conference on Natural Language Processing (IceTAL)}, year = {2010}, address = {Reykjavik, Iceland}, pages = {}, abstract = {}, url = {http://www-tlp.limsi.fr/public/oparin_iscslp2010_paper56.pdf} } @InProceedings{Vergyri10a, author = {D. Vergyri and L. Lamel and J.-L. Gauvain}, title = {{Automatic Speech Recognition of Multiple Accented English Data}}, booktitle = {InterSpeech'10, 11th Annual Conference of the International Speech Communication Association}, year = {2010}, address = {Makuhari, Japan}, pages = {}, abstract = {}, url = {http://www-tlp.limsi.fr/public/automatic_speech_recognition_of_multiple_accented_english_data_vergyri.pdf} } @InProceedings{Madda10a, author = {M. Adda-Decker and L. Lamel and N.D. Snoeren}, title = {{Comparing mono- \& multilingual acoustic seed models for a low e-resourced language: a case-study of Luxembourgish}}, booktitle = {InterSpeech'10, 11th Annual Conference of the International Speech Communication Association}, year = {2010}, address = {Makuhari, Japan}, pages = {}, abstract = {}, keywords = { multilingualasr }, url = {http://www-tlp.limsi.fr/public/interspeech_adda_comparing.pdf} } @InProceedings{Karanasou10a, author = {P. Karanasou and L. Lamel}, title = {{Comparing SMT Methods for automatic generation of pronunciation variants}}, booktitle = {7th International Conference on Natural Language Processing (IceTAL)}, year = {2010}, address = {Reykjavik, Iceland}, pages = {}, abstract = {}, url = {http://www-tlp.limsi.fr/public/pkaranasou_icetal_camera_ready.pdf} } @InProceedings{Crego10local, author = {Josep Maria Crego and Aurélien Max and François Yvon}, title = {Local lexical adaptation in Machine Translation through triangulation: SMT helping SMT}, booktitle = {Proceedings of the 23rd International Conference on Computational Linguistics (Coling 2010)}, year = {2010}, address = {Beijing, China}, publisher = {Coling 2010 Organizing Committee}, pages = {232--240}, abstract = {We present a framework where auxiliary MT systems are used to provide lexical predictions to a main SMT system. In this work, predictions are obtained by means of pivoting via auxiliary languages, and introduced into the main SMT system in the form of a low order language model, which is estimated on a sentence-by-sentence basis. The linear combination of models implemented by the decoder is thus extended with the additional language model. Experimental work is carried out over three different translation tasks using the European Parliament corpus. For each task, nine additional languages are used as auxiliary languages to obtain the triangulated predictions. Translation accuracy results show that improvements in translation quality are obtained, even for large data conditions.}, url = {http://www.aclweb.org/anthology/C10-1027} } @InProceedings{Allauzen10wmt, author = {Alexandre Allauzen and Josep M. Crego and Ilknur Durgar El-Kahlout and Francois Yvon}, title = {LIMSI's Statistical Translation Systems for WMT'10}, booktitle = {Proceedings of the Joint Fifth Workshop on Statistical Machine Translation and MetricsMATR}, year = {2010}, address = {Uppsala, Sweden}, pages = {54--59}, abstract = {This paper describes our Statistical Machine Translation systems for the WMT10 evaluation, where LIMSI participated for two language pairs (French-English and German-English, in both directions). For German-English, we concentrated on normalizing the German side through a proper preprocessing, aimed at reducing the lexical redundancy and at splitting complex compounds. For French-English, we studied two extensions of our in-house N-code decoder: firstly, the effect of integrating a new bilingual reordering model; second, the use of adaptation techniques for the translation model. For both set of experiments, we report the improvements obtained on the development and test data.}, url = {http://www.aclweb.org/anthology/W10-1704} } @InProceedings{Yvon2010eamt, title = {Proceedings of the 14th Annual Conference of the European Association for Machine Translation}, year = 2010, editor = {François Yvon and Viggo Hansen}, address = {Saint-Raphael, France}, url = {http://www.mt-archive.info/EAMT-2010-TOC.htm}} @InProceedings{Crego10contrastive, author = {Josep Maria Crego and Aurélien Max and François Yvon}, title = {Contrastive Lexical Evaluation of Machine Translation}, booktitle = {Proceedings of the Language Resources and Evaluation Conference (LREC'10)}, pages = { }, year = 2010, address = {La Valletta, Malta}, abstract = {This paper advocates a complementary measure of translation performance that focuses on the constrastive ability of two or more systems or system versions to adequately translate source words. This is motivated by three main reasons : 1) existing automatic metrics sometimes do not show significant differences that can be revealed by fine-grained focussed human evaluation, 2) these metrics are based on direct comparisons between system hypotheses with the corresponding reference translations, thus ignoring the input words that were actually translated, and 3) as these metrics do not take input hypotheses from several systems at once, fine-grained contrastive evaluation can only be done indirectly. This proposal is illustrated on a multi-source Machine Translation scenario, where significant gains (e.g. +1.3 BLEU point) are achieved.} } @ARTICLE{Devil10i, AUTHOR = { A. Batliner and S. Steidl and B. Schuller and D. Seppi and T. Vogt and J. Wagner and L. Devillers and L. Vidrascu and V. Aharonson and L. Kessous and N. Ami. Whodunnit}, TITLE = {{ Searching for the Most Important Feature Types Signalling Emotion-Related User States in Speech }}, JOURNAL = { Computer Speech and Language }, VOLUME = {}, NUMBER = {}, month = {}, PAGES = {}, YEAR = 2010, URL = {} } @ARTICLE{Devil10h, AUTHOR = { C. Clavel and I. Vasilescu and L. Devillers }, TITLE = {{ Fiction supports for realistic portrayals of fear-type emotional manifestations}}, JOURNAL = { Computer Speech and Language }, VOLUME = {}, NUMBER = {}, month = {}, PAGES = {}, YEAR = 2010, URL = {} } @INCOLLECTION{Devil10g, AUTHOR = { L. Devillers and J-C. Martin}, TITLE = {{ Emotional corpora}}, BOOKTITLE = {{ Emotions}}, EDITOR = {C. Pelachaud}, PUBLISHER = {John Wiley}, YEAR = 2010, CHAPTER = {}, abstract = { }, keywords = { }, url = { }, PAGES = {} } @INCOLLECTION{Devil10f, AUTHOR = { L. Devillers and J-C. Martin}, TITLE = {{ Corpus émotionnels : de l'acquisition à la modélisation}}, BOOKTITLE = {{ Emotions}}, EDITOR = {C. Pelachaud}, PUBLISHER = {Hermes}, YEAR = 2010, CHAPTER = {}, abstract = { }, keywords = { }, url = { }, PAGES = {} } @INPROCEEDINGS{Devil10e, AUTHOR = {B. Schuller and R. Zaccarelli and N. Rollet and L. Devillers}, TITLE = {{ CINEMO - A French Spoken CINEMO - A French Spoken }}, BOOKTITLE = {Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC'10)}, YEAR = 2010, MONTH = {May}, DATE = {19-21}, ADDRESS = {Valletta, Malta}, URL = { } } @INPROCEEDINGS{Devil10d, AUTHOR = { M. Tahon and A. Delaborde and C. Barras and L. Devillers}, TITLE = {{Towards measuring similarity between emotional corpora}}, BOOKTITLE = { Proc. 3rd Intern. Workshop on Emotion - LREC'10}, YEAR = 2010, MONTH = {May}, DATE = {23}, ADDRESS = {Valletta, Malta}, URL = { } } @INPROCEEDINGS{Devil10c, AUTHOR = { Delaborde and M. Tahon and L. Devillers}, TITLE = {{Affective Links in a Child-Robot Interaction}}, BOOKTITLE = { Proc. 3rd Intern. Workshop on Emotion - LREC'10}, YEAR = 2010, MONTH = {May}, DATE = {23}, ADDRESS = {Valletta, Malta}, URL = { } } @INPROCEEDINGS{Devil10b, AUTHOR = { Ch. Vaudable and N. Rollet and L. Devillers }, TITLE = {{Annotation of Affective Interaction in Real-life Dialogs Collected in a Call-center}}, BOOKTITLE = { Proc. 3rd Intern. Workshop on Emotion - LREC'10}, YEAR = 2010, MONTH = {May}, DATE = {23}, ADDRESS = {Valletta, Malta}, URL = { } } @INPROCEEDINGS{Devil10a, AUTHOR = { M. Brendel and R. Zaccarelli and B. Schuller and L. Devillers}, TITLE = {{Towards measuring similarity between emotional corpora}}, BOOKTITLE = { Proc. 3rd Intern. Workshop on Emotion - LREC'10}, YEAR = 2010, MONTH = {May}, DATE = {23}, ADDRESS = {Valletta, Malta}, URL = { } } @InProceedings{SNOEREN10.258, author = {Natalie D. Snoeren and Martine Adda-Decker and Gilles Adda}, title = {The Study of Writing Variants in an Under-resourced Language: Some Evidence from Mobile N-Deletion in Luxembourgish}, booktitle = {Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC'10)}, year = {2010}, month = {may}, date = {19-21}, address = {Valletta, Malta}, publisher = {European Language Resources Association (ELRA)}, isbn = {2-9517408-6-7}, language = {english}, abstract = { The national language of the Grand-Duchy of Luxembourg, Luxembourgish, hasoften been characterized as one of Europe's under-described and under-resourcedlanguages. Because of a limited written production of Luxembourgish, poorlyobserved writing standardization (as compared to other languages such asEnglish and French) and a large diversity of spoken varieties, the study ofLuxembourgish poses many interesting challenges to automatic speech processingstudies as well as to linguistic enquiries. In the present paper, we make useof large corpora to focus on typical writing and derived pronunciation variantsin Luxembourgish, elicited by mobile -n deletion (hereafter shortened to MND).Using transcriptions from the House of Parliament debates and 10k words fromnews reports, we examine the reality of MND variants in written transcripts ofspeech. The goal of this study is manyfold: quantify the potential of variationdue to MND in written Luxembourgish, check the mandatory status of the MND ruleand discuss the arising problems for automatic spoken Luxembourgish processing.}, URL = {http://www-tlp.limsi.fr/public/258_Paper.pdf} } @InProceedings{CALZOLARI10.370, author = {Nicoletta Calzolari and Claudia Soria and Riccardo Del Gratta and Sara Goggi and Valeria Quochi and Irene Russo and Khalid Choukri and Joseph Mariani and Stelios Piperidis}, title = {The LREC Map of Language Resources and Technologies}, booktitle = {Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC'10)}, year = {2010}, month = {may}, date = {19-21}, address = {Valletta, Malta}, publisher = {European Language Resources Association (ELRA)}, isbn = {2-9517408-6-7}, language = {english}, abstract = { In this paper we present the LREC Map of Language Resources and Tools, aninnovative feature introduced with this LREC. The purpose of the Map is to shedlight on the vast amount of resources and tools that represent the backgroundof the research presented at LREC, in the attempt to fill in a gap in thecommunity knowledge about the resources and tools that are used or createdworldwide. It also aims at a change of culture in the field, actively engagingeach researcher in the documentation task about resources. The Map has beendeveloped on the basis of the information provided by LREC authors during thesubmission of papers to the LREC 2010 conference and the LREC workshops, andcontains information about almost 2000 resources. The paper illustrates themotivation behind this initiative, its main characteristics, its relevance andfuture impact in the field, the metadata used to describe the resources, andfinally presents some of the most relevant findings.}, URL = {http://www-tlp.limsi.fr/public/370_Paper.pdf} } @InProceedings{MOREAU10.372, author = {Nicolas Moreau and Olivier Hamon and Djamel Mostefa and Sophie Rosset and Olivier Galibert and Lori Lamel and Jordi Turmo and Pere R. Comas and Paolo Rosso and Davide Buscaldi and Khalid Choukri}, title = {Evaluation Protocol and Tools for Question-Answering on Speech Transcripts}, booktitle = {Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC'10)}, year = {2010}, month = {may}, date = {19-21}, address = {Valletta, Malta}, publisher = {European Language Resources Association (ELRA)}, isbn = {2-9517408-6-7}, language = {english}, abstract = {Question Answering (QA) technology aims at providing relevant answers tonatural language questions. Most Question Answering research has focused onmining document collections containing written texts to answer writtenquestions. In addition to written sources, a large (and growing) amount ofpotentially interesting information appears in spoken documents, such asbroadcast news, speeches, seminars, meetings or telephone conversations. TheQAST track (Question-Answering on Speech Transcripts) was introduced in CLEF toinvestigate the problem of question answering in such audio documents. Thispaper describes in detail the evaluation protocol and tools designed anddeveloped for the CLEF-QAST evaluation campaigns that have taken place between2007 and 2009. We first remind the data, question sets, and submissionprocedures that were produced or set up during these three campaigns. As forthe evaluation procedure, the interface that was developed to ease theassessorsâ work is described. In addition, this paper introduces amethodology for a semi-automatic evaluation of QAST systems based on time slotcomparisons. Finally, the QAST Evaluation Package 2007-2009 resulting fromthese evaluation campaigns is also introduced.}, URL = {http://www-tlp.limsi.fr/public/372_Paper.pdf}, keywords = {question-answering}, } @InProceedings{NEMOTO10.386, author = {Rena Nemoto and Martine Adda-Decker and Jacques Durand}, title = {Word Boundaries in French: Evidence from Large Speech Corpora}, booktitle = {Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC'10)}, year = {2010}, month = {may}, date = {19-21}, address = {Valletta, Malta}, publisher = {European Language Resources Association (ELRA)}, isbn = {2-9517408-6-7}, language = {english}, abstract = { The goal of this paper is to investigate French word segmentation strategiesusing phonemic and lexical transcriptions as well as prosodic andpart-of-speech annotations. Average fundamental frequency (f0) profiles andphoneme duration profiles are measured using 13 hours of broadcast news speechto study prosodic regularities of French words. Some influential factors aretaken into consideration for f0 and duration measurements: word syllablelength, word-final schwa, part-of-speech. Results from average f0 profilesconfirm word final syllable accentuation and from average duration profiles, wecan observe long word final syllable length. Both are common tendencies inFrench. From noun phrase studies, results of average f0 profiles illustratehigher noun first syllable after determiner. Inter-vocalic duration profileresults show long inter-vocalic duration between determiner vowel and precedingword vowel. These results reveal measurable cues contributing to word boundarylocation. Further studies will include more detailed within syllable f0patterns, other speaking styles and languages.}, URL = {http://www-tlp.limsi.fr/public/386_Paper.pdf} } @InProceedings{VASILESCU10.481, author = {Ioana Vasilescu and Sophie Rosset and Martine Adda-Decker}, title = {On the Role of Discourse Markers in Interactive Spoken Question Answering Systems}, booktitle = {Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC'10)}, year = {2010}, month = {may}, date = {19-21}, address = {Valletta, Malta}, publisher = {European Language Resources Association (ELRA)}, isbn = {2-9517408-6-7}, language = {english}, abstract = {This paper presents a preliminary analysis of the role of some discoursemarkers and the vocalic hesitation "euh" in a corpus of spoken human utterancescollected with the Ritel system, an open domain and spoken dialog system. Thefrequency and contextual combinatory of classical discourse markers and of thevocalic hesitation have been studied. This analysis pointed out somespecificity in terms of combinatory of the analyzed items. The classicaldiscourse markers seem to help initiating larger discursive blocks both atinitial and medial positions of the on-going turns. The vocalic hesitationstand also for marking the user's embarrassments and wish to close the dialog.}, URL = {http://www-tlp.limsi.fr/public/481_Paper.pdf}, keywords = {human-machine dialog, dialog analysis, speech analysis, discourse markers, disfluences}, } @InProceedings{BERNARD10.518, author = {Guillaume Bernard and Sophie Rosset and Martine Adda-Decker and Olivier Galibert}, title = {A Question-answer Distance Measure to Investigate QA System Progress}, booktitle = {Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC'10)}, year = {2010}, month = {may}, date = {19-21}, address = {Valletta, Malta}, publisher = {European Language Resources Association (ELRA)}, isbn = {2-9517408-6-7}, language = {english}, abstract = { The performance of question answering system is evaluated throughsuccessive evaluations campaigns. A set of questions are given to theparticipating systems which are to find the correct answer in acollection of documents. The creation process of the questions maychange from one evaluation to the next. This may entail an uncontroledquestion difficulty shift. For the QAst 2009 evaluation campaign, a new procedure was adopted to build the questions.Comparing results of QAst 2008 and QAst 2009 evaluations, a strongperformance loss could be measured in 2009 for French and English,while the Spanish systems globally made progress. The measured lossmight be related to this new way of elaborating questions. The general purposeof this paper is to propose a measure to calibrate the difficulty of a question set. In particular, a reasonable measure should output higher values for 2009 than for 2008.The proposed measure relies on a distance measure between the critical elementsofa question and those of the associated correct answer. An increase of theproposeddistance measure for French and English 2009 evaluations as compared to 2008could be established.This increase correlates with the previously observed degraded performances.We conclude on the potential of this evaluation criterion: the importance ofsucha measure for the elaboration of new question corpora for questions answeringsystemsand a tool to control the level of difficulty for successive evaluationcampaigns.}, URL = {http://www-tlp.limsi.fr/public/518_Paper.pdf}, keywords={question-answering}, } @InProceedings{ROUAS10.704, author = {Jean-Luc Rouas and Mayumi Beppu and Martine Adda-Decker}, title = {Comparison of Spectral Properties of Read, Prepared and Casual Speech in French}, booktitle = {Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC'10)}, year = {2010}, month = {may}, date = {19-21}, address = {Valletta, Malta}, publisher = {European Language Resources Association (ELRA)}, isbn = {2-9517408-6-7}, language = {english}, abstract = { In this paper, we investigate the acoustic properties of phonemes in threespeaking styles: read speech, prepared speech and spontaneous speech. Our aimis to better understand why speech recognition systems still fails to achievegood performances on spontaneous speech. This work follows the work of Nakamura et al. on Japanese speaking styles, withthe difference that we here focus on French. Using Nakamura's method, we useclassical speech recognition features, MFCC, and try to represent the effectsof the speaking styles on the spectral space. Two measurements are defined inorder to represent the spectral space reduction and the spectral varianceextension. Experiments are then carried on to investigate if indeed we findsome differences between the three speaking styles using these measurements. Wefinally compare our results to those obtained by Nakamura on Japanese to see ifthe same phenomenon appears. We happen to find some cues, and it also seemsthat phone duration also plays an important role regarding spectral reduction,especially for spontaneous speech.}, URL = {http://www-tlp.limsi.fr/public/704_Paper.pdf} } @InProceedings{MAX10.823, author = {Aurélien Max and Josep Maria Crego and François Yvon}, title = {Contrastive Lexical Evaluation of Machine Translation}, booktitle = {Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC'10)}, year = {2010}, month = {may}, date = {19-21}, address = {Valletta, Malta}, publisher = {European Language Resources Association (ELRA)}, isbn = {2-9517408-6-7}, language = {english}, abstract = { This paper advocates a complementary measure of translation performance thatfocuses on the constrastive ability of two or more systems or system versionsto adequately translate source words. This is motivated by three main reasons :1) existing automatic metrics sometimes do not show significant differencesthat can be revealed by fine-grained focussed human evaluation, 2) thesemetrics are based on direct comparisons between system hypotheses with thecorresponding reference translations, thus ignoring the input words that wereactually translated, and 3) as these metrics do not take input hypotheses fromseveral systems at once, fine-grained contrastive evaluation can only be doneindirectly. This proposal is illustrated on a multi-source Machine Translationscenario where multiple translations of a source text are available.Significant gains (up to +1.3 BLEU point) are achieved on these experiments,and contrastive lexical evaluation is shown to provide new information that canhelp to better analyse a system's performance.}, URL = {http://www-tlp.limsi.fr/public/823_Paper.pdf} } @InProceedings{MAX10.827, author = {Aurélien Max and Guillaume Wisniewski}, title = {Mining Naturally-occurring Corrections and Paraphrases from Wikipediaâs Revision History}, booktitle = {Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC'10)}, year = {2010}, month = {may}, date = {19-21}, address = {Valletta, Malta}, publisher = {European Language Resources Association (ELRA)}, isbn = {2-9517408-6-7}, language = {english}, abstract = { Naturally-occurring instances of linguistic phenomena are important both fortraining and for evaluating automatic text processing. When available in largequantities, they also prove interesting material for linguistic studies. Inthis article, we present WiCoPaCo (Wikipedia Correction and Paraphrase Corpus),a new freely-available resource built by automatically mining Wikipediaâsrevision history. The WiCoPaCo corpus focuses on local modifications made byhuman revisors and include various types of corrections (such as spelling erroror typographical corrections) and rewritings, which can be categorized broadlyinto meaning-preserving and meaning-altering revisions. We present an initialhand-built typology of these revisions, but the resource allows for anypossible annotation scheme. We discuss the main motivations for building such aresource and describe the main technical details guiding its construction. Wealso present applications and data analysis on French and report initialresults on spelling error correction and morphosyntactic rewriting. TheWiCoPaCo corpus can be freely downloaded from http://wicopaco.limsi.fr.}, URL = {http://www-tlp.limsi.fr/public/827_Paper.pdf} } @InProceedings{BRENDEL10.403, author = {Matthias Brendel, Riccardo Zaccarelli and Laurence Devillers}, title = {Building a System for Emotions Detection from Speech to Control an Affective Avatar}, booktitle = {Proceedings of the Seventh conference on International Language Resources and Evaluation (LREC'10)}, year = {2010}, month = {may}, date = {19-21}, address = {Valletta, Malta}, publisher = {European Language Resources Association (ELRA)}, isbn = {2-9517408-6-7}, language = {english}, abstract = {In this paper we describe a corpus set together from two sub-corpora. TheCINEMO corpus contains acted emotional expression obtained by playing dubbingexercises. This new protocol is a way to collect mood-induced data in large amount which show severalcomplex and shaded emotions. JEMO is a corpus collected with anemotion-detection game and contains more prototypical emotions than CINEMO. We show how the two sub-corpora balance and enrich eachother and result in a better performance. We built male and female emotionmodels and use Sequential Fast Forward Feature Selection to improve detection performances. After feature-selection weobtain good results even with our strict speaker independent testing method.The global corpus contains 88 speakers(38 females, 50 males). This study has been done within the scope of the ANR(National Research Agency) Affective Avatar project which deals with building asystem of emotions detection for monitoring an Artificial Agent by voice.}, URL = {http://www-tlp.limsi.fr/public/403_Paper.pdf} } @INPROCEEDINGS{prosody10LD, AUTHOR = { Marie Tahon and Laurence Devillers}, TITLE = {{Acoustic Measures characterizing anger across corpora collected in artificial or natural context}}, BOOKTITLE = { Proc. of the 5th International Conference Speech Prosody}, PAGES = { }, ADDRESS = {Chicago}, YEAR = 2010, MONTH = {May}, abstract = { This paper aims at studying the difference of acoustic manifestations for anger across corpora collected in artificial, manipulated or natural context. It also aims at finding measures for naturalness in emotive corpora. Actually it is quite difficult to evaluate the degree of naturalness of a corpus except considering the task. As for more acted corpora, anger is often stronger; we can imagine that it exists a kind of distance between anger and the all corpus. This distance is here evaluated with basic acoustic descriptors in several corpora collected. We show some differences between the acoustic features obtained for anger in these different contexts and propose measures of naturalness.}, keywords = { Emotion-detection, anger, naturalness, prototypical emotion}, URL = {http://www-tlp.limsi.fr/public/100850.pdf}, } @INPROCEEDINGS{prosody10RN, AUTHOR = { Rena Nemoto and Martine Adda-Decker and Jacques Durand}, TITLE = {{Investigation of lexical f0 and duration patterns in French using large broadcast news speech corpora}}, BOOKTITLE = { Proc. of the 5th International Conference Speech Prosody}, PAGES = { }, ADDRESS = {Chicago}, YEAR = 2010, MONTH = {May}, abstract = { This work aims at improving our knowledge of links between prosody and pronunciation variants in French. An original methodology is proposed to study prosodic regularities of French words via average f0 profiles, by making use of automatic processing and 13 hours of broadcast news speech. Investigated influential factors include word syllable length, duration, word-final schwa, parts of speech. The following questions are addressed: can specific lexical f0 profiles be measured automatically using large corpora? If so, how do they vary with respect to the cited influential factors? Results confirm the known tendency of word-final syllable accentuation. They also highlight some word-initial accentuation. Higher average f0 profiles are measured for increasing segment durations (locally decreasing speaking rate), but also for words ending with schwas. Future studies include phrase boundary annotation and the extension to a larger variety of speaking styles and languages.}, keywords = { f0 profile, syllabic word length, lexical duration, word-final schwa, French}, URL = {http://www-tlp.limsi.fr/public/100059.pdf} } @INPROCEEDINGS{prosody10JK, AUTHOR = {Jachym Kolar and Yang Liu}, TITLE = {{Comparing and Combining Modeling Techniques for Sentence Segmentation of Spoken Czech Using Textual and Prosodic Information}}, BOOKTITLE = {Proc. of the 5th International Conference Speech Prosody}, PAGES = {}, ADDRESS = {Chicago}, YEAR = 2010, MONTH = {May}, abstract = {This paper deals with automatic sentence boundary detection in spoken Czech using both textual and prosodic information. This task is important to make automatic speech recognition (ASR) output more readable and easier for downstream language processing modules. We compare and combine three statistical models hidden Markov model, maximum entropy, and adaptive boosting. We evaluate these methods on two Czech corpora, broadcast news and broadcast conversations, using both manual and ASR transcripts. Our results show that superior results are achieved when all the three models are combined via posterior probability interpolation, and that there is substantial difference among the three methods when using different knowledge sources, as well as in different genres. Feature analysis also reveals significant differences in prosodic feature usage patterns between the two genres. }, keywords = {sentence segmentation, prosody, HMM, maximum entropy, boosting }, URL = {http://www-tlp.limsi.fr/public/100021.pdf}, } @INPROCEEDINGS{sltu10fin, AUTHOR = { Lori Lamel and Bianca Vieru}, TITLE = {{Development of a Speech-to-text transcription system for Finnish}}, BOOKTITLE = {{The second International Workshop on Spoken Languages Technologies for Under-resourced languages (SLTU10)}}, PAGES = {62-67}, ADDRESS = {Penang, Malaysia}, YEAR = 2010, MONTH = {May}, abstract = { This paper describes the development of a speech-to-text transcription system for the Finnish language. Finnish is a Finno-Ugric language spoken by about 6 million of people living in Finland, but also by some minorities in Sweden, Norway, Russia and Estonia. System development was carried out without any detailed manual transcriptions, relying instead on several sources of audio and textual data were found on the web. Some of the audio sources were associated with approximate (and usually partial) texts, which were used to provide estimates of system performance.}, keywords = { }, URL = {http://www-tlp.limsi.fr/public/sltu10fin.pdf}, } @INPROCEEDINGS{sltu10lux, AUTHOR = {Martine Adda-Decker and Lori Lamel and Natalie D. Snoeren}, TITLE = {{Initializaing Acoustic Phone Models of Under-resourced Languages: A Case-study of Luxembourgish}}, BOOKTITLE = {{The second International Workshop on Spoken Languages Technologies for Under-resourced languages (SLTU10)}}, PAGES = {74-80}, ADDRESS = {Penang, Malaysia}, YEAR = 2010, MONTH = {May}, abstract = { Luxembourgish, has often been characterized as one of Europe's under-described and under-resourced languages. In this contribution we report on our ongoing work to take Luxembourgish on board as an e-language : an electronically searchable spoken language. More specifically, we focus on the issue of producing acoustic seed models for Luxembourgish. A phonemic inventory was defined and linked to inventories from major neighboring languages (German, French and English), with the help of the IPA symbol set. Acoustic seed model sets were composed using monolingual German, French or English acoustic model sets and corresponding forced alignment segmentations were compared. Next a super-set of multilingual acoustic seeds was usedputting together the three language-dependent sets. The language-identity of the aligned acoustic models provides information about the overall acoustic adequacy of both thecross-language phonemic correspondances and the acousticmodels. Furthermore some information can be gleaned on inter-language distances : the German acoustic models provided the best match with 54.3\% of the segments aligned using German seeds, 35.3\% using the English ones and only 10.4\% using the French acoustic models. Since Luxembourgish is considered a Western Germanic language close to German, this result is in line with its linguistic typology. }, keywords = { }, URL = {http://www-tlp.limsi.fr/public/sltu10lux.pdf}, } @INPROCEEDINGS{icassp10bac, AUTHOR = { Viet-Bac Le and Lori Lamel and Jean-Luc Gauvain}, TITLE = {{Multi-style MLP Features for BN Transcription}}, BOOKTITLE = icassp, PAGES = {4866--4869}, ADDRESS = {Dallas, Texas}, YEAR = 2010, MONTH = {March}, abstract = {It has become common practice to adapt acoustic models to specific-conditions (gender, accent, bandwidth) in order to improve the performance of speech-to-text (STT) transcription systems. With the growing interest in the use of discriminative features produced by a multi layer perceptron (MLP) in such systems, the question arise of whether it is necessary to specialize the MLP to particular conditions, and if so, how to incorporate the condition-specific MLP features in the system. This paper explores three approaches (adaptation, full training, and feature merging) to use condition-specific MLP features in a state-of-the-art BN STT system for French. The third approach without condition-specific adaptation was found to outperform the original models with condition-specific adaptation, and was found to perform almost as well as full training of multiple condition-specific HMMs.}, keywords = { Speech recognition, speech-to-text transcription, mlpfeat}, URL = {http://www-tlp.limsi.fr/public/0004866.pdf}, } @INCOLLECTION{chilhandbook10, AUTHOR = { A. Waibel and R. Stiefelhagen and R. Carlson and J. Casas and J. Kleindienst and L. Lamel and O. Lanz and D. Mostefa and M. Omologo and F. Pianesi and L. Polymenakos and G. Potamianos and J. Soldatos and G. Sutschet and J. Terken}, TITLE = {{ Computers in the Human Interaction Loop}}, BOOKTITLE = {{ Handbook on Ambient Intelligence and Smart Environments}}, EDITOR = {}, PUBLISHER = {Springer}, YEAR = 2010, CHAPTER = 20, abstract = { }, keywords = { }, url = { }, PAGES = {1071-1116} } %%%%%%%%%%% 2009 section @article {PubLIMSI-97, author = {Martin, Jean-Claude AND Caridakis, G. AND Devillers, Laurence AND Karpouzis, K. AND Abrillian, Sarkis}, title = {Manual annotation and automatic image processing of multimodal emotional behaviors : validating the annotation of TV interviews}, AERES = {ACL}, GROUP = {LIMSI,AMI,TLP}, year = {2009}, pages = {69-76}, journal = {Personal and Ubiquitous Computing}, volume = {13}, number = {1}, } @article {PubLIMSI-668, author = {Van Schooten, Boris AND Op Den Akker, R. AND Rosset, Sophie AND Galibert, Olivier AND Max, Aur{\'e}lien AND Illouz, Gabriel}, title = {Follow-up question handling in the IMIX and Ritel systems : a comparative study}, AERES = {ACL}, GROUP = {LIMSI,TLP,ILES}, year = {2009}, pages = {97-118}, journal = {Natural Language Engineering}, volume = {15}, number = {1}, keywords = {spoken dialog system, question-answering}, } @article {PubLIMSI-791, author = {Mariani, Joseph-Jean}, title = {Research infrastructures for human language technologies : a vision from France}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2009}, pages = {569-584}, journal = {Speech Communication - Research Challenges in Speech Technology: A Special Issue in Honour of Rolf Carlson and Björn Granström}, volume = {51}, number = {7}, } @article {PubLIMSI-803, author = {Bonneau-Maynard, H. AND Quignard, M. AND Denis, A.}, title = {MEDIA : a semantically annotated corpus of task oriented dialogs in French. Results of the French MEDIA evaluation campaign}, AERES = {ACL}, GROUP = {LIMSI,TLP}, year = {2009}, pages = {329-354}, journal = {Language Resources and Evaluation}, volume = {43}, number = {4}, abstract= {The aim of the French MEDIA project was to define a protocol for the evaluation of speech understanding modules for dialog systems. Accordingly, a corpus of 1,257 real spoken dialogs related to hotel reservation and tourist information was recorded, transcribed and semantically annotated, and a semantic attribute-value representation was defined in which each conceptual relationship was represented by the names of the attributes. Two semantic annotation levels are distinguished in this approach. At the first level, each utterance is considered separately and the annotation represents the meaning of the statement without taking into account the dialog context. The second level of annotation then corresponds to the interpretation of the meaning of the statement by taking into account the dialog context; in this way a semantic representation of the dialog context is defined. This paper discusses the data collection, the detailed definition of both annotation levels, and the annotation scheme. Then the paper comments on both evaluation campaigns which were carried out during the project and discusses some results.}, keywords= {Dialog system Speech understanding Corpus Annotation Evaluation}, url = {http://www-tlp.limsi.fr/public/2009-HBM_1359578645.pdf}, } @article {PubLIMSI-851, author = {Sokolovska, N. AND Capp{\'e}, Olivier AND Yvon, Fran\c{c}ois}, title = {S{\'e}lection de caract{\'e}ristiques pour les champs al{\'e}atoires conditionnels par p{\'e}nalisation L1}, AERES = {ACLN}, GROUP = {LIMSI,TLP}, year = {2009}, pages = {139-171}, journal = {TAL (Traitement Automatique des Langues)}, volume = {50}, number = {3}, } @article {PubLIMSI-852, author = {Allauzen, Alexandre AND Wisniewski, Guillaume}, title = {Mod{\`e}les discriminants pour l'alignement mot-{\`a}-mot}, AERES = {ACLN}, GROUP = {LIMSI,TLP}, year = {2009}, pages = {173-203}, journal = {TAL (Traitement Automatique des Langues)}, volume = {50}, number = {3}, keywords = {mtalign}, } @inproceedings {PubLIMSI-698, author = {Boula De Mareuil, Philippe AND Rilliard, Albert AND Allauzen, Alexandre}, title = {Perception of the evolution of prosody in the French broadcast news style}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2009)}, AERES = {ACTI}, GROUP = {LIMSI,AA,TLP}, year = {2009}, pages = {2531-2534}, month= {September 6-10, 2009}, address= {Brighton, UK}, } @inproceedings {PubLIMSI-715, author = {Tomeh, Nadi AND Cancedda, N. AND Dymetman, M.}, title = {Complexity-based phrase-table filtering for statistical machine translation}, booktitle = {Machine Translation Summit (MT Summit 2009)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2009}, pages = {8p}, month= {August 26-30, 2009}, address= {Ottawa, Canada}, } @inproceedings {PubLIMSI-774, author = {Turmo, J. AND Comas, P. AND Rosset, Sophie AND Galibert, Olivier AND Moreau, N. AND Mostefa, D. AND Rosso, P. AND Buscaldi, D.}, title = {Overview of QAST 2009}, booktitle = {Workshop of the Cross-Language Evaluation Forum (CLEF 2009)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2009}, pages = {197-211}, publisher = {Springer}, series = {Revised selected papers, LNCS 6241, C. Peters et al. (Eds)}, month= {30/9 au 2/10}, address= {Corfu, Greece}, keywords={question-answering}, } @inproceedings {PubLIMSI-775, author = {Bernard, Guillaume AND Rosset, Sophie AND Galibert, Olivier AND Bilinski, Eric AND Adda, Gilles}, title = {The LIMSI participation in the QAst 2009 track}, booktitle = {Workshop of the Cross-Language Evaluation Forum (CLEF 2009)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2009}, pages = {289-296}, publisher = {Springer}, series = {Revised selected papers, LNCS 6241, C. Peters et al. (Eds)}, month= {30/9 au 2/10}, address= {Corfu, Greece}, keywords={question-answering}, } @inproceedings {PubLIMSI-776, author = {Garcia-Fernandez, Anne AND Rosset, Sophie AND Vilnat, Anne}, title = {Collecte et analyses de r{\'e}ponses naturelles pour les syst{\`e}mes de questions-r{\'e}ponses}, booktitle = {Conf{\'e}rence sur le Traitement Automatique des Langues Naturelles (TALN 2009)}, AERES = {ACTN}, GROUP = {LIMSI,TLP,ILES}, year = {2009}, pages = {10p}, month= {24-26 juin 2009}, address= {Senlis, France}, keywords = {question-answering, human production, dialog system}, } @inproceedings {PubLIMSI-782, author = {Woehrling, C{\'e}cile AND Boula De Mareuil, Philippe AND Adda-Decker, Martine}, title = {Linguistically-motivated automatic classification of regional French varieties}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2009)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2009}, pages = {2183-2186}, month= {September 6-10, 2009}, address= {Brighton, UK}, } @inproceedings {PubLIMSI-796, author = {Mariani, Joseph-Jean}, title = {Language technology infrastructures in support to multilingualism}, booktitle = {International Universal Communication Symposium}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2009}, pages = {7p}, month= {December 3-4, 2009}, address= {Tokyo, Japan}, } @inproceedings {PubLIMSI-837, author = {Snoeren, Natalie AND Adda-Decker, Martine AND Adda, Gilles}, title = {Pronunciation and writing variants in Luxembourgish : the case of mobile n-deletion in large corpora}, booktitle = {Language \& Technology Conference : Human Language Technologies as a Challenge for Computer Science and Linguistics (L\&TC 2009)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2009}, pages = {6p}, month= {November 6-8, 2009}, address= {Poznan, Poland}, } @inproceedings {PubLIMSI-839, author = {Torreira, F. AND Adda-Decker, Martine AND Ernestus, M.}, title = {The Nijmegen corpus of casual French}, booktitle = {Journ{\'e}es Linguistiques de Nantes}, AERES = {ACTN}, GROUP = {LIMSI,TLP}, year = {2009}, pages = {65-67}, month= {18-19 juin 2009}, address= {Nantes, France}, } @inproceedings {PubLIMSI-840, author = {Adda-Decker, Martine AND Nemoto, Rena AND Durand, J.}, title = {Strat{\'e}gies de d{\'e}marcation du mot en fran\c{c}ais: une {\'e}tude exp{\'e}rimentale sur grand corpus}, booktitle = {Journ{\'e}es Linguistiques de Nantes}, AERES = {ACTN}, GROUP = {LIMSI,TLP}, year = {2009}, pages = {91-96}, month= {18-19 juin 2009}, address= {Nantes, France}, } @inproceedings {PubLIMSI-842, author = {Misra, H. AND Yvon, Fran\c{c}ois AND Joemon, M. AND Capp{\'e}, Olivier}, title = {Text segmentation via topic modeling : an analytical study}, booktitle = {ACM Conference on Information and Knowledge Management (CIKM 2009)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2009}, pages = {1553-1556}, month= {November 2-6, 2009}, address= {Hong Kong, China}, } @inproceedings {PubLIMSI-871, author = {Vaudable, Christophe AND Devillers, Laurence AND Balague, C.}, title = {Study of consumer's emotion during product interviews}, booktitle = {International Conference on Affective Computing and Intelligent Interaction (ACII 2009)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2009}, pages = {6p}, series = {Springer, LNCS, vol. xx. J. Cohn, A. Nijholt and M. Pantic (Eds.)}, month= {10/09 au 12/09}, address= {Amsterdam, The Netherlands}, } @inproceedings {PubLIMSI-872, author = {Rollet, Nicolas AND Delaborde, Agn{\`e}s AND Devillers, Laurence}, title = {Protocol CINEMO : the use of fiction for collecting emotional data in naturalistic controlled oriented context}, booktitle = {International Conference on Affective Computing and Intelligent Interaction (ACII 2009)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2009}, pages = {6p}, series = {Springer, LNCS, vol. xx. J. Cohn, A. Nijholt and M. Pantic (Eds.)}, month= {10/09 au 12/09}, address= {Amsterdam, The Netherlands}, } @inproceedings {PubLIMSI-856, author = {Kaglik, A. AND Boula De Mareuil, Philippe}, title = {Perception d'un accent {\'e}tranger et part de la prosodie selon l'âge de premi{\`e}re exposition {\`a} la L2 : transfert ou ph{\'e}nom{\`e}ne universel en acquisition ?}, booktitle = {Journ{\'e}es d'Etudes Linguistiques de l'Universit{\'e} de Nantes (JEL 2009)}, AERES = {ACTN}, GROUP = {LIMSI,TLP}, year = {2009}, pages = {7-13}, month= {18-19 juin 2009}, address= {Nantes, France}, } @book {PubLIMSI-667, author = {Mariani, Joseph-Jean, (editor)}, title = {Spoken language processing : analysis, coding, synthesis and recognition}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2009}, publisher = {John Wiley \& sons}, } @inproceedings {PubLIMSI-792, author = {Mariani, Joseph-Jean}, title = {Introduction to session 1 "broadening the coverage, addressing the gaps", the European language resources and technologies forum: "shaping the future of the multilingual digital Europe"}, booktitle = {European Language Resources and Technologies Forum}, AERES = {COM}, GROUP = {LIMSI,TLP}, year = {2009}, month= {February 12-13, 2009}, address= {Vienna, Austria}, } @inproceedings {PubLIMSI-793, author = {Mariani, Joseph-Jean}, title = {The international challenge of natural language processing}, booktitle = {ATALA 50th anniversary conference}, AERES = {ACTN}, GROUP = {LIMSI,TLP}, year = {2009}, month= {June 23, 2009}, address= {Paris, France}, } @incollection {PubLIMSI-764, author = {Turmo, J. AND Comas, P. AND Rosset, Sophie AND Lamel, Lori AND Moreau, N. AND Mostefa, D.}, title = {Overview of QAST 2008}, booktitle = {Lecture Notes in Computer Science : evaluating systems for multilingual and multimodal information access. 9th Workshop of the cross-language evaluation forum, CLEF 2008}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2009}, publisher = {Springer Verlag}, keywords={question-answering}, } @incollection {PubLIMSI-765, author = {Turmo, J. AND Surdeanu, M. AND Galibert, Olivier AND Rosset, Sophie}, title = {Language technologies : question answering in speech transcripts}, booktitle = {Computers in the human interaction loop}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2009}, pages = {75-86}, publisher = {Springer Verlag}, ISBN= {978-1-84882-053-1}, keywords={question-answering}, } @incollection {PubLIMSI-778, author = {Pnevmatikakis, A. AND Ekenel, H. AND Barras, Claude AND Hernando, J.}, title = {Multimodal person identification}, booktitle = {Computers in the human interaction loop}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2009}, pages = {23-31}, publisher = {Springer Verlag}, } @incollection {PubLIMSI-788, author = {Mariani, Joseph-Jean}, title = {Pr{\'e}face. Spoken language processing : analysis, coding, synthesis and recognition}, booktitle = {Spoken language processing : analysis, coding, synthesis and recognition}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2009}, pages = {4p}, publisher = {Herm{\`e}s Science}, } @phdthesis {PubLIMSI-711, author = {Ferr{\`a}s Font, Marc}, title = {Maximum-likelihood linear regression coefficients as features for speaker recognition}, AERES = {TH}, GROUP = {LIMSI,TLP}, year = {2009}, } @phdthesis {PubLIMSI-714, author = {Galibert, Olivier}, title = {Approches et m{\'e}thodologies pour la r{\'e}ponse automatique {\`a} des questions adapt{\'e}es {\`a} un cadre int{\'e}ractif en domaine ouvert}, AERES = {TH}, GROUP = {LIMSI,TLP}, year = {2009}, } @phdthesis {PubLIMSI-729, author = {Woehrling, C{\'e}cile}, title = {Accents r{\'e}gionaux en fran\c{c}ais : perception, analyse et mod{\'e}lisation {\`a} partir de grands corpus}, AERES = {TH}, GROUP = {LIMSI,TLP}, year = {2009}, } @inproceedings {PubLIMSI-797, author = {Mariani, Joseph-Jean}, title = {L'apport des technologies de la langue au multilinguisme}, howpublished = {Forum des Droits sur l'Internet}, GROUP = {LIMSI,TLP}, year = {2009}, } @inproceedings {PubLIMSI-798, author = {Mariani, Joseph-Jean}, title = {Pour une {\'e}thique de la recherche en sciences et technologies de l'information et de la communication}, howpublished = {S{\'e}minaire " Ethique et analyse de l'information " du CT Connaissances du P{\^o}le Cap Digital}, GROUP = {LIMSI,TLP}, year = {2009}, } @inproceedings {PubLIMSI-795, author = {Mariani, Joseph-Jean}, title = {Sharing the effort to produce the needed language resources}, booktitle = {Workshop "New Horizons for Linguistic Resources in a Global Context"}, AERES = {COM}, GROUP = {LIMSI,TLP}, year = {2009}, } @inproceedings {PubLIMSI-859, author = {Delaborde, Agn{\`e}s AND Tahon, Marie AND Barras, Claude AND Devillers, Laurence}, title = {A wizard-of-oz game for collecting emotional audio data in a children-robot interaction}, booktitle = {AFFINE '09. International Workshop on Affective-Aware Virtual Agents and Social Robots}, AERES = {COM}, GROUP = {LIMSI,TLP}, year = {2009}, pages = {3p}, } @inproceedings {PubLIMSI-4135, author = {Allauzen, Alexandre AND Crego, Josep-Maria AND Max, Aur{\'e}lien AND Yvon, Fran\c{c}ois}, title = {LIMSI's statistical translation systems for WMT'09}, booktitle = {Fourth Workshop on Statistical Machine Translation (WMT 2009)}, AERES = {ACTI}, GROUP = {LIMSI,TLP,ILES}, year = {2009}, pages = {100-104}, institution = {Athens University of Economics and Business}, abstract= {This paper describes our Statistical Machine Translation systems for the WMT09 (en:fr) shared task. For this evaluation, we have developed four systems, using two different MT Toolkits: our primary submission, in both directions, is based on Moses, boosted with contextual information on phrases, and is contrasted with a conventional Moses-based system. Additional contrasts are based on the Ncode toolkit, one of which uses (part of) the English/French GigaWord parallel corpus}, url = {http://www-tlp.limsi.fr/public/WMTallauzen2009_1370615887.pdf}, } @inproceedings {PubLIMSI-727, author = {Poirier, Nicolas}, title = {D{\'e}veloppement d'interface web PHP/MySQL pour la gestion des publications des membres du groupe au format BibTeX et de filtres d'exportation vers la base de la biblioth{\`e}que}, GROUP = {LIMSI,TLP}, year = {2009}, institution = {IFIPS/LIMSI}, } @inproceedings {PubLIMSI-811, author = {Ben Ayed, Maaouia}, title = {Erreurs de transcription automatique de la parole : analyses {\`a} partir de corpus en vue d'une mod{\'e}lisation acoustique enrichie}, GROUP = {LIMSI,TLP}, year = {2009}, institution = {Universit{\'e} Paris 8/LIMSI}, keywords= {Conception et d{\'e}veloppement de syst{\`e}me intelligent}, } @inproceedings {PubLIMSI-827, author = {Le Hai, S.}, title = {Mod{\'e}lisation du langage dans un espace continu}, GROUP = {LIMSI,TLP}, year = {2009}, institution = {ENS Cachan/LIMSI}, keywords= {Math{\'e}matiques, Vision, Apprentissage}, } @inproceedings {PubLIMSI-828, author = {Sehili, Mohamed El Amine}, title = {R{\'e}seaux de consensus pour la traduction automatique}, GROUP = {LIMSI,TLP}, year = {2009}, institution = {Universit{\'e} Paris 11/LIMSI-CNRS}, keywords= {SETI}, } @inproceedings {PubLIMSI-799, author = {Samassekou, A. AND Mariani, Joseph-Jean AND (Collectif de participants), Et al}, title = {Bamako commitment on universal multilingualism}, GROUP = {LIMSI,TLP}, year = {2009}, institution = {International Forum on multilingualism}, } @inproceedings {PubLIMSI-800, author = {Samassekou, A. AND Mariani, Joseph-Jean AND (Collectif de participants), Et al}, title = {Action plan on the Bamako commitment on universal multilingualism}, GROUP = {LIMSI,TLP}, year = {2009}, institution = {International Forum on multilingualism}, } @inproceedings {PubLIMSI-804, author = {Mariani, Joseph-Jean}, title = {Shaping the future of the multilingual digital europe}, GROUP = {LIMSI,TLP}, year = {2009}, institution = {Report on session 3 "Evaluation \& Validation", European Language Resources and Technologies Forum Vienna, Austrian Academy of Sciences, February 12-13, 2009}, } @inproceedings {PubLIMSI-805, author = {Mariani, Joseph-Jean AND Besnier, J. AND Bord{\'e}, J. AND Cornu, J. AND Farge, M. AND Ganascia, J. AND Haton, J. AND Serverin, E.}, title = {Pour une {\'e}thique de la recherche en sciences et technologies de l'information}, GROUP = {LIMSI,TLP}, year = {2009}, institution = {Rapport du Comit{\'e} d'Ethique du CNRS, Septembre 2009}, } @inproceedings {PubLIMSI-855, author = {Mariani, Joseph-Jean AND Gruas, Thibault}, title = {Internet et d{\'e}veloppement durable II : langue et internet}, GROUP = {LIMSI,TLP}, year = {2009}, institution = {Rapport du Forum des Droits sur l'Internet, 22 d{\'e}cembre 2009}, } @inproceedings {PubLIMSI-858, author = {Sokolovska, N. AND Lavergne, Thomas AND Capp{\'e}, Olivier AND Yvon, Fran\c{c}ois}, title = {Efficient learning of sparse conditional random fields for supervised sequence labelling}, GROUP = {LIMSI,TLP}, year = {2009}, pages = {21p}, abstract= {Conditional Random Fields (CRFs) constitute a popular and efficient approach for supervised sequence labelling. CRFs can cope with large description spaces and can integrate some form of structural dependency between labels. In this contribution, we address the issue of efficient feature selection for CRFs based on imposing sparsity through an L1 penalty. We first show how sparsity of the parameter set can be exploited to significantly speed up training and labelling. We then introduce coordinate descent parameter update schemes for CRFs with L1 regularization. We finally provide some empirical comparisons of the proposed approach with state-of-the-art CRF training strategies. In particular, it is shown that the proposed approach is able to take profit of the sparsity to speed up processing and handle larger dimensional models.}, } @Article{Lamel09a, author = {L. Lamel and A. Messaoudi and J.-L. Gauvain}, affiliation = {LIMSI/CNRS BP 133 91430 Orsay Cedex France}, title = {{Automatic Speech-to-Text Transcription in Arabic}}, journal = {Transactions on Asian Language Information Processing, Special Issue on Arabic Natural Language Processing in the ACM}, number = {}, volume = {8 (4)}, pages = {}, url = {http://www-tlp.limsi.fr/public/limsi_publi_qpr8_acm09_arabic.pdf}, year = {2009} } @InProceedings{Crego09plusieurs, author = {Crego, Josep Maria and Aurélien Max and François Yvon}, title = {Plusieurs langues (bien choisies) valent mieux qu'une: traduction statistique multi-source par renforcement lexical}, booktitle = {Acte de la Conférence sur le Traitement Automatique des Langues Naturelles, session poster (TALN'09)}, type = {national}, year = 2009, address = {Senlis, France}, abstract = { Statistical Machine Translation (SMT) systems integrate various models that exploit all available features during decoding to produce the best possible translation hypotheses. Correctly translating polysemous words, such as the French word avocat into English (lawyer or avocado) raequires integrating complex models. Such translation lexical ambiguities, however, depend on the language pair considered. If one knows, for instance, that avocat was translated into Spanish as aguacate, then translating it into English is no longer ambiguous avocado). Thus, in this example, t\ he knowledge of the Spanish translation allows to reinforce the choice of the appropriate English word for the pair French/English system. In this article, we present an approach in which documents available in several languages are used to reinforce the lexical choices made by a SMT system. In particular, we show that gains can be obtained on several metrics when using auxiliary translations produced by human translators.} } @InProceedings{Crego09gappy, author = {Josep Maria Crego and François Yvon}, title = {Gappy translation units under left-to-right SMT decoding}, booktitle = {Proceedings of the meeting of the European Association for Machine Translation (EAMT)}, editors = {Lluis Marquez and Harold Somers}, pages = {66--73}, year = 2009, address = {Barcelona, Spain}, keywords = {Statistical Machine Translation}, abstract = {This paper presents an extension for a bilingual n-gram statistical machine translation (SMT) system based on allowing translation units with gaps. Our gappy translation units can be seen as a first step towards introducing hierarchical units similar to those employed in hierarchical MT systems. Our goal is double. On the one hand we aim at capturing the benefits of the higher generalization power shown by hierarchical systems. On the other hand, we want to avoid the computational burden of decoding based on parsing techniques, which among other drawbacks, make difficult the introduction of the required target language model costs. Our experiments show slight but consistent improvements for Chinese-to-English machine translation. Accuracy results are competitive with those achieved by a state-of-the-art phrase-based system.}} @Inproceedings{Langlais09improvements, title = {Improvements in Analogical Learning: Application to Translating multi-Terms of the Medical Domain}, author = {Philippe Langlais and François Yvon and Pierre Zweigenbaum}, year = 2009, booktitle = {Proceedings of the European Conference on Computational Linguistics (EACL'09)}, pages = {487--495}, address = {Athens, Greece}, url = {http://www.aclweb.org/anthology/E09-1056}, keywords = {Machine Translation, Terminology, Analogical Learning}, abstract = { Handling terminology is an important matter in a translation workflow. However, current Machine Translation (MT) systems do not yet propose anything proactive upon tools which assist in managing terminological databases. In this work, we investigate several enhancements to analogical learning and test our implementation on translating terms of the medical domain. We show that the analogical engine works equally well when translating from and into a morphologically rich language, or when dealing with language pairs written in different scripts. Combining it to a phrase-based statistical engine leads to significant improvements.} } @InProceedings{Allauzen09limsi, author = {Allauzen Alexandre and Crego Josep and Max Aurélien and Yvon François}, title = {{LIMSI}'s Statistical Translation Systems for {WMT}'09}, booktitle = {Proceedings of the Fourth Workshop on Statistical Machine Translation}, month = {March}, year = {2009}, address = {Athens, Greece}, pages = {100--104}, url = {http://www.aclweb.org/anthology/W09-0417}, keywords = {Statistical Machine Translation}, abstract = {This paper describes our Statistical Machine Translation systems for the WMT09 (en:fr) shared task. For this evaluation, we have developed four systems, using two different MT Toolkits: our primary submission, in both directions, is based on Moses, boosted with contextual information on phrases, and is contrasted with a conventional Moses-based system. Additional contrasts are based on the Ncode toolkit, one of which uses (part of) the English/French GigaWord parallel corpus.} } @incollection{chil09pid, title={Multimodal Person Identification}, author={Aristodemos Pnevmatikakis and Hazim K. Ekenel and Claude Barras and Javier Hernando}, chapter={4}, pages={23-31}, URL = {http://www-tlp.limsi.fr/public/CHIL_book_pid-springer.pdf}, editor={Alexander Waibel and Rainer Stiefelhagen}, year={2009}, booktitle={Computers in the Human Interaction Loop series}, series={Human-Computer Interaction Series}, publisher={Springer}, address={London} } @incollection{chil09asr, title={Automatic Speech Recognition}, author={Gerasimos Potamianos and Lori Lamel and Matthias W\"olfel andJing Huang and Etienne Marcheret and Claude Barras and Xuan Zhu and John McDonough and Javier Hernando and Dusan Macho and Climent Nadeu}, chapter={6}, pages={43-59}, URL = {http://www-tlp.limsi.fr/public/CHIL_book_asr-springer.pdf}, editor={Alexander Waibel and Rainer Stiefelhagen}, year={2009}, booktitle={Computers in the Human Interaction Loop series}, series={Human-Computer Interaction Series}, publisher={Springer}, address={London} } @Article{ieee09TPLL, author = { Thomas Pellegrini and Lori Lamel}, title = {Automatic Word Decompounding for ASR in a Morphologically Rich Language: Application to Amharic}, journal = {IEEE Transactions on Audio, Speech, and Language Processing}, year = 2009, month = {July}, volume = 17, number = 10, URL = {}, abstract = { This paper investigates a data-driven word decompounding algorithm for use in automatic speech recognition. An existing algorithm, called ldquoMorfessor,rdquo has been enhanced in order to address the problem of increased phonetic confusability arising from word decompounding by incorporating phonetic properties and some constraints on recognition units derived from forced alignments experiments. Speech recognition experiments have been carried out on a broadcast news task for the Amharic language to validate the approach. The out of vocabulary (OOV) word rates were reduced by 35\% to 50\% and a small reduction in word error rate (WER) has been achieved. The algorithm is relatively language independent and requires minimal adaptation to be applied to other languages.}, keywords = { }, pages = {863-873} } @Article{ieee09MFCB, author = {M.~Ferras and C.C.~Leung and C.~Barras and J.L.~Gauvain }, title = { Comparison of Speaker Adaptation Methods as Feature Extraction for SVM-Based Speaker Recognition}, journal = {IEEE Transactions on Audio, Speech, and Language Processing}, year = 2009, month = {}, volume = {}, number = {}, abstract = { In the last years the speaker recognition field has made extensive useof speaker adaptation techniques. Adaptation allows speaker model parametersto be estimated using less speech data than needed for maximum likelihood(ML) training. The Maximum-A-Posteriori (MAP) and Maximum Likelihood Linear Regression (MLLR) techniques have been typically used for adaptation. Recently, MAP and MLLR adaptation have been incorporated in the feature extraction stage of Support Vector Machine (SVM) based speaker recognition systems. Two approaches to feature extraction use a SVM to classify either the MAP-adapted Gaussian mean vector parameters (GSV-SVM) or the Maximum-Likelihood Linear Regression (MLLR) transform coefficients (MLLR-SVM). In this paper we provide an experimental analysis of the GSV-SVM and MLLR-SVM approaches. We largely focus on the latter by exploring constrained and unconstrained transforms and different choices of the acoustic model. A channel-compensated front-end is used to prevent the MLLR transforms to adapt to channel components in the speech data. Additional acoustic models were trained using Speaker Adaptive Training (SAT) to better estimate the speaker MLLR transforms. We provide results on the NIST 2005 and 2006 Speaker Recognition Evaluation (SRE) data and fusion results on the SRE 2006 data. The results show that using the compensated front-end, SAT models and multiple regression classes bring major performance improvements.}, keywords = { }, pages = {}, url = {http://www-tlp.limsi.fr/public/IEEEFerras2010_1375102768.pdf}, } @Article{talip09arabic, author = {Lori Lamel and Abdel Messaoudi and Jean-Luc Gauvain }, title = {{ Automatic Speech-to-Text Transcription in Arabic }}, journal = { ACM Transactions on Asian Language Information Processing, Special Issue on Arabic Natural Language Processing}, year = 2009, month = {December}, volume = 8, number = 4, abstract = { }, keywords = { }, URL = {http://www-tlp.limsi.fr/public/acm09_arabic.pdf}, pages = {1-18} } @INPROCEEDINGS{specom09, author = { E. Arisoy and T. Pellegrini and M. Saraclar and L. Lamel} }, title = {{ Enhanced Morfessor Algorithm with Phonetic Features: application to Turkish}}, booktitle = { 13th International Conference on Speech and Computer, SPECOM-2009}, address = {St Petersburg, Russia}, year = {June, 2009}, pages = {1945-1948 }, abstract = { }, keywords = { }, URL = {http://www-tlp.limsi.fr/public/IS080922.PDF} } @INPROCEEDINGS{interspeech09mfb, author = { M. Faouzi BenZeghiba and J.L. Gauvain and L. Lamel }, title = {{ Language Score Calibration using Adapted Gaussian Back-end}}, booktitle = { Interspeech'09}, address = {Brighton, UK}, year = {September, 2009}, pages = {1945-1948 }, abstract = {Generative Gaussian back-end and discriminative logistic regression are the most used approaches for language score fusion and calibration. Combination of these two approaches can significantly improve the performance. This paper proposes the use of an adapted Gaussian back-end, where the mean of the language-dependent Gaussian is adapted from the mean of a language-specific background Gaussian via maximum a posteriori estimation algorithm. Experiments are conducted using the LRE-07 evaluation data. Compared to the conventional Gaussian back-end approach for a closed set task, relative improvements in the Cavg of 50\%, 17\% and 4.2\% are obtained on the 30s, 10s and 3s conditions, respectively. Besides this, the estimated scores are better calibrated. A combination with logistic regression results in a system with the best calibrated scores.}, keywords = { Language recognition, Gaussian back-end, Adaptation, lid }, URL = {http://www-tlp.limsi.fr/public/IS090552.PDF} } %%%%%%%%% 2008 section @article {PubLIMSI-362, author = {Zweigenbaum, Pierre AND Grau, Brigitte AND Ligozat, Anne-Laure AND Robba, Isabelle AND Rosset, Sophie AND Tannier, Xavier AND Vilnat, Anne}, title = {Apports de la linguistique dans les syst{\`e}mes de recherche d'informations pr{\'e}cises}, AERES = {ACL}, GROUP = {LIMSI,LIR,TLP}, year = {2008}, pages = {41-62}, journal = {Revue Fran\c{c}aise de Linguistique Appliqu{\'e}e}, volume = {XIII}, number = {1}, keywords={named entity, question-answering}, } @inproceedings {PubLIMSI-388, author = {Boula De Mareuil, Philippe AND Rilliard, Albert AND Allauzen, Alexandre}, title = {A diachronic study of prosody through French audio archives}, booktitle = {International Conference on Speech Prosody (SP 2008)}, AERES = {ACTI}, GROUP = {LIMSI,TLP,AA}, year = {2008}, pages = {531-534}, month= {May 6-9, 2008}, address= {Campinas, Brazil}, } @inproceedings {PubLIMSI-440, author = {Dal, G. AND Fradin, B. AND Plancq, C. AND Grabar, N. AND Lignon, S. AND Namer, F. AND Yvon, Fran\c{c}ois AND Zweigenbaum, Pierre}, title = {Quelques pr{\'e}alables au calcul de la productivit{\'e} des r{\`e}gles constructionnelles et premiers r{\'e}sultats}, booktitle = {Congr{\`e}s Mondial de Linguistique Fran\c{c}aise (CMLF 2008)}, AERES = {ACTI}, GROUP = {LIMSI,TLP,LIR}, year = {2008}, pages = {17p}, month= {9-12 juillet 2008}, address= {Paris, France}, } @inproceedings {PubLIMSI-472, author = {Devillers, Laurence AND Vidrascu, Laurence}, title = {Real-life emotions detection on human-human spoken dialogs}, booktitle = {Information Processing and Management of Uncertainty in Knowledge-Based Systems (IPMU 2008)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2008}, pages = {1590-1596}, month= {June 22-27, 2008}, address= {Malaga, Spain}, } @inproceedings {PubLIMSI-496, author = {Rosset, Sophie AND Galibert, Olivier AND Bernard, Guillaume AND Bilinski, Eric AND Adda, Gilles}, title = {The LIMSI participation to the QAst track}, booktitle = {Workshop of the Cross-Language Evaluation Forum (CLEF 2008)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2008}, pages = {289-296}, publisher = {C. Peters et al. (Eds)}, series = {Revised selected papers. LNCS 5706, C. Peters et al. (Eds)}, month= {17/9 au 19/9}, address= {Aarhus, Denmark}, keywords={question-answering}, } @inproceedings {PubLIMSI-497, author = {Turmo, J. AND Comas, P. AND Rosset, Sophie AND Lamel, Lori AND Moreau, N. AND Mostefa, D.}, title = {Overview of QAST 2008}, booktitle = {Workshop of the Cross-Language Evaluation Forum (CLEF 2008)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2008}, pages = {314-324}, publisher = {Springer}, series = {Revised selected papers, LNCS 5706, C. Peters et al. (Eds)}, month= {17/9 au 19/9}, address= {Aarhus, Denmark}, keywords={question-answering}, } @inproceedings {PubLIMSI-533, author = {Nemoto, Rena AND Vasilescu, Ioana AND Adda-Decker, Martine}, title = {Speech errors on frequently observed homophones in French : perceptual evaluation vs automatic classification}, booktitle = {International Conference on Language Resources and Evaluation (LREC 2008)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2008}, pages = {2189-2195}, month= {May 26-June 1, 2008}, address= {Marrakech, Morocco}, } @inproceedings {PubLIMSI-773, author = {Rosset, Sophie AND Galibert, Olivier AND Bernard, Guillaume AND Bilinski, Eric AND Adda, Gilles}, title = {The LIMSI multilingual, multitask QAst system}, booktitle = {Workshop of the Cross-Language Evaluation Forum (CLEF 2008)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2008}, pages = {480-487}, publisher = {Springer}, series = {Revised Selected Papers. LNCS 5706, C. Peters et al. (Eds)}, month= {17/9 au 19/9}, address= {Aarhus, Denmark}, keywords={question-answering}, } @inproceedings {PubLIMSI-3937, author = {Seppi, Dino AND Batliner, A. AND Schuller, Bjoern AND Steidl, S. AND Vogt, T. AND Wagner, J. AND Devillers, Laurence AND Vidrascu, Laurence AND Amir, N. AND Aharonson, V.}, title = {Patterns, prototypes, performance: classifying emotional user states}, booktitle = {Annual Conference of the International Speech Communication Association (INTERSPEECH 2008)}, AERES = {ACTI}, GROUP = {LIMSI,TLP}, year = {2008}, pages = {601-604}, month= {22/09 au 26/09}, address= {Brisbane, Australia}, } @book {PubLIMSI-474, author = {Devillers, Laurence AND Martin, Jean-Claude AND Cowie, R. AND Douglas-Cowie, E., (editors)}, title = {Actes du deuxi{\`e}me workshop "Corpora for research on emotion and affect", workshop satellite LREC 2008}, AERES = {DO}, GROUP = {LIMSI,TLP}, year = {2008}, pages = {115p}, publisher = {ELRA/European Language Resources Association}, } @incollection {PubLIMSI-339, author = {d'Alessandro, Christophe AND Boula De Mareuil, Philippe AND Garcia, M. AND Bailly, G. AND Morel, M. AND Raake, A. AND Bechet, F. AND V{\'e}ronis, J. AND Prudon, R.}, title = {La campagne EvaSy d'{\'e}valuation de la synth{\`e}se de la parole {\`a} partir du texte}, booktitle = {L'{\'e}valuation technologique dans le domaine du traitement automatique de la langue : l'exp{\'e}rience du programme technolangue}, AERES = {OS}, GROUP = {LIMSI,AA,TLP}, year = {2008}, pages = {183-208}, publisher = {Herm{\`e}s}, } @incollection {PubLIMSI-340, author = {Mariani, Joseph-Jean}, title = {Pr{\'e}face}, booktitle = {L'{\'e}valuation des technologies de traitement de la langue : les campagnes Technolangue}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2008}, pages = {15-18}, publisher = {Herm{\`e}s}, } @incollection {PubLIMSI-485, author = {Barras, Claude}, title = {Etude de grands corpus de parole : apports de la transcription automatique}, booktitle = {Donn{\'e}es orales - Les enjeux de la transcription}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2008}, pages = {258-268}, publisher = {Presses Universitaires de Perpignan}, } @incollection {PubLIMSI-495, author = {Bonneau-Maynard, H. AND Denis, A. AND Bechet, F. AND Devillers, Laurence AND Lefevre, F. AND Quignard, M. AND Rosset, Sophie AND Villaneau, J.}, title = {MEDIA : {\'e}valuation de la compr{\'e}hension dans les syst{\`e}mes de dialogue}, booktitle = {L'{\'e}valuation des technologies de traitement de la langue : les campagnes technolangue (Trait{\'e} IC2, S{\'e}rie Cognition et Traitement de l'Information)}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2008}, pages = {209-231}, publisher = {Herm{\`e}s}, keywords = {spoken language understanding, corpus annotation, evaluation}, } @incollection {PubLIMSI-550, author = {Adda-Decker, Martine}, title = {Language Identification}, booktitle = {Spoken language processing}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2008}, pages = {279-320}, publisher = {Wiley}, keywords = { lid }, } @phdthesis {PubLIMSI-347, author = {Pellegrini, Thomas}, title = {Transcription automatique de langues peu dot{\'e}es}, AERES = {TH}, GROUP = {LIMSI,TLP}, year = {2008}, } @phdthesis {PubLIMSI-348, author = {Vieru-Dimulescu, B.}, title = {Caract{\'e}risation et identification d'accents {\'e}trangers en fran\c{c}ais}, AERES = {TH}, GROUP = {LIMSI,TLP}, year = {2008}, } @inproceedings {PubLIMSI-398, author = {Layachi, Omar}, title = {Syst{\`e}me temps r{\'e}el de reconnaissance d'{\'e}tat affectif dans la voix}, GROUP = {LIMSI,TLP}, year = {2008}, institution = {Ecole Polytechnique/LIMSI}, } @inproceedings {PubLIMSI-501, author = {Najar, Salma}, title = {Apprentissage discriminant pour la traduction de la parole}, GROUP = {LIMSI,TLP}, year = {2008}, institution = {Universit{\'e} ??/LIMSI}, } @inproceedings {PubLIMSI-532, author = {Marc, A.}, title = {Exploitation de Wikip{\'e}dia pour l'apprentissage de paraphrases}, GROUP = {LIMSI,TLP}, year = {2008}, pages = {32p}, institution = {ENSIIE Evry/LIMSI}, } @inproceedings {PubLIMSI-516, author = {Yvon, Fran\c{c}ois}, title = {R{\'e}orthographier des sms}, GROUP = {LIMSI,TLP}, year = {2008}, } %%%%%%% @INCOLLECTION{Gauvain08a, AUTHOR = { Jean-Luc Gauvain and Lori Lamel}, TITLE = {{ Speech recognition systems}}, BOOKTITLE = {{ Spoken Language Processing}}, EDITOR = {J. Mariani}, AERES = {OS}, GROUP = {LIMSI,TLP}, year = {2008}, pages = {239-278}, publisher = {Wiley}, } @INPROCEEDINGS{interspeech09nbest, author = { Julien Despres and Petr Fousek and Jean-Luc Gauvain and Sandrine Gay and Yvan Josse and Lori Lamel and Abdel Messaoudi }, title = {{ Modeling Northern and Southern Varieties of Dutch for STT}}, booktitle = { Interspeech'09}, address = {Brighton, UK}, year = {September, 2009}, pages = { 96-99 }, abstract = { This paper describes how the Northern (NL) and Southern (VL) varieties of Dutch are modeled in the joint Limsi-Vecsys Research speech-to-text transcription systems for broadcast news (BN) and conversational telephone speech (CTS). Using the Spoken Dutch Corpus resources (CGN), systems were developed and evaluated in the 2008 N-Best benchmark. Modeling techniques that are used in our systems for other languages were found to be effective for the Dutch language, however it was also found to be important to have acoustic and language models, and statistical pronunciation generation rules adapted to each variety. This was in particular true for the MLP features which were only effective when trained separately for Dutch and Flemish. The joint submissions obtained the lowest WERs in the benchmark by a significant margin. }, keywords = { speech recognition, Dutch, Flemish, CGN, Nbest, broadcast news, conversational telephone speech, MLP }, URL = {http://www-tlp.limsi.fr/public/IS090162.PDF} } @INPROCEEDINGS{interspeech09errors, author = { Ioana Vasilescu and Martine Adda-Decker and Lori Lamel and Pierre Hallé }, title = {{A Perceptual Investigation of Speech Transcription Errors Involving Frequent Near-Homophones in French and American English }}, booktitle = { Interspeech'09}, address = {Brighton, UK}, year = {September, 2009}, pages = { 144-147 }, abstract = { This article compares the errors made by automatic speech recognizers to those made by humans for near-homophones in American English and French. This exploratory study focuses on the impact of limited word context and the potential resulting ambiguities for automatic speech recognition (ASR) systems and human listeners. Perceptual experiments using 7-gram chunks centered on incorrect or correct words output by an ASR system, show that humans make significantly more transcription errors on the first type of stimuli, thus highlighting the local ambiguity. The long-term aim of this study is to improve the modeling of such ambiguous items in order to reduce ASR errors. }, keywords = { American English, French, ASR, speech perception, speech ambiguity, near-homophones}, URL = {http://www-tlp.limsi.fr/public/IS090012.PDF} } @INPROCEEDINGS{interspeech09cw, author = { Cécile Woehrling and Philippe Boula de Mareüil and Martine Adda-Decker}, title = {{Linguistically-Motivated Automatic Classification of Regional French Varieties}}, booktitle = { Interspeech'09}, address = {Brighton, UK}, year = {September, 2009}, pages = { 2183-2186 }, abstract = { The goal of this study is to automatically differentiate French varieties (standard French and French varieties spoken in the South of France, Alsace, Belgium and Switzerland) by applying a linguistically-motivated approach. We took advantage of automatic phoneme alignment to measure vowel formants, consonant (de)voicing, pronunciation variants as well as prosodic cues. These features were then used to identify French varieties by applying classification techniques. On large corpora of hundreds of speakers, over 80\% correct identification scores were obtained. The confusions between varieties and the features used (by decision trees) are linguistically grounded.}, keywords = {language variety identification, regional French accents, classification}, URL = {http://www-tlp.limsi.fr/public/IS090365.PDF} } @INPROCEEDINGS{acm09, AUTHOR = { Julien Law-To and G. Grefenstette and Jean-Luc Gauvain}, TITLE = {{VoxaleadNews: Robust Automatic Segmentation of Video into Browsable Content }}, BOOKTITLE = {{ Yahoo challenge of the Multimedia Grand Challenge 2009 at then ACM Multimedia Conference}}, PAGES = {1119-1120}, ADDRESS = { Beijing}, YEAR = 2009, MONTH = {October}, abstract = { }, keywords = { Speech recognition, speech-to-text transcription}, URL = {} } @INPROCEEDINGS{icassp09luo, AUTHOR = { Jun Luo and Lori Lamel and Jean-Luc Gauvain}, TITLE = {{Modeling characters versus words for Mandarin speech recognition}}, BOOKTITLE = icassp, PAGES = { 4325 - 4328 }, ADDRESS = {Taipei, Taiwan}, YEAR = 2009, MONTH = {April}, abstract = { Word based models are widely used in speech recognition since they typically perform well. However, the question of whether it is better to use a word-based or a character-based model warrants being for the Mandarin Chinese language. Since Chinese is written without any spaces or word delimiters, a word segmentation algorithm is applied in a pre-processing step prior to training a word-based language model. Chinese characters carry meaning and speakers are free to combine characters to construct new words. This suggests that character information can also be useful in communication. This paper explores both word-based and character-based models, and their complementarity. Although word-based modeling is found to outperform character-based modeling, increasing the vocabulary size from 56 k to 160 k words did not lead to a gain in performance. Results are reported for the Gale Mandarin speech-to-text task. }, keywords = { Speech recognition, language modeling, Mandarin Chinese, speech-to-text transcription}, URL = {http://www-tlp.limsi.fr/public/0004325.pdf} } @INPROCEEDINGS{icassp09marc, AUTHOR = { Marc Ferras and Claude Barras Jean-Luc Gauvain}, TITLE = {{ Lattice-based MLLR for speaker recognition }}, BOOKTITLE = icassp, PAGES = { 4537 - 4540 }, ADDRESS = {Taipei, Taiwan}, YEAR = 2009, MONTH = {April}, abstract = { Maximum-Likelihod Linear Regression (MLLR) transform coefficients have shown to be useful features for text-independent speaker recognition systems. These use MLLR coefficients computed on a Large Vocabulary Continuous Speech Recognition System (LVCSR) as features and Support Vector machines(SVM) classification. However, performance is limited by transcripts, which are often erroneous with high word error rates (WER) for spontaneous telephone speech applications. In this paper, we propose using lattice-based MLLR to overcome this issue. Using wordlattices instead of 1-best hypotheses, more hypotheses can be considered for MLLR estimation and, thus, better models are more likely to be used. As opposed to standard MLLR, language model probabilities are taken into account as well. We show how systems using lattice MLLR outperform standard MLLR systems in the Speaker Recognition Evaluation (SRE) 2006. Comparison to other standard acoustic systems is provided as well.}, keywords = { Speaker recognition, MLLR, lattice}, URL = {http://www-tlp.limsi.fr/public/0004537.pdf} } @INPROCEEDINGS{icassp09mfb, AUTHOR = { Mohamed F. Ben Zeghiba and Jean-Luc Gauvain and Lori Lamel }, TITLE = {{ Gaussian Backend design for open-set language detection }}, BOOKTITLE = icassp, PAGES = { 4349 - 4352 }, ADDRESS = {Taipei, Taiwan}, YEAR = 2009, MONTH = {April}, abstract = { This paper proposes a new approach to the challenging open-set language detection task. Most state-of-the-art approaches make use of data sources with several out-of-set languages to model such languages. In the proposed approach, no additional data from out-ofset languages is required, only date from the target languages is used. Experiments are conducted using the LRE-05 and the LRE-07 evaluation data sets with the 30s condition. A Cavg of 4.5\% and 3.4\% is obtained on these data set, respectively. These results are comparable with other reported results. }, keywords = {Language recognition, Open-set, Phonotactic approach, Gaussian Backend, Adaptation, lid }, URL = {http://www-tlp.limsi.fr/public/0004349.pdf} } @InProceedings{Langlais08translating, author = {Philippe Langlais and François Yvon and Pierre Zweigenbaum}, url = {http://www.limsi.fr/Individu/yvon/publications/sources/Langlais08translating.pdf}, title = {Translating Medical Words by Analogy}, booktitle = {Proceedings of the workshop on Intelligent Data Analysis in bioMedicine and Pharmacology (IDAMAP) 2008}, pages = {}, year = 2008, address = {Washington, DC}, abstract = {Term translation has become a recurring need in med- ical informatics. This creates an interest for robust methods which can translate medical words in vari- ous languages. We propose a novel, analogy-based method to generate word translations. It relies on a partial bilingual lexicon and solves bilingual analog- ical equations to create candidate translations. To evaluate the potential of this method, we tested it to translate a set of 1,306 French medical words to English. It could propose a correct translation for up to 67.9\% of the input words, with a precision of up to 80.2\% depending on the number of selected candi- dates. We compare it to previous methods including word alignment in parallel corpora and edit distance to a list of words, and show how these methods can complement each other.}, keywords = {Analogical Learning, Machine Translation} } @InProceedings{Langlais08scalingup, author = {Philippe Langlais and François Yvon}, url = {http://www.limsi.fr/Individu/yvon/publications/sources/Langlais08scalingup.pdf}, title = {Scaling up analogical learning}, booktitle = {Proceedings of the 22nd International Conference on Computational Linguistics (COLING 2008)}, pages = {49--52}, year = 2008, address = {Manchester, UK}, abstract = {Recent years have witnessed a growing interest in analogical learning for NLP applications. If the principle of analogical learning is quite simple, it does involve complex steps that seriously limit its applicability. In this study, we investigate different algorithms and data-structure for making analogical learning practical. We analyze their behavior on a (translation) task whose dimensionality is far larger than the ones usually tackled by analogical learning.}, keywords = {Analogical Learning, Algorithmics, Machine Translation} } @InProceedings{Misra08incoherent, author = {Hemant Misra and François Yvon and Olivier Capp\'e}, url = {http://www.limsi.fr/Individu/yvon/publications/sources/Misra08incoherent.pdf}, title = {Using {LDA} to detect semantically incoherent documents}, booktitle = {Proceedings of Twelfth Conference on Computational Natural Language Learning (CoNLL'2008)}, pages = {41--48}, year = 2008, address = {Manchester, UK} } @InProceedings{Kobus08normalizing, author = {Catherine Kobus and François Yvon and G\'eraldine Damnati}, url = {http://www.limsi.fr/Individu/yvon/publications/sources/Kobus08normalizing.pdf}, title = {Normalizing {SMS}: are two metaphors better than one ?}, booktitle = {Proceedings of the 22nd International Conference on Computational Linguistics (COLING 2008)}, pages = {441--448}, year = 2008, address = {Manchester, UK}, abstract = {Electronic written texts used in computer-mediated interactions (e-mails, blogs, chats, etc) present major deviations from the norm of the language. This paper presents an comparative study of systems aiming at normalizing the orthography of French SMS messages: after discussing the linguistic peculiarities of these messages, and possible approaches to their automatic normalization, we present, evaluate and contrast two systems, one drawing inspiration from the Machine Translation task; the other using techniques that are commonly used in automatic speech recognition devices. Combining both approaches, our best normalization system achieves about 11\% Word Error Rate on a test set of about 3,000 unseen messages}, keywords = {{SMS}, Finite-State Transducers} } @InProceedings{Moreau08robust, author = {Erwan Moreau and François Yvon and Olivier Capp\'e}, url = {http://www.limsi.fr/Individu/yvon/publications/sources/Moreau08robust.pdf}, title = {Robust Similarity Measures for Named Entities Matching}, booktitle = {Proceedings of the 22nd International Conference on Computational Linguistics (COLING 2008)}, pages = {593--600}, year = 2008, address = {Manchester, UK}, abstract = {Matching coreferent named entities without prior knowledge requires good similarity measures. Soft-TFIDF is a fine-grained measure which performs well in this task. We propose to enhance this kind of metrics, through a generic model in which measures may be mixed, and show experimentally the relevance of this approach.}, keywords = {Named Entities, Approximative string matching} } @InProceedings{Sokolovska08analyse, author = {Nataliya Sokolovska and Olivier Cappé and François Yvon}, url = {http://www.limsi.fr/Individu/yvon/publications/sources/Sokolovska08analyse.pdf}, title = {Analyse asymptotique de l'apprentissage semi-supervisé pour les modèles probabilistes discriminants}, booktitle = {Actes de la Conférence d'Apprentissage (CAP'08)}, pages = { }, type = {national}, year = 2008, address = {Porquerolles, France}, note = {Voir l'article ICML 2008 pour une version plus complète}, abstract = {Le but de l'apprentissage semi-supervisé est d'améliorer l'efficacité de l'apprentissage supervisé en utilisant des données non-étiquetées. Cet objectif est difficile à atteindre dans les cas des modèles discriminants. Nous introduisons, dans cet article, une méthode originale et simple pour intégrer des données non étiquetées dans une fonction objectif semi-supervisée. Nous démontrons alors que l'estimateur semi-supervisé correspondant est asymptotiquement optimal. Le cas de la régression logistique est illustré par des résultats d'expériences.}, keywords = {semi-supervised learning, logistic regression} } @INPROCEEDINGS{odyssey08Ferras, AUTHOR = {Marc Ferras and Cheung Chi Leung and Claude Barras and Jean-Luc Gauvain }, TITLE = {{MLLR Techniques for Speaker Recognition}}, BOOKTITLE = Odyssey, YEAR = 2008, MONTH = {21-24 January}, ADDRESS = {Stellenbosch, South Africa}, pages = { }, abstract = { Maximum-Likelihood Linear Regression (MLLR) and Constrained MLLR (CMLLR) have been recently used for feature extraction in speaker recognition. These systems use (C)MLLR transforms as features that are modeled with Support Vector Machines (SVM). This paper evaluates and compares several of these approaches for the NIST Speaker Recognition task. Single CMLLR and up to 4-phonetic-class MLLR transforms are explored using Gaussian Mixture Models (GMM) and large-vocabulary speech recognition Hidden Markov Models(HMM), using both speaker recognition and speech recognition cepstral front-ends and normalizations. Results for the individual systems as well as in combination with two standard cepstral systems are provided. Relative gains of 3\% and 12\% were obtained when combining the best performing CMLLR-based and MLLR-based systems with two standard cepstral systems, respectively.}, URL = {http://www-tlp.limsi.fr/public/Odyssey08Ferras.pdf} } @InProceedings{Sokolovska08asymptotics, author = {Nataliya Sokolovska and Olivier Cappé and François Yvon}, url = {http://www.limsi.fr/Individu/yvon/publications/sources/Sokolovska08asymptotics.pdf}, title = {The asymptotics of semi-supervised learning in discriminative probabilistic models}, booktitle = {Proceedings of the international conference on Machine Learning (ICML'08)}, editors = {Andrew McCallum and Sam Roweis}, pages = {984--991}, year = 2008, address = {Helsinki, Finland}, abstract = {Semi-supervised learning aims at taking advantage of unlabeled data to improve the efficiency of supervised learning procedures. For discriminative models however, this is a challenging task. In this contribution, we introduce an original methodology for using unlabeled data through the design of a simple semi-supervised objective function. We prove that the corresponding semi-supervised estimator is asymptotically optimal. The practical consequences of this result are discussed for the case of the logistic regression model.}, keywords = {semi-supervised learning, logistic regression} } @InProceedings{Dechelotte08limsi, author = {Daniel D\'{e}chelotte and Gilles Adda and Alexandre Allauzen and H\'{e}l\`{e}ne Bonneau-Maynard and Olivier Galibert and Jean-Luc Gauvain and Philippe Langlais and Fran\c{c}ois Yvon}, title = {{LIMSI's Statistical Translation Systems for {WMT}'08}}, booktitle = {Proceedings of the Third Workshop on Statistical Machine Translation}, month = {June}, year = {2008}, address = {Columbus, Ohio}, pages = {107--110}, url = {http://www.aclweb.org/anthology/W/W08/W08-0310} } @InProceedings{Lavergne08detecting, author = {Thomas Lavergne and Tanguy Urvoy and François Yvon}, url = {http://www.limsi.fr/Individu/yvon/publications/sources/Lavergne08detecting.pdf}, title = {Detecting Fake Content with Relative Entropy Scoring}, booktitle = {Proceedings of the 2nd International Workshop on Uncovering Plagiarism, Authorship, and Social Software Misuse PAN-08}, pages = { }, year = 2008, address = {Patras, Greece}, abstract = {How to distinguish natural texts from artificially generated ones ? Fake content is commonly encountered on the Internet, ranging from web scraping to random word salads. Most of this fake content is generated for spam purpose. In this paper, we present two methods to deal with this problem. The first one uses classical language models, while the second one is a novel approach using short range information between words.}, keywords = {Language Modeling, Web Spam Filtering} } @INPROCEEDINGS{prosody08boula, AUTHOR = { Philippe Boula de Mareuil and Albert Rilliard and Alexandre Allauzen}, TITLE = {{A diachronic study of prosody through French audio archives}}, BOOKTITLE = {Speech Prosody}, MONTH = {May 6-9}, ADDRESS = { Campinas, Brazil}, pages = { 531-534 }, abstract = {This study makes use of advances in automatic speech processing to analyse French audiovisual archives. A 10-hour corpus covering five decades of broadcast news is investigated from the angle of prosody evolution. Two prosodic features are explored: word-initial stress (especially in the case of proper names) and penultimate vowel lengthening (especially penultimate nasal vowels before a pause), which may give an impression of emphatic style. Our measurements suggest that the following features as well as mean pitch have decreased since the forties: pitch rise and vowel duration associated with initial stress, and prepausal penultimate lengthening. In the meantime, the onsets of supposedly stressed initial syllables have become longer while speech rate has not changed. This puzzling outcome rises interesting questions for research on French prosody.}, YEAR = 2008 } @INPROCEEDINGS{interspeech08leung, author = {Cheung-Chi Leung and Marc Ferras and Claude Barras and Jean-Luc Gauvain}, title = {{Comparing Prosodic Models for Speaker Recognition}}, booktitle = { InterSpeech'08}, address = {Brisbane, Australia}, year = {September 22-26, 2008}, pages = {1945-1948 }, abstract = {Recently, speaker verification systems using different kinds of prosodic features have been proposed. Although it has been shown that most of these speaker verification systems can improve system performance using score-level fusion with stateof- the-art cepstral-based systems, a systematic comparison of the prosodic modelling algorithms used in these prosodic systems has not yet been performed. This motivated us to review the proposed prosodic modelling algorithms and compare them using a common experimental condition. These experiments explored different approaches in the sampling/segmentation of prosodic contours and the selection of prosodic features. They show that simple prosodic systems with features extracted from fixed-size contour segments, without knowledge of phone/pseudo-syllable level information, still provide significant performance improvement when fused with a state-of-the-art cepstral-based system. Moreover, some prosodic systems are shown to be complementary to each other. Fusion of these systems with the cepstral-based system can provide further performance improvement on the speaker verification task.}, keywords = {Speaker recognition, prosodic features}, URL = {http://www-tlp.limsi.fr/public/IS080922.PDF} } @INPROCEEDINGS{interspeech08luo, author = {Jun Luo and Cheung-Chi Leung and Marc Ferras and Claude Barras}, title = {{Parallelized factor analysis and feature normalization for automatic speaker verification}}, booktitle = { InterSpeech'08}, address = {Brisbane, Australia}, year = {September 22-26, 2008}, abstract = {Factor analysis (FA) is one of the key advances presented in recent speaker verification evaluations. This technique is able to successfully remove session variability effects and it is currently used in many state-of-the-art automatic speaker verification systems. This paper addresses several practical issues in using an FA model in order to speed up model training and to achieve good performance. A parallelized training algorithm as well as maximum-likelihood estimation are proposed for fast training. The front-end feature normalization techniques are also investigated in the context of FA model. We demonstrate that factor analysis is very robust, and can be successfully applied to various kinds of feature normalization. Moreover, the proposed parallelized MLE implementation speeds up the training procedure from several days to several hours without sacrificing the performance. }, pages = {1409-1412 }, keywords = {speaker verification, factor analysis, maximumlikelihood estimation, parallelization. }, URL = {http://www-tlp.limsi.fr/public/IS080967.PDF} } @INPROCEEDINGS{interspeech08fousek, author = {Petr Fousek and Lori Lamel and Jean-Luc Gauvain}, title = {{Transcribing Broadcast Data Using MLP Features}}, booktitle = { InterSpeech'08}, address = {Brisbane, Australia}, year = {September 22-26, 2008}, pages = { 1433-1436}, abstract = {This paper describes incorporating discriminative features from a multi layer perceptron (MLP) into a state-of-the-art Arabic broadcast data transcription system based on cepstral features. The MLP features are based on a recently proposed Bottle-Neck architecture with long-term warped LPTRAP speech representation at the input. It is shown that the previously reported improvements on a development Arabic transcription system carry through to a full system at a state-ofthe- art level. SAT, CMLLR and MLLR adaptation techniques are shown to be useful for both MLP and combined features, though to a lesser degree than for PLPs. Without adaptation, MLP features obtain superior performance to cepstral features in all test conditions, and with adaptation both feature sets give comparable results. Combining the features, either by feature concatenation or system hypotheses, gives significant gains. Gains from MMI model training seem to be additive to the gain coming from discriminative MLP features.}, keywords = { MLP, LP-TRAP, broadcast transcription, bottleneck, discriminative training, mlpfeat }, URL = {http://www-tlp.limsi.fr/public/IS080833.PDF} } @INPROCEEDINGS{interspeech08arabic, author = {Lori Lamel and Abdel. Messaoudi and Jean-Luc Gauvain}, title = {{Investigating Morphological Decomposition for Transcription of Arabic Broadcast News and Broadcast Conversation Data}}, booktitle = { InterSpeech'08}, address = {Brisbane, Australia}, year = {September 22-26, 2008}, pages = {1429-1432 }, abstract = {One of the challenges of Arabic speech recognition is to deal with the huge lexical variety. Morphological decomposition has been proposed to address this problem by increasing lexical coverage, thereby reducing errors that are due to words that are unknown to the system. In our previous attempts to develop an Arabic speech-to-text (STT) transcription system with morphological decomposition, an increase in word error rate of about 2\% absolute was observed relative to a comparable word based system. Based on an error analysis and a comparison of our approach with that of other sites, two modifications were made. The first modification was to not decompose the most frequent words; and the second to not decompose the prefix \u2019Al\u2019 for words starting with a solar consonant since due to assimilation with the following consonant, deletion of the prefix was one of the most frequent errors. Comparable recognition performance was achieved using word-based and morphologically decomposed language models, and since the errors made by the systems are different, combining the two gwo gave a performance gain.}, keywords = {Morphological decomposition, Arabic speech recognition }, URL = {http://www-tlp.limsi.fr/public/IS080551.PDF} } @INPROCEEDINGS{interspeech08lre, author = {Mohamed Faouzi BenZeghiba and Jean-luc Gauvain and Lori Lamel}, title = {{Context-dependent phone models and models adaptation for phonotactic language recognition}}, booktitle = { InterSpeech'08}, address = {Brisbane, Australia}, year = {September 22-26, 2008}, pages = {313-316 }, abstract = {The performance of a PPRLM language recognition system depends on the quality and the consistency of phone decoders. To improve the performance of the decoders, this paper investigates the use of context-dependent instead of contextindependent phone models, and the use of CMLLR for model adaptation. This paper also discusses several improvements to the LIMSI 2007 NIST LRE system, including the use of a 4- gram language model, score calibration and fusion using the FoCal Multi-class toolkit (with large development data) and better decoding parameters such as phone insertion penalty. The improved system is evaluated on the NIST LRE-2005 and the LRE-2007 evaluation data sets. Despite its simplicity, the system achieves for the 30s condition a Cavg of 2.4\% and 1.6\% on these data sets, respectively.}, keywords = { context-dependent, Phone lattice, CMLLR adaptation, language recognition, lid}, URL = {http://www-tlp.limsi.fr/public/IS081117.PDF} } @INPROCEEDINGS{interspeech08cw, author = {Cécile Woehrling and Philippe Boula de Mareüil and Martine Adda-Decker and Lori Lamel}, title = {{ A corpus-based prosodic study of Alsatian, Belgian and Swiss French }}, booktitle = { InterSpeech'08}, address = {Brisbane, Australia}, year = {September 22-26, 2008}, pages = {780-783 }, abstract = {The object of this paper is a prosodic study of the French language as it is spoken in Alsace, Belgium and Switzerland, also compared with standard French through large corpora (over 100 hours) of scripted and spontaneous speech. The data were segmented into phones by automatic alignment; pitch values were extracted and averaged over segments. Two features are addressed: initial stress (through pitch and duration correlates) and penultimate lengthening. Different patterns enable us to distinguish the three varieties under investigation. Swiss speakers exhibit pitch rise and polysyllabic word onset lengthening in clitic-nonclitic sequences, while Alsatians tend to lengthen the initial vowel of nonclitic words. Belgians show prepausal penultimate lengthening whereas the Swiss tend to lengthen the last two prepausal vowels. }, keywords = { regional accents, French prosody}, URL = {http://www-tlp.limsi.fr/public/IS080905.PDF} } %http://dx.doi.org/10.1007/978-3-540-87391-4_39 @InProceedings{tsd08fousek, author = {Petr Fousek and Lori Lamel and Jean-Luc Gauvain}, title = {{On the Use of MLP Features for Broadcast News Transcription}}, booktitle = { Text, Speech and Dialogue}, series = {Lecture Notes in Computer Science}, number = {5246/2008}, pages = {303-310}, year = 2008, url = {http://www-tlp.limsi.fr/public/08-mlp_features.pdf}, publisher = {Springer Verlag, Berlin/Heidelberg}, keywords = {Multi-Layer Perceptron, speech recognition, mlpfeat} } @InProceedings{gotal08lamel, author = {Lori Lamel and Jean-Luc Gauvain}, title = {Speech processing for audio indexing}, booktitle = {Proceedings of the 6th International Conference on Natural Language Processing, GoTAL 2008 - Advances in Natural Language Processing}, series = {Lecture Notes in Computer Science}, number = {5221/2008}, pages = {4--15}, year = 2008, url = {http://dx.doi.org/10.1007/10.1007/978-3-540-85287-2_2}, publisher = {Springer Verlag, Berlin/Heidelberg}, keywords = {speech recognition} } @INPROCEEDINGS{icassp08fousek, author = {Frantisek Grezl and Petr Fousek}, title = {{Optimizing Bottle-Neck Features for LVCSR}}, booktitle = { ICASSP'08}, address = {Las Vegas, Nevada, USA}, year = {September 22-26, 2008}, pages = { 4729-4732}, abstract = {This work continues in development of the recently proposed Bottle-Neck features for ASR. A five-layers MLP used in bottleneck feature extraction allows to obtain arbitrary feature size without dimensionality reduction by transforms, independently on the MLP training targets. The MLP topology number and sizes of layers, suitable training targets, the impact of output feature transforms, the need of delta features, and the dimensionality of the final feature vector are studied with respect to the best ASR result. Optimized features are employed in three LVCSR tasks: Arabic broadcast news, English conversational telephone speech and English meetings. Improvements over standard cepstral features and probabilistic MLP features are shown for different tasks and different neural net input representations. A significant improvement is observed when phoneme MLP training targets are replaced by phoneme states and when delta features are added.}, keywords = { Bottle-neck, MLP structure, features, LVCSR, mlpfeat }, URL = {http://www-tlp.limsi.fr/public/0004729.pdf} } @InProceedings{Langlais08analogical, author = {Philippe Langlais and François Yvon and Pierre Zweigenbaum}, title = {Analogical translation of medical words in different languages}, booktitle = {Proceedings of the 6th International Conference on Natural Language Processing, GoTAL 2008 - Advances in Natural Language Processing}, series = {Lecture Notes in Computer Science}, number = {5221/2008}, pages = {284--295}, year = 2008, url = {http://dx.doi.org/10.1007/978-3-540-85287-2_27}, publisher = {Springer Verlag, Berlin/Heidelberg}, keywords = {Analogical learning, Terminology, Machine Translation} } @InProceedings{Kobus08transcrire, author = {Catherine Kobus and François Yvon and G\'eraldine Damnati}, url = {http://www.limsi.fr/Individu/yvon/publications/sources/Kobus08transcrire.pdf}, title = {Transcrire les {SMS} comme on reconnaît la parole}, booktitle = {Actes de la Conférence sur le Traitement Automatique des Langues (TALN'08)}, pages = {128--138}, type = {national}, year = 2008, address = {Avignon, France}, abstract = {This paper presents a system aiming at normalizing the orthography of SMS messages, using techniques that are commonly used in automatic speech recognition devices. We describe a baseline system and various evolutions, which are shown to significantly improve the quality of the output normalizations.}, keywords = {{SMS}, Finite-State Transducers} } @InProceedings{Moreau08appariemment, author = {Erwan Moreau and François Yvon and Olivier Capp\'e}, url = {http://www.limsi.fr/Individu/yvon/publications/sources/Moreau08appariemment.pdf}, title = {Appariement d'entités nommées coréférentes : combinaisons de mesures de similarité par apprentissage supervisé}, booktitle = {Actes de la Conférence sur le Traitement Automatique des Langues (TALN'08)}, pages = {488-497}, year = 2008, type = {national}, address = {Avignon, France}, abstract = {Matching named entities consists in grouping the different forms under which an entity may occur. Textual similarity measures are the usual tools for this task. We propose to combine several measures in order to improve the performance. We show the relevance of supervised learning in this objective through experiences with two corpora, especially in the case of the C4.5 algorithm.}, keywords = {Named Entities, Approximative string matching} } @InProceedings{lienard08, author = {Jean-Sylvain Lienard and Claude Barras and François Signol}, title = {Using sets of combs to control pitch estimation errors}, booktitle = {Acoustics'08 Paris}, address = {Paris, France}, year = {2008}, URL = {http://www-tlp.limsi.fr/public/Combs_Ac08.pdf }, month = {June} } @InProceedings{signol08, author = {François Signol and Claude Barras and Jean-Sylvain Lienard}, title = {Evaluation of the Pitch Estimation Algorithms in the monopitch and multipitch cases}, booktitle = {Acoustics'08 Paris}, address = {Paris, France}, year = {2008}, abstract = { }, URL = {http://www-tlp.limsi.fr/public/Eval_Ac08.pdf }, month = {June} } @INPROCEEDINGS{jep08rn, AUTHOR = { Rena Nemoto and Ioana Vasilescu and Martine Adda-Decker }, TITLE = {{Mots fréquents homophones en français : analyse acoustique et classification automatique par fouille de données }}, BOOKTITLE = jep, YEAR = 2008, MONTH = {June 9-13}, ADDRESS = {Avignon}, URL = {http://www-tlp.limsi.fr/public/105_jep_1632.pdf } } @INPROCEEDINGS{jep08cg, AUTHOR = { Cédric Gendrot and Martine Adda-Decker and Jacqueline Vaissière }, TITLE = {{ Les voyelles /i/ et /y/ du français : focalisation et variations formantiques }}, BOOKTITLE = jep, YEAR = 2008, MONTH = {June 9-13}, ADDRESS = {Avignon}, URL = {http://www-tlp.limsi.fr/public/050_jep_1663.pdf } } @INPROCEEDINGS{jep08cw, AUTHOR = { Cécile Woehrling and Philippe Boula-de-Mareüil and Martine Adda-Decker }, TITLE = {{Aspects prosodiques du français parlé en Alsace, Belgique et Suisse }}, BOOKTITLE = jep, YEAR = 2008, MONTH = {June 9-13}, ADDRESS = {Avignon}, URL = {http://www-tlp.limsi.fr/public/003_jep_1586.pdf } } @INPROCEEDINGS{jep08pbm, AUTHOR = {Philippe Boula-de-Mareüil and Albert Rilliard and Alexandre Allauzen }, TITLE = {{Étude diachronique de l'accent initial au travers d'archives audio }}, BOOKTITLE = jep, YEAR = 2008, MONTH = {June 9-13}, ADDRESS = {Avignon}, URL = {http://www-tlp.limsi.fr/public/104_jep_1621.pdf } } @INPROCEEDINGS{jep08aa, AUTHOR = {Alexandre Allauzen }, TITLE = {{Modèles discriminants pour la prédiction d'erreur dans les réseaux de confusion}}, BOOKTITLE = jep, YEAR = 2008, MONTH = {June 9-13}, ADDRESS = {Avignon}, URL = {http://www-tlp.limsi.fr/public/087_jep_1604.pdf } } @INPROCEEDINGS{nbest08, author = {Julien Despres and Petr Fousek and Jean-Luc Gauvain and Sandrine Gay and Yvan Josse and Lori Lamel and Abdel Messaoudi}, title = {{The LIMSI-Vecsys Research Systems for N-Best 2008}}, booktitle = "N-Best: North- and South-Dutch Benchmark Evaluation of Speech recognition Technology workshop", month = "September", address = "Soesterberg, NL ", year = "2008", abstract = {This document describes the speech recognizers jointly submitted by the LIMSI and Vecsys Research to the N-Best 2008 evaluation. The aim of this evaluation was to perform automatic speech recognition (ASR) for the Dutch language. Northern Dutch and Southern Dutch (also known as Dutch - NL, and Flemish - VL) have been processed with two different data types per accent (broadcast news - BN, and conversational telephone speech - CTS). The speech recognizers use multiple decoding passes with models (lexicon, acoustic models, language models) trained for the four different transcription tasks: BN-NL, BN-VL, CTS-NL and CTS-VL. Four primary systems (one for each accent-domain task) have been trained for the primary training condition and the unlimited decoding. The primary submission is also a less than 10xRT contrastive submission. Four contrastive systems have a processing time of 1xRT. The case-sensitive word error rates (WER) of the primary LIMSI/Vecsys Research systems on the N-Best development data are 9.5\% for BN-NL, 8.7\% for BN-VL, 31.6\% for CTS-NL and 41.9\% for CTS-VL.}, URL = {http://www-tlp.limsi.fr/public/nbest2008_description.pdf} } @INPROCEEDINGS{qast08, author = {J. Turmo and P. Comas and L. Lamel and S. Rosset and N. Moreau and D. Mostefa}, title = {{Overview of QAST 2008 - Question Answering on Speech Transcriptions}}, booktitle = "CLEF 2008 workshop", month = "September", address = "Aarhus, Denmark", year = "2008", URL = {http://www-tlp.limsi.fr/public/turmoCLEF2008-QASToverview.pdf}, keywords={question-answering}, } @InProceedings{ALLAUZEN08.856, author = {Alexandre Allauzen and Hélène Bonneau-Maynard}, title = {Training and Evaluation of POS Taggers on the French MULTITAG Corpus}, booktitle = {Proceedings of the Sixth International Language Resources and Evaluation (LREC'08)}, URL = {http://www-tlp.limsi.fr/public/856_paper.pdf}, year = {2008}, editor = {European Language Resources Association (ELRA)}, address = {Marrakech, Morocco}, month = {may}, date = {28-30}, language = {english} } @InProceedings{Moreau08semiautomatic, title = {Semi-automatic labeling of (coreferent) named entities: an experimental study}, author = {Erwan Moreau and François Yvon and Olivier Capp\'e}, url = {http://www.limsi.fr/Individu/yvon/publications/sources/Moreau08semiautomatic.pdf}, booktitle = {Proceedings of the LREC'08 Workshop on "Resources and Evaluation for Identity Matching, Entity Resolution and Entity Management"}, pages = { }, year = 2008, address = {Marakech, Marocco}, abstract = {In this paper, we investigate the problem of matching coreferent named entities extracted from text collections in a robust way: our long-term goal is to build similarity methods without (or with the minimum amount of) prior knowledge. In this framework, string similarity measures are the main tool at our disposal. Here we focus on the problem of evaluating such a task, especially in finding a methodology to label the data in a semi-automatic way.} } @InProceedings{TONEY08.825, author = {Dave Toney and Sophie Rosset and Aurélien Max and Olivier Galibert and Eric Bilinski}, title = {{An Evaluation of Spoken and Textual Interaction in the RITEL Interactive Question Answering System}}, booktitle = {Proceedings of the Sixth International Language Resources and Evaluation (LREC'08)}, year = {2008}, editor = {European Language Resources Association (ELRA)}, address = {Marrakech, Morocco}, month = {may}, date = {28-30}, URL = {http://www-tlp.limsi.fr/public/825_paper.pdf}, language = {english}, keywords={dialog system, evaluation}, } @InProceedings{DEVILLERS08.322, author = {Laurence Devillers and Jean-Claude Martin}, title = {Coding Emotional Events in Audiovisual Corpora}, booktitle = {Proceedings of the Sixth International Language Resources and Evaluation (LREC'08)}, year = {2008}, editor = {European Language Resources Association (ELRA)}, address = {Marrakech, Morocco}, month = {may}, date = {28-30}, URL = {http://www-tlp.limsi.fr/public/322_paper.pdf}, language = {english} } @InProceedings{GARNIERRIZET08.652, author = {Martine Garnier-Rizet and Gilles Adda and Frederik Cailliau and Jean-Luc Gauvain and Sylvie Guillemin-Lanne and Lori Lamel and Stephan Vanni and Claire Waast-Richard}, title = {CallSurf: Automatic Transcription, Indexing and Structuration of Call Center Conversational Speech for Knowledge Extraction and Query by Content}, booktitle = {Proceedings of the Sixth International Language Resources and Evaluation (LREC'08)}, year = {2008}, editor = {European Language Resources Association (ELRA)}, address = {Marrakech, Morocco}, month = {may}, date = {28-30}, language = {english}, URL = {http://www-tlp.limsi.fr/public/652_paper.pdf} } @InProceedings{ADDADECKER08.855, author = {Martine Adda-Decker and Thomas Pellegrini and Eric Bilinski and Gilles Adda}, title = {Developments of Letzebuergesch Resources for Automatic Speech Processing and Linguistic Studies}, booktitle = {Proceedings of the Sixth International Language Resources and Evaluation (LREC'08)}, year = {2008}, editor = {European Language Resources Association (ELRA)}, address = {Marrakech, Morocco}, month = {may}, date = {28-30}, URL = {http://www-tlp.limsi.fr/public/855_paper.pdf}, language = {english} } @InProceedings{NEMOTO08.554, author = {Rena Nemoto and Ioana Vasilescu and Martine Adda-Decker}, title = {Speech Errors on Frequently Observed Homophones in French: Perceptual Evaluation vs Automatic Classification}, booktitle = {Proceedings of the Sixth International Language Resources and Evaluation (LREC'08)}, year = {2008}, editor = {European Language Resources Association (ELRA)}, address = {Marrakech, Morocco}, month = {may}, date = {28-30}, URL = {http://www-tlp.limsi.fr/public/554_paper.pdf}, language = {english} } @InProceedings{LAMEL08.511, author = {Lori Lamel and Sophie Rosset and Christelle Ayache and Djamel Mostefa and Jordi Turmo and Pere Comas}, title = {{Question Answering on Speech Transcriptions: the QAST evaluation in CLEF}}, booktitle = {Proceedings of the Sixth International Language Resources and Evaluation (LREC'08)}, year = {2008}, editor = {European Language Resources Association (ELRA)}, address = {Marrakech, Morocco}, month = {may}, date = {28-30}, URL = {http://www-tlp.limsi.fr/public/511_paper.pdf}, language = {english}, keywords= {question-answering}, } @InProceedings{ADDADECKER08.788, author = {Martine Adda-Decker and Claude Barras and Gilles Adda and Patrick Paroubek and Philippe Boula de Mareuil and Benoit Habert}, title = {Annotation and analysis of overlapping speech in political interviews}, booktitle = {Proceedings of the Sixth International Language Resources and Evaluation (LREC'08)}, year = {2008}, editor = {European Language Resources Association (ELRA)}, address = {Marrakech, Morocco}, month = {may}, date = {28-30}, URL = {http://www-tlp.limsi.fr/public/788_paper.pdf}, language = {english} } @INPROCEEDINGS{Pellegrini08sltu, author = {Thomas Pellegrini and Lori Lamel}, title = {{Are Audio or textual training data more important for ASR in less-represented languages?}}, booktitle = {1st International Workshop on Spoken Languages Technologies for Under-resourced languages (SLTU) }, pages = {2-6}, address = {Hanoi, Vietnam}, month = { May 5 - 7}, year = { 2008}, URL = {http://www-tlp.limsi.fr/public/pellegrini_sltu_08.pdf } } @ARTICLE{clavel08, AUTHOR = {C. Clavel and I. Vasilescu and L. Devillers and G. Richard and T Ehrette}, TITLE = {{Fear-type recognition for future audio-based surveillance systems}}, JOURNAL = {Speech Communication}, VOLUME = {50}, NUMBER = {6}, month = {June}, PAGES = {487-503}, YEAR = 2008, URL = {http://www-tlp.limsi.fr/public/2008_403_5_1375184251.pdf} } @ARTICLE{tal_boula08, author = {Philippe Boula de Mareuil and Bianca Vieru-Dimulescu and Cécile Woehrling and Martine Adda-Decker}, title = {{Accents étrangers et régionaux en français}}, journal = {{Traitement Automatique des Langues}}, volume = {49}, number = {49-3}, year = {{2008}}, URL = {http://www-tlp.limsi.fr/public/2008_tal_Boula-et-al.pdf} } @ARTICLE{tal_mad08, author = {Martine Adda-Decker and Cédric Gendrot and Noël Nguyen}, title = {{Contributions du traitement automatique de la parole à l'étude des voyelles orales du français }}, journal = {{Traitement Automatique des Langues}}, volume = {49}, number = {}, year = {{2008}}, URL = {http://www-tlp.limsi.fr/public/2008_tal_Boula-et-al.pdf} } @ARTICLE{tal_iv08, author = {Ioana Vasilescu and Martine Adda-Decker and Rena Nemoto}, title = {{Caractéristiques acoustiques et prosodiques des hésitations vocaliques dans trois langues}}, journal = {{Traitement Automatique des Langues}}, volume = {49}, number = {}, year = {{2008}}, URL = {http://www-tlp.limsi.fr/public/2008_tal_Vasilescu-et-al.pdf} } @INPROCEEDINGS{EGC08, author = {Rena NEMOTO and Martine Adda-Decker and Ioana Vasilescu}, title = {{Fouille de données audio pour la classification automatique de mots homophones}}, booktitle = "EGC 2008", month = "29 Jan - 1 Feb", address = " Sophia-Antipolis", year = "2008", pages = {445-456}, URL = {http://www-tlp.limsi.fr/public/egc08RN+entete.pdf} } @ARTICLE{Rosset08A, AUTHOR = {Sophie Rosset and Delphine Tribout and Lori Lamel}, TITLE = {{Multi-level Information and Automatic dialog Act Detection in Human-Human Spoken Dialogs}}, JOURNAL = {Speech Communication}, VOLUME = {50}, NUMBER = {1}, PAGES = {}, YEAR = 2008, URL = {http://www-tlp.limsi.fr/public/}, keywords = {dialog act, human human spoken dialog}, } @INPROCEEDINGS{qast07, author = {J. Turmo and P. Comas and C. Ayache and D. Mostefa and S. Rosset and L. Lamel}, title = {{Overview of the QAST 2007}}, booktitle = "Working Notes for the CLEF 2007 Workshop", month = "September", address = "Budapest, Hungary", year = "2007", URL = {http://www-tlp.limsi.fr/public/turmoCLEF2007-QASToverview.pdf}, keywords = {question-answering}, } @INPROCEEDINGS{limsiqast07, author = {S. Rosset and O. Galibert and G. Adda and E. Bilinski}, title = {{The LIMSI participation to the QAst track}}, booktitle = "Working Notes for the CLEF 2007 Workshop", month = "September", address = "Budapest and Hungary", year = "2007", URL = {http://www-tlp.limsi.fr/public/rossetCLEF2007.pdf}, keywords = {question-answering}, } @INPROCEEDINGS{qast_asru07, author = {S. Rosset and O. Galibert and G. Adda and E. Bilinski}, title = {{The LIMSI Qast systems: comparison between human and automatic rules generation for question-answering on speech transcriptions}}, booktitle = "IEEE ASRU", month = "December", location = "Kyoto, Japan", year = "2007", URL = {http://www-tlp.limsi.fr/public/Rosset07_et_al2.pdf}, keywords = {question-answering}, } @INPROCEEDINGS{srsl07, author = {J. Villaneau and S. Rosset and O. Galibert}, title = {{Semantic relations for an oral and interactive question-answering system}}, booktitle = "Workshop on the semantic representation of spoken language - SRSL7", month = "November", location = "Salamanca, Spain", year = "2007", URL = {http://www-tlp.limsi.fr/public/final_srsl07.pdf}, keywords = {semantic relations, information extraction}, } @INPROCEEDINGS{jsl-interspeech07, author = {J.-S. Liénard and F. Signol and C. Barras}, title = {{Speech fundamental freqency estimation using the Alternate Comb}}, booktitle = { InterSpeech'07}, address = {Antwerp, Belgium}, year = {August 2007}, URL = {http://www-tlp.limsi.fr/public/IS070976.PDF} } @INPROCEEDINGS{ritel-interspeech07, author = {B. van Schooten and S. Rosset and O. Galibert and A. Max and R. op den Akker and G. Illouz}, title = {{Handling speech input in the Ritel QA dialogue system}}, booktitle = { InterSpeech'07}, address = {Antwerp, Belgium}, year = {August 2007}, URL = {http://www-tlp.limsi.fr/public/IS070803.PDF}, keywords = {spoken dialog system, ritel}, } @INPROCEEDINGS{Allauzen07, author = {Alexandre Allauzen}, title = {{Error detection in confusion network}}, booktitle = {InterSpeech'07}, address = {Antwerp, Belgium}, year = {August 2007}, URL = {http://www-tlp.limsi.fr/public/IS070803.PDF} } @INPROCEEDINGS{Lamel07, author = {Lori Lamel and Abdel. Messaoudi and Jean-Luc Gauvain}, title = {{Improved Acoustic Modeling for Transcribing Arabic Broadcast Data}}, booktitle = {InterSpeech'07}, address = {Antwerp, Belgium}, pages = {}, year = {August 2007}, URL = {http://www-tlp.limsi.fr/public/IS070512.PDF} } @INPROCEEDINGS{Woehrling07B, author = {C. Woehrling and P. Boula de Mareüil}, title = {{Comparing Praat and Snack formant measurements on two large corpora of northern and southern French}}, booktitle = {InterSpeech'07}, pages = {}, address = {Antwerp, Belgium}, year = {August 2007}, URL = {http://www-tlp.limsi.fr/public/IS070688.PDF} } @INPROCEEDINGS{Pellegrini07, author = {Thomas Pellegrini and Lori Lamel}, title = {{Using phonetic features in unsupervised word decompounding for ASR with application to a less-represented language}}, booktitle = {InterSpeech'07}, pages = {}, address = {Antwerp, Belgium}, year = {August 2007}, URL = {http://www-tlp.limsi.fr/public/IS070319.PDF } } @INPROCEEDINGS{Dechelotte07is, author = {Daniel D\'echelotte and Holger Schwenk and Gilles Adda and Jean-Luc Gauvain}, title = {{Improved Machine Translation of Speech-to-Text outputs}}, booktitle = {InterSpeech'07}, address = {Antwerp, Belgium}, year = {August 2007}, URL = {http://www-tlp.limsi.fr/public/IS071156.PDF} } @INPROCEEDINGS{Devillers07Batliner, author = { Anton Batliner and Stefan Steidl and Björn Schuller and Dino Seppi and Thurid Vogt and Laurence Devillers and Laurence Vidrascu and Noam Amir and Loic Kessous and Vered Aharonson }, title = {{The impact of F0 extraction errors on the classification of prominence and emotion}}, booktitle = { ICPhS 2007. 16th International Congress of Phonetic Science }, address = { Saarbrücken, Germany}, year = {August 6-10, 2007}, URL = { http://www-tlp.limsi.fr/public/icphs07_1168.pdf } } @INPROCEEDINGS{Devillers07A, author = {Björn Schuller and Anton Batliner and Dino Seppi and Stefan Steidl and Thurid Vogt and Johannes Wagner and Laurence Devillers and Laurence Vidrascu and Noam Amir and Loic Kessous and Vered Aharonson}, title = {{The Relevance of Feature Type for the Automatic Classification of Emotional User States: Low Level Descriptors and Functionals}}, booktitle = {InterSpeech'07}, address = {Antwerp, Belgium}, year = {August 2007}, URL = {http://www-tlp.limsi.fr/public/IS070399.PDF } } @incollection{Devillers-vidrascuSC07, AUTHOR = {L. Devillers and L. Vidrascu}, TITLE = {{Emotion recognition}}, BOOKTITLE = {Speaker characterization}, YEAR = {2007}, EDITOR = {Christian Müller and Susanne Schötz }, Publisher = {Springer-Verlag} } % Devillers, L., Vidrascu, Laurence, Real-life emotions detection on human-human medical call center interactions, MAVEBA 2007. 5th International Workshop on Models and Analysis of Vocal Emissions for Biomedical Applications. 2007. Florence, Italy, December 13-15, 2007. ??. @INPROCEEDINGS{Devillers-vidrascu07, AUTHOR = {L. Devillers and L. Vidrascu}, TITLE = {{Positive and Negative emotional states behind the laugh in spontaneous spoken dialogs}}, BOOKTITLE = {ICPHS 2007 workshop Laughter}, address = { Saarbrücken, Germany}, pages = { 37-40 }, url = { http://www-tlp.limsi.fr/public/}, YEAR = {2007} } @INPROCEEDINGS{vidrascu07, AUTHOR = {L. Vidrascu and L. Devillers}, TITLE = {{Five emotion classes detection in real-world call center data: the use of various types of paralinguistic features}}, BOOKTITLE = {International workshop on Paralinguistic Speech - between models and data, ParaLing }, YEAR = {2007} } @INPROCEEDINGS{Devillers-zara07, AUTHOR = {A. Zara and V. Maffiolo and J-C. Martin and L. Devillers}, TITLE = {{Collection and Annotation of a Corpus of Human-Human Multimodal Interactions: Emotion and Others Anthropomorphic Characteristics}}, BOOKTITLE = {ACII}, YEAR = {2007} } @INPROCEEDINGS{Devillers-schroeder07, AUTHOR = {M. Schroeder and L. Devillers and K. Karpouzis and J-C. Martin and C. Pelachaud and Ch. Peter and H. Pirker and B. Schuller and J. Tao and and I. Wilson }, TITLE = {{What should a generic emotion markup language be able to represent?}}, BOOKTITLE = {ACII}, address = { Lisbon, Portugal }, pages = { 440-451 }, YEAR = {2007} } @INPROCEEDINGS{Devillers-douglas07, AUTHOR = {E. Douglas-Cowie and R. Cowie and Sneddon and Cox and Lowry and McRorie, J.-C. Martin and L. Devillers and S. Abrilian and A. Batliner}, TITLE = {{The HUMANE Database: addressing the needs of the affective computing community}}, BOOKTITLE = {ACII}, address = { Lisbon, Portugal }, pages = { 488-500 }, YEAR = { September 12-14 2007} } @INPROCEEDINGS{Boula07A, author = {Philippe Boula de Mareuil and Martine Adda-Decker and Cécile Woehrling }, title = {{Analysis of oral and nasal vowel realisation in northern and southern French varieties}}, booktitle = { ICPhS 2007. 16th International Congress of Phonetic Science }, address = { Saarbrücken, Germany}, year = {August 6-10, 2007}, pages = { 2221-2224}, URL = {http://www-tlp.limsi.fr/public/icphs07_1240.pdf} } @INPROCEEDINGS{Vieru07C, AUTHOR = {B. Vieru-Dimulescu and P. Boula de Mareüil and M. Adda-Decker}, TITLE = {{Characterizing non-native French Accents using automatic alignement}}, BOOKTITLE = { 16th International Congress of Phonetic Science, ICPhS}, MONTH = {August}, ADDRESS = {Sarbrucken}, YEAR = 2007, PAGES = {2217-2220}, URL = {http://www-tlp.limsi.fr/public/VieruBoulaMadda_Icphs07.pdf} } @INPROCEEDINGS{Vieru07B, AUTHOR = {B. Vieru-Dimulescu and P. Boula de Mareüil and M. Adda-Decker}, TITLE = {{Identification of foreign-accented French using data-mining techniques}}, BOOKTITLE = {International workshop on Paralinguistic Speech - between models and data, ParaLing 2007}, MONTH = {August}, ADDRESS = {Sarbruchen}, YEAR = 2007, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/VieruBoulaMadda_ParaLing07.pdf} } @INPROCEEDINGS{Adda2BarrasBoulaHabertParoubek07, AUTHOR = {Gilles Adda and Martine Adda-Decker and Claude Barras and Philippe Boula de Mareüil and Benoît Habert and Patrick Paroubek}, TITLE = {{Speech Overlap and Interplay with Disfluencies in Political Interviews}}, BOOKTITLE = {International workshop on Paralinguistic Speech - between models and data, ParaLing 2007}, MONTH = {August}, ADDRESS = {Sarbruchen}, YEAR = 2007, PAGES = {41-46}, URL = {http://www-tlp.limsi.fr/public/paraling07A2BBHP.pdf} } @INPROCEEDINGS{Vieru07A, AUTHOR = {B. Vieru-Dimulescu and P. Boula de Mareüil}, TITLE = {{Identification de 6 accents étrangers en français utilisant des techniques de fouilles de données}}, BOOKTITLE = {7éme Journées Jeunes Chercheurs en Parole}, MONTH = {July}, ADDRESS = {Paris}, YEAR = 2007, PAGES = {152-155}, URL = {http://www-tlp.limsi.fr/public/Vieru_rjcp07.pdf} } @INPROCEEDINGS{Woehrling07A, AUTHOR = {C. Woehrling and P. Boula de Mareüil}, TITLE = {{Comparaison entre l'extraction de formants par Praat et Snack sur deux grands corpus de français du nord et du sud}}, BOOKTITLE = {7éme Journées Jeunes Chercheurs en Parole}, MONTH = {July}, ADDRESS = {Paris}, YEAR = 2007, PAGES = {160-163}, URL = {http://www-tlp.limsi.fr/public/} } @INPROCEEDINGS{PBM07A, AUTHOR = { P. Boula de Mareüil}, TITLE = {{Traitement du schwa : de la synthèse à l'alignement automatique}}, BOOKTITLE = {5èmes Journées d'Études Linguistiques de Nantes}, MONTH = {June}, ADDRESS = {Nantes}, YEAR = 2007, PAGES = {181-189}, URL = {http://www-tlp.limsi.fr/public/jel2007PBM.pdf} } @INPROCEEDINGS{MADDA07E, AUTHOR = { Martine Adda-Decker}, TITLE = {{Problèmes posés par le schwa en reconnaissance et en alignement automatiques de la parole}}, BOOKTITLE = {5èmes Journées d'Études Linguistiques de Nantes}, MONTH = {June}, ADDRESS = {Nantes}, YEAR = 2007, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/jel2007-MartineAdda.pdf } } @INPROCEEDINGS{MADDA07D, AUTHOR = { Pierre Hallé and Martine Adda-Decker}, TITLE = {{Voicing assimilation in journalistic speech}}, BOOKTITLE = {ICPhS}, MONTH = {August}, ADDRESS = {Sarbrucken}, YEAR = 2007, PAGES = {493-496}, URL = {http://www-tlp.limsi.fr/public/icphs07_1385phmad.pdf}, } @INPROCEEDINGS{MADDA07C, AUTHOR = {Martine Adda-Decker and Pierre Hallé}, TITLE = {{Bayesian framework for voicing alternation \& assimilation studies on large corpora in French}}, BOOKTITLE = {ICPhS}, MONTH = {August}, ADDRESS = {Sarbrucken}, YEAR = 2007, PAGES = {613-616 }, URL = {http://www-tlp.limsi.fr/public/icphs07-1562madph.pdf} } @INPROCEEDINGS{MADDA07B, AUTHOR = {Cédric Gendrot and Martine Adda-Decker}, TITLE = {{Impact of duration and vowel inventory size on formant values of oral vowels: an automated formant analysis from eight languages}}, BOOKTITLE = {ICPhS}, MONTH = {August}, ADDRESS = {Sarbrucken}, YEAR = 2007, PAGES = {1417-1420}, URL = {http://www-tlp.limsi.fr/public/icphs07_1481_cgmad.pdf} } @INPROCEEDINGS{Vasilescu07A, AUTHOR = {Ioana Vasilescu and Rena Nemoto and Martine Adda-Decker}, TITLE = {{Vocalic Hesitations vs Vocalic Systems: A Cross-Language Comparison}}, BOOKTITLE = {ICPhS}, MONTH = {August}, ADDRESS = {Sarbrucken}, YEAR = 2007, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/icphs2007ivrnmafinal.pdf} } @ARTICLE{MADDA07A, author = {Martine Adda-Decker}, title = {{Corpus pour la transcription automatique de l'oral}}, journal = {{Revue Française de Linguistique Appliquée}}, volume = {XII}, number = {1}, PAGES = {71-84}, year = {June 2007}, URL = {http://www-tlp.limsi.fr/public/RFLA-Adda-Decker-v2.pdf} } @INPROCEEDINGS{RT07asr, AUTHOR = {Lori Lamel and Eric Bilinski and Jean-Luc Gauvain and Gilles Adda and Claude Barras and Xuan Zhu}, TITLE = {{The LIMSI RT07 Lecture Transcription System}}, BOOKTITLE = {{to appear Lecture Notes in Computer Science}}, PUBLISHER = {Springer Verlag}, EDITOR = {S. Renals and S. Bengio and J. Fiscus}, MONTH = {May}, ADDRESS = {Bethesda, MD}, YEAR = 2007, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/LIMSI_rt07asr_final.pdf} } @INPROCEEDINGS{RT07diar, AUTHOR = { Xuan Zhu and Claude Barras and Lori Lamel and Jean-Luc Gauvain }, TITLE = {{Multi-Stage Speaker Diarization for Conference and Lecture Meetings}}, BOOKTITLE = {{ Lecture Notes in Computer Science}}, PUBLISHER = {Springer Verlag}, EDITOR = {S. Renals and S. Bengio and J. Fiscus}, MONTH = {May}, ADDRESS = {Bethesda, MD}, YEAR = 2007, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/LIMSI_rt07diar.pdf} } @INPROCEEDINGS{Clear07SID, AUTHOR = {Claude Barras and Xuan Zhu and Cheung-Chi Leung and Jean-Luc Gauvain and Lori Lamel}, TITLE = {{The CLEAR'07 LIMSI System for Acoustic Speaker Identification in Seminars}}, BOOKTITLE = {{Lecture Notes in Computer Science, Proc. CLEAR'07 Evaluation Campaign and Workshop - Classification of Events, Activities and Relationships}}, PUBLISHER = {Springer Verlag}, EDITOR = { R. Stiefelhagen}, MONTH = {May}, ADDRESS = {Baltimore}, YEAR = 2007, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/clear07_barras_draft.pdf} } @INPROCEEDINGS{Jennequin07A, AUTHOR = {Nicolas Jennequin and Jean-Luc Gauvain}, TITLE = {{Modeling Duration via Lattice Rescoring}}, BOOKTITLE = icassp, PAGES = {641--644}, ADDRESS = {Honolulu, Hawaii}, YEAR = 2007, MONTH = {April}, URL = {http://www-tlp.limsi.fr/public/0400641.pdf} } @INPROCEEDINGS{Lamel07A, AUTHOR = {Lori Lamel and Jean-Luc Gauvain and Gilles Adda and Claude Barras and Eric Bilinski and Olivier Galibert and Agusti Pujol and Holger Schwenk and Xuan Zhu}, TITLE = {{The LIMSI 2006 TC-STAR EPPS Transcription Systems}}, BOOKTITLE = icassp, PAGES = {997--1000}, ADDRESS = {Honolulu, Hawaii}, YEAR = 2007, MONTH = {April}, URL = {http://www-tlp.limsi.fr/public/0400997.pdf} } @INPROCEEDINGS{Ferras07A, AUTHOR = {Marc Ferras and Cheung-Chi Leung and Claude Barras and Jean-Luc Gauvain}, TITLE = {{Constrained MLLR for Speaker Recognition}}, BOOKTITLE = icassp, PAGES = {53--56}, ADDRESS = {Honolulu, Hawaii}, YEAR = 2007, MONTH = {April}, URL = {http://www-tlp.limsi.fr/public/0400053.pdf} } @INPROCEEDINGS{Gales07, AUTHOR = {M.J.F. Gales and X. Liu and R. Sinha and P.C. Woodland and K. Yu and S. Matsoukas and T. Ng and K. Nguyen and L. Nguyen and J-L Gauvain and L. Lamel and A. Messaoudi}, TITLE = {{Speech Recognition System Combination for Machine Translation}}, BOOKTITLE = icassp, PAGES = {1277--1280}, ADDRESS = {Honolulu, Hawaii}, YEAR = 2007, MONTH = {April}, URL = {http://www-tlp.limsi.fr/public/0401277.pdf} } @INPROCEEDINGS{Clavel07a, AUTHOR = {C. Clavel and L. Devillers and G. Richard and I. Vasilescu and T. Ehrette}, TITLE = {{Detection and Analysis of Abnormal Situations Through Fear-Type Acoustic Manifestations}}, BOOKTITLE = icassp, PAGES = {21--24}, ADDRESS = {Honolulu, Hawaii}, YEAR = 2007, MONTH = {April}, URL = {http://www-tlp.limsi.fr/public/0400021.pdf} } @InProceedings{Schwenk:2007:taln, author = {Holger Schwenk and Daniel Déchelotte and Hélène Bonneau-Maynard and Alexandre Allauzen}, title = {Modèles statistiques enrichis par la syntaxe pour la traduction automatique}, booktitle = taln, address = {{Toulouse}}, year = {June, 2007}, pages = {253-262}, URL = {} } @InProceedings{Maynard:2007:SSST, author = {Hélène Bonneau-Maynard and Alexandre Allauzen and DanieDéchelotteand Holger Schwenk}, title = {{Combining Morphosyntactic Enriched Representation with $n$-best Reranking in Statistical Translation}}, booktitle = {{HLT/NAACL workshop on Syntax and Structure in Statistical Translation}}, address = {{Rochester}}, month = {April 22-27}, year = {2007}, pages = {65--71}, URL = { } } @Article{Matusov07combination, author = {Evgeny Matusov and Gregor Leusch and Rafael E. Banchs and Nicola Bertoldi and Daniel Déchelotte and Marcello Federico and Muntsin Kolss and Young-Suk Lee and José B. Mario and Matthias Paulik and Salim Roukos and Holger Schwenk and Hermann Ney.}, title = {System combination for machine translation of spoken and written language.}, journal = {IEEE Transactions on Audio, Speech, and Language Processing}, year = 2007, volume = 16, number = 7, pages = {1222--237}, astext = {Evgeny Matusov, Gregor Leusch, Rafael E. Banchs, Nicola Bertoldi, Daniel Déchelotte, Marcello Federico, e Muntsin Kolss, Young-Suk Lee, José B. Mario, Matthias Paulik, Salim Roukos, Holger Schwenk, and Hermann Ney. IEEE Transactions on Audio, Speech, and Language Processing, 16(7):1222-1237, 2007.} } @InProceedings{Daniel:2007:mtsummit, author = {Daniel Déchelotte and Holger Schwenk and Hélène Bonneau-Maynard and Alexandre Allauzen and Gilles Adda}, title = {A state-of-the-art Statistical Machine Translation System based on {Moses}}, booktitle = {MT Summit}, address = {{ Copenhagen}}, month = {September 10-14}, year = {2007}, pages = {127--133}, URL = { } } @InProceedings{Schwenk:2007:iwslt, author = {Patrik Lambert and Marta R. Costa-jussá and Josep M. Crego and Maxim Khalilov and José B. Mariño and Rafael E. Banchs and José A.R. Fonollosa and Holger Schwenk}, title = {The {TALP} {Ngram}-based {SMT} System for {IWSLT} 2007}, booktitle = {International Workshop on Spoken Language Translation (IWSLT)}, month = {October 15-16}, address = {{ Trento}}, year = {2007}, pages = {in press}, categ = {International_Conference}, topics = {Machine_translation} } @InProceedings{Schwenk:2007:acl, author = {Holger Schwenk and Marta R. Costa-jussa and Jose A. R. Fonollosa}, title = {{Smooth Bilingual N-gram Translation}}, booktitle = acl, pages = {430--438}, address = {{Prague}}, year = {June, 2007}, URL = {http://www-tlp.limsi.fr/public/EMNLP-CoNLL200745.pdf} } @InProceedings{Schwenk:2007:, author = {Holger Schwenk}, title = {{Building a statistical machine translation system for French using the Europarl corpus}}, booktitle = {ACL Workshop on Statistical Machine Translation}, pages = {}, address = {Prague}, year = {June, 2007}, URL = {http://www-tlp.limsi.fr/public/WMT25.pdf} } @ARTICLE{Schwenk:2007:csl, author = {Holger Schwenk}, title = {Continuous Space Language Models}, journal = {Computer Speech and Language}, year = 2007, volume = {21}, pages = {492--518}, } @INPROCEEDINGS{ritel-interspeech06, author = {S. Rosset and O. Galibert and G. Illouz and A. Max}, title = {{Integrating Spoken Dialog and Question Answering: the Ritel Project}}, booktitle = {InterSpeech'06}, address = {Pittsburgh, USA}, year = {2006}, URL = {http://www-tlp.limsi.fr/public/Ritel_is06.pdf}, keywords = {spoken dialog system}, } @ARTICLE{tal_ritel06, author = {S. Rosset and O. Galibert and G. Illouz and A. Max}, title = {{Interaction et recherche d'information : le projet Ritel}}, journal = {{Traitement Automatique des Langues}}, volume = {46}, number = {46-3}, year = {{2006}}, URL = {http://www-tlp.limsi.fr/public/Tal_ritel.pdf}, keywords = {spoken dialog system}, } @ARTICLE{PBM06, author = {{P. Boula de Mareüil and B. Vieru-Dimulescu}}, title = {{The contribution of prosody to the perception of foreign accent}}, journal = {{Phonetica}}, volume = {{63}}, number = {{}}, pages = {274-267}, year = {{2006}}, URL = {http://www-tlp.limsi.fr/public/Boula_Vieru_phonetica06.pdf} } @INCOLLECTION{MaddaLamel06, AUTHOR = {Martine Adda-Decker and Lori Lamel}, TITLE = {{ Dictionaries for Multilingual Speech Processing}}, BOOKTITLE = {{Multilingual Speech Processing}}, EDITOR = {Katrin Kirchhoff and Tanja Schultz}, PUBLISHER = {Elsevier}, YEAR = 2006, CHAPTER = 5, keywords = { multilingualasr }, PAGES = {305-322} } @ARTICLE{Woehrling06B, author = {{C. Woehrling and P. Boula de Mareüil}}, title = {{Identification d'accents régionaux en français : perception et analyse}}, journal = {Parole}, volume = {37}, number = {}, pages = {25-65}, year = {2006}, URL = {http://www-tlp.limsi.fr/public/parole06.pdf} } @ARTICLE{RT06ears, AUTHOR = { Spyros Matsoukas and Jean-Luc Gauvain and Gilles Adda and Thomas Colthurst and Chia-Lin Kao and Owen Kimball and Lori Lamel and Fabrice Lefevre and Jeff Ma and John Makhoul and Long Nguyen and Rohit Prasad and Richard Schwartz and Holger Schwenk and Bing Xiang}, TITLE = {{Advances in Transcription of Broadcast News and Conversational Telephone Speech within the Combined EARS BBN/LIMSI System}}, JOURNAL = {IEEE Transactions on Audio, Speech and Language Processing}, VOLUME = 14, NUMBER = {5}, PAGES = {1541-1556}, YEAR = 2006, URL = {} } @ARTICLE{Barras06B, AUTHOR = {Claude Barras and Xuan Zhu and Sylvain Meignier and Jean-Luc Gauvain}, TITLE = {{Multistage Speaker Diarization of Broadcast News}}, JOURNAL = {IEEE Transactions on Audio, Speech and Language Processing}, VOLUME = 14, NUMBER = {5}, PAGES = {1505-1512}, YEAR = 2006, URL = {} } @INPROCEEDINGS{Xuan06A, AUTHOR = {Xuan Zhu and Claude Barras and Lori Lamel and Jean-Luc Gauvain}, TITLE = {{Speaker Diarization: from Broadcast News to Lectures}}, BOOKTITLE = {{Lecture Notes in Computer Science, Proc. CLEAR'06 Evaluation Campaign and Workshop - Classification of Events, Activities and Relationships}}, PUBLISHER = {Springer Verlag}, EDITOR = { R. Stiefelhagen}, MONTH = {May}, ADDRESS = {Washington}, YEAR = 2006, PAGES = {396-406}, URL = {} } @INPROCEEDINGS{Lamel06B, AUTHOR = {Lori Lamel and Eric Bilinski and Gilles Adda and Jean-Luc Gauvain and Holger Schwenk}, TITLE = {{The LIMSI RT06s Lecture Transcription System}}, BOOKTITLE = {Lecture Notes in Computer Science, Vol. 4299 - Proc. 3rd Joint Workshop on Multimodal Interaction and Related Machine Learning Algorithms (MLMI 2006)}, PUBLISHER = {Springer Verlag}, EDITOR = {S. Renals and S. Bengio and J. Fiscus}, MONTH = {May}, ADDRESS = {Washington}, YEAR = 2006, PAGES = {}, URL = {} } @InProceedings{Schwenk:2006:IWSLT, author = {Holger Schwenk and Marta R. Costa-juss\`a and Jos\'e A. R. Fonollosa}, title = {Continuous Space Language Models for the {IWSLT} 2006 Task}, booktitle = iwslt, month = {November}, year = {2006}, pages = {166--173}, } @INPROCEEDINGS{Barras06A, AUTHOR = {Claude Barras and Xuan Zhu and Jean-Luc Gauvain and Lori Lamel}, TITLE = {{The CLEAR'06 LIMSI Acoustic Speaker Identification System for CHIL Seminars}}, BOOKTITLE = {Lecture Notes in Computer Science - CLEAR'06 Evaluation Campaign and Workshop - Classification of Events, Activities and Relationships}, PUBLISHER = {Springer Verlag}, EDITOR = {R. Stiefelhagen}, MONTH = {April}, ADDRESS = {Southampton}, YEAR = 2006, PAGES = {}, URL = {} } @INPROCEEDINGS{Pellegrini06C, AUTHOR = {Thomas Pellegrini and Lori Lamel}, TITLE = {{Investigating Automatic Decomposition for ASR in Less Represented Languages}}, BOOKTITLE = icslp, YEAR = 2006, MONTH = {September}, ADDRESS = {Pittsburgh}, PAGES = {285-288}, URL = {http://www-tlp.limsi.fr/public/IS061776.PDF} } @INPROCEEDINGS{Woehrling06A, AUTHOR = {Cécile Woehrling and Philippe Boula de Mareüil}, TITLE = {{Identification of regional accents in French: perception and categorization}}, BOOKTITLE = icslp, YEAR = 2006, MONTH = {September}, ADDRESS = {Pittsburgh}, PAGES = {1511-1514}, URL = {http://www-tlp.limsi.fr/public/IS061261.PDF} } @INPROCEEDINGS{Vieru06A, AUTHOR = {Bianca Vieru-Dimulescu and Philippe Boula de Mareüil}, TITLE = {{Perceptual identification and phonetic analysis of 6 foreign accents in French}}, BOOKTITLE = icslp, YEAR = 2006, MONTH = {September}, ADDRESS = {Pittsburgh}, PAGES = {441-444}, URL = {http://www-tlp.limsi.fr/public/IS061251.PDF} } @INPROCEEDINGS{Devil06D, AUTHOR = {Laurence Devillers and Laurence Vidrascu}, TITLE = {{Real-life emotions detection with lexical and paralinguistic cues on Human-Human call center dialogs}}, BOOKTITLE = icslp, YEAR = 2006, MONTH = {September}, ADDRESS = {Pittsburgh}, PAGES = {801-804}, URL = {http://www-tlp.limsi.fr/public/IS061636.PDF} } @ARTICLE{AIAI06, AUTHOR = {J.-C. Martin and G. Caridakis and L. Devillers and K. Karouzis and S. Abrilian}, TITLE = {{Manual Annotation and Automatic Image Processing of Multimodal Emotional Behaviours: Validating the Annotation of TV Interviews}}, JOURNAL = {{Special issue of the Journal on Personal and Ubiquitous Computing on Emerging Multimodal Interfaces, AIAI}}, YEAR = {2006}, PUBLISHER = {Springer} } @ARTICLE{Devillers-RIA06, AUTHOR = {L. Devillers and L. Vidrascu}, TITLE = {{Repr\Ã'eentation et D\'etection des \'emotions dans des donn\'ees issues de dialogues enregistr\'es dans des centres d'appels : des \'emotions mixtes dans des donn\'ees r\'eelles}}, BOOKTITLE = {Interaction Emotionnelle}, JOURNAL = {Revue Des Sciences et Technologies de l'Information, s\'erie Revue d'Intelligence Artificielle}, YEAR = {2006}, VOLUME = {20}, NUMBER = {4-5} } @ARTICLE{Martin-HUMA06, AUTHOR = {J-C. Martin and R. Niewiadomski and L. Devillers and S. Buisine and C. Pelachaud}, TITLE = {{Multimodal Complex Emotions: Gesture Expressivity And Blended Facial Expressions Interviews}}, JOURNAL = {Special issue of the Journal of Humanoid Robotics}, YEAR = {2006}, VOLUME = {3}, NUMBER = {3}, MONTH = {Septembre}, PAGE = {269-291}, EDITOR = {C. Pelachaud, L. Canamero} } @article{Martin-RIA06, AUTHOR = {J-C. Martin and S. Abrilian and L. Devillers and M. Lamolle and M. Mancini and C. Pelachaud}, TITLE = {{Du corpus vidéo à l'agent expressif : utilisation des différents niveaux de représentations multimodales et émotionnelles}}, BOOKTITLE = {Interaction Emotionnelle}, JOURNAL = {Revue Des Sciences et Technologies de l'Information, série Revue d'Intelligence Artificielle}, YEAR = {2006}, VOLUME = {20}, NUMBER = {4-5} } @ARTICLE{Clavel06, AUTHOR = {C. Clavel and I. Vasilescu and G. Richard and L. Devillers}, TITLE = {{De la construction du corpus émotionnel au système de détection : le point de vue applicatif de la surveillance dans les lieux publics}}, BOOKTITLE = {Interaction Emotionnelle}, JOURNAL = {Revue Des Sciences et Technologies de l'Information, série Revue d'Intelligence Artificielle}, YEAR = {2006}, VOLUME = {20}, NUMBER = {4-5} } @INPROCEEDINGS{Devillers-WSLREC06, TITLE = {{Corpora for Research on Emotion and Affect}}, JOURNAL = {Actes du workshop WP09, workshop satellite de LREC 2006}, YEAR = {2006}, EDITOR = {L. Devillers and J-C. Martin and R. Cowie and E. Douglas-Cowie and A. Batliner} } @INPROCEEDINGS{Devillers-panelLREC06, AUTHOR = {N. Campbell and L. Devillers and E. Douglas-Cowie and V. Aubergé and A. Batliner and J. Tao}, TITLE = {{Resources for the Processing of Affect in Interactions}}, booktitle = {LREC}, YEAR = {2006} } @INPROCEEDINGS{Batliner06, AUTHOR = { A. Batliner and S. Steidl and B. Schuller and D. Seppi and K. Laskowski and T. Vogt and L. Devillers and L. Vidrascu and N. Amir and L. Kessous and V. Aharonson}, TITLE = {{CEICES: Combining Efforts for Improving automatic Classification of Emotional user States: a « forced co-operation » initiative}}, BOOKTITLE = {IS-LTC, fifth slovenian and first international language technologies conference}, YEAR = {2006} } @INPROCEEDINGS{Devil-buisine06, AUTHOR = {S. Buisine and R. Niewiadomski and J-C. Martin and L. Devillers and C. Pelachaud}, TITLE = {{Perception of blended emotions: from Video corpus to expressive Agent}}, BOOKTITLE = {IVA}, YEAR = 2006 } @INPROCEEDINGS{clavel-sp06, AUTHOR = {C. Clavel and I. Vasilescu and G. Richard and L. Devillers}, TITLE = {{Voice and Unvoiced content of fear-type emotions in the Safe Corpus}}, booktitle = {Speech Prosody}, YEAR = {2006} } @INPROCEEDINGS{Devillers-waka06, AUTHOR = {S. Buisine and R. Niewiadomski and JC. Martin and L. Devillers and C. Pelachaud}, TITLE = {{Perception d'Emotions Mélangées : Du Corpus Vidéo à l'Agent Expressif}}, booktitle = {Waka}, YEAR = {2006} } @INPROCEEDINGS{Devillers-vidrascuWSLREC06, AUTHOR = {L. Vidrascu and L. Devillers}, TITLE = {{Real-life emotions in naturalistic data recorded in a medical call center}}, booktitle = {LREC'06 workshop: Emotion}, YEAR = {2006} } @INPROCEEDINGS{Devillers-clavelWSLREC06, AUTHOR = {C. Clavel and I. Vasilescu and L. Devillers and G. Richard and T. Ehrette}, TITLE = {{The SAFE Corpus: illustrating extreme emotions in dynamic situation}}, booktitle = {LREC'06 workshop: Emotion}, YEAR = {2006} } @INPROCEEDINGS{Dong06B, AUTHOR = {Dong Zhu and Martine Adda-Decker }, TITLE = {{Language identification using lattice-based phonotactic and syllabotactic approaches}}, BOOKTITLE = Odyssey, YEAR = 2006, MONTH = {June}, ADDRESS = {San Juan}, keywords = { lid }, URL = {http://www-tlp.limsi.fr/public/zhu_odyssey.pdf} } @INPROCEEDINGS{Dong06, AUTHOR = {Dong Zhu and Martine Adda-Decker }, TITLE = {{Identification automatique des langues : combinaison d'approches phonotactiques à base de treillis de phones et de syllabes}}, BOOKTITLE = jep, YEAR = 2006, MONTH = {June}, ADDRESS = {Dinard}, URL = {http://www-tlp.limsi.fr/public/jep2006_zhu_madda_versionfinale.pdf} } @INPROCEEDINGS{Vieru06B, AUTHOR = {B. Vieru-Dimulescu and P. Boula de Mareüil }, TITLE = {{Identification perceptive d'accents étrangers en français}}, BOOKTITLE = jep, YEAR = 2006, MONTH = {June}, ADDRESS = {Dinard}, PAGES = {163-166}, URL = {http://www-tlp.limsi.fr/public/jep06bv.pdf} } @INPROCEEDINGS{Pellegrini06B, AUTHOR = {Thomas Pellegrini and Lori Lamel}, TITLE = {{Experiences de transcription automatique d'une langue rare}}, BOOKTITLE = jep, YEAR = 2006, MONTH = {June}, ADDRESS = {Dinard}, URL = {http://www-tlp.limsi.fr/public/jep06pellegrini.pdf} } @INPROCEEDINGS{Gauvain06B, AUTHOR = {Fabrice Lefevre and Jean-Luc Gauvain}, TITLE = {{Transformation lineaire discriminante pour l'apprentissage des HMM a analyse factorielle}}, BOOKTITLE = jep, YEAR = 2006, MONTH = {June}, ADDRESS = {Dinard}, URL = {http://www-tlp.limsi.fr/public/jep06lefevre.pdf} } @INPROCEEDINGS{MAdda06A, AUTHOR = {Martine Adda-Decker}, TITLE = {{ De la reconnaissance automatique de la parole a l'analyse linguistique de corpus oraux}}, BOOKTITLE = jep, YEAR = 2006, MONTH = {June}, ADDRESS = {Dinard}, URL = {http://www-tlp.limsi.fr/public/jep06madda.pdf} } @INPROCEEDINGS{Lamel06A, AUTHOR = {Lori Lamel and Jean-Luc Gauvain and Gilles Adda and Claude Barras and Eric Bilinski and Olivier Galibert and Agusti Pujol and Holger Schwenk and Xuan Zhu}, TITLE = {{The LIMSI 2006 TC-STAR Transcription Systems }}, BOOKTITLE = tcstar06, YEAR = 2006, MONTH = {June}, ADDRESS = {Barcelona}, PAGES = {123-128}, URL = {http://www-tlp.limsi.fr/public/.pdf} } @INPROCEEDINGS{Jennequin06A, AUTHOR = {Nicolas Jennequin and Jean-Luc Gauvain}, TITLE = {{Lattice Rescoring Experiments with Duration Models }}, BOOKTITLE = tcstar06, YEAR = 2006, MONTH = {June}, ADDRESS = {Barcelona}, PAGES = {155-158}, URL = {http://www-tlp.limsi.fr/public/.pdf} } @INPROCEEDINGS{Dechelotte06B, AUTHOR = {Daniel Dechelotte and Holger Schwenk and Jean-Luc Gauvain}, TITLE = {{The 2006 LIMSI Statistical Machine Translation System for TC-STAR }}, BOOKTITLE = tcstar06, YEAR = 2006, MONTH = {June}, ADDRESS = {Barcelona}, PAGES = {25-30}, URL = {http://www-tlp.limsi.fr/public/.pdf} } @INPROCEEDINGS{Garcia06A, AUTHOR = {Marie-Neige Garcia and Christophe d'Alessandro and G. Bailly and Philippe Boula de Mareüil and M. Morel}, TITLE = {{A joint prosody evaluation of French text-to-speech systems: the EVASY Prosody campaign}}, BOOKTITLE = lrec, YEAR = 2006, MONTH = {May}, ADDRESS = {Genoa}, PAGES = {307-310}, URL = {http://www-tlp.limsi.fr/public/lrec06prosody.pdf} } @INPROCEEDINGS{Alessandro06A, AUTHOR = {P. Boula de Mareüil and C. d' Alessandro and Alexandre Raake and G. Bailly and M.-N. Garcia and M. Morel}, TITLE = {{A joint intelligibility evaluation of French text to speech systems: the EVASY/SUS campaign}}, BOOKTITLE = lrec, YEAR = 2006, MONTH = {May}, ADDRESS = {Genoa}, PAGES = {2034-2037}, URL = {http://www-tlp.limsi.fr/public/lrec06evaSy.pdf} } @INPROCEEDINGS{Pellegrini06A, AUTHOR = {Thomas Pellegrini and Lori Lamel}, TITLE = {{Experimental detection of vowel prononciation variants in Amharic}}, BOOKTITLE = lrec, YEAR = 2006, MONTH = {May}, ADDRESS = {Genoa}, PAGES = {1005-1008}, URL = {http://www-tlp.limsi.fr/public/lrec06TP.pdf} } @INPROCEEDINGS{Devil06C, AUTHOR = {Laurence Devillers and Jean-Claude Martin and Sarkis Abrilian}, TITLE = {{Annotation of Emotions in Real-Life Video Interviews: Variability between Coders}}, BOOKTITLE = lrec, YEAR = 2006, MONTH = {May}, ADDRESS = {Genoa}, PAGES = {2004-2009}, URL = {http://www-tlp.limsi.fr/public/lrec06abrilian.pdf} } @INPROCEEDINGS{Devil06B, AUTHOR = {J.C. Martin and G. Caridakis and L. Devillers and K. Karpouzis and S. Abrilian}, TITLE = {{Manual Annotation and Image Processing of Multimodal Emotional Behaviours in TV Interviews}}, BOOKTITLE = lrec, YEAR = 2006, MONTH = {May}, ADDRESS = {Genoa}, PAGES = {1127-1132}, URL = {http://www-tlp.limsi.fr/public/lrec06JCM.pdf} } @INPROCEEDINGS{Devil06A, AUTHOR = {L. Devillers and R. Cowie and J-C. Martin and E. Douglas-Cowie}, TITLE = {{Real life emotions in French and English TV video clips: an integrated annotation protocol combining continuous and discrete approaches}}, BOOKTITLE = lrec, YEAR = 2006, MONTH = {May}, ADDRESS = {Genoa}, PAGES = {1105-1110}, URL = {http://www-tlp.limsi.fr/public/lrec06devil.pdf} } @INPROCEEDINGS{Clavel06A, AUTHOR = {Chloe Clavel and Ioana Vasilescu and Laurence Devillers and Thibaut Ehrette and Gaël Richard}, TITLE = {{The SAFE Corpus: fear-type emotions detection for surveillance applications}}, BOOKTITLE = lrec, YEAR = 2006, MONTH = {May}, ADDRESS = {Genoa}, PAGES = {1099-1104}, URL = {http://www-tlp.limsi.fr/public/lrec06clavel.pdf} } @INPROCEEDINGS{Maynard06A, AUTHOR = {H. Bonneau-Maynard and C. Ayache and F. Bechet and A. Denis and A. Kuhn and F. Lefevre and D. Mostefa and M. Quignard and S. Rosset and C. Servan and J. Villaneau}, TITLE = {{Results of the French Evalda-Media evaluation campaign for literal understanding}}, BOOKTITLE = lrec, YEAR = 2006, MONTH = {May}, ADDRESS = {Genoa}, PAGES = {2054-2059}, URL = {http://www-tlp.limsi.fr/public/lrec06media.pdf}, keywords = {spoken language understanding, evaluation, corpus annotation}, } @INPROCEEDINGS{Rosset06A, AUTHOR = {Sophie Rosset and Sandra Petel}, TITLE = {{The Ritel Corpus - An annotated Human-Machine open-domain question answering spoken dialog corpus}}, BOOKTITLE = lrec, YEAR = 2006, MONTH = {May}, ADDRESS = {Genoa}, PAGES = {1640-1643}, URL = {http://www-tlp.limsi.fr/public/lrec06SR.pdf}, keywords = {dialog system, corpus annotation}, } @INPROCEEDINGS{Lefevre06A, AUTHOR = {Fabrice Lefevre and Jean-Luc Gauvain}, TITLE = {{Discriminant Initialization for Factor Analyzed HMM Training}}, BOOKTITLE = icassp, YEAR = 2006, MONTH = {May}, ADDRESS = {Toulouse}, PAGES = {I-285-288}, URL = {http://www-tlp.limsi.fr/public/icassp06FL.pdf} } @INPROCEEDINGS{Messaoudi06A, AUTHOR = {Abdel. Messaoudi and Jean-Luc Gauvain and Lori Lamel}, TITLE = {{Arabic Broadcast News Transcription using a One Million Word Vocalized Vocabulary}}, BOOKTITLE = icassp, YEAR = 2006, MONTH = {May}, ADDRESS = {Toulouse}, PAGES = {I-1093-1096}, URL = {http://www-tlp.limsi.fr/public/icassp06arabic.pdf} } @INPROCEEDINGS{Gauvain06A, AUTHOR = {Christopher White and Izhak Shafran and Jean-Luc Gauvain}, TITLE = {{Discriminative Classifiers for Language Recognition}}, BOOKTITLE = icassp, YEAR = 2006, MONTH = {May}, ADDRESS = {Toulouse}, PAGES = {I-213-216}, keywords = { lid }, URL = {http://www-tlp.limsi.fr/public/icassp06LID.pdf} } @INPROCEEDINGS{Dechelotte06A, AUTHOR = {Daniel Dechelotte and Holger Schwenk and Jean-Luc Gauvain}, TITLE = {{Transcription et traduction de débats parlementaires}}, BOOKTITLE = {Reconnaissance des Formes et Intelligence Artificielle}, YEAR = 2006, MONTH = {January}, ADDRESS = {Tours}, URL = {} } @INPROCEEDINGS{Maynard05B, AUTHOR = {H.Bonneau-Maynard and F. Lefevre}, TITLE = {{A 2+1-Level Stochastic Understanding Model}}, BOOKTITLE = ieeeasr, MONTH = {November}, ADDRESS = {San Juan, Porto Rico}, YEAR = 2005, URL = {http://www-tlp.limsi.fr/public/hbm_asru2005.pdf} } @INPROCEEDINGS{Galibert05B, AUTHOR = {Olivier Galibert and Gabriel Illouz and Sophie Rosset}, TITLE = {{Ritel+ : dialogue homme-machine a domaine ouvert}}, BOOKTITLE = {Proceedings of TALN}, MONTH = {June}, ADDRESS = {Dourdan}, YEAR = 2005, PAGES = {439-444}, URL = {http://www-tlp.limsi.fr/public/ritel_taln05.pdf}, keywords = {spoken dialog system, ritel}, } @INPROCEEDINGS{Rosset05D, AUTHOR = {Sophie Rosset and Olivier Galibert and Gabriel Illouz}, TITLE = {{Human-Computer Dialog System in an Open Domain}}, BOOKTITLE = {Workshop on Multimodal Interaction and Related Machine Learning Algorithms (MLMI)}, MONTH = {July}, ADDRESS = {Edinburgh}, YEAR = 2005, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/ritel_mlmi05.pdf}, keywords = {spoken dialog system, ritel}, } @INPROCEEDINGS{Rosset05C, AUTHOR = {Sophie Rosset and Delphine Tribout}, TITLE = {{Detection automatique d'actes de dialogue}}, BOOKTITLE = {Proceedings of TALN}, MONTH = {June}, ADDRESS = {Dourdan}, YEAR = 2005, PAGES = {283-292}, URL = {http://www-tlp.limsi.fr/public/taln05_AD.pdf}, keywords = {dialog act detection, human human dialog}, } @INPROCEEDINGS{Boula05A, AUTHOR = {Philippe Boula de Mareuil and Benoit Habert and Frederique Benard and Martine Adda-Decker and Claude Barras and Gilles Adda and Patrick Paroubek}, TITLE = {{A quantitative study of disfluencies in French broadcast interviews}}, BOOKTITLE = {Proceedings of Disfluency In Spontaneous Speech (DISS) Workshop}, MONTH = {September}, ADDRESS = {Aix-en-Provence}, YEAR = 2005, URL = {http://www-tlp.limsi.fr/public/diss05_copte.pdf} } @INPROCEEDINGS{Candea05A, AUTHOR = {Maria Candea and Iona Vasilescu and Martine Adda-Decker}, TITLE = {{Inter- and intra-language acoustic analysis of autonomous fillers}}, BOOKTITLE = {Proceedings of Disfluency In Spontaneous Speech (DISS) Workshop}, MONTH = {September}, ADDRESS = {Aix-en-Provence}, YEAR = 2005, URL = {http://www-tlp.limsi.fr/public/diss05_filler.pdf} } @INPROCEEDINGS{Xuan05C, AUTHOR = {Xuan Zhu and Cheung-Chi Leung and Claude Barras and Lori Lamel and Jean-Luc Gauvain}, TITLE = {{Speech activity detection and speaker identification for CHIL}}, BOOKTITLE = {Workshop on Multimodal Interaction and Related Machine Learning Algorithms (MLMI)}, ADDRESS = {Edinburgh}, MONTH = {July}, YEAR = 2005, URL = {http://www-tlp.limsi.fr/public/mlmi05-limsidsad.pdf} } @ARTICLE{Mariani05A, AUTHOR = {J.J. Mariani}, TITLE = {{Developing Language Technologies with the Support of Language Resources and Evaluation Programs}}, JOURNAL = {Language Resources and Evaluation}, VOLUME = {39(1)}, PAGES = {35-44}, YEAR = 2005 } @ARTICLE{Lamel05D, AUTHOR = {J. Goldman and S. Renals and S. Bird and F. de Jong and M. Federico and C. Fleischhauer and M. Kornbluh, L. Lamel and D. W. Oard and F. Sebastiani and C. Stewart and R. Wright}, TITLE = {{Spoken Word Audio Collections}}, JOURNAL = {International Journal of Digital libraries}, VOLUME = {5(4)}, PAGES = {287-298}, YEAR = 2005 } @INPROCEEDINGS{Lamel05C, AUTHOR = {Lori Lamel and Holger Schwenk and Jean-Luc Gauvain and Gilles Adda and Eric Bilinski}, TITLE = {{Improvements in Transcribing Lectures and Seminars}}, BOOKTITLE = {Workshop on Multimodal Interaction and Related Machine Learning Algorithms (MLMI)}, ADDRESS = {Edinburgh}, YEAR = 2005 } @INPROCEEDINGS{Schwenk05C, AUTHOR = {Holger Schwenk and Jean-Luc Gauvain}, TITLE = {{Training Neural Network Language Models On Very Large Corpora}}, BOOKTITLE = {Joint Human Language Technology Conference and Conference on Empirical Methods in Natural Language Processing (EMNLP)}, MONTH = {October}, ADDRESS = {Vancouver}, PAGES = {201--208}, YEAR = 2005, URL = {http://www-tlp.limsi.fr/public/emnlp05.pdf} } @INPROCEEDINGS{Canseco05A, AUTHOR = {Leonardo Canseco and Lori Lamel and Jean-Luc Gauvain}, TITLE = {{A Comparative Study Using Manual and Automatic Transcriptions for Diarization}}, BOOKTITLE = ieeeasr, MONTH = {November}, ADDRESS = {San Juan, Porto Rico}, YEAR = 2005, URL = {http://www-tlp.limsi.fr/public/CansecoASRU05.pdf} } @INPROCEEDINGS{Dechelot05A, AUTHOR = {Daniel Dechelotte and Holger Schwenk and Jean-Luc Gauvain and Olivier Galibert and Lori Lamel}, TITLE = {{Investigating Translation of Parliament Speeches}}, BOOKTITLE = ieeeasr, MONTH = {November}, ADDRESS = {San Juan, Porto Rico}, YEAR = 2005, URL = {http://www-tlp.limsi.fr/public/DechelotteASRU05.pdf} } @INPROCEEDINGS{Gauvain05C, AUTHOR = {R. Prasad and S. Matsoukas and C.-L. Kao and J.Z. Ma and D.-X. Xu and T. Colthurst and O. Kimball and R. Schwartz and J.L. Gauvain and L. Lamel and H. Schwenk and G. Adda and F. Lefevre}, TITLE = {{The 2004 BBN/LIMSI 20xRT English Conversational Telephone Speech Recognition System}}, BOOKTITLE = {InterSpeech}, MONTH = {September}, ADDRESS = {Lisbon}, YEAR = 2005, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/IS051574.PDF} } @INPROCEEDINGS{Messaoudi05A, AUTHOR = {Abdel Messaoudi and Lori Lamel and Jean-Luc Gauvain}, TITLE = {{Modeling Vowels for Arabic BN Transcription}}, BOOKTITLE = {InterSpeech}, MONTH = {September}, ADDRESS = {Lisbon}, YEAR = 2005, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/IS051588.PDF} } @INPROCEEDINGS{Gauvain05B, AUTHOR = {Jean-Luc Gauvain and Gilles Adda and Martine Adda-Decker and Alexandre Allauzen and Veronique Gendner and Lori Lamel and Holger Schwenk}, TITLE = {{Where Are We in Transcribing French Broadcast News?}}, BOOKTITLE = {InterSpeech}, MONTH = {September}, ADDRESS = {Lisbon}, YEAR = 2005, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/IS052392.PDF} } @INPROCEEDINGS{Xuan05B, AUTHOR = {Xuan Zhu and Claude Barras and Sylvain Meignier and Jean-Luc Gauvain}, TITLE = {{Combining Speaker Identification and BIC for Speaker Diarization}}, BOOKTITLE = {InterSpeech}, MONTH = {September}, ADDRESS = {Lisbon}, YEAR = 2005, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/IS051821.PDF} } @INPROCEEDINGS{Madda05C, AUTHOR = {Cedric Gendrot, Martine Adda-Decker}, TITLE = {{Impact of Duration on F1/F2 Formant Values of Oral Vowels: An Automatic Analysis of Large Broadcast News Corpora}}, BOOKTITLE = {InterSpeech}, MONTH = {September}, ADDRESS = {Lisbon}, YEAR = 2005, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/IS051895.PDF} } @INPROCEEDINGS{Schwenk05B, AUTHOR = {Holger Schwenk and Jean-Luc Gauvain}, TITLE = {{Building Continuous Space Language Models for Transcribing European Languages}}, BOOKTITLE = {InterSpeech}, MONTH = {September}, ADDRESS = {Lisbon}, YEAR = 2005, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/IS052366.PDF} } @INPROCEEDINGS{Allauzen05B, AUTHOR = {Alexandre Allauzen and Jean-Luc Gauvain}, TITLE = {{Diachronic Vocabulary Adaptation for Broadcast News Transcription}}, BOOKTITLE = {InterSpeech}, MONTH = {September}, ADDRESS = {Lisbon}, YEAR = 2005, PAGES = {}, keywords = { lmadapt }, URL = {http://www-tlp.limsi.fr/public/IS052002.PDF} } @INPROCEEDINGS{Lamel05B, AUTHOR = {Lori Lamel and Gilles Adda and Eric Bilinski and Jean-Luc Gauvain}, TITLE = {{Transcribing Lectures and Seminars}}, BOOKTITLE = {InterSpeech}, MONTH = {September}, ADDRESS = {Lisbon}, YEAR = 2005, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/IS051589.PDF} } @INPROCEEDINGS{Vidrascu05B, AUTHOR = {Laurence Vidrascu and Laurence Devillers}, TITLE = {{Real-life Emotions Representation and Detection in Call Centers}}, BOOKTITLE = {ACII}, ADDRESS = {Bejing}, YEAR = 2005, MONTH = {October}, } @INPROCEEDINGS{Devil05K, AUTHOR = {J.C. Martin and L. Devillers and S. Abrilian and M. Lamolle and M. Mancini and C. Pelachaud}, TITLE = {{Levels of Representation in the annotation of emotion for the specification of expressivity in ECAs}}, BOOKTITLE = {IVA}, ADDRESS = {Kos}, YEAR = 2005, MONTH = {September}, } @INPROCEEDINGS{Devil05J, AUTHOR = {Jean-Claude Martin and Sarkis Abrilian and Laurence Devillers}, TITLE = {{Annotating Multimodal Behaviors Occuring during Non Basic Emotions}}, BOOKTITLE = {ACII}, ADDRESS = {Bejing}, YEAR = 2005, MONTH = {October}, } @INPROCEEDINGS{Devil05I, AUTHOR = {Laurence Devillers and Sarkis Abrilian and Jean-Claude Martin}, TITLE = {{Representing Real-life Emotions in Audiovisual Data with Non Basic Emotional Patterns and Context Features}}, BOOKTITLE = {ACII}, ADDRESS = {Bejing}, YEAR = 2005, MONTH = {October}, } @ARTICLE{Devil05H, AUTHOR = {Laurence Devillers and Laurence Vidrascu and Lori Lamel}, TITLE = {{Challenges in real-life emotion annotation and machine learning based detection}}, JOURNAL = {Journal of Neural Networks}, VOLUME = {18/4}, YEAR = 2005 } @INPROCEEDINGS{Devil05G, AUTHOR = {S. Abrilian and J.-C. Martin and L. Devillers}, TITLE = {{A Corpus-Based Approach for the Modeling of Multimodal Emotional Behaviors for the Specification of Embodied Agents}}, BOOKTITLE = {HCI International}, ADDRESS = {Las Vegas}, YEAR = 2005, MONTH = {July}, PAGES = {} } @INPROCEEDINGS{Devil05F, AUTHOR = {S. Abrilian and L. Devillers and J.-C. Martin}, TITLE = {{EmoTV1: Annotation of Real-life Emotions for the Specification of Multimodal Affective Interfaces}}, BOOKTITLE = {HCI International}, ADDRESS = {Las Vegas}, YEAR = 2005, MONTH = {July}, PAGES = {} } @INPROCEEDINGS{Devil05E, AUTHOR = {M. Lamotte and M. Manzini and C. Pelachaud and S. Abrilian and J-C. Martin and L. Devillers}, TITLE = {{Contextual Factors and Adaptative Multimodal Human-Computer Interaction: Multi-level Specification of Emotion and Expressivity in Embodied Conversational Agents}}, BOOKTITLE = {CONTEXT}, ADDRESS = {Paris}, YEAR = 2005, MONTH = {June}, PAGES = {} } @INPROCEEDINGS{Devil05D, AUTHOR = {Laurence Vidrascu and Laurence Devillers}, TITLE = {{Annotation and Detection of Blended Emotions in Real Human-Human Dialogs Recorded in a Call Center}}, BOOKTITLE = {ICME}, ADDRESS = {Amsterdam}, YEAR = 2005, MONTH = {June}, PAGES = {} } @INPROCEEDINGS{Devil05B, AUTHOR = {Ellen Douglas-Cowie and Laurence Devillers and Jean-Claude Martin and Roddy Cowie and Suzie Savvidou and Sarkis Abrilian and Cate Cox}, TITLE = {{Multimodal Databases of Everyday Emotion: Facing up to Complexity}}, BOOKTITLE = {InterSpeech}, MONTH = {September}, ADDRESS = {Lisbon}, YEAR = 2005, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/IS052018.PDF} } @INPROCEEDINGS{Vidrascu05A, AUTHOR = {Laurence Vidrascu and Laurence Devillers}, TITLE = {{Detection of Real-Life Emotions in Call Centers}}, BOOKTITLE = {InterSpeech}, MONTH = {September}, ADDRESS = {Lisbon}, YEAR = 2005, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/IS052175.PDF} } @INPROCEEDINGS{Galibert05A, AUTHOR = {Olivier Galibert and Gabriel Illouz and Sophie Rosset}, TITLE = {{An Open-Domain, Human-Computer Dialog System}}, BOOKTITLE = {InterSpeech}, MONTH = {September}, ADDRESS = {Lisbon}, YEAR = 2005, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/IS052393.PDF}, keywords = {spoken dialog system, ritel}, } @INPROCEEDINGS{Rosset05B, AUTHOR = {Sophie Rosset and Delphine Tribout}, TITLE = {{Multi-Level Information and Automatic Dialog Acts Detection in Human-Human Spoken Dialogs}}, BOOKTITLE = {InterSpeech}, MONTH = {September}, ADDRESS = {Lisbon}, YEAR = 2005, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/IS052349.PDF}, keywords = {dialog act detection, human human dialog}, } @INPROCEEDINGS{Maynard05A, AUTHOR = {H. Bonneau-Maynard and S. Rosset and C. Ayache and A. Kuhn and D. Mostefa}, TITLE = {{Semantic Annotation of the French Media Dialog Corpus}}, BOOKTITLE = {InterSpeech}, MONTH = {September}, ADDRESS = {Lisbon}, YEAR = 2005, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/IS052010.PDF}, keywords = {spoken language understanding, corpus annotation}, } @INPROCEEDINGS{Vasilescu05A, AUTHOR = {Ioana Vasilescu and Maria Candea and Martine Adda-Decker}, TITLE = {{Perceptual Salience of Language-Specific Acoustic Differences in Autonomous Fillers Across Eight Languages}}, BOOKTITLE = {InterSpeech}, MONTH = {September}, ADDRESS = {Lisbon}, YEAR = 2005, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/IS051976.PDF} } @INPROCEEDINGS{Madda05B, AUTHOR = {Martine Adda-Decker and Lori Lamel}, TITLE = {{Do Speech Recognizers Prefer Female Speakers?}}, BOOKTITLE = {InterSpeech}, MONTH = {September}, ADDRESS = {Lisbon}, YEAR = 2005, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/IS052391.PDF} } @INPROCEEDINGS{Dong05A, AUTHOR = {Dong Zhu and Martine Adda-Decker and Fabien Antoine}, TITLE = {{Different Size Multilingual Phone Inventories and Context-Dependent Acoustic Models for Language Identification}}, BOOKTITLE = {InterSpeech}, MONTH = {September}, ADDRESS = {Lisbon}, YEAR = 2005, PAGES = {}, keywords = { lid }, URL = {http://www-tlp.limsi.fr/public/IS051735.PDF} } @ARTICLE{Madda05A, AUTHOR = {Martine Adda-Decker and Philippe Boula de Mareuil and Gilles Adda and Lori Lamel}, TITLE = {{Investigating syllabic structures and their variation in spontaneous French}}, JOURNAL = spcom, VOLUME = 46, PAGES = {119-139}, YEAR = 2005, URL = {http://www-tlp.limsi.fr/public/speechcomSyll.pdf} } @INCOLLECTION{Schwenk05A, AUTHOR = {Yoshua Bengio and Holger Schwenk and Jean-Sebastien Senecal and Frederic Morin and Jean-Luc Gauvain}, TITLE = {{Neural Probabilistic Language Models}}, BOOKTITLE = {Innovations in Machine Learning: Theory and Applications}, PUBLISHER = {Springer Verlag}, YEAR = 2005, EDITOR = {D. Holmes and L.C. Jain}, PAGES = {}, } @ARTICLE{Gauvain05A, AUTHOR = {Jean-Luc Gauvain and Gilles Adda and Lori Lamel and Fabrice Lefevre and Holger Schwenk}, TITLE = {{Transcription de la parole conversationnelle}}, JOURNAL = {TAL}, VOLUME = 45, NUMBER = {3}, PAGES = {}, YEAR = 2005, URL = {http://www-tlp.limsi.fr/public/tal05_cts.pdf} } @ARTICLE{Lefevre05A, AUTHOR = {Fabrice Lefevre and Jean-Luc Gauvain and Lori Lamel}, TITLE = {{Genericity and portability for task-dependent speech recognition}}, JOURNAL = csl, VOLUME = 19, PAGES = {345-363}, YEAR = 2005, URL = {} } @INPROCEEDINGS{Lamel05A, AUTHOR = {Lori Lamel and Jean-Luc Gauvain}, TITLE = {{Alternate Phone Models for Conversational Speech}}, BOOKTITLE = icassp, YEAR = 2005, MONTH = {April}, URL = {http://www-tlp.limsi.fr/public/Lamel_ICASSP05.pdf} } @INPROCEEDINGS{Allauzen05A, AUTHOR = {Alexandre Allauzen and Jean-Luc Gauvain}, TITLE = {{Open vocabulary ASR for audiovisual document indexation}}, BOOKTITLE = icassp, YEAR = 2005, MONTH = {April}, URL = {http://www-tlp.limsi.fr/public/Allauzen_ICASSP05.pdf} } @INPROCEEDINGS{Ruiz04A, AUTHOR = {Marta Ruiz Costa-jussà and Jean-Luc Gauvain and Olivier Galibert}, TITLE = {{Normalización de textos y selección del vocabulario para estimar el modelo de lenguaje de un sistema de transcripción de noticias }}, BOOKTITLE = {III Jornadas en Tecnología del Habla}, ADDRESS = {Valencia}, YEAR = 2004, MONTH = {November} } @INPROCEEDINGS{Madda04D, AUTHOR = {Cédric Gendrot and Martine Adda-Decker}, TITLE = {{Analyses formantiques de corpus d'\'emissions journalistiques en français et allemand}}, BOOKTITLE = MIDL, MONTH = {November}, ADDRESS = {Paris}, YEAR = 2004, PAGES = {7-12}, URL = {http://www-tlp.limsi.fr/public/midl04formant.pdf} } @INPROCEEDINGS{Gauvain04F, AUTHOR = {Jean-Luc Gauvain and Abdel Messaoudi and Holger Schwenk}, TITLE = {{Improving Language Recognition using Phone Lattices}}, BOOKTITLE = MIDL, MONTH = {November}, ADDRESS = {Paris}, keywords = { lid }, YEAR = 2004 } @INPROCEEDINGS{Madda04E, AUTHOR = {Ioana Vasilescu and Maria Candea and Martine Adda-Decker}, TITLE = {{Hésitations autonomes dans 8 langues : une étude acoustique et perceptive}}, BOOKTITLE = MIDL, MONTH = {November}, ADDRESS = {Paris}, YEAR = 2004, PAGES = {25-30}, URL = {http://www-tlp.limsi.fr/public/midl04filledp.pdf} } @INPROCEEDINGS{Zhu04A, AUTHOR = {Dong Zhu and Fabien Antoine and Martine Adda-Decker}, TITLE = {{Modélisation acoustiques multilingues et structurations syllabiques pour l'IAL}}, BOOKTITLE = MIDL, MONTH = {November}, ADDRESS = {Paris}, YEAR = 2004, PAGES = {79-84}, URL = {http://www-tlp.limsi.fr/public/midl04syllabo.pdf} } @INPROCEEDINGS{Schwenk04F, AUTHOR = {Holger Schwenk}, TITLE = {{Efficient Training of Large Neural Networks for Language Modeling}}, PAGES = {3059--3062}, BOOKTITLE = ijcnn, YEAR = 2004 } @INPROCEEDINGS{Barras04E, AUTHOR = {Claude Barras and Xuan Zhu and Sylvain Meignier and Jean-Luc Gauvain}, TITLE = {{Improving Speaker Diarization}}, BOOKTITLE = {Proc. DARPA RT04}, MONTH = {November}, ADDRESS = {Palisades NY}, YEAR = 2004, URL = {http://www-tlp.limsi.fr/public/rt04f_diarization.pdf} } @INPROCEEDINGS{Schwenk04E, AUTHOR = {Holger Schwenk and Jean-Luc Gauvain}, TITLE = {{Using neural network language models for LVCSR}}, BOOKTITLE = {Proc. DARPA RT04}, MONTH = {November}, ADDRESS = {Palisades NY}, YEAR = 2004, URL = {http://www-tlp.limsi.fr/public/rt04f_limsi_lm.pdf} } @INPROCEEDINGS{Gauvain04E, AUTHOR = {R. Prasad and S. Matsoukas and C.-L. Kao and J. Ma and D.-X. Xu and T. Colthurst and G. Thattai and O. Kimball and R. Schwartz and J.-L. Gauvain and L. Lamel and H. Schwenk and G. Adda and F. Lefevre}, TITLE = {{The 2004 BBN/LIMSI 20xRT English Conversational Telephone Speech System}}, BOOKTITLE = {Proc. DARPA RT04}, MONTH = {November}, ADDRESS = {Palisades NY}, YEAR = 2004, URL = {http://www-tlp.limsi.fr/public/rt04f_bbn_limsi_eng_cts20xrt.pdf} } @INPROCEEDINGS{Lamel04E, AUTHOR = {L. Nguyen and S. Abdou and M. Afify and J. Makhoul and S. Matsoukas and R. Schwartz and B. Xiang and L. Lamel and J.L. Gauvain and G. Adda and H. Schwenk and F. Lefevre}, TITLE = {{The 2004 BBN/LIMSI 10xRT English Broadcast News Transcription System}}, BOOKTITLE = {Proc. DARPA RT04}, MONTH = {November}, ADDRESS = {Palisades NY}, YEAR = 2004, URL = {http://www-tlp.limsi.fr/public/rt04f_bbn_limsi_eng_bn10xrt.pdf} } @INPROCEEDINGS{Lamel04D, AUTHOR = {Lori Lamel and Jean-Luc Gauvain}, TITLE = {{Alternate phone models for CTS}}, BOOKTITLE = {Proc. DARPA RT04}, MONTH = {November}, ADDRESS = {Palisades NY}, YEAR = 2004, URL = {http://www-tlp.limsi.fr/public/rt04f_limsi_altphone.pdf} } @INPROCEEDINGS{Canseco04D, AUTHOR = {Leornardo Canseco-Rodriguez and Lori Lamel and Jean-Luc Gauvain}, TITLE = {{Towards using STT for Broadcast News Speaker Diarization}}, BOOKTITLE = {Proc. DARPA RT04}, MONTH = {November}, ADDRESS = {Palisades NY}, YEAR = 2004, URL = {http://www-tlp.limsi.fr/public/rt04f_limsi_sttspkr.pdf} } @INPROCEEDINGS{Messaoudi04D, AUTHOR = {Abdel Messaoudi and Lori Lamel and Jean-Luc Gauvain}, TITLE = {{The LIMSI RT04 BN Arabic system}}, BOOKTITLE = {Proc. DARPA RT04}, MONTH = {November}, ADDRESS = {Palisades NY}, YEAR = 2004, URL = {http://www-tlp.limsi.fr/public/rt04f_limsi_arabic.pdf} } @INPROCEEDINGS{Barras04B, AUTHOR = {Claude Barras and Sylvain Meignier and Jean-Luc Gauvain}, TITLE = {{Unsupervised Online Adaptation for Speaker Verification over the Telephone}}, BOOKTITLE = {Odyssey}, MONTH = {May-June}, ADDRESS = {Toledo}, YEAR = 2004, URL = {http://www-tlp.limsi.fr/public/odyssey04_barras.pdf} } @INPROCEEDINGS{Barras04A, AUTHOR = {Claude Barras and Gilles Adda and Martine Adda-Decker and Benoit Habert and Philippe Boula de Mareüil and Patrick Paroubek}, TITLE = {{Automatic Audio and Manual Transcripts Alignment, Time-code Transfer and Selection of Exact Transcripts}}, BOOKTITLE = {LREC}, MONTH = {May}, ADDRESS = {Lisbon}, YEAR = 2004, URL = {http://www-tlp.limsi.fr/public/lrec04_barras.pdf} } @INPROCEEDINGS{Madda04C, AUTHOR = {Martine Adda-Decker and Benoit Habert and Claude Barras and Gilles Adda and Philippe Boula de Mareüil and Patrick Paroubek}, TITLE = {{Une étude des disfluences pour la transcription automatique de la parole spontanée et l'amélioration des modèles de langage}}, BOOKTITLE = {JEP}, MONTH = {April}, ADDRESS = {Fez}, YEAR = 2004, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/jep04_madda.pdf} } @INPROCEEDINGS{Allauzen04C, AUTHOR = {Allauzen Allauzen and Jean-Luc Gauvain}, TITLE = {{Construction automatique du vocabulaire d'un système de transcription}}, BOOKTITLE = {JEP}, MONTH = {April}, ADDRESS = {Fez}, YEAR = 2004, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/jep04_allauzen.pdf} } @INPROCEEDINGS{Antoine04C, AUTHOR = {Fabien Antoine and Dong Zhu and Philippe Boula de Mareüil and Martine Adda-Decker}, TITLE = {{Approches segmentales multilingues pour l'identification automatique de la langue : phones et syllabes}}, BOOKTITLE = {JEP}, MONTH = {April}, ADDRESS = {Fez}, YEAR = 2004, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/jep04_antoine.pdf} } @INPROCEEDINGS{Devil04D, AUTHOR = {Laurence Devillers and Iona Vasilescu}, TITLE = {{Détection des émotions à partir d'indices lexicaux, dialogiques et prosodiques dans le dialogue oral}}, BOOKTITLE = {JEP}, MONTH = {April}, ADDRESS = {Fez}, YEAR = 2004, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/jep04_devil.pdf} } @INPROCEEDINGS{Gauvain04C, AUTHOR = {Jean-Luc Gauvain and Gilles Adda and Lori Lamel and Fabrice Lefevre and Holger Schwenk}, TITLE = {{Transcription de la parole conversationnelle}}, BOOKTITLE = {JEP}, MONTH = {April}, ADDRESS = {Fez}, YEAR = 2004, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/jep04_jlg.pdf} } @INPROCEEDINGS{Maynard04C, AUTHOR = {H. Maynard and K. McTait and D. Mostefa and L. Devillers and S. Rosset and P. Paroubek and C. Bousquet and K. Choukri and J. Goulian and J.-Y. Antoine and F. Béchet and O. Bontron and L. Charnay and L. Romary and M. Vergnes and N. Vigouroux}, TITLE = {{Constitution d'un corpus de dialogue oral pour l'évaluation automatique de la compréhension hors et en contexte du dialogue}}, BOOKTITLE = {JEP}, MONTH = {April}, ADDRESS = {Fez}, YEAR = 2004, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/jep04_mctait.pdf}, keywords = {spoken language understanding, corpus annotation}, } @INPROCEEDINGS{Devil04C, AUTHOR = {Laurence Devillers and Iona Vasiluscu and Laurence Vidrascu}, TITLE = {{Anger Versus Fear Detection in Recorded Conversations}}, BOOKTITLE = {Speech Prosody}, MONTH = {March}, ADDRESS = {Nara}, YEAR = 2004 } @INPROCEEDINGS{Devil04B, AUTHOR = {Laurence Devillers and Laurence Vidrascu}, TITLE = {{Reliability of Lexical and Prosodic Cues in two Real-life Spoken Dialog Corpora}}, BOOKTITLE = {LREC}, MONTH = {May}, ADDRESS = {Lisbon}, YEAR = 2004, URL = {http://www-tlp.limsi.fr/public/lrec04_devil.pdf} } @INPROCEEDINGS{Devil04A, AUTHOR = {L. Devillers and H. Maynard and S. Rosset and P. Paroubek and K. McTait and D. Mostefa and K. Choukri and L. Charnay and C. Bousquet and N. Vigouroux and F. Béchet and L. Romary and J.Y. Antoine and J. Villaneau and M. Vergnes and J. Goulian}, TITLE = {{The French MEDIA/EVALDA Project: the Evaluation of the Understanding Capability of Spoken Language Dialogue Systems}}, BOOKTITLE = {LREC}, MONTH = {May}, ADDRESS = {Lisbon}, YEAR = 2004, URL = {http://www-tlp.limsi.fr/public/lrec04_devil2.pdf}, keywords = {spoken language understanding, evaluation}, } @INPROCEEDINGS{Lienard04A, AUTHOR = {Jean-Sylvain Li\'enard and Martine Adda-Decker}, TITLE = {{Indices prosodiques caract\'erisant un style d'\'elocution et ses variations individuelles}}, BOOKTITLE = MIDL, MONTH = {November}, ADDRESS = {Paris}, YEAR = 2004, PAGES = {173-178}, URL = {http://www-tlp.limsi.fr/public/midl04style.pdf} } @INPROCEEDINGS{Chen04B, AUTHOR = {Langzhou Chen and Lori Lamel and Jean-Luc Gauvain}, TITLE = {{Lightly supervised acoustic model training using consensus networks}}, BOOKTITLE = icassp, MONTH = {May}, ADDRESS = {Montreal}, YEAR = 2004, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/icassp04_chen.pdf} } @INPROCEEDINGS{Gauvain04B, AUTHOR = {Richard Schwartz and Thomas Colthurst and Nicolae Duta and Herb Gish and Rukmini Iyer and Chia-Lin Kao and Daben Liu and Owen Kimball and J. Ma and John Makhoul and Spyros Matsoukas and Long Nguyen and Mohamed Noamany and Rohit Prasad and Bing Xiang and Dongxin Xu and Jean-Luc Gauvain and Lori Lamel and Holger Schwenk and Gilles Adda and Langzhou Chen}, TITLE = {{Speech recognition in multiple languages and domains: The 2003 BBN/LIMSI EARS system}}, BOOKTITLE = icassp, MONTH = {May}, ADDRESS = {Montreal}, YEAR = 2004, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/icassp04_jlg.pdf} } @INPROCEEDINGS{Lamel04B, AUTHOR = {Lori Lamel and Jean-Luc Gauvain and Gilles Adda and Martine Adda-Decker and Leonard Canseco and Langzhou Chen and Olivier Galibert and Abdel Messaoudi and Holger Schwenk}, TITLE = {{Speech Transcription in Multiple Languages}}, BOOKTITLE = icassp, MONTH = {May}, ADDRESS = {Montreal}, YEAR = 2004, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/icassp04_ll.pdf} } @INPROCEEDINGS{Schwenk04A, AUTHOR = {Holger Schwenk and Jean-Luc Gauvain}, TITLE = {{Neural Network Language Models for Conversational Speech Recognition}}, BOOKTITLE = icslp, MONTH = {October}, ADDRESS = {Jeju Island}, YEAR = 2004, PAGES = {1215-1218}, URL = {http://www-tlp.limsi.fr/public/ThC3202o.5_p1215.pdf} } @INPROCEEDINGS{Gauvain04A, AUTHOR = {Jean-Luc Gauvain and Abdel Messaoudi and Holger Schwenk}, TITLE = {{Language Recognition Using Phone Lattices}}, BOOKTITLE = icslp, MONTH = {October}, ADDRESS = {Jeju Island}, YEAR = 2004, PAGES = {1283-1286}, keywords = { lid }, URL = {http://www-tlp.limsi.fr/public/TuA2301o.1_p1283.pdf} } @INPROCEEDINGS{Rosset04A, AUTHOR = {Sophie Rosset and Lori Lamel}, TITLE = {{Automatic Detection of Dialog Acts Based on Multi-level Information}}, BOOKTITLE = icslp, MONTH = {October}, ADDRESS = {Jeju Island}, YEAR = 2004, PAGES = {540-543}, URL = {http://www-tlp.limsi.fr/public/TuB401o.2_p540.pdf}, keywords = {dialog act detection, human human dialog}, } @INPROCEEDINGS{Canseco04A, AUTHOR = {Leornardo Canseco-Rodriguez and Lori Lamel and Jean-Luc Gauvain}, TITLE = {{Speaker Diarization from Speech Transcripts}}, BOOKTITLE = icslp, MONTH = {October}, ADDRESS = {Jeju Island}, YEAR = 2004, PAGES = {1272-1275}, URL = {http://www-tlp.limsi.fr/public/TuC2105o.3_p1272.pdf} } @INPROCEEDINGS{Chen04A, AUTHOR = {Langzhou Chen and Jean-Luc Gauvain and Lori Lamel amd Gilles Adda}, TITLE = {{Dynamic Language Modeling for Broadcast News}}, BOOKTITLE = icslp, MONTH = {October}, ADDRESS = {Jeju Island}, YEAR = 2004, PAGES = {1281-1284}, URL = {http://www-tlp.limsi.fr/public/WeA3203p.2_p1281.pdf} } @INPROCEEDINGS{Messaoudi04A, AUTHOR = {Abdel Messaoudi and Lori Lamel and Jean-Luc Gauvain}, TITLE = {{Transcription of Arabic Broadcast News}}, BOOKTITLE = icslp, MONTH = {October}, ADDRESS = {Jeju Island}, YEAR = 2004, PAGES = {521-524}, URL = {http://www-tlp.limsi.fr/public/ThA2001o.2_p521.pdf} } @INPROCEEDINGS{Clavel04A, AUTHOR = {Chloe Clavel and Ioan Vasilescu and Laurence Devillers and Thibault Ehrette}, TITLE = {{Fiction Database for Emotion Detection in Abnormal Situations}}, BOOKTITLE = icslp, MONTH = {October}, ADDRESS = {Jeju Island}, YEAR = 2004, PAGES = {1105-1108}, URL = {http://www-tlp.limsi.fr/public/ThC2701o.5_p1105.pdf} } @INCOLLECTION{HCL1, AUTHOR = {Lori Lamel and Jean-Luc Gauvain}, TITLE = {Speech Recognition}, BOOKTITLE = {The Oxford Handbook of Computational Linguistics}, EDITOR = {R. Mitkov}, PUBLISHER = {Oxford University Press}, YEAR = 2003, CHAPTER = 16, PAGES = {305-322} } @INPROCEEDINGS{McTait03A, AUTHOR = {Kevin McTait and Martine Adda-Decker}, TITLE = {{The 300k LIMSI German Broadcast News Transcription System}}, BOOKTITLE = {ISCA Eurospeech}, MONTH = {September}, ADDRESS = {Geneva}, YEAR = 2003, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/ES030250.PDF} } @INPROCEEDINGS{Madda03D, AUTHOR = {P. Boula de Mareuil and M. Adda-Decker and V. Gendner}, TITLE = {{Liaisons in French: a corpus based study using morpho-syntactic information}}, BOOKTITLE = {ICPhS}, MONTH = {August}, ADDRESS = {Barcelona}, YEAR = 2003, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/ICPhSliaison.pdf} } @INPROCEEDINGS{Madda03C, AUTHOR = {Martine Adda-Decker and Fabien Antoine and Philippe Boula de Mareuil and Ioana Vasilescu and Lori Lamel and Jacqueline Vaissiere and Edouard Geoffrois and Jean-Sylvain Liénard}, TITLE = {{Phonetic knowledge, phonotactics and perceptual validation for automatic language identification}}, BOOKTITLE = {ICPhS}, MONTH = {August}, ADDRESS = {Barcelona}, YEAR = 2003, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/ICPhSlid.pdf} } @INPROCEEDINGS{Madda03B, AUTHOR = {Martine Adda-Decker}, TITLE = {{A corpus-based decompounding algorithm for German lexical modeling in LVCSR}}, BOOKTITLE = {ISCA Eurospeech}, MONTH = {September}, ADDRESS = {Geneva}, YEAR = 2003, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/ES031038.PDF} } @INPROCEEDINGS{Madda03A, AUTHOR = {M. Adda-Decker and B. Habert and C. Barras and G. Adda and Ph. Boula de Mareuil and P. Paroube}, TITLE = {{A disfluency study for cleaning spontaneous speech automatic transcripts and improving speech language models}}, BOOKTITLE = {ISCA DiSS '03 - Disfluency in Spontaneous Speech}, MONTH = {September}, ADDRESS = {Gothenburg, Sweden}, YEAR = 2003, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/diss03_disfluencies.pdf} } @INPROCEEDINGS{HBM03A, AUTHOR = {H. Bonneau-Maynard and S. Rosset}, TITLE = {{A Semantic representation for spoken dialogs}}, BOOKTITLE = {ISCA Eurospeech}, MONTH = {September}, ADDRESS = {Geneva}, YEAR = 2003, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/euro03HBMSR.pdf}, keywords = {semantic representation, corpus annotation tool}, } @INPROCEEDINGS{Amities03A, AUTHOR = {H. Hardy and K. Baker and H. Bonneau-Maynard and L. Devillers and S. Rosset and T. Strzalkowski}, TITLE = {{Semantic and Dialogic Annotation for Automated Multilingual Customer Service}}, BOOKTITLE = {ISCA Eurospeech}, MONTH = {September}, ADDRESS = {Geneva}, YEAR = 2003, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/AnnotationHH032803.pdf}, keywords = {corpus annotation}, } @INPROCEEDINGS{Gendner03A, AUTHOR = {Veronique Gendner and Gabriel Illouz and Michele Jardino and Laura Monceaux and Patrick Paroubek and Isabelle Robba and Anne Vilnat}, TITLE = {{PEAS, the First Instantiation of a Comparative Framework for Evaluating Parsers of French}}, BOOKTITLE = {EACL}, MONTH = {April}, ADDRESS = {Budapest}, YEAR = 2003, PAGES = {}, URL = {} } @INPROCEEDINGS{Devillers03D, AUTHOR = {Laurence Devillers and Helene Maynard and Patrick Paroubek and Sophie Rosset}, TITLE = {{The PEACE SLDS understanding evaluation paradigm of the French MEDIA campaign}}, BOOKTITLE = {EACL Workshop on Evaluation Initiatives in NLP}, MONTH = {April}, ADDRESS = {Budapest}, YEAR = 2003, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/eacl2003peace.pdf}, keywords = {spoken understanding, evaluation, metrics}, } @INPROCEEDINGS{Devillers03C, AUTHOR = {Laurence Devillers and Ioana Vasilescu}, TITLE = {{Prosodic cues for emotion characterization in real-life spoken dialogs}}, BOOKTITLE = {ISCA Eurospeech}, MONTH = {September}, ADDRESS = {Geneva}, YEAR = 2003, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/1025anav.pdf} } @INPROCEEDINGS{Devillers03B, AUTHOR = {Laurence Devillers and Ioana Vasilescu and Catherine Mathon}, TITLE = {{Prosodic cues for perceptual emotion detection in task-oriented Human-Human corpus}}, BOOKTITLE = {ICPhS}, MONTH = {August}, ADDRESS = {Barcelona}, YEAR = 2003, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/0572anav.pdf} } @INPROCEEDINGS{Devillers03A, AUTHOR = {Laurence Devillers and Lori Lamel and Ioana Vasilescu}, TITLE = {{Emotion Detection in Task-Oriented Spoken Dialogs}}, BOOKTITLE = {ICME}, MONTH = {July}, ADDRESS = {Baltimore}, YEAR = 2003, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/ICME2003_4pages.pdf} } @INPROCEEDINGS{Schwenk03A, AUTHOR = {Holger Schwenk and Jean-Luc Gauvain}, TITLE = {{Using Continuous Space Language Models for Conversational Speech Recognition}}, BOOKTITLE = {ISCA \& IEEE Workshop on Spontaneous Speech Processing and Recognition}, MONTH = {April}, ADDRESS = {Tokyo}, YEAR = 2003, URL = {http://www-tlp.limsi.fr/public/sspr03_ftp.pdf} } @INPROCEEDINGS{Lo03A, AUTHOR = {Yuen-Yee Lo and Jean-Luc Gauvain}, TITLE = {{Tracking Topics in Broadcast News Data}}, BOOKTITLE = {Proc. ISCA ITRW MSDR}, MONTH = {April}, ADDRESS = {Hong Kong}, YEAR = 2003, URL = {http://www-tlp.limsi.fr/public/isca2003-yylo.pdf} } @INCOLLECTION{Lamel03A, AUTHOR = {Lori Lamel and Jean-Luc Gauvain}, TITLE = {Speech Recognition}, BOOKTITLE = {OUP Handbook on Computational Linguistics}, EDITOR = {R. Mitkov}, PUBLISHER = {Oxford University Press}, YEAR = 2003, CHAPTER = 16, PAGES = {305-322} } @INCOLLECTION{Gauvain03A, AUTHOR = {Jean-Luc Gauvain and Lori Lamel}, TITLE = {Large Vocabulary Speech Recognition Based on Statistical Methods}, BOOKTITLE = {Pattern Recognition in Speech and Language Processing}, EDITOR = {W. Chou and F. Juang}, PUBLISHER = {CRC Press}, YEAR = 2003, CHAPTER = 5, PAGES = {149-189} } @ARTICLE{Gauvain03B, AUTHOR = {Jean-Luc Gauvain and Lori Lamel}, TITLE = {{Structuring Broadcast Audio for Information Access}}, JOURNAL = {EURASIP journal on Applied Signal Processing}, VOLUME = 2003, NUMBER = {2}, PAGES = {140-150}, YEAR = 2003, URL = {} } @INPROCEEDINGS{Chen03B, AUTHOR = {Langzhou Chen and Lori Lamel and Jean-Luc Gauvain}, TITLE = {{Transcribing Mandarin Broadcast News}}, BOOKTITLE = ieeeasr, MONTH = {November}, ADDRESS = {St Thomas}, YEAR = 2003, URL = {http://www-tlp.limsi.fr/public/asru03ma.pdf} } @INPROCEEDINGS{Chen03A, AUTHOR = {Langzhou Chen and Jean-Luc Gauvain and Lori Lamel and Gilles Adda}, TITLE = {{Unsupervised language model adaptation for broadcast news}}, BOOKTITLE = icassp, MONTH = {April}, ADDRESS = {Hong Kong}, YEAR = 2003, PAGES = {I-220-223}, URL = {http://www-tlp.limsi.fr/public/ica03CLZ.pdf}, keywords = { lmadapt } } @INPROCEEDINGS{Gauvain03C, AUTHOR = {Jean-Luc Gauvain and Lori Lamel and Holger Schwenk and Gilles Adda and Langzhou Chen and Fabrice Lefevre}, TITLE = {{Conversational telephone speech recognition}}, BOOKTITLE = icassp, MONTH = {April}, ADDRESS = {Hong Kong}, YEAR = 2003, PAGES = {I-212-215}, URL = {http://www-tlp.limsi.fr/public/ica03cts.pdf} } @INPROCEEDINGS{Barras03A, AUTHOR = {Claude Barras and Jean-Luc Gauvain}, TITLE = {{Feature and score normalization for speaker verification of cellular data}}, BOOKTITLE = icassp, MONTH = {April}, ADDRESS = {Hong Kong}, YEAR = 2003, PAGES = {II-49-52}, URL = {http://www-tlp.limsi.fr/public/ica03CB.pdf} } @ARTICLE{Allauzen03A, AUTHOR = {Alexandre Allauzen and Jean-Luc Gauvain}, TITLE = {{Adaptation automatique du mod\`ele de langage d'un syst\`eme de transcription de journaux parl\'es}}, JOURNAL = {Traitement Automatique des langues}, YEAR = {2003}, VOLUME = {44}, NUMBER = {1}, keywords = { lmadapt }, PAGES = {11-31} } @ARTICLE{Devillers02B, AUTHOR = {Laurence Devillers and H\'el\`ene Maynard and Patrick Paroubek}, TITLE = {{M\'ethodologies d'\'evaluation des syst\`emes de dialogue parl\'e: r\'eflexions et exp\'eriences autour de la compr\'ehension}}, JOURNAL = {TAL}, YEAR = 2002, URL = {http://www-tlp.limsi.fr/public/tal02_eval.pdf} } @INPROCEEDINGS{Devillers02C, AUTHOR = {Laurence Devillers and Ioana Vasilescu and Lori Lamel}, TITLE = {Annotation and Detection of Emotion in a Task-oriented Human-Human Dialog Corpus}, BOOKTITLE = {ISLE workshop}, MONTH = {Dec}, ADDRESS = {Edinburgh}, YEAR = 2002, URL = {http://www-tlp.limsi.fr/public/isle02em.pdf} } @INPROCEEDINGS{Rosset02C, AUTHOR = {J.Y. Antoine and C. Bousquet-Vernhettes and J. Goulian and M. Zakaria Kurdi and S. Rosset and N. Vigouroux and J. Villaneau}, TITLE = {{Predictive and objective evaluation of speech understanding: the "challenge" evaluation campaign of the I3 speech workgroup of the French CNRS}}, BOOKTITLE = lrec, MONTH = {May}, ADDRESS = {Las Palmas}, YEAR = 2002, URL = {http://www-tlp.limsi.fr/public/lrec02_eval_defi.pdf}, keywords = { spoken language understanding, evaluation, metrics}, } @INPROCEEDINGS{Rosset02B, AUTHOR = {Vu Ngoc Tuan and Christophe d'Alessandro and Sophie Rosset}, TITLE = {A phonetic study of Vietnamese tones: acoustic and electrographic measurements}, BOOKTITLE = {ICSLP}, MONTH = {Sep}, ADDRESS = {Denver}, YEAR = 2002, URL = {http://www-tlp.limsi.fr/public/icslp02_tcs.pdf}, } @INPROCEEDINGS{Rosset02A, AUTHOR = {Sophie Rosset and Lori Lamel}, TITLE = {Representing Dialog Progression for Dynamic State Assessment}, BOOKTITLE = {ISLE workshop}, MONTH = {Dec}, ADDRESS = {Edinburgh}, YEAR = 2002, URL = {http://www-tlp.limsi.fr/public/isle02axes.pdf}, keywords = {dialog, corpus annotation}, } @INPROCEEDINGS{Amities02A, AUTHOR = {H. Hardy and K. Baker and L. Devillers and L. Lamel and S. Rosset and T. Strzalkowski and C. Ursu and N. Webb }, TITLE = {Multi-layer Dialogue Annotation for Automated Multilingual Customer Service}, BOOKTITLE = {ISLE workshop}, MONTH = {Dec}, ADDRESS = {Edinburgh}, YEAR = 2002, URL = {http://www.research.att.com/~walker/isle-dtag-wrk/}, keywords = {dialog, corpus annotation}, } @INPROCEEDINGS{Allauzen02A, AUTHOR = {Alexandre Allauzen and Jean-Luc Gauvain}, TITLE = {{Mise à jour automatique du mod\`ele de langage d'un syst\`eme de transcription}}, BOOKTITLE = {Proc. XXIViemes JEP}, MONTH = {Jun}, ADDRESS = {Nancy}, YEAR = 2002 } @INPROCEEDINGS{Maynard02A, AUTHOR = {Helene Maynard and Fabrice Lefevre}, TITLE = {{Apprentissage d'un module stochastique de compr\'ehension de parole}}, BOOKTITLE = {Proc. XXIViemes JEP}, MONTH = {Jun}, ADDRESS = {Nancy}, YEAR = 2002, URL = {http://www-tlp.limsi.fr/public/jep2002hbm.pdf} } @INPROCEEDINGS{Lefevre02B, AUTHOR = {Fabrice Lefevre and Helene Bonneau-Maynard}, TITLE = {Issues in the Development of a Stochastic Speech Understanding System}, BOOKTITLE = {ICSLP}, MONTH = {Sep}, ADDRESS = {Denver}, YEAR = 2002, URL = {http://www-tlp.limsi.fr/public/icslp02hbm.pdf} } @INPROCEEDINGS{Lefevre02A, AUTHOR = {Fabrice Lefevre and Jean-Luc Gauvain and Lori Lamel}, TITLE = {{D\'eveloppement d'une technologie g\'en\'erique pour la reconnaissance de la parole ind\'ependante de la tâche}}, BOOKTITLE = {Proc. XXIViemes JEP}, MONTH = {Jun}, ADDRESS = {Nancy}, YEAR = 2002 } @INPROCEEDINGS{Gendner02A, AUTHOR = {Veronique Gendner and Martine. Adda-Decker}, TITLE = {{Analyse comparative de corpus oraux et \'ecrits fran\c{c}ais: mots, lemmes et classes morpho-syntaxiques}}, BOOKTITLE = {Proc. XXIViemes JEP}, MONTH = {Jun}, ADDRESS = {Nancy}, YEAR = 2002, URL = {http://www-tlp.limsi.fr/public/jep02POS.pdf} } @INPROCEEDINGS{Devillers02A, AUTHOR = {Laurence Devillers and Sophie Rosset and Hélène Maynard and Lori Lamel}, TITLE = {{Annotations for dynamic Diagnosis of the Dialog State}}, BOOKTITLE = {LREC}, MONTH = {May}, ADDRESS = {Las Palmas}, YEAR = 2002, URL = {http://www-tlp.limsi.fr/public/lrec02_dial.pdf}, keywords = {dialog, corpus annotation}, } @INPROCEEDINGS{Barras02A, AUTHOR = {Claude Barras and Alexandre Allauzen and Lori Lamel and Jean-Luc Gauvain}, TITLE = {{Transcribing Audio-Video Archives}}, BOOKTITLE = icassp, MONTH = {May}, ADDRESS = {Orlando}, YEAR = 2002, PAGES = {13-16}, URL = {http://www-tlp.limsi.fr/public/ica02cb.pdf} } @INPROCEEDINGS{Madda02B, AUTHOR = {Ph. Boula de Mareuil and M. Adda-Decker}, TITLE = {{Studying Pronunciation Variants in French by using Alignment Techniques}}, BOOKTITLE = {ICSLP}, MONTH = {Sep}, ADDRESS = {Denver}, YEAR = 2002, URL = {http://www-tlp.limsi.fr/public/icslp02variant.pdf} } @INPROCEEDINGS{Madda02A, AUTHOR = {Martine Adda-Decker and Philippe Boula de Mareüil and Gilles Adda and Lori lamel}, TITLE = {{Investigating syllabic structure and its variation in speech from French radio interviews}}, BOOKTITLE = {ISCA ITRW Pronunciation modeling and Lexicon Adaptation for Spoken Language}, MONTH = {September}, ADDRESS = {Estes Park}, YEAR = 2002, URL = {http://www-tlp.limsi.fr/public/pmla02syll.pdf} } @INPROCEEDINGS{Schwenk02A, AUTHOR = {Holger Schwenk and Jean-Luc Gauvain}, TITLE = {{Connectionist Language Modeling for Large Vocabulary Continuous Speech Recognition}}, BOOKTITLE = icassp, MONTH = {May}, ADDRESS = {Orlando}, YEAR = 2002, PAGES = {765-768}, URL = {http://www-tlp.limsi.fr/public/ica02hs.pdf} } @INPROCEEDINGS{Lamel02B, AUTHOR = {Lori Lamel and Jean-Luc Gauvain and Gilles Adda}, TITLE = {{Unsupervised Acoustic Model Training}}, BOOKTITLE = icassp, MONTH = {May}, ADDRESS = {Orlando}, YEAR = 2002, PAGES = {877-880}, URL = {http://www-tlp.limsi.fr/public/ica02light.pdf} } @ARTICLE{Gauvain02A, AUTHOR = {J.L. Gauvain and L. Lamel and G. Adda}, TITLE = {{The LIMSI Broadcast News Transcription System}}, JOURNAL = spcom, VOLUME = 37, NUMBER = {1-2}, PAGES = {89-108}, YEAR = 2002, URL = {http://www-tlp.limsi.fr/public/spcH4_limsi.pdf} } @INCOLLECTION{Gauvain02B, AUTHOR = {J.L. Gauvain and L.F. Lamel}, TITLE = {Systèmes de reconnaissance, de compréhension et de dialogue}, BOOKTITLE = {Reconnaissance de la parole Traitement automatique du langage parlé}, VOLUME = 2, PUBLISHER = {Hermes Lavoisier}, YEAR = 2002, EDITOR = {J. Mariani}, PAGES = {47-83}, } @ARTICLE{Lamel02A, AUTHOR = {L. Lamel and J.L. Gauvain and G. Adda}, TITLE = {{Lightly supervised and unsupervised acoustic model training}}, JOURNAL = {Computer Speech and Language}, VOLUME = 16, NUMBER = {1}, PAGES = {115-229}, YEAR = 2002, URL = {http://www-tlp.limsi.fr/public/csl01.pdf} } @ARTICLE{Lamel02C, AUTHOR = {L. Lamel and S. Bennacef and J.L. Gauvain and H. Dartigues and J.N. Temem}, TITLE = {{User Evaluation of the MASK Kiosk}}, JOURNAL = {Speech Communication}, VOLUME = 38, NUMBER = {1-2}, PAGES = {131-139}, YEAR = 2002, URL = {http://www-tlp.limsi.fr/public/spc02mask.pdf} } @INPROCEEDINGS{Lamel02D, AUTHOR = {Lori Lamel}, TITLE = {{Some Issues in Speech Recognizer Portability}}, BOOKTITLE = {ISCA SALTMIL SIG workshop at LREC'02 on Portability Issues in Human Language Technologies}, MONTH = {Jun}, ADDRESS = {Las Palmas}, YEAR = 2002, URL = {http://www-tlp.limsi.fr/public/lrec02port.pdf} } @INPROCEEDINGS{Lamel02E, AUTHOR = {L. Lamel and J.L. Gauvain}, TITLE = {{Automatic Processing of Broadcast Audio in Multiple Languages}}, BOOKTITLE = {Proc. Eusipco02}, MONTH = {Sep}, ADDRESS = {Toulouse}, YEAR = 2002, URL = {http://www-tlp.limsi.fr/public/eusipco02.pdf} } @INPROCEEDINGS{Lo02A, AUTHOR = {Yuen-Yee Lo and Jean-Luc Gauvain}, TITLE = {{The LIMSI Topic Tracking System for TDT2002}}, BOOKTITLE = {Proc. DARPA Topic Detection and Tracking Workshop}, MONTH = {Nov}, ADDRESS = {Gaithersburg}, YEAR = 2002, URL = {http://www-tlp.limsi.fr/public/tdt02.pdf} } @ARTICLE{Matrouf01A, AUTHOR = {D. Matrouf and J.L. Gauvain}, TITLE = {{Utilisation des modèles de Markov cachés pour le débruitage}}, JOURNAL = {Traitement du signal}, VOLUME = 18, NUMBER = {3}, PAGES = {213-218}, YEAR = 2001 } @INPROCEEDINGS{Rosset01A, AUTHOR = {S. Rosset and L. Lamel}, TITLE = {{Gestionnaire de dialogue pour un système d'informations à reconnaissance vocale}}, BOOKTITLE = {TALN'01}, MONTH = {Jul}, ADDRESS = {Tours}, YEAR = 2001, URL = {http://www-tlp.limsi.fr/public/taln01sr.pdf}, keywords = {dialog management, dialog system}, } @INPROCEEDINGS{Rosset01B, AUTHOR = {S. Rosset and L. Lamel and Gauvain}, TITLE = {{Gestion de dialogue, compréhension et traitement des erreurs}}, BOOKTITLE = {Journées Atala}, MONTH = {Jun}, ADDRESS = {Paris}, YEAR = 2001, keywords = {dialog management, dialog system}, } @ARTICLE{Paroubek01A, AUTHOR = {P. Paroubek}, TITLE = {{Coup d'oeil d'un expert sur l'évaluation en ingénieurie de la langue et de la parole}}, JOURNAL = {Lettre d'information ELRA}, VOLUME = 6, NUMBER = {3}, PAGES = {4-5}, MONTH = {Jul}, YEAR = 2001 } @ARTICLE{Paroubek02A, AUTHOR = {P. Paroubek}, TITLE = {{Workshop sur l'évaluation des systèmes de dialogue et de traitement du language naturel à EACI'01}}, JOURNAL = {Lettre d'information ELRA}, VOLUME = 6, NUMBER = {3}, PAGES = {5-6}, MONTH = {Jul}, YEAR = 2001 } @ARTICLE{Barras01A, AUTHOR = {C. Barras and E. Geoffrois and Z. Wu and M. Liberman}, TITLE = {{Transcriber: development and use of a tool for assisting speech corpora production}}, JOURNAL = spcom, VOLUME = 33, NUMBER = {1-2}, PAGES = {5-22}, MONTH = {Jan}, YEAR = 2001 } @ARTICLE{Barras01B, AUTHOR = {C. Barras}, TITLE = {{Reconnaissance automatique de la parole~: vers l'indexation automatique d'archives sonores}}, JOURNAL = {Orsay Infos}, VOLUME = 62, PAGES = {8-10}, MONTH = {Jan}, YEAR = 2001 } @INPROCEEDINGS{Barras01C, AUTHOR = {C. Barras and Y. de Kercadio}, TITLE = {{Indexation automatique de documents audiovisuels}}, BOOKTITLE = {ASTI, 1ères Rencontres des Sciences et Technologies de l'Information}, MONTH = {Apr}, ADDRESS = {Paris - La Villette}, YEAR = 2001, } @ARTICLE{Barras01D, AUTHOR = {C. Barras}, TITLE = {{Techniques de balisage acustique et méthodes d'évaluation}}, JOURNAL = {Notes et Documents LIMSI}, VOLUME = 10, MONTH = {Mar}, YEAR = 2001 } @INPROCEEDINGS{Maynard01A, AUTHOR = {H\'el\`ene Bonneau-Maynard and Fabrice Lefevre}, TITLE = {{Investigating Stochastic Speech Understanding}}, BOOKTITLE = {IEEE Automatic Speech Recognition and Understanding Workshop}, MONTH = {Dec}, ADDRESS = {Madonna di Campiglio}, YEAR = 2001, URL = {http://www-tlp.limsi.fr/public/asru01hbmfl.pdf} } @ARTICLE{Madda01A, AUTHOR = {Martine Adda-Decker}, TITLE = {{Towards Multilingual Interoperability in Speech Technology}}, JOURNAL = spcom, VOLUME = 35, NUMBER = {1-2}, PAGES = {5-20}, MONTH = {Aug}, keywords = { multilingualasr }, YEAR = 2001 } @INPROCEEDINGS{Lo01A, AUTHOR = {Yuen-Yee Lo and Jean-Luc Gauvain}, TITLE = {{The LIMSI Topic Tracking System for TDT2001}}, BOOKTITLE = {Proc. DARPA Topic Detection and Tracking Workshop}, MONTH = {Nov}, ADDRESS = {Gaithersburg}, YEAR = 2001, URL = {http://www-tlp.limsi.fr/public/limsi_trk01.pdf} } @INPROCEEDINGS{Chen01A, AUTHOR = {L. Chen and J.L. Gauvain and L. Lamel and G. Adda and M. Adda-Decker}, TITLE = {{Language Model Adaptation for Broadcast News Transcription}}, BOOKTITLE = {Proc. ISCA ITRW 2001 Adaptation Methods for Speech Recognition}, MONTH = {Aug}, ADDRESS = {Sophia-Antipolis}, YEAR = 2001, keywords = { lmadapt }, URL = {http://www-tlp.limsi.fr/public/itrw01chen.pdf} } @INPROCEEDINGS{Lefevre01B, AUTHOR = {F. Lefevre and J.L. Gauvain and L. Lamel}, TITLE = {{Genericity and Adaptability Issues for Task-Independent Speech Recognition}}, BOOKTITLE = {Proc. ISCA ITRW 2001 Adaptation Methods for Speech Recognition}, MONTH = {Aug}, ADDRESS = {Sophia-Antipolis}, YEAR = 2001, URL = {http://www-tlp.limsi.fr/public/itrw01fabfinal.pdf} } @INPROCEEDINGS{Chen01B, AUTHOR = {L. Chen and J.L. Gauvain and L. Lamel and G. Adda and M. Adda-Decker}, TITLE = {{Using Information Retrieval Methods for Language Model Adaptation}}, BOOKTITLE = eurospeech, MONTH = {Sep}, ADDRESS = {Aalborg}, YEAR = 2001, keywords = { lmadapt }, URL = {http://www-tlp.limsi.fr/public/euro01chen.pdf} } @INPROCEEDINGS{Lefevre01C, AUTHOR = {F. Lefevre and J.L. Gauvain and L. Lamel}, TITLE = {{Improving Genericity for Task-Independent Speech Recognition}}, BOOKTITLE = eurospeech, MONTH = {Sep}, ADDRESS = {Aalborg}, YEAR = 2001, URL = {http://www-tlp.limsi.fr/public/euro01fab.pdf} } @ARTICLE{Gauvain01A, AUTHOR = {J.L. Gauvain and L. Lamel and G. Adda}, TITLE = {{Audio Partitioning and Transcription for Broadcast Data Indexation}}, JOURNAL = {MTAP Journal}, VOLUME = 14, NUMBER = 2, PAGES = {187-200}, YEAR = 2001, URL = {http://www.wkap.nl/issuetoc.htm/1380-7501+14+2+2001} } @INPROCEEDINGS{Gauvain01B, AUTHOR = {Jean-Luc Gauvain and Lori Lamel and Gilles Adda and Martine Adda-Decker and Claude Barras and Langzhou Chen and Yannick de Kercadio}, TITLE = {{Processing Broadcast Audio for Information Access}}, BOOKTITLE = {ACL 39th annual meeting}, MONTH = {July}, ADDRESS = {Toulouse}, YEAR = 2001, PAGES = {2-9}, URL = {http://www-tlp.limsi.fr/public/acl01.pdf} } @INPROCEEDINGS{Lamel01A, AUTHOR = {Lori Lamel and Jean-Luc Gauvain and Gilles Adda}, TITLE = {{Investigating Lightly Supervised Acoustic Model Training}}, BOOKTITLE = icassp, MONTH = {May}, ADDRESS = {Salt Lake City}, YEAR = 2001, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/ica01light.pdf} } @INPROCEEDINGS{Lamel01B, AUTHOR = {Lori Lamel and Fabrice Lefevre and Jean-Luc Gauvain and Gilles Adda}, TITLE = {{Portability Issues for Speech Recognition Technologies}}, BOOKTITLE = {Proceedings of HLT 2001}, MONTH = {March}, ADDRESS = {San Diego}, YEAR = 2001, PAGES = {9-16}, URL = {http://www-tlp.limsi.fr/public/hlt01.pdf} } @INPROCEEDINGS{Barras01E, AUTHOR = {Claude Barras and Lori Lamel and Jean-Luc Gauvain}, TITLE = {{Automatic Transcription of Compressed Broadcast Audio}}, BOOKTITLE = icassp, MONTH = {May}, ADDRESS = {Salt Lake City}, YEAR = 2001, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/ica01cb.pdf} } @INPROCEEDINGS{Lefevre01A, AUTHOR = {Fabrice Lefevre and Jean-Luc Gauvain and Lori Lamel}, TITLE = {{Towards Task-Independent Speech Recognition}}, BOOKTITLE = icassp, MONTH = {May}, ADDRESS = {Salt Lake City}, YEAR = 2001, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/ica01fl.pdf} }} @ARTICLE{Holger00A, AUTHOR = {H. Schwenk and Y. Bengio}, TITLE = {{Boosting Neural Networks}}, JOURNAL = {Neural Computation}, VOLUME = 12, NUMBER = 8, PAGES = {1869-1887}, YEAR = 2000 } @INPROCEEDINGS{Holger00B, AUTHOR = {Holger Schwenk and Jean-Luc Gauvain}, TITLE = {{Combining Multiple Speech Recognizers using Voting and Language Model Information}}, BOOKTITLE = icslp, PAGES = {II:915--918}, MONTH = {Oct}, ADDRESS = {Beijing}, YEAR = 2000, URL = {http://www-tlp.limsi.fr/public/icslp00_holger.pdf} } @INPROCEEDINGS{Holger00C, AUTHOR = {Holger Schwenk and Jean-Luc Gauvain}, title = {{Improved ROVER using Language Model Information}}, BOOKTITLE = {ISCA ITRW Workshop on Automatic Speech Recognition: Challenges for the new Millenium}, PAGES = {47--52}, ADDRESS = {Paris}, MONTH = {Sep}, YEAR = 2000, URL = {http://www-tlp.limsi.fr/public/asr00_holger.pdf} } @ARTICLE{Lamel00A, AUTHOR = {L. Lamel and S. Rosset and J.L. Gauvain and S. Bennacef and M. Garnier-Rizet and B. Prouts}, TITLE = {{The LIMSI ARISE System}}, JOURNAL = spcom, VOLUME = 31, NUMBER = {4}, PAGES = {339-354}, MONTH = {Aug}, YEAR = 2000, URL = {http://www-tlp.limsi.fr/public/spc00arise.pdf}, keywords = {dialog management, dialog system}, } @ARTICLE{Lamel00B, AUTHOR = {L. Lamel and J.L. Gauvain}, TITLE = {{Speaker Verification over the Telephone}}, JOURNAL = spcom, VOLUME = 31, NUMBER = {2-3}, PAGES = {141-154}, MONTH = {Jun}, YEAR = 2000, URL = {http://www-tlp.limsi.fr/public/spc00sv.pdf} } @INPROCEEDINGS{Lamel00C, AUTHOR = {L. Lamel and J.L. Gauvain and G. Adda}, title = {{Lightly Supervised Acoustic Model Training}}, BOOKTITLE = {ISCA ITRW Workshop on Automatic Speech Recognition: Challenges for the new Millenium}, MONTH = {Sep}, ADDRESS = {Paris}, PAGES = {150-154}, YEAR = 2000, URL = {http://www-tlp.limsi.fr/public/asr2000_light.pdf} } @INPROCEEDINGS{Lamel00D, AUTHOR = {L. Lamel and S. Rosset and J.L. Gauvain}, TITLE = {{Considerations in the Design and Evaluation of Spoken Language Dialog Systems}}, BOOKTITLE = {Proc. ICSLP'2000}, MONTH = {Oct}, ADDRESS = {Beijing}, YEAR = 2000, PAGES = {IV-5-8}, URL = {http://www-tlp.limsi.fr/public/icslp00_slds.pdf} } @ARTICLE{Lamel00E, AUTHOR = {L. Lamel and W. Minker and P. Paroubek}, TITLE = {{Towards Best Practice in the Development and Evaluation of Speech Recognition Components of a Spoken Language Dialogue System}}, JOURNAL = {Natural Language Engineering}, MONTH = {Oct}, VOLUME = {6 part 3}, PAGES = {305-322}, YEAR = 2000, URL = {ftp://www.limsi.fr/Individu/pap/nle99.pdf} } @INPROCEEDINGS{Rosset00A, AUTHOR = {S. Rosset and S. Bennacef and L. Lamel}, TITLE = {{Stratégies pour un système de dialogue oral homme-machine}}, BOOKTITLE = {23èmes Journées d'Etude sur la Parole}, MONTH = {Jun}, ADDRESS = {Aussois}, YEAR = 2000, PAGES = {329-332} } @INCOLLECTION{Madda00A, AUTHOR = {M. Adda-Decker and L. Lamel}, TITLE = {{The use of lexica in automatic speech recognition}}, BOOKTITLE = {Lexicon Development for Speech and Language Processing, Ed. F. Van Eynde and D. Gibbon}, PUBLISHER = {Kluwer}, YEAR = 2000, PAGES = {} } @INPROCEEDINGS{Madda00B, AUTHOR = {M. Adda-Decker and L. Lamel}, TITLE = {{Modeling Reduced Pronunciations in German}}, BOOKTITLE = {Proc. Workshop on Phonetics and Phonology in ASR, PHONUS 5}, MONTH = {March}, ADDRESS = {Saarbrücken}, YEAR = 2000, PAGES = {145-159}, URL = {http://www-tlp.limsi.fr/public/phon00decomp.pdf} } @INPROCEEDINGS{Madda00C, AUTHOR = {M. Adda-Decker and G. Adda and L. Lamel}, TITLE = {{Investigating text normalization and pronunciation variants for German broadcast transcription}}, BOOKTITLE = {Proc. ICSLP'2000}, MONTH = {Oct}, ADDRESS = {Beijing}, YEAR = 2000, PAGES = {266-269}, URL = {http://www-tlp.limsi.fr/public/icslp00_h4ger.pdf} } @INPROCEEDINGS{Madda00D, AUTHOR = {M. Adda-Decker and L. Lamel}, TITLE = {{Systèmes d'alignement automatique \& études de variantes de prononciation}}, BOOKTITLE = {Proc. XXIIIièmes Journées d'Études sur la Parole}, MONTH = {Jun}, ADDRESS = {Aussois}, YEAR = 2000, PAGES = {189-192}, URL = {http://www-tlp.limsi.fr/public/jep00align.pdf} } @INPROCEEDINGS{Madda00E, AUTHOR = {M. Adda-Decker and G. Adda}, TITLE = {Morphological Decomposition for ASR in German}, BOOKTITLE = {Proc. Workshop on Phonetics and Phonology in ASR, PHONUS 5}, MONTH = {March}, ADDRESS = {Saarbrücken}, YEAR = 2000, PAGES = {129-143}, URL = {http://www-tlp.limsi.fr/public/phon00reco.pdf} } @ARTICLE{Gauvain00A, AUTHOR = {J.L. Gauvain and L. Lamel and G. Adda}, TITLE = {{Transcribing Broadcast News for Audio and Video Indexing}}, JOURNAL = {Communications of the ACM}, VOLUME = 43, NUMBER = {2}, PAGES = {64-70}, MONTH = {Feb}, YEAR = 2000, URL = {http://www.acm.org/pubs/citations/journals/cacm/2000-43-2/p64-gauvain/} } @INPROCEEDINGS{Gauvain00B, AUTHOR = {J.L. Gauvain}, TITLE = {{Systèmes de reconnaissance à grands vocabulaires : Progrès et défis}}, BOOKTITLE = {Proc. XXIIIièmes JEP}, MONTH = {Jun}, ADDRESS = {Aussois}, YEAR = 2000, PAGES = {} } @INPROCEEDINGS{Gauvain00C, AUTHOR = {J.L. Gauvain and L. Lamel and Y. de Kercadio and G. Adda}, TITLE = {{Transcription and Indexation of Broadcast Data}}, BOOKTITLE = icassp, MONTH = {Jun}, ADDRESS = {Istanbul}, YEAR = 2000, PAGES = {1663-1666}, URL = {http://www-tlp.limsi.fr/public/ica00h4.pdf} } @INPROCEEDINGS{Gauvain00D, AUTHOR = {F. de Jong and J.L. Gauvain and D. Hiemstra and K. Netter}, TITLE = {{Language-Based Multimedia Information Retrieval}}, BOOKTITLE = {RIAO}, MONTH = {Apr}, ADDRESS = {Paris}, YEAR = 2000, PAGES = {} } @INPROCEEDINGS{Gauvain00E, AUTHOR = {B. Prouts et J.-L. Gauvain}, TITLE = {{An Audio Transcriber for Broadcast Document Indexation}}, BOOKTITLE = {RIAO}, MONTH = {Apr}, ADDRESS = {Paris}, YEAR = 2000, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/riao00.pdf} } @ARTICLE{Gauvain00F, AUTHOR = {J.L. Gauvain and L. Lamel}, TITLE = {{Large Vocabulary Continuous Speech Recognition: Advances and Applications}}, JOURNAL = {Proceedings of the IEEE}, VOLUME = 88, NUMBER = {8}, PAGES = {1181-1200}, MONTH = {Aug}, YEAR = 2000, URL = {http://www-tlp.limsi.fr/public/ieee00.pdf} } @INPROCEEDINGS{Gauvain00G, AUTHOR = {J.-L. Gauvain and G. Adda and M. Adda-Decker and C. Barras, L. Chen and M. Jardino and L. Lamel and H. Schwenk}, TITLE = {{An Overview of Speech Recognition Activities at LIMSI}}, BOOKTITLE = {Sino-French Symposium on Speech and Language Processing}, MONTH = {Oct}, ADDRESS = {Beijing}, YEAR = 2000, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/TLPreco00.pdf} } @INPROCEEDINGS{Gauvain00H, AUTHOR = {J.-L. Gauvain and L. Lamel}, TITLE = {{Fast Decoding for Indexation of Broadcast Data}}, BOOKTITLE = {Proc. ICSLP'2000}, MONTH = {Oct}, ADDRESS = {Beijing}, YEAR = 2000, PAGES = {IV-794-798}, URL = {http://www-tlp.limsi.fr/public/icslp00_10x.pdf} } @INPROCEEDINGS{Gauvain00I, AUTHOR = {J.L. Gauvain and L. Lamel and C. Barras and G. Adda and Y. Kercadio}, TITLE = {{The LIMSI SDR system for TREC-9}}, BOOKTITLE = {Proc. of the Text Retrieval Conference, TREC-9}, MONTH = {Nov}, ADDRESS = {Gaithersburg}, YEAR = 2000, PAGES = {335-341}, URL = {http://www-tlp.limsi.fr/public/sdr00.pdf} } @INPROCEEDINGS{Gauvain00J, AUTHOR = {J.L. Gauvain and L. Lamel and G. Adda}, TITLE = {{The LIMSI 1999 Hub-4E Transcription System}}, BOOKTITLE = {Proc. NIST Speech Transcription Workshop}, MONTH = {May}, ADDRESS = {College Park, MD}, YEAR = 2000, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/hub4_00.pdf} } @INPROCEEDINGS{Chen00A, AUTHOR = {L. Chen and L. Lamel and G. Adda and J.L. Gauvain}, TITLE = {{Broadcast News Transcription in Mandarin}}, BOOKTITLE = {Proc. ICSLP'2000}, MONTH = {Oct}, ADDRESS = {Beijing}, YEAR = 2000, PAGES = {II-1015-1018}, URL = {http://www-tlp.limsi.fr/public/icslp00_h4m.pdf} } @INPROCEEDINGS{Mariani00A, AUTHOR = {J. Mariani}, TITLE = {{Support To French language processing research}}, BOOKTITLE = {Sino-French Symposium on Speech and Language Processing}, MONTH = {Oct}, ADDRESS = {Beijing}, YEAR = 2000, PAGES = {} } @INPROCEEDINGS{Mariani00B, AUTHOR = {J. Mariani}, TITLE = {{Are we loosing ground to the US? A constrastive analysis of the EU versus US frameworks}}, BOOKTITLE = {HLT Open House}, MONTH = {Sep}, ADDRESS = {Luxembourg}, YEAR = 2000, PAGES = {} } @INPROCEEDINGS{Mariani00C, AUTHOR = {J. Mariani and K. Choukri}, TITLE = {{Overview of recent activities in Europe}}, BOOKTITLE = {Coscoda Workshop}, MONTH = {Oct}, ADDRESS = {Beijing}, YEAR = 2000, PAGES = {} } @INPROCEEDINGS{Mariani00D, AUTHOR = {J. Mariani}, TITLE = {{Spoken language processing fro multilingual interactions}}, BOOKTITLE = {IST Conference}, MONTH = {Nov}, ADDRESS = {Nice}, YEAR = 2000, PAGES = {} } @INPROCEEDINGS{Maynard00A, AUTHOR = {H. Bonneau-Maynard and L. Devillers}, TITLE = {{A framework for evaluating contextual understanding}}, BOOKTITLE = icslp, PAGES = {}, MONTH = {Oct}, ADDRESS = {Beijing}, YEAR = 2000 } % URL = {http://www-tlp.limsi.fr/public/cbmi99-olive.pdf} @INPROCEEDINGS{Maynard00B, AUTHOR = {H. Bonneau-Maynard and L. Devillers and S. Rosset}, TITLE = {{Predictive performance of dialog systems}}, BOOKTITLE = {Int. Conf. on Language Resources and Evaluation}, PAGES = {}, MONTH = {May}, ADDRESS = {Athens}, YEAR = 2000, URL = {http://www-tlp.limsi.fr/public/lrec02hbm.pdf} } @INPROCEEDINGS{Geoffrois00A, AUTHOR = {E. Geoffrois and C. Barras and S. Bird and Z. Wu}, TITLE = {{Transcribing with annotation graphs}}, BOOKTITLE = {Int. Conf. on Language Resources and Evaluation}, PAGES = {}, MONTH = {May}, ADDRESS = {Athens}, YEAR = 2000 } @INPROCEEDINGS{Paroubek00A, AUTHOR = {P. Paroubek}, TITLE = {{Categorial data-specification for control task formalization and validation in quantitative black box evaluation}}, BOOKTITLE = {LREC Sat. workshop on Using evaluation within HLT programs~: Results and Trends}, ADDRESS = {Athens}, MONTH = May, YEAR = 2000 } @INPROCEEDINGS{Paroubek00B, AUTHOR = {P. Paroubek}, TITLE = {{Language resources as by-product of evaluation: The Multitag example}}, BOOKTITLE = {Int. Conf. on Language Resources and Evaluation}, PAGES = {}, MONTH = {May}, ADDRESS = {Athens}, YEAR = 2000 } @INPROCEEDINGS{Paroubek00C, AUTHOR = {P. Paroubek and M. Rajman}, TITLE = {{Multitag, une ressource linguistique produit du paradigme d'évaluation}}, BOOKTITLE = {TALN}, PAGES = {}, MONTH = {Oct}, ADDRESS = {Lausanne}, YEAR = 2000 } @INPROCEEDINGS{Temem99A, AUTHOR = {J.N. Temem and L. Lamel and J.L. Gauvain}, TITLE = {{The MASK Demonstrator: An Emerging Technology for User-Friendly Passengers Kiosk}}, BOOKTITLE = {Proc. World Congress on Railway Research}, MONTH = {Oct}, ADDRESS = {Toulouse}, YEAR = 1999, PAGES = {} } @INPROCEEDINGS{Gauvain99D, AUTHOR = {J.L. Gauvain and L. Lamel and G. Adda}, TITLE = {{Audio Partitioning and Transcription for Broadcast Data Indexation}}, BOOKTITLE = {Proc. CBMI'99}, MONTH = {Oct}, ADDRESS = {Toulouse}, YEAR = 1999, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/cbmi99.pdf} } @INPROCEEDINGS{deJong99A, AUTHOR = {F. de Jong and J.L. Gauvain and J. den Hartog and K. Netter}, TITLE = {Olive: Speech Based Video Retrieval}, BOOKTITLE = {Proc. CBMI'99}, MONTH = {Oct}, ADDRESS = {Toulouse}, YEAR = 1999, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/cbmi99-olive.pdf} } @TECHREPORT{Demars99A, AUTHOR = {C. Demars}, TITLE = {{Représentations bidimensionnelles d'un signal de parole. Eléments de monographie, version révisée et augmentée}}, NOTE = {Electronic publication}, INSTITUTION = {LIMSI-CNRS}, YEAR = {1999}, PAGES = {1--246}, URL = {http://www.limsi.fr/Individu/chrd/TOCHTML2001.html} } @INPROCEEDINGS{Gauvain99C, AUTHOR = {J.-L. Gauvain and Y. Kercadio and L. Lamel and G. Adda}, TITLE = {{The LIMSI SDR System for TREC-8}}, BOOKTITLE = {Proc. of the Text Retrieval Conference, TREC-8, notebook}, MONTH = {Nov}, ADDRESS = {Gaithersburg}, YEAR = 1999, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/sdr99.pdf} } @ARTICLE{Madda99C, AUTHOR = {M. Adda-Decker and L. Lamel}, TITLE = {Pronunciation variants across system configuration, language and speaking style}, JOURNAL = {Speech Communication}, VOLUME = 29, NUMBER = {2-4}, PAGES = {83-98}, MONTH = {Nov}, YEAR = 1999, URL = {http://www-tlp.limsi.fr/public/spc99pron.pdf} } @INPROCEEDINGS{Doval99A, AUTHOR = {B. Doval and C. d'Alessandro}, TITLE = {On the waveforms and spectra of glottal flow models}, BOOKTITLE = {Proc. of the 137th Congress of the American Society of America}, MONTH = {Mar}, ADDRESS = {Berlin}, YEAR = 1999, PAGES = {} } @INPROCEEDINGS{Wanneroy99A, AUTHOR = {R. Wanneroy and E. Bilinski and C. Barras and M. Adda-Decker and E. Geoffrois}, TITLE = {Acoustic-phonetic modeling of non-native speech for language identification}, BOOKTITLE = {Proc. of the MIST workshop (ESCA-NATO)}, MONTH = {Sep}, ADDRESS = {Leusden}, YEAR = 1999, keywords = { lid }, PAGES = {} } @INPROCEEDINGS{Madda99B, AUTHOR = {Martine Adda-Decker}, TITLE = {Towards Multilingual Interoperability in Automatic Speech Recognition}, BOOKTITLE = {Proc. of the MIST workshop (ESCA-NATO)}, MONTH = {Sep}, ADDRESS = {Leusden}, YEAR = 1999, PAGES = {}, keywords = { multilingualasr }, URL = {http://www-tlp.limsi.fr/public/mist99.pdf} } @INPROCEEDINGS{Heinrich99A, AUTHOR = {N. Henrich, B. Doval and C. d'Alessandro}, TITLE = {Glottal open quotient estimation using linear prediction}, BOOKTITLE = {International Workshop on Models and Analysis of Vocal Emissions for biomedical applications}, MONTH = {Sep}, ADDRESS = {Firenze}, YEAR = 1999, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/voiceglottalope.pdf} } @INCOLLECTION{Lienard99C, AUTHOR = {Jean-Sylvain Lienard}, TITLE = {Speech and voice perception: beyond pattern recognition}, BOOKTITLE = {Speech Processing, Recognition and Artificial Neural Networks}, PUBLISHER = {Springer Verlag}, ADDRESS = {}, YEAR = 1999, PAGES = {85--112} } @ARTICLE{Lienard99B, AUTHOR = {J.S. Lienard and M.G. Di Benedetto}, TITLE = {Effect of vocal effort on spectral properties of vowels, Journal of the Acoustical Society of America}, JOURNAL = {Journal of the Acoustical Society of America}, VOLUME = {106}, NUMBER = {1}, PAGES = {411-422}, YEAR = 1999 } @INPROCEEDINGS{Minker99E, AUTHOR = {W. Minker and M. Gavalda and A. Waibel}, TITLE = {Hidden Understanding Models for Machine Translation}, BOOKTITLE = {Proceedings ESCA Tutorial and Research Workshop (ETRW) on Interactive Dialogue in Multi-Modal Systems}, MONTH = {Jun}, ADDRESS = {}, YEAR = 1999, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/esca99-minker.pdf} } @ARTICLE{Minker99D, AUTHOR = {W. Minker}, TITLE = {Stochastically-based Semantic Analysis for Machine Translation}, JOURNAL = {Computer Speech and Language}, VOLUME = 13, NUMBER = 2, PAGES = {}, YEAR = 1999 } @ARTICLE{Minker99C, AUTHOR = {W. Minker}, TITLE = {Design Considerations for Knowledge Source Representations of a Stochastically-based Natural Language Understanding Component}, JOURNAL = {Speech Communication}, VOLUME = 28, NUMBER = 2, PAGES = {}, YEAR = 1999 } @INCOLLECTION{Minker99B, AUTHOR = {F. Neel and W. Minker}, TITLE = {Multimodal Interactive Speech Systems}, BOOKTITLE = {Computational Models of Speech Pattern Processing}, PUBLISHER = {Springer Verlag}, ADDRESS = {Berlin/Heidelberg}, YEAR = 1999, PAGES = {} } @BOOK{Minker99A, AUTHOR = {W. Minker}, TITLE = {Compr\'ehension automatique de la parole spontan\'ee}, PUBLISHER = {L'Harmattan}, ADDRESS = {Paris}, YEAR = 1999 } @INPROCEEDINGS{Mareuil99A, AUTHOR = {Philippe Boula de Mareuil and Cristobal Corredor-Ardoy and Martine Adda-Decker}, TITLE = {Multilingual Automatic Phoneme Clustering}, BOOKTITLE = {14th International Congress of Phonetic Sciences}, MONTH = {Aug}, ADDRESS = {San Francisco}, YEAR = 1999, PAGES = {1209--1212} } @INPROCEEDINGS{Madda99A, AUTHOR = {Jean-Sylvain Li\'enard}, TITLE = {Pronunciation Variants in French: Schwa and Liaison}, BOOKTITLE = {14th International Congress of Phonetic Sciences}, MONTH = {Aug}, ADDRESS = {San Francisco}, YEAR = 1999, PAGES = {2239--2242} } @INPROCEEDINGS{Lienard99A, AUTHOR = {Jean-Sylvain Li\'enard}, TITLE = {A Hierarchical Model of Speech and Voice Perception based on Time Resolution}, BOOKTITLE = {14th International Congress of Phonetic Sciences}, MONTH = {Aug}, ADDRESS = {San Francisco}, YEAR = 1999, PAGES = {1839--1842} } @ARTICLE{Beaujard99B, AUTHOR = {Christel Beaujard and Mich\`ele Jardino}, TITLE = {Classification de mots non étiquetés par des méthodes statistiques}, JOURNAL = {Mathématique Informatique et Sciences Humaines}, NUMBER = 147, MONTH = {Sep}, YEAR = 1999, PAGES = {7-23}, URL = {http://www-tlp.limsi.fr/public/SFC99-article.pdf} } @INPROCEEDINGS{Beaujard99A, AUTHOR = {Christel Beaujard and Mich\`ele Jardino}, TITLE = {Language Modeling based on Automatic Word Concatenations}, BOOKTITLE = eurospeech, MONTH = {Sep}, ADDRESS = {Budapest}, YEAR = 1999, PAGES = {1563--1566} } @INPROCEEDINGS{VuNgoc99A, AUTHOR = {Tuan Vu Ngoc and Christophe d'Alessandro}, TITLE = {Robust Glottal Closure Detection using the Wavelet Transform}, BOOKTITLE = eurospeech, MONTH = {Sep}, ADDRESS = {Budapest}, YEAR = 1999, PAGES = {2805--2808}, URL = {http://www-tlp.limsi.fr/public/euro99_tuan.pdf} } @INPROCEEDINGS{Matrouf99A, AUTHOR = {Driss Matrouf and Martine Adda-Decker and Jean-Luc Gauvain and Lori Lamel}, TITLE = {Comparing Different Model Configurations for Language Identification using a Phonotactic Approach}, BOOKTITLE = eurospeech, MONTH = {Sep}, ADDRESS = {Budapest}, YEAR = 1999, PAGES = {387--390}, keywords = { lid }, URL = {http://www-tlp.limsi.fr/public/euro99lid.pdf} } @INPROCEEDINGS{Rosset99A, AUTHOR = {Sophie Rosset and Samir Bennacef and Lori Lamel}, TITLE = {Design Strategies for Spoken Language Dialog Systems}, BOOKTITLE = eurospeech, MONTH = {Sep}, ADDRESS = {Budapest}, YEAR = 1999, PAGES = {1535--1538}, URL = {http://www-tlp.limsi.fr/public/euro99_dial.pdf} } @INPROCEEDINGS{Gadda99A, AUTHOR = {Gilles Adda and Mich\`ele Jardino and Jean-Luc Gauvain}, TITLE = {Language Modeling for Broadcast News Transcription}, BOOKTITLE = eurospeech, MONTH = {Sep}, ADDRESS = {Budapest}, YEAR = 1999, PAGES = {1759--1762} } @INPROCEEDINGS{Gauvain99B, AUTHOR = {Jean-Luc Gauvain and Lori Lamel and Gilles Adda and Mich\`ele Jardino}, TITLE = {Recent Advances in Transcribing Television and Radio Broadcasts}, BOOKTITLE = eurospeech, MONTH = {Sep}, ADDRESS = {Budapest}, YEAR = 1999, PAGES = {655--658}, URL = {http://www-tlp.limsi.fr/public/euro99_hub4.pdf} } %don't have source @INPROCEEDINGS{Lamel99C, AUTHOR = {Els den Os and Lou Boves and Lori Lamel and Paolo Baggia}, TITLE = {Overview of the ARISE Project}, BOOKTITLE = eurospeech, MONTH = {Sep}, ADDRESS = {Budapest}, YEAR = 1999, PAGES = {1527--1530} } @INPROCEEDINGS{Gauvain99A, AUTHOR = {Jean-Luc Gauvain and Lori Lamel and Gilles Adda and Mich\`ele Jardino}, TITLE = {The LIMSI 1998 Hub-4E Transcription System}, BOOKTITLE = {Proc. of the DARPA Broadcast News Workshop}, MONTH = {Feb}, ADDRESS = {Herndon, VA}, YEAR = 1999, PAGES = {99-104}, URL = {http://www-tlp.limsi.fr/public/hub4_99.pdf} } @INPROCEEDINGS{Mariani99A, AUTHOR = {Joseph Mariani and Patrick Paroubek}, TITLE = {Human Language Technologies Evaluation in the European Framework}, BOOKTITLE = {Proc. of the DARPA Broadcast News Workshop}, MONTH = {Feb}, ADDRESS = {Herndon, VA}, YEAR = 1999, PAGES = {237-242} } @INPROCEEDINGS{Lamel99B, AUTHOR = {Martine Adda-Decker and Gilles Adda and Jean-Luc Gauvain and Lori Lamel}, TITLE = {Large Vocabulary Speech Recognition in French}, BOOKTITLE = icassp, MONTH = {Mar}, ADDRESS = {Phoenix}, YEAR = 1999, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/icassp99bref.pdf} } @INPROCEEDINGS{Lamel99A, AUTHOR = {Lori Lamel and Sophie Rosset and Jean-Luc Gauvain and Samir Bennacef}, TITLE = {The LIMSI ARISE System for Train Travel Information}, BOOKTITLE = {IEEE International Conference On Acoustics, Speech, and Signal Processing}, MONTH = {Mar}, ADDRESS = {Phoenix}, YEAR = 1999, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/ica99arise.pdf} } @INPROCEEDINGS{Holger99A, AUTHOR = {H. Schwenk}, TITLE = {Using Boosting to Improve a Hybrid {HMM}/Neural Network Speech Recognizer}, BOOKTITLE = {IEEE International Conference On Acoustics, Speech, and Signal Processing}, MONTH = {Mar}, ADDRESS = {Phoenix}, YEAR = 1999, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/icassp99_holger.pdf} } @ARTICLE{Holger98A, AUTHOR = {H. Schwenk}, TITLE = {The {D}iabolo Classifier}, JOURNAL = {Neural Computation}, VOLUME = 10, NUMBER = 8, PAGES = {2175--2200}, YEAR = 1998 } @INPROCEEDINGS{Devillers98A, AUTHOR = {Laurence Devillers and H\'el\`ene Bonneau-Maynard}, TITLE = {Evaluation of Dialog Strategies for a Tourist Information Retrieval System}, BOOKTITLE = icslp, VOLUME = {4}, MONTH = {Dec}, ADDRESS = {Sydney, Australia}, YEAR = 1998, PAGES = {1187-1190}, URL = {http://www-tlp.limsi.fr/public/icslp98dial.pdf} } @INPROCEEDINGS{Maynard98A, AUTHOR = {H. Bonneau-Maynard, L. Devillers}, TITLE = {Dialog Strategies in a tourist information spoken dialog system}, BOOKTITLE = {specom}, MONTH = {Oct}, ADDRESS = {St Petersbourg, Russia}, YEAR = 1998, PAGES = {} } @INPROCEEDINGS{Lamel98E, AUTHOR = {Lori Lamel}, TITLE = {Spoken Language Dialog System Development and Evaluation at {LIMSI}}, BOOKTITLE = {1998 International Symposium on Spoken Dialogue}, MONTH = {Nov}, ADDRESS = {Sydney, Australia}, YEAR = 1998, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/issd98.pdf} } @INPROCEEDINGS{Lamel98D, AUTHOR = {L. Lamel and S. Bennacef and J.L. Gauvain and Hervé Dartigues and Jean-Noël Temem}, TITLE = {User {E}valuation of the {MASK} {K}iosk }, BOOKTITLE = icslp, VOLUME = {7}, MONTH = {Dec}, ADDRESS = {Sydney, Australia}, YEAR = 1998, PAGES = {2875-2878}, URL = {http://www-tlp.limsi.fr/public/icslp98mask.pdf} } @INPROCEEDINGS{Matrouf98B, AUTHOR = {D. Matrouf and M. Adda-Decker and L. Lamel and J.L. Gauvain}, TITLE = {Language {I}dentification {I}ncorporating {L}exical {I}nformation}, BOOKTITLE = icslp, VOLUME = {2}, MONTH = {Dec}, ADDRESS = {Sydney, Australia}, YEAR = 1998, PAGES = {181-184}, URL = {http://www-tlp.limsi.fr/public/icslp98lid.pdf} } @INPROCEEDINGS{Gauvain98D, AUTHOR = {J.L. Gauvain and L. Lamel and G. Adda}, TITLE = {Partitioning and Transcription of Broadcast News Data }, BOOKTITLE = icslp, VOLUME = {4}, MONTH = {Dec}, ADDRESS = {Sydney, Australia}, YEAR = 1998, PAGES = {1335-1338}, URL = {http://www-tlp.limsi.fr/public/icslp98h4.pdf} } @INPROCEEDINGS{AlessandroDoval98A, AUTHOR = {C.d'Alessandro and B. Doval}, TITLE = {Voice quality modification using periodic-aperiodic decomposition and spectral processing of the voice source signal}, BOOKTITLE = {International workshop on Speech Synthesis, ESCA}, ADDRESS = {Jenolan Caves, Australia}, MONTH = Nov, YEAR = 1998, PAGES = {277-282}, URL = {http://www-tlp.limsi.fr/public/tts98atr.pdf} } @INPROCEEDINGS{AlessandroB398, AUTHOR = {C. d'Alessandro and B3 Partners}, TITLE = {Joint evaluation of Text-To-Speech synthesis in {F}rench within the {AUPELF ARC-B3} project}, BOOKTITLE = {International workshop on Speech Synthesis, ESCA}, ADDRESS = {Jenolan Caves, Australia}, MONTH = Nov, YEAR = 1998, PAGES = {11-16}, URL = {http://www-tlp.limsi.fr/public/tts98b3.pdf} } @INPROCEEDINGS{BoulaAlessandro98, AUTHOR = {C. d'Alessandro and B3 Partners}, TITLE = {Text chunking for prosodic phrasing in {French}}, BOOKTITLE = {International workshop on Speech Synthesis, ESCA}, ADDRESS = {Jenolan Caves, Australia}, MONTH = Nov, YEAR = 1998, PAGES = {127-132}, URL = {http://www-tlp.limsi.fr/public/tts98BdM.pdf} } @INPROCEEDINGS{Kercadio98A, AUTHOR = {Y. de Kercadio}, TITLE = {An improved Earley parser with {LTAG}}, BOOKTITLE = {TAG+ Workshop}, MONTH = {Jul}, ADDRESS = {Philadelphia}, YEAR = 1998, URL = {http://www-tlp.limsi.fr/public/tagworkshop98.pdf} } @INPROCEEDINGS{Lamel98C, AUTHOR = {L.F. Lamel and S. Rosset and J.L. Gauvain and S.K. Bennacef and M. Garnier-Rizet and B. Prouts}, TITLE = {The {LIMSI ARISE} System}, BOOKTITLE = ivtta, MONTH = Sep, ADDRESS = {Torino, Italy}, YEAR = 1998, PAGES = {209--214}, URL = {http://www-tlp.limsi.fr/public/ivtta98.pdf} } @INPROCEEDINGS{Madda98C, AUTHOR = {Martine Adda-Decker and Lori Lamel}, TITLE = {Pronunciation Variants Across Systems, Languages and Speaking Style}, BOOKTITLE = {ESCA Workshop on Modeling Pronunciation Variation for Automatic Speech Recognition}, MONTH = {May}, ADDRESS = {Rolduc, NL}, YEAR = 1998, PAGES = {1-6}, URL = {http://www-tlp.limsi.fr/public/pron98.pdf} } @INPROCEEDINGS{Madda98B, AUTHOR = {M. Adda-Decker and G. Adda and J.L. Gauvain and L. Lamel}, TITLE = {Elements pour la mise au point de système de reconnaissance grand vocabulaire de fran\c{c}ais}, BOOKTITLE = {Proc. XXIIièemes Journ\'ees d'Etudes sur la Parole}, MONTH = {June}, ADDRESS = {Martigny}, YEAR = 1998, PAGES = {367-370} } @INPROCEEDINGS{Matrouf98A, AUTHOR = {D. Matrouf and J.L. Gauvain}, TITLE = {Utilisation des Modèles de Markov Cachés pour le Débruitage}, BOOKTITLE = {Proc. XXIIièemes Journ\'ees d'Etudes sur la Parole}, MONTH = {June}, ADDRESS = {Martigny}, YEAR = 1998, PAGES = {327-330} } @INPROCEEDINGS{Jardino98B, AUTHOR = {M. Jardino}, TITLE = {Evaluation de modèle de langage à base de trigrammes de classes et de mots, avec le Jeu de {Shannon}}, BOOKTITLE = {Proc. XXIIièemes Journ\'ees d'Etudes sur la Parole}, MONTH = {June}, ADDRESS = {Martigny}, YEAR = 1998, PAGES = {363-366} } @INPROCEEDINGS{Beaujard98B, AUTHOR = {C. Beaujard and M. Jardino}, TITLE = {Un modèle de langage mixte basé sur la similarité des mots dans un système de reconnaissance de parole}, BOOKTITLE = {Proc. XXIIièemes Journ\'ees d'Etudes sur la Parole}, MONTH = {June}, ADDRESS = {Martigny}, YEAR = 1998, PAGES = {343-346} } @INPROCEEDINGS{Corredor98B, AUTHOR = {C. Corredor-Ardoy and P. Boula de Mareüil and M. Adda-Decker and L. Lamel and J.L. Gauvain}, TITLE = {Classement automatique de phonèmes dans un cadre multilaingue}, BOOKTITLE = {Proc. XXIIièmes Journ\'ees d'Etudes sur la Parole}, MONTH = {June}, ADDRESS = {Martigny}, YEAR = 1998, PAGES = {75-78} } @INPROCEEDINGS{Gauvain98B, AUTHOR = {J.L. Gauvain and L. Lamel}, TITLE = {Recent Activities in Spoken Language Processing at {LIMSI}}, BOOKTITLE = jasa, VOLUME = {103, 5, Pt. 2 (Proceedings of the 135th ASA and the 16th ICA)}, ADDRESS = {Seattle, WA}, MONTH = {June}, YEAR = 1998, PAGES = {2816, (603-604)}, URL = {http://www-tlp.limsi.fr/public/asaica98.pdf} } @INPROCEEDINGS{Mariani98B, AUTHOR = {J. Mariani}, TITLE = {The {Aupelf-Uref} Evaluation-Based Language Engineering Actions and Related Projects}, BOOKTITLE = lrec, EDITOR = {A. Rubio and N. Gallardo and R. Castro and A. Tejada}, VOLUME = {I}, ADDRESS = {Granada}, MONTH = {May}, YEAR = 1998, PAGES = {123--128} } % URL = {http://www-tlp.limsi.fr/public/lrec98aupelf.pdf} @INPROCEEDINGS{Chase98B, AUTHOR = {L. Chase}, TITLE = {A Review of the {American} Switchboard and Callhome Speech Recognition Evaluation Programs}, BOOKTITLE = lrec, PAGES = {789--793}, YEAR = 1998, EDITOR = {A. Rubio and N. Gallardo and R. Castro and A. Tejada}, VOLUME = {II}, ADDRESS = {Granada}, MONTH = {May} } @INPROCEEDINGS{Chase98A, AUTHOR = {L. Chase}, TITLE = {Evaluating Word Confidence Annotation for Speech Recognition Systems}, BOOKTITLE = lrec, EDITOR = {A. Rubio and N. Gallardo and R. Castro and A. Tejada}, VOLUME = {I}, ADDRESS = {Granada}, MONTH = {May}, YEAR = 1998, PAGES = {167--173} } % URL = {http://www-tlp.limsi.fr/public/lrec98chase.pdf} @INPROCEEDINGS{disc98A, AUTHOR = {L. Dybkjaer and N.O. Bernsen and R. Carlson and L. Chase and N. Dahlbäck and K. Failenschmid and U. Heid and P. Heisterkamp and A. Jönsson and H. Kamp and I. Karlsson and J.V. Kuppevelt and L. Lamel and P. Paroubek and D. Williams}, TITLE = {The {Disc} Approach to Spoken Language Systems Development and Evaluation}, BOOKTITLE = lrec, EDITOR = {A. Rubio and N. Gallardo and R. Castro and A. Tejada}, VOLUME = {I}, ADDRESS = {Granada}, MONTH = {May}, YEAR = 1998, PAGES = {185--189} } @INPROCEEDINGS{babel98A, AUTHOR = {P. Roach and S. Arnfield and W. Barry and S. Dimitrova and M. Boldea and A. Fourcin and W. Gonet and R. Gubrynowicz and E. Hallum and L. Lamel and K. Marasek and A. Marchal and E. Meister and K. Vicsi}, TITLE = {Babel: A Database of Central and Eastern European Languages}, BOOKTITLE = lrec, EDITOR = {A. Rubio and N. Gallardo and R. Castro and A. Tejada}, VOLUME = {I}, ADDRESS = {Granada}, MONTH = {May}, YEAR = 1998, PAGES = {371--374} } @INPROCEEDINGS{habert-et-al98b, AUTHOR = {B. Habert and G. Adda and M. Adda-Decker and P. Boula de Marëuil and S. Ferrari and O. Ferret and G. Illouz and P. Paroubek}, TITLE = {Towards Tokenization Evaluation}, BOOKTITLE = lrec, PAGES = {427--431}, YEAR = 1998, EDITOR = {A. Rubio and N. Gallardo and R. Castro and A. Tejada}, VOLUME = {I}, ADDRESS = {Granada}, MONTH = {May} } @ARTICLE{YvonAupelfB398, AUTHOR = {F. Yvon and P. Boula de Mareüil and C. d'Alessandro and V. Aubergé and M. Bagein and G. Bailly and F. Béchet and S. Foukia and J.-P. Goldman and E. Keller and V. Pagel and F. Sannier and J. Véronis and D. O'Shaughnessy and B. Zellner}, TITLE = {Objective evaluation of grapheme to phoneme conversion for Text-To-Speech synthesis in {F}rench}, JOURNAL = csl, VOLUME = {12}, NUMBER = {4}, MONTH = dec, YEAR = 1998, PAGES = {393--410} } @INPROCEEDINGS{Aupelf98B3, AUTHOR = {P. Boula de Marëuil and F. Yvon and C. d'Alessandro and V. Auberg and M. Bagein and G. Bailly and F. Bechet and S. Foukia and J.P. Goldman and E. Keller and D. O'Shaughnessy and V. Pagel and F. Sannier and J. Veronis and B. Zellner}, TITLE = {Evaluation of Grapheme-to-Phoneme Conversion for Text-to-Speech Synthesis in {French}}, BOOKTITLE = lrec, PAGES = {641--644}, YEAR = 1998, EDITOR = {A. Rubio and N. Gallardo and R. Castro and A. Tejada}, VOLUME = {I}, ADDRESS = {Granada}, MONTH = {May} } @INPROCEEDINGS{Gadda-et-al98A, AUTHOR = {G. Adda and J. Mariani and J. Lecomte and P. Paroubek and M. Rajman}, TITLE = {The {GRACE F}rench Part-of-Speech Tagging Evaluation Task}, BOOKTITLE = lrec, PAGES = {433--441}, YEAR = 1998, EDITOR = {A. Rubio and N. Gallardo and R. Castro and A. Tejada}, VOLUME = {I}, ADDRESS = {Granada}, MONTH = {May} } @INPROCEEDINGS{Madda98A, AUTHOR = {M. Adda-Decker and G. Adda and and J.L. Gauvain and L. Lamel}, TITLE = {On the Use of Speech \& Text Corpora for Automatic Speech Recognition in {French}}, BOOKTITLE = lrec, PAGES = {783--788}, YEAR = 1998, EDITOR = {A. Rubio and N. Gallardo and R. Castro and A. Tejada}, VOLUME = {II}, ADDRESS = {Granada}, MONTH = {May}, URL = {http://www-tlp.limsi.fr/public/lrec98madda.pdf} } @BOOK{Minker98D, AUTHOR = {W. Minker}, TITLE = {Speech Understanding for Spoken Language Systems - Portability Across Domains and Languages}, PUBLISHER = {H\"ansel-Hohenhausen}, ADDRESS = {Frankfurt}, NOTE = {Deutsche Hochschulschriften 2569}, YEAR = 1998 } @ARTICLE{Minker98C, AUTHOR = {W. Minker}, TITLE = {Stochastic versus rule-based speech understanding for information retrieval}, JOURNAL = spcom, VOLUME = 25, NUMBER = 4, PAGES = {223-247}, YEAR = 1998 } @INPROCEEDINGS{Minker98B, AUTHOR = {W. Minker and L. Chase}, TITLE = {Evaluating Parses for Spoken Language Dialogue Systems}, BOOKTITLE = lrecWp, YEAR = 1998, ADDRESS = {Granada}, MONTH = {May}, URL = {http://www-tlp.limsi.fr/public/lrec98W_minker.pdf} } @INPROCEEDINGS{Minker98A, AUTHOR = {W. Minker}, TITLE = {Evaluation Methodologies for Interactive Speech Systems}, BOOKTITLE = lrec, YEAR = 1998, EDITOR = {A. Rubio and N. Gallardo and R. Castro and A. Tejada}, VOLUME = {I}, ADDRESS = {Granada}, MONTH = {May}, PAGES = {199--206}, URL = {http://www-tlp.limsi.fr/public/lrec98minker.pdf} } @INPROCEEDINGS{Jardino98A, AUTHOR = {M. Jardino and F. Bimbot and S. Igounet and K. Smaïli and I. Zitouni and M. El-Beze}, TITLE = {A First Evaluation Campaign for Language Models}, BOOKTITLE = lrec, YEAR = 1998, EDITOR = {A. Rubio and N. Gallardo and R. Castro and A. Tejada}, VOLUME = {II}, ADDRESS = {Granada}, MONTH = {May}, PAGES = {801--805}, } @INPROCEEDINGS{Lamel98B, AUTHOR = {L. Lamel and G. Adda and M. Adda-Decker and C. Corredor-Ardoy and J.J. Gangolf and J.L. Gauvain}, TITLE = {A Multilingual Corpus for Language Identification}, BOOKTITLE = lrec, YEAR = 1998, EDITOR = {A. Rubio and N. Gallardo and R. Castro and A. Tejada}, VOLUME = {II}, ADDRESS = {Granada}, MONTH = {May}, PAGES = {1115--1122}, keywords = { lid }, URL = {http://www-tlp.limsi.fr/public/lrec98ideal.pdf} } @INPROCEEDINGS{Beaujard98A, AUTHOR = {Christel Beaujard and Michèle Jardino}, TITLE = {Classification de mots non étiquetés par des méthodes statistiques}, BOOKTITLE = {6e rencontres de la Societe Francophone de Classification 1998}, ADDRESS = {Montpellier}, MONTH = Sep, YEAR = 1998, PAGES = {21-24}, URL = {http://www-tlp.limsi.fr/public/sfc98.pdf} } @INPROCEEDINGS{Lamel98A, AUTHOR = {L. Lamel and J.L. Gauvain}, TITLE = {Speaker Verification Over the Telephone}, BOOKTITLE = rla2c, ADDRESS = {Avignon}, MONTH = {April}, YEAR = 1998, VOLUME = {}, PAGES = {75--79}, URL = {http://www-tlp.limsi.fr/public/rla2c98.pdf} } @INPROCEEDINGS{Corredor98A, AUTHOR = { C. Corredor-Ardoy and L. Lamel and M. Adda-Decker and J.L. Gauvain}, TITLE = {Multilingual Phone Recognition of Spontaneous Telephone Speech}, BOOKTITLE = ieeeicassp, ADDRESS = {Seattle}, MONTH = May, YEAR = 1998, VOLUME = {I}, PAGES = {413--416}, URL = {http://www-tlp.limsi.fr/public/ica98phrec.pdf} } @INPROCEEDINGS{Gauvain98A, AUTHOR = {J.L. Gauvain and L. Lamel and G. Adda}, TITLE = {The {LIMSI} 1997 {Hub-4E} Transcription System}, BOOKTITLE = darpabntuw, ADDRESS = {Landsdowne,VA}, MONTH = Feb, YEAR = 1998, VOLUME = {}, PAGES = {75--79}, URL = {http://www-tlp.limsi.fr/public/hub4_98.pdf} } @INPROCEEDINGS{Mariani98A, AUTHOR = {J.J. Mariani and L. Lamel}, TITLE = {An Overview of {EU} Programs Related to Conversational/Interactive Systems}, BOOKTITLE = darpabntuw, ADDRESS = {Landsdowne,VA}, MONTH = Feb, YEAR = 1998, VOLUME = {}, PAGES = {247--253}, URL = {http://www-tlp.limsi.fr/public/convsys98.pdf} } @ARTICLE{Yegna98A, AUTHOR = {B. Yegnanarayana and C. d'Alessandro and V. Darsinos}, TITLE = {An iterative algorithm for decomposition of speech signals into periodic and aperiodic components}, JOURNAL = {IEEE Trans. on Speech and Audio Processing}, MONTH = Jan, YEAR = 1998, VOLUME = 6, NUMBER = 1, PAGES = {1--11} } @ARTICLE{Alessandro98B, AUTHOR = {C. d'Alessandro and S. Rosset and J.P. Rossi}, TITLE = {The pitch of short-duration fundamental frequency glissandos}, JOURNAL = {Journal of the Acoustical Society of America}, MONTH = Oct, YEAR = 1998, VOLUME = 104, NUMBER = 4, PAGES = {2339--2348} } @ARTICLE{Alessandro98A, AUTHOR = {C. d'Alessandro and V. Darsinos and B. Yegnanarayana}, TITLE = {Effectiveness of a periodic and aperiodic decomposition method for analysis of voice sources}, JOURNAL = {IEEE Trans. on Speech and Audio Processing}, MONTH = Jan, YEAR = 1998, VOLUME = 6, NUMBER = 1, PAGES = {12--23} } @INPROCEEDINGS{Beaujard97A, AUTHOR = {Christel Beaujard and Michele Jardino and Helene Bonneau-Maynard}, TITLE = {Evaluation of a class-based language model in a speech recognizer}, BOOKTITLE = {specom}, MONTH = {Oct}, ADDRESS = {Cluj-Napoca, Roumanie}, YEAR = 1997, PAGES = {45-50}, URL = {http://www-tlp.limsi.fr/public/specom97.pdf}} @INPROCEEDINGS{Gauvain97E, AUTHOR = {J.L. Gauvain and L. Lamel and G. Adda and M. Adda-Decker}, TITLE = {Transcription of Broadcast News}, BOOKTITLE = eurospeech, ADDRESS = {Rhodes}, MONTH = Sep, YEAR = 1997, VOLUME = {2}, PAGES = {907-910}, URL = {http://www-tlp.limsi.fr/public/euro97h4.pdf} } @INPROCEEDINGS{Corredor97A, AUTHOR = {C. Corredor Ardoy and J.L. Gauvain and M. Adda-Decker and L. Lamel}, TITLE = {Language Identification with Language-Independent Acoustic Models}, BOOKTITLE = eurospeech, ADDRESS = {Rhodes}, MONTH = Sep, YEAR = 1997, VOLUME = {1}, PAGES = {5--8}, keywords = { lid }, URL = {http://www-tlp.limsi.fr/public/euro97lid.pdf} } @INPROCEEDINGS{Gadda97D, AUTHOR = {G. Adda and M. Adda-Decker and J.L. Gauvain and L. Lamel}, TITLE = {Text Normalization and Speech Recognition in {French}}, BOOKTITLE = eurospeech, ADDRESS = {Rhodes}, MONTH = Sep, YEAR = 1997, VOLUME = {5}, PAGES = {2711-2714}, URL = {http://www-tlp.limsi.fr/public/euro97bref.pdf} } @INPROCEEDINGS{Minker97A, AUTHOR = {W. Minker}, TITLE = {Stochastically-Based Natural Language Understanding Across Tasks and Languages}, BOOKTITLE = eurospeech, ADDRESS = {Rhodes}, MONTH = Sep, YEAR = 1997, VOLUME = {3}, PAGES = {1423-1426}, URL = {http://www-tlp.limsi.fr/public/euro97minker.pdf} } @INPROCEEDINGS{AlessandroDoval97, AUTHOR = {C. d'Alessandro and B. Doval}, TITLE = {Spectral representation and modelling of glottal flow signals}, BOOKTITLE = {ESCA Workshop Larynx 97}, ADDRESS = {Marseille}, MONTH = Jun, YEAR = 1997, PAGES = {87-90}, URL = {http://www-tlp.limsi.fr/public/larynx97.pdf} } @INPROCEEDINGS{Doval97B, AUTHOR = {B. Doval and C. d'Alessandro and B. Diard}, TITLE = {Spectral methods for voice source parameter estimation}, BOOKTITLE = eurospeech, ADDRESS = {Rhodes}, MONTH = Sep, YEAR = 1997, VOLUME = {1}, PAGES = {533--536}, URL = {http://www-tlp.limsi.fr/public/euro97doval.pdf} } @INPROCEEDINGS{Madda97B, AUTHOR = {M. Adda-Decker and L. Lamel}, TITLE = {The use of lexica in automatic speech recognition}, BOOKTITLE = {Course notes for ELSNET's 5TH European Summer School on Language and Speech Communication: Lexicon Development for Language and Speech Processing}, ORGANIZATION = {ELSNET}, ADDRESS = {Leuven}, YEAR = 1997, MONTH = Jul } @INPROCEEDINGS{Doval97A, AUTHOR = {B. Doval and C. d'Alessandro}, TITLE = {Spectral Correlates of Glottal Waveform Models: An Analytic Study}, BOOKTITLE = ieeeicassp, ADDRESS = {Munich}, MONTH = Apr, YEAR = 1997, VOLUME = {II}, PAGES = {1295--1298}, URL = {http://www-tlp.limsi.fr/public/ica97doval.pdf} } @INPROCEEDINGS{Gauvain97D, AUTHOR = {J.L. Gauvain}, TITLE = {Evaluation et avancées en reconnaissance de la parole : de "Resource Management" à "Broadcast News"}, BOOKTITLE = jst, ADDRESS = {Avignon, France}, MONTH = Apr, YEAR = 1997, PAGES = {11-12} } @INPROCEEDINGS{Madda97A, AUTHOR = {J.M. Dolmazon and F. Bimbot and G. Adda and M. El Bèze and J. C. Caërou and J. Zeiliger and M. Adda-Decker}, TITLE = {{Organisation de la première campagne AUPELF pour l'évaluation des systèmes de dictée vocale}}, BOOKTITLE = jst, ADDRESS = {Avignon, France}, MONTH = Apr, YEAR = 1997, PAGES = {13-18} } @INPROCEEDINGS{Gadda97A, AUTHOR = {G. Adda and M. Adda-Decker and J. L. Gauvain and L. Lamel}, TITLE = {Le système de dictée vocale du {LIMSI} pour l'évaluation {AUPELF'97}}, BOOKTITLE = jst, ADDRESS = {Avignon, France}, MONTH = Apr, YEAR = 1997, PAGES = {35-40} } @INPROCEEDINGS{Matrouf97C, AUTHOR = {D. Matrouf and J.L. Gauvain}, TITLE = {Model Compensation for Additive and Convolutive Noises in Training and Test Data}, BOOKTITLE = {International workshop on Robust Speech Recognition for Unknown Communication Channels, ESCA-NATO}, ADDRESS = {Pont-à-Mousson, France}, MONTH = Apr, YEAR = 1997, PAGES = {207-210} } @INPROCEEDINGS{Matrouf97B, AUTHOR = {D. Matrouf}, TITLE = {Adaptation aux bruits additifs et convolutifs}, BOOKTITLE = jst, ADDRESS = {Avignon, France}, MONTH = Apr, YEAR = 1997, PAGES = {63-69} } @INPROCEEDINGS{Jardino97A, AUTHOR = {M. Jardino and C. Beaujard}, TITLE = {Rôle du contexte dans les modèles de langage "n-classes"}, BOOKTITLE = jst, ADDRESS = {Avignon, France}, MONTH = Apr, YEAR = 1997, PAGES = {71-74} } @INPROCEEDINGS{Jardino97B, AUTHOR = {M. El-Bèze and M. Jardino and F. Bimbot}, TITLE = {Une approche alternative pour le calcul de perplexité}, BOOKTITLE = jst, ADDRESS = {Avignon, France}, MONTH = Apr, YEAR = 1997, PAGES = {79-84} } @INPROCEEDINGS{Paroubek97A, AUTHOR = {P. Paroubek and G. Adda and J. Mariani and M.Rajman}, TITLE = {Les proc\'edures de mesure automatique de l'action {GRACE} pour l'\'evaluation des assignateurs de Parties du Discours pour le Fran\c{c}ais}, BOOKTITLE = jst, ADDRESS = {Avignon, France}, MONTH = Apr, YEAR = 1997, PAGES = {245-252} } @INPROCEEDINGS{Gadda97B, AUTHOR = {G. Adda and M. Adda-Decker}, TITLE = {Normalisation de textes en fran\c{c}ais : une \'etude quantitative pour la reconnaissance de la parole}, BOOKTITLE = jst, ADDRESS = {Avignon, France}, MONTH = Apr, YEAR = 1997, PAGES = {289-296} } @INPROCEEDINGS{Gadda97C, AUTHOR = {G. Adda and M. de Calmès and L. Lamel and G. Pérennou and M. Rajman and S. Rosset and J. Zeiliger}, TITLE = {Ressources pour l'apprentissage, le développement et l'évaluation des systèmes de dictée vocale en français : corpus de texte, de parole et lexical}, BOOKTITLE = jst, ADDRESS = {Avignon, France}, MONTH = Apr, YEAR = 1997, PAGES = {305-309} } @INPROCEEDINGS{Alessandro97A, AUTHOR = {C. d'Alessandro and V. Aubergé and G. Bailly and F. Béchet and P. Boula de Mareüil and S. Foukia and J.P. Goldman and J.F. Isabelle and E. Keller and A. Marchal and P. Mertens and V. Pagel and D. O'Shaughnessy and G. Richard and M-H. Talon and E. Wehrli and F. Yvon}, TITLE = {Vers l'évaluation de systèmes de synthèse de parole à partir du texte en français}, BOOKTITLE = jst, ADDRESS = {Avignon, France}, MONTH = Apr, YEAR = 1997, PAGES = {393-397} } @INPROCEEDINGS{Mareuil97A, AUTHOR = {P. Boula de Mareüil}, TITLE = {Conversion graphème-phonème : de la formalisation à l'évaluation}, BOOKTITLE = jst, ADDRESS = {Avignon, France}, MONTH = Apr, YEAR = 1997, PAGES = {399-406} } @INPROCEEDINGS{Rosset97A, AUTHOR = {S. Rosset and S. Bennacef and J.L. Gauvain and L. Devillers and L. Lamel}, TITLE = {Corpus oral de renseignements touristiques}, BOOKTITLE = jst, ADDRESS = {Avignon, France}, MONTH = Apr, YEAR = 1997, PAGES = {399-406}, URL = {http://www-tlp.limsi.fr/public/jst97corpusB2.pdf} } @INCOLLECTION{Gauvain97A, AUTHOR = {J.L. Gauvain and S.K. Bennacef and L. Devillers and L.F. Lamel and S. Rosset}, TITLE = {Spoken Language Component of the {MASK} Kiosk}, BOOKTITLE = {Human Comfort and Security of Information Systems}, PUBLISHER = {Springer}, YEAR = 1997, EDITOR = {K. Varghese and S. Pfleger}, PAGES = {93-103}, URL = {http://www-tlp.limsi.fr/public/mask95hcs.pdf} } @INPROCEEDINGS{Gauvain97B, AUTHOR = {J.L. Gauvain and G. Adda and L. Lamel and M. Adda-Decker}, TITLE = {Transcribing Broadcast News Shows}, BOOKTITLE = ieeeicassp, ADDRESS = {Munich}, MONTH = Apr, YEAR = 1997, VOLUME = {II}, PAGES = {715--719}, URL = {http://www-tlp.limsi.fr/public/ica97hub4.pdf} } @INPROCEEDINGS{Gauvain97C, AUTHOR = {J.L. Gauvain and G. Adda and L. Lamel and M. Adda-Decker}, TITLE = {Transcribing Broadcast News: The {LIMSI} {Nov96} {Hub4} System}, BOOKTITLE = arpasltw, ADDRESS = {Chantilly, Virginia}, MONTH = Feb, YEAR = 1997, PAGES = {56--63}, URL = {http://www-tlp.limsi.fr/public/darpa97hub4.pdf} } @INPROCEEDINGS{Jardino97C, AUTHOR = {F. Bimbot and M. El-Beze and M. Jardino}, TITLE = {An Alternative Scheme for Perplexity Estimation}, BOOKTITLE = ieeeicassp, ADDRESS = {Munich}, MONTH = Apr, YEAR = 1997, VOLUME = {II}, PAGES = {1483--1486} } @INPROCEEDINGS{Lamel97A, AUTHOR = {L. Lamel and J.L. Gauvain}, TITLE = {Speaker Recognition with the Switchboard corpus}, BOOKTITLE = ieeeicassp, ADDRESS = {Munich}, MONTH = Apr, YEAR = 1997, VOLUME = {II}, PAGES = {1067--1070}, URL = {http://www-tlp.limsi.fr/public/ica97sv.pdf} } @ARTICLE{Lamel97B, AUTHOR = {L.F. Lamel and S.K. Bennacef and S. Rosset and L. Devillers and S. Foukia and J.J. Gangolf and J.L. Gauvain}, TITLE = {The {LIMSI RailTel} System: Field trial of a telephone service for rail travel information}, JOURNAL = spcom, VOLUME = 23, MONTH = Oct, YEAR = 1997, PAGES = {67--82}, URL = {http://www-tlp.limsi.fr/public/spc97rtel.pdf} } @ARTICLE{Billi97A, AUTHOR = {R. Billi and L.F. Lamel}, TITLE = {RailTel: Railway Telephone Services}, JOURNAL = spcom, VOLUME = 23, MONTH = Oct, YEAR = 1997, PAGES = {63--65}, URL = {http://www-tlp.limsi.fr/public/spc97intro.pdf} } @INPROCEEDINGS{Matrouf97A, AUTHOR = {D. Matrouf and J.L. Gauvain}, TITLE = {Model Compensation for Noises in Training and Test Data}, BOOKTITLE = ieeeicassp, ADDRESS = {Munich}, MONTH = Apr, YEAR = 1997, VOLUME = {II}, PAGES = {831--834}, URL = {http://www-tlp.limsi.fr/public/ica97matrouf.pdf} } @ARTICLE{Young97A, AUTHOR = {S.J. Young and M. Adda-Decker and X. Aubert and C. Dugast and J.L. Gauvain and D.J. Kershaw and L. Lamel and D.A. Leeuwen and D. Pye and A.J. Robinson, H.J.M. Steeneken and P.C. Woodland}, TITLE = {Multilingual large vocabulary speech recognition: the European {SQALE} project}, JOURNAL = csl, VOLUME = {11}, NUMBER = {1}, MONTH = Jan, YEAR = 1997, PAGES = {73--89} } @ARTICLE{Gauvain96F, AUTHOR = {J.L. Gauvain and L. Lamel}, TITLE = {Large Vocabulary Continuous Speech Recognition: from Laboratory Systems towards Real-World Applications}, JOURNAL = ieice, VOLUME = {J79-D-II}, MONTH = Dec, YEAR = 1996, PAGES = {2005-2021}, URL = {http://www-tlp.limsi.fr/public/ieice96.pdf} } @INCOLLECTION{Lamel96I, AUTHOR = {L. Lamel and M. Adda-Decker and G. Adda and J.-L. Gauvain}, TITLE = {Reconnaissance Multilingue de Grands Vocabulaires}, BOOKTITLE = {Fondements et Perspectives en Traitement Automatique de la Parole}, PUBLISHER = {AUPELF-UREF}, YEAR = 1996, EDITOR = {H. Meloni}, PAGES = {} } @INCOLLECTION{Lienard96A, AUTHOR = {J.S. Liénard}, TITLE = {Perception et Variabilité}, BOOKTITLE = {Fondements et Perspectives en Traitement Automatique de la Parole}, PUBLISHER = {AUPELF-UREF}, YEAR = 1996, EDITOR = {H. Meloni}, PAGES = {} } @TECHREPORT{Demars96A, AUTHOR = {C. Demars}, TITLE = {{Repr\'esentations bidimensionnelles d'un signal de parole. {E}l\'ements de monographie}}, MONTH = Aug, NOTE = {Notes et Documents LIMSI}, NUMBER = 9611, INSTITUTION = {LIMSI-CNRS}, YEAR = 1996, PAGES = {1--175} } @UNPUBLISHED{Schiel96A, AUTHOR = {F. Schiel and L. F. Lamel}, TITLE = {{TED} corpus: {CDROM}s {TEDS}peeches (1-5), {TEDL}aryngo, {TE}DPhone}, YEAR = 1996, PUBLISHER = {CNRS-LIMSI and U. Munich} } @INPROCEEDINGS{Matrouf96A, AUTHOR = {D. Matrouf and J.-L. Gauvain}, TITLE = {Techniques de compensation pour la reconnaissance de la parole bruit\'ee}, BOOKTITLE = {21\`emes Journ\'ees d'\'etudes sur la parole}, MONTH = Jun, YEAR = 1996, PAGES = {} } @INPROCEEDINGS{Minker96A, AUTHOR = {W. Minker and S. Bennacef}, TITLE = "Compr\'ehension et \'evaluation dans le domaine {ATIS}", BOOKTITLE = {21\`emes Journ\'ees d'\'etudes sur la parole}, MONTH = Jun, YEAR = 1996, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/jep96minker.pdf} } @INPROCEEDINGS{Minker96B, AUTHOR = {W. Minker and S. Bennacef and J.L. Gauvain}, TITLE = {A Stochastic Case Frame Approach for Natural Language Understanding}, BOOKTITLE = icslp, ADDRESS = {Philadelphia}, MONTH = Oct, YEAR = 1996, PAGES = {1013--1016}, URL = {http://www-tlp.limsi.fr/public/icslp96minker.pdf} } @INPROCEEDINGS{Bennacef96A, AUTHOR = {S. Bennacef and L. Devillers and S. Rosset and L. Lamel}, TITLE = {Dialog in the {RailTel} Telephone-Based System}, BOOKTITLE = icslp, ADDRESS = {Philadelphia}, MONTH = Oct, YEAR = 1996, PAGES = {550--553}, URL = {http://www-tlp.limsi.fr/public/icslp96rtel.pdf} } % don't have source file @INPROCEEDINGS{Babel96, AUTHOR = {Peter Roach and Simon Arnfield and W. Barry and J. Baltova and M. Boldea and A. Fourcin and W. Gonet and R. Gubrynowicz and E. Hallum and L. Lamel and K. Marasek and A. Marchal and E. Meister and K. Vicsi}, TITLE = {BABEL: An {Eastern European} Multi-language Database}, BOOKTITLE = icslp, ADDRESS = {Philadelphia}, MONTH = Oct, YEAR = 1996, PAGES = {} } @INPROCEEDINGS{Lamel96D, AUTHOR = {L. Lamel and M. Adda-Decker and J.L. Gauvain and G. Adda}, TITLE = {Spoken Language Processing in a Multilingual context}, BOOKTITLE = icslp, ADDRESS = {Philadelphia}, MONTH = Oct, YEAR = 1996, PAGES = {2203--2206}, NOTE = {invited paper}, URL = {http://www-tlp.limsi.fr/public/icslp96ml.pdf} } @INPROCEEDINGS{Gauvain96E, AUTHOR = {J.L. Gauvain and J.J. Gangolf and L. Lamel}, TITLE = {Speech Recognition for an Information Kiosk}, BOOKTITLE = icslp, ADDRESS = {Philadelphia}, MONTH = Oct, YEAR = 1996, PAGES = {849--852}, URL = {http://www-tlp.limsi.fr/public/icslp96maskreco.pdf} } @INPROCEEDINGS{Lamel96C, AUTHOR = {L. Lamel and G. Adda}, TITLE = {On Designing Pronunciation Lexicons for Large Vocabulary, Continuous Speech Recognition}, BOOKTITLE = icslp, ADDRESS = {Philadelphia}, MONTH = Oct, YEAR = 1996, PAGES = {6--9}, URL = {http://www-tlp.limsi.fr/public/icslp96lex.pdf} } @INPROCEEDINGS{Lamel96H, AUTHOR = {L. Lamel and G. Adda and M. Adda-Decker}, TITLE = {Les lexiques de prononciation dans les syst\`emes de reconnaissance de la parole}, BOOKTITLE = {Proc. S\'eminaire GDR-PRC CHM Lexique et communication parl\'ee}, ADDRESS = {Toulouse}, MONTH = Oct, YEAR = 1996, PAGES = {1--10} } @INPROCEEDINGS{Life96A, AUTHOR = {A. Life and I. Salter and J.N. Temem and F. Bernard and S. Rosset and S. Bennacef and L. Lamel}, TITLE = {Data Collection for the {Mask} Kiosk: {WOz} vs Prototype System}, BOOKTITLE = icslp, ADDRESS = {Philadelphia}, MONTH = Oct, YEAR = 1996, PAGES = {1672--1675}, URL = {http://www-tlp.limsi.fr/public/icslp96maskwoz.pdf} } @INPROCEEDINGS{Lamel96B, AUTHOR = {L.F. Lamel and J.L. Gauvain and S.K. Bennacef and L. Devillers and S. Foukia and J.J. Gangolf and S. Rosset}, TITLE = {Field Trials of a Telephone Service for Rail Travel Information}, BOOKTITLE = ivtta, MONTH = Sep, YEAR = 1996, PAGES = {111--116}, URL = {http://www-tlp.limsi.fr/public/ivtta96.pdf} } @CONFERENCE{Lamel96A, AUTHOR = {L. Lamel}, TITLE = {Spoken Language Systems Development at {LIMSI}}, BOOKTITLE = {Dutch Language \& Speech Technology programme Workshop, Amsterdam}, MONTH = Jun, YEAR = 1996, NOTE = {presentation} } @INPROCEEDINGS{Madda96B, AUTHOR = {M. Adda-Decker}, TITLE = {Read and Spontaneous Speech Recognition Activities at {LIMSI}}, BOOKTITLE = {Multi-Lingual Spontaneous Speech Recognition in Real Environments}, MONTH = Jun, YEAR = 1996, NOTE = {to appear} } @INPROCEEDINGS{Madda96C, AUTHOR = {M. Adda-Decker and L.F. Lamel and J.-L. Gauvain and G. Adda}, TITLE = {Activities in Multilingual Speech Recognition at {LIMSI}}, BOOKTITLE = {Proc. of the CRIM/FORWISS Workshop on Progress and Propects of Speech Research and Technology}, ADDRESSE = {Montreal}, MONTH = Oct, YEAR = 1996, NOTE = {invited} } @INPROCEEDINGS{Gauvain96C, AUTHOR = {J.L. Gauvain and L. F. Lamel and G. Adda and D. Matrouf}, TITLE = {{The LIMSI 1995 Hub3 System}}, BOOKTITLE = arpasltw, ADDRESSE = {Harriman}, MONTH = Feb, YEAR = 1996, URL = {http://www-tlp.limsi.fr/public/arpa96nab.pdf} } @ARTICLE{Mariani96A, AUTHOR = {J. Mariani and J.L. Gauvain and L. Lamel}, TITLE = {{Comments on `Towards increasing speech recognition error rates,' by H. Bourlard, H. Hermansky, and N. Morgan}}, JOURNAL = spcom, VOLUME = 18, NUMBER = 3, MONTH = May, YEAR = 1996, PAGES = {249-252} } @INPROCEEDINGS{Mariani96B, AUTHOR = {J. Mariani}, TITLE = {Language resources and evaluation: a survey}, BOOKTITLE = {Workshop on Integration of Language and Speech}, ORGANIZATION = {ELSNET Goes East and IMACS}, ADDRESS = {Moscow}, MONTH = Nov, YEAR = 1996, PAGES = {} } @INCOLLECTION{Mertens96A, AUTHOR = {P. Mertens and F. Beaugendre and C. d'Alessandro}, TITLE = {Automatic stylization of intonation: application to speech synthesis}, BOOKTITLE = {Progress in Speech Synthesis}, PUBLISHER = {Springer Verlag}, YEAR = 1996, PAGES = {347-364}, EDITOR = {R. Van Santen and R. Sproat and J. Hirschberg and J. Olive} } @INCOLLECTION{Richard96A, AUTHOR = {G. Richard and C. d'Alessandro}, TITLE = {Modification of the aperiodic component of speech signals for synthesis}, BOOKTITLE = {Progress in Speech Synthesis}, PUBLISHER = {Springer Verlag}, YEAR = 1996, PAGES = {41-56}, EDITOR = {R. Van Santen and R. Sproat and J. Hirschberg and J. Olive} } @INPROCEEDINGS{Temen96A, AUTHOR = {J.-N. Temem and J.-L. Gauvain}, TITLE = {Le syst\`eme de dialogue oral du projet {MASK}}, BOOKTITLE = {L'interface des mondes r\'eels et virtuels}, ADDRESS = {Montpellier}, MONTH = May, YEAR = 1996 } @ARTICLE{Richard96B, AUTHOR = {G. Richard and C. d'Alessandro}, TITLE = {Analysis/synthesis of speech aperiodic component}, JOURNAL = spcom, YEAR = 1996, VOLUME = 19, PAGES = {221--224} } @INCOLLECTION{Alessandro96C, AUTHOR = {C. d'Alessandro and M. Garnier-Rizet and P. Boula de Mare\"uil}, TITLE = {Synth\`ese de la parole \`a partir du texte}, BOOKTITLE = {Fondements et perspectives en traitement automatique de la parole}, PUBLISHER = {AUPELF-UREF}, YEAR = 1996, EDITOR = {Henri M\'eloni}, PAGES = {81--96} } @INCOLLECTION{Alessandro96D, AUTHOR = {C. d'Alessandro and J.S. Li\'enard}, TITLE = {Synthetic Speech Generation}, BOOKTITLE = {Survey of the State of the Art of Human Language Technology}, YEAR = 1996, EDITOR = {Ron Cole}, URL = {http://www.cse.ogi.edu/CSLU/HLTsurvey/} } @CONFERENCE{Lamel96G, AUTHOR = {L. Lamel and J.L. Gauvain}, TITLE = {{LIMSI M}arch'96 Speaker Recognition Evaluation}, BOOKTITLE = {NIST/NSF Speaker Recognition Workshop}, ADDRESSE = {Linthicum}, MONTH = Mar, YEAR = 1996, NOTE = {presentation} } @CONFERENCE{Lamel96F, AUTHOR = {L. Lamel and D. Pye and X. Aubert}, TITLE = {Speech Recognizer Quality Assessment for Linguistic Engineering - {SQALE}}, BOOKTITLE = arpasltw, ADDRESSE = {Harriman}, MONTH = Feb, YEAR = 1996, NOTE = {presentation} } @INPROCEEDINGS{Gauvain96A, AUTHOR = {J.-L. Gauvain and L. F. Lamel and G. Adda and D. Matrouf}, TITLE = {Developments in Continuous Speech Dictation using the 1995 {ARPA} {NAB} News Task}, BOOKTITLE = ieeeicassp, ADDRESS = {Atlanta}, MONTH = May, YEAR = 1996, VOLUME = {I}, PAGES = {73-76}, URL = {http://www-tlp.limsi.fr/public/ica96nab.pdf} } @INPROCEEDINGS{AddaDecker96A, AUTHOR = {M. Adda-Decker and G. Adda and L.F. Lamel and J.-L. Gauvain}, TITLE = {Developments in Large Vocabulary, Continuous Speech Recognition of {G}erman}, BOOKTITLE = ieeeicassp, ADDRESS = {Atlanta}, MONTH = May, YEAR = 1996, URL = {http://www-tlp.limsi.fr/public/ica96ger.pdf} } @INPROCEEDINGS{Jardino96A, AUTHOR = {M. Jardino}, TITLE = {Multilingual stochastic n-gram class language models}, BOOKTITLE = ieeeicassp, ADDRESS = {Atlanta}, MONTH = May, YEAR = 1996, URL = {http://www-tlp.limsi.fr/public/ica96jardino.pdf} } @INCOLLECTION{Neel96A, AUTHOR = {F. Neel and G. Chollet and L. Lamel and W. Minker and A. Constantinescu}, TITLE = {Reconnaissance et compr\'ehension de la parole : \'evaluation et applications}, BOOKTITLE = {Fondements et perspectives en traitement automatique de la parole}, PUBLISHER = {AUPELF-UREF}, YEAR = 1996, EDITOR = {Henri M\'eloni}, PAGES = {} } @INPROCEEDINGS{Mareuil96A, AUTHOR = {P. Boula de Mare\"uil}, TITLE = {Pour une approche par r\`egles en transcription graph\`eme-phon\`eme}, BOOKTITLE = {Lexique et Communication Parl\'ee}, ORGANIZATION = {S\'eminaire GDR-PRC}, ADDRESS = {Universit\'e Paul Sabatier, Toulouse}, YEAR = 1996, MONTH = Oct } @INPROCEEDINGS{Mareuil96B, AUTHOR = {P. Boula de Mareuil}, TITLE = "Les liaisons et la synth\`ese vocale", BOOKTITLE = {21\`emes Journ\'ees d'\'etudes sur la parole}, MONTH = Jun, YEAR = 1996, PAGES = {} } @INPROCEEDINGS{Adda95A, AUTHOR = {G. Adda and P. Blache and J. Mariani and P. Paroubek and M. Rajman}, TITLE = {Action GRACE. Mise en place du paradigme d'\'evaluation. Application au domaine de l'analyse morpho-syntaxique}, BOOKTITLE = {Le Traitement Automatique du Langage Naturel, 14, 15 et 16, Marseille}, MONTH = Jun, YEAR = 1995, EDITOR = {Philippe Blache}, ORGANIZATION = {GDR-PRC Communication Homme-Machine, P\^ole Langage Naturel}, PAGES = {72--77} } @ARTICLE{Alessandro95A, AUTHOR = {C. d'Alessandro and P. Depalle and X. Rodet}, TITLE = {Machines \`a chanter}, JOURNAL = {R\'esonance}, YEAR = 1995, VOLUME = 8, PAGES = {8-13}, NOTE = {IRCAM-Centre Pompidou} } @INPROCEEDINGS{Alessandro95B, AUTHOR = {C. d'Alessandro and V. Darsinos and B. Yegnanarayana}, TITLE = {Evaluation of periodic/aperiodic decomposition for analysis of aperiodicities in the voice source}, BOOKTITLE = {Proceedings of ISMA, International Symposium on Musical Acoustics}, ADDRESS = {Dourdan}, MONTH = Jul, PAGES = {446-452}, YEAR = 1995 } @ARTICLE{Alessandro95C, AUTHOR = {C. d'Alessandro and P. Mertens}, TITLE = {Automatic intonation stylisation using a model of pitch perception}, JOURNAL = csl, MONTH = Jul, YEAR = 1995, VOLUME = {}, NUMBER = {}, PAGES = {} } @INPROCEEDINGS{Alessandro95D, AUTHOR = {C. d'Alessandro and S. Rosset and O. Piot}, TITLE = {Measurement of pitch perception for F0 glides}, ADDRESS = {Madrid}, BOOKTITLE = eurospeech, MONTH = Sep, PAGES = {957-960}, YEAR = 1995 } @INPROCEEDINGS{Alessandro95E, AUTHOR = {C. d'Alessandro and B. Yegnanarayana}, TITLE = {Decomposition of speech signals into deterministic and stochastic components}, BOOKTITLE = ieeeicassp, ADDRESS = {Detroit}, SERIES = {}, NUMBER = {}, MONTH = Apr, PAGES = {446-452}, YEAR = 1995 } @INCOLLECTION{Alessandro95F, AUTHOR = {C. d'Alessandro and M. Castellengo}, TITLE = {The pitch of short-duration vibrato tones: experimental data and numerical model}, BOOKTITLE = {Vibrato}, PUBLISHER = {Singular Publishing Group}, YEAR = 1995, EDITOR = {P.H. Dejonckere and M. Hirano and J. Sundberg}, PAGES = {83--92} } @INPROCEEDINGS{Bennacef95B, AUTHOR = {S. K. Bennacef and F. N\'eel and H. B. Maynard}, TITLE = {An Oral Dialogue Model based on Speech Acts Categorization}, BOOKTITLE = {Proc. of ESCA Workshop spoken dialogue systems, Visgo, Denmark}, MONTH = {May 30-June 2}, YEAR = 1995 } @INCOLLECTION{LeeGauvain95A, AUTHOR = {C. H. Lee and J.-L. Gauvain}, TITLE = {Adaptive learning in acoustic and language modeling}, BOOKTITLE = {Speech Recognition and Coding, New Advances and Trends, NATO ASI Series}, EDITOR = {A. Rubio Ayuso and J. Lopez Soler}, PUBLISHER = sv, YEAR = 1995, PAGES = {14-31} } @INPROCEEDINGS{DarsinosAlessandro95A, AUTHOR = {V. Darsinos and C. d'Alessandro and B. Yegnanarayana}, TITLE = {Evaluation of a periodic/aperiodic speech decomposition algorithm}, BOOKTITLE = {European Conference on Speech Communication and Technology}, ORGANIZATION = {European Speech Communication Association}, ADDRESS = {Madrid}, YEAR = 1995, MONTH = Sep, PAGES = {393-396} } @INPROCEEDINGS{Gauvain95A, AUTHOR = {J.-L. Gauvain and L. F. Lamel and M. Adda-Decker}, TITLE = {Developments in Large Vocabulary Dictation : The LIMSI Nov94 NAB System}, BOOKTITLE = arpaslt, MONTH = Jan, YEAR = 1995, URL = {http://www-tlp.limsi.fr/public/slt95.pdf} } @INPROCEEDINGS{Gauvain95B, AUTHOR = {J.-L. Gauvain and L. F. Lamel and M. Adda-Decker}, TITLE = {{Developments in Continuous Speech Dictation using the ARPA WSJ Task}}, BOOKTITLE = ieeeicassp, ADDRESS = {Detroit}, MONTH = May, YEAR = 1995, PAGES = {65-68}, URL = {http://www-tlp.limsi.fr/public/ica95lv.pdf} } @INPROCEEDINGS{Gauvain95C, AUTHOR = {J.-L. Gauvain and L. F. Lamel and B. Prouts}, TITLE = {Experiments with Speaker Verification over the Telephone}, BOOKTITLE = eurospeech, ADDRESS = {Madrid}, MONTH = Sep, YEAR = 1995, URL = {http://www-tlp.limsi.fr/public/euro95sv.pdf} } @INPROCEEDINGS{Gauvain95D, AUTHOR = {L.F. Lamel and M. Adda and J.-L. Gauvain}, TITLE = {Issues in Large Vocabulary, Multilingual Speech Recognition}, BOOKTITLE = eurospeech, ADDRESS = {Madrid}, MONTH = Sep, YEAR = 1995, URL = {http://www-tlp.limsi.fr/public/euro95sqale.pdf} } @INPROCEEDINGS{Lamel95A, AUTHOR = {L.F. Lamel and S. K. Bennacef and H. Bonneau-Maynard and S. Rosset and J.-L. Gauvain}, TITLE = {Recent Developments in Spoken Language Sytems for Information Retrieval}, BOOKTITLE = {ESCA ETRW Spoken Dialog Systems, Visgo, Denmark}, MONTH = {May 30-June 2}, YEAR = 1995, PAGES = {17-20}, URL = {http://www-tlp.limsi.fr/public/esca-sls95.pdf} } @ARTICLE{Lamel95B, AUTHOR = {L.F. Lamel and J.-L. Gauvain}, TITLE = {A Phone-based Approach to Non-Linguistic Speech Feature Identification}, JOURNAL = csl, MONTH = Jan, YEAR = 1995, VOLUME = 9, NUMBER = 1, PAGES = {87-103}, URL = {http://www-tlp.limsi.fr/public/csl95-nl.pdf} } @INPROCEEDINGS{Lamel95C, AUTHOR = {L.F. Lamel and S. Rosset and S.K. Bennacef and H. Bonneau-Maynard and L. Devillers and J.-L. Gauvain}, TITLE = {Development of Spoken Language Corpora for Travel Information}, BOOKTITLE = eurospeech, ADDRESS = {Madrid}, MONTH = Sep, YEAR = 1995, VOLUME = 3, PAGES = {1961-1964}, URL = {http://www-tlp.limsi.fr/public/euro95slc.pdf} } @INPROCEEDINGS{MertensAlessandro95A, AUTHOR = {P. Mertens and C. d'Alessandro}, TITLE = {Pitch contour stylization using a tonal perception model}, BOOKTITLE = {XIIIth International Congress of Phonetic Sciences}, ADDRESS = {Stockholm}, YEAR = 1995, MONTH = Aug, PAGES = {4:228-232} } @INPROCEEDINGS{Gauvain95E, AUTHOR = {J.-L. Gauvain and S. Bennacef and L. Devillers and L. Lamel}, TITLE = {Spoken Language System Development for the MASK kiosk}, BOOKTITLE = ieeeasr, SERIES = {}, NUMBER = {}, ADDRESS = {Salt Lake City}, MONTH = Dec, YEAR = 1995, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/asru95mask.pdf} } @INPROCEEDINGS{Lamel95D, AUTHOR = {L. Lamel and Y. Sagisaka}, TITLE = {Large Vocabulary, Multilingual Speech Recognition: Session Overview}, BOOKTITLE = ieeeasr, SERIES = {}, ADDRESS = {Snowbird}, NUMBER = {}, MONTH = Dec, YEAR = 1995, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/asru95intro.pdf} } @INPROCEEDINGS{Lamel95F, AUTHOR = {K. Choukri and S. Itahashi and L. Lamel and M. Liberman}, TITLE = {Multi-lingual Spoken Language Ressources}, BOOKTITLE = ieeeasr, SERIES = {}, NUMBER = {}, ADDRESS = {Snowbird}, MONTH = Dec, YEAR = 1995, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/asru95c.pdf} } @INPROCEEDINGS{Lamel95H, AUTHOR = {L. Lamel and R. DeMori}, TITLE = {Speech Recognition of European Languages}, BOOKTITLE = ieeeasr, SERIES = {}, NUMBER = {}, ADDRESS = {Snowbird}, MONTH = Dec, YEAR = 1995, PAGES = {51-54}, URL = {http://www-tlp.limsi.fr/public/asru95ml.pdf} } @INPROCEEDINGS{Lienard95A, AUTHOR = {J.-S. Li\'enard}, TITLE = {Speech Pattern Processing : integrating the linguistic and non-linguistic aspects of voice and speech}, BOOKTITLE = {13th Int. Congress of Phonetic Sciences, Stockholm}, YEAR = 1995, MONTH = Aug } @INPROCEEDINGS{Lienard95B, AUTHOR = {J.-S. Li\'enard and M. G. Di Benedetto}, TITLE = {Characterization of the non-linguistic information of the vowels by matching vowel systems}, BOOKTITLE = {13th Int. Congress of Phonetic Sciences, Stockholm}, YEAR = 1995, MONTH = Aug } @INCOLLECTION{Lienard95C, AUTHOR = {J.-S. Li\'enard}, TITLE = {From Speech Variability to Pattern Processing: a non-reductive view of Speech Processing}, BOOKTITLE = {Levels in Speech Communication: Relations and Interactions}, EDITOR = {C. Sorin et al.}, PUBLISHER = {Elsevier Science B.V.}, YEAR = 1995, PAGES = {137--148} } @TECHREPORT{Lienard95D, AUTHOR = {J.-S. Li\'enard}, TITLE = {Projet d'action du LIMSI : Plate-Forme Multimodale}, MONTH = Nov, NOTE = {Rapport du groupe d'\'etude pr\'eliminaire, LIMSI}, YEAR = 1995 } @TECHREPORT{Minker95C, AUTHOR = {W. Minker}, TITLE = {Grapheme-to-phoneme conversion. An approach based on hidden Markov models}, NOTE = {Notes et Documents LIMSI}, NUMBER = 9604, INSTITUTION = {LIMSI-CNRS}, YEAR = 1996, MONTH = Jan } @INPROCEEDINGS{Beaugendre95A, AUTHOR = {F. Beaugendre}, TITLE = {Generating French Intonation at different speaking rates}, BOOKTITLE = eurospeech, ADDRESS = {Madrid}, MONTH = Sep, YEAR = 1995 } @ARTICLE{YegnanarayanaAlessandro95A, AUTHOR = {B. Yegnanarayana and C. d'Alessandro and V. Darsinos}, TITLE = {Decomposition of speech signals into periodic and aperiodic components}, BOOKTITLE = ieeetsa, YEAR = 1995, NOTE = {accepted for publication} } @ARTICLE{AlessandroDarsinos95A, AUTHOR = {C. d'Alessandro and V. Darsinos and B. Yegnanarayana}, TITLE = {Significance of periodic and aperiodic decomposition for analysis of voice sources}, BOOKTITLE = ieeetsa, YEAR = 1995, NOTE = {accepted for publication} } @INCOLLECTION{RichardAlessandro95A, AUTHOR = {G. Richard and C. d'Alessandro}, TITLE = {Modification of the aperiodic component of speech signals for synthesis}, BOOKTITLE = {Progress in Text-To-Speech synthesis}, PUBLISHER = sv, YEAR = 1995 } @INCOLLECTION{MertensBeaugendre95A, AUTHOR = {P. Mertens and F. Beaugendre and C. d'Alessandro}, TITLE = {Comparing approaches to pitch contour stylization for speech synthesis}, BOOKTITLE = {Progress in Text-To-Speech synthesis}, PUBLISHER = sv, YEAR = 1995 } @INPROCEEDINGS{Doval95A, AUTHOR = {B. Doval}, TITLE = {Fundamental frequency estimation of musical sounds using statistical learning}, BOOKTITLE = {Internatinal Symposium of Musical Acoustics, Dourdan}, ADDRESS = {Dourdan}, YEAR = 1995, MONTH = Jul, VOLUME = {}, PAGES = {} } @TECHREPORT{Bataille95A, AUTHOR = {O. Bataille}, TITLE = {Prise de son multimicrophone pour la reconnaissance de la parole dans un milieu bruit\'e}, INSTITUTION = {LIMSI, Universit\'e Paris Sud}, YEAR = 1995 } @TECHREPORT{Bennacef95A, AUTHOR = {S. Bennacef}, TITLE = {Mod\'elisation du dialogue oral Homme-Machine. Mise en \oe{}uvre dans une application de demande d'information}, INSTITUTION = {LIMSI, Universit\'e Paris Sud}, NOTE = {PhD Thesis}, MONTH = Jul, YEAR = 1995 } @ARTICLE{Gauvain95F, AUTHOR = {J.-L. Gauvain, J.-N. Temem}, TITLE = {Un syst\`eme de dialogue oral pour un kiosque de service ferroviaire}, JOURNAL = {TELECOM, Revue de l'Ass. Amicale des Ing. de l'ENST}, NUMBER = {106}, MONTH = Dec, YEAR = 1995, PAGES = {22-25} } @TECHREPORT{Minker95A, AUTHOR = {W. Minker}, TITLE = {An English Version of the LIMSI L'ATIS System}, NOTE = {Notes et Documents LIMSI}, NUMBER = 9512, INSTITUTION = {LIMSI-CNRS}, YEAR = 1995, MONTH = Apr } @INPROCEEDINGS{Minker95B, AUTHOR = {W. Minker}, TITLE = {Evaluation des {S}yst\`emes de Compr\'ehension}, BOOKTITLE = {Ecole Th\'ematique, Fondements et Perspectives en Traitement Automatique de la Parole}, ORGANIZATION = {GRD-PRC Communication Homme-Machine}, ADDRESS = {Marseille-Luminy}, YEAR = 1995, MONTH = Jul } @TECHREPORT{Minker95D, AUTHOR = {W. Minker}, TITLE = {An English Version of the LIMSI L'ATIS System}, INSTITUTION = {LIMSI}, MONTH = Dec, NOTE = {LRE Project 62-056 Relator, deliverable 1.2 Catalog}, YEAR = 1995, URL = {http://www-tlp.limsi.fr/public/lim9512.pdf} } @ARTICLE{Mareuil95A, AUTHOR = {P. Boula de Mare\"uil}, TITLE = {Vers la phon\'etisation automatique ses sigles}, JOURNAL = {La linguistique}, YEAR = 1995, MONTH = {}, VOLUME = 1, } @INPROCEEDINGS{Mareuil95B, AUTHOR = {P. Boula de Mare\"uil}, TITLE = {Conversion graph\`eme-phon\`eme: en marge de la syllabation et de la liaison''}, BOOKTITLE = {1\`eres Rencontres de Jeunes Chercheur sur la Parole}, ADDRESS = {Paris}, YEAR = 1995 } % don't have source @INPROCEEDINGS{Lamel95E, AUTHOR = {D. Chan and A. Fourcin and L. Lamel and al.}, TITLE = {EUROM - A spoken langage resource for the EU. The SAM Projects.}, BOOKTITLE = eurospeech, ADDRESS = {Madrid}, MONTH = Sep, YEAR = 1995 } @INPROCEEDINGS{Mariani95A, AUTHOR = {J. Mariani}, TITLE = {Language Ressources and Evaluation: A survey}, BOOKTITLE = {Workshop ELSNET}, ADDRESS = {Moscou}, MONTH = Nov, YEAR = 1995 } @INPROCEEDINGS{Mariani95B, AUTHOR = {J. Mariani}, TITLE = {Probl\`ematique g\'en\'erale de l'informatisation du fran\c{c}ais}, BOOKTITLE = {Conf\'erence des Ministres francophones de la Recherche et de l'Enseignement Sup\'erieur}, ADDRESS = {Cotonou, B\'enin}, MONTH = Nov, YEAR = 1995 } @INPROCEEDINGS{Mariani95C, AUTHOR = {J. Mariani}, TITLE = {Speech Technology in the ESPRIT and IT programs}, BOOKTITLE = {EU Speech Project Day}, ADDRESS = {Madrid}, MONTH = Sep, YEAR = 1995 } @UNPUBLISHED{Typology_of_Speech_Resource, AUTHOR = {J. Zeiliger and L. Lamel and I. Trancoso}, TITLE = {Typology of Speech Resources, LRE Project 62-056 Relator}, MONTH = Oct, NOTE = {Deliverable 1.1 Assessment of the needs of LR in Europ}, YEAR = 1994, } @INPROCEEDINGS{Alessandro94A, AUTHOR = {C. d'Alessandro and P. Mertens and F. Beaugendre}, TITLE = {Automatic stylization of intonation: application to speech synthesis}, BOOKTITLE = {2nd International Workshop on Speech Synthesis}, ORGANIZATION = {European Speech Communication Association and Institute of Electronics and Electrical Engineers}, ADDRESS = {Mohonk}, YEAR = 1994, MONTH = Sep, PAGES = {155-158} } @ARTICLE{Alessandro94B, AUTHOR = {C. d'Alessandro and M. Castellengo}, TITLE = {The pitch of short-duration vibrato tones}, JOURNAL = {J. Acoust. Soc. Am}, MONTH = Mar, YEAR = 1994, VOLUME = 95, NUMBER = 3, PAGES = {1617-1630} } @TECHREPORT{Alessandro94C, AUTHOR = {C. d'Alessandro and F. Beaugendre}, TITLE = {French Speech Synthesis for TMC receivers (Synth\`ese de la parole en Fran\c{c}ais pour les r\'ecepteurs TMC}, INSTITUTION = {LIMSI}, YEAR = 1994, NOTE = {Contrat CNRS/Philips Car Systems International (Wetzlar, Allemagne) CNRS No 770613. Final Report, 25 pages} } @INPROCEEDINGS{Alessandro94D, AUTHOR = {C. d'Alessandro and D. Beautemps}, TITLE = {Repr\'esentation, modification et synth\`ese du signal vocal par formes d'ondes \'el\'ementaires}, BOOKTITLE = {Colloque International sur les Mod\`eles Physiques dans l'analyse, la synth\'ese, la production et la cr\'eation sonore, 1990, Grenoble}, SERIES = {Publications dans: Mod\`eles physiques, cr\'eation musicale et ordinateur. Edition de la maison des sciences de l'homme, Paris}, VOLUME = 1, YEAR = 1994, PAGES = {247--272} } @INPROCEEDINGS{Alessandro94E, AUTHOR = {C. d'Alessandro and M. Castellengo}, TITLE = {The Pitch of short-duration vibrato tones: experimental data and numerical model}, BOOKTITLE = {Proc. of Stockholm Music Acoustic Conference SMAC 93}, SERIES = {Publications of the Royal Swedish Academy of Music}, NUMBER = 79, MONTH = {}, YEAR = 1994, PAGES = {25--30} } @INPROCEEDINGS{Alessandro94F, AUTHOR = {C. d'Alessandro and P. Mertens and F. Beaugendre}, TITLE = {Automatic stylisation of intonation: application to speech synthesis}, BOOKTITLE = escaieee, MONTH = Sep, YEAR = 1994, PAGES = {155--158} } @INPROCEEDINGS{Alessandro94G, AUTHOR = {C. d'Alessandro and P. Mertens}, TITLE = {Automatic intonation stylization using a model os pitch perception}, BOOKTITLE = {127th meeting of the Acoust. Soc. of Am J.A.S.A}, VOLUME = 95, YEAR = 1994, PAGES = {2949 (4aSP11)} } @INPROCEEDINGS{Bennacef94A, AUTHOR = {S.K. Bennacef and H. Bonneau-Maynard and J.L. Gauvain and L.F. Lamel and W. Minker}, TITLE = {A spoken Language System For Information Retrieval}, ADDRESS = {Yokohama, Japan}, BOOKTITLE = icslp, MONTH = Sep, YEAR = 1994, URL = {http://www-tlp.limsi.fr/public/icslp94latis.pdf} } @INPROCEEDINGS{Gauvain94A, AUTHOR = {J.-L. Gauvain and L. F. Lamel and G. Adda and M. Adda-Decker}, TITLE = {Continuous Speech Dictation System in French}, BOOKTITLE = icslp, SERIES = {}, VOLUME = {}, ADDRESS = {Yokohama, Japan}, MONTH = Sep, YEAR = 1994, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/icslp94lv.pdf} } @INPROCEEDINGS{Gauvain94B, AUTHOR = {J.-L. Gauvain and L. F. Lamel and G. Adda and M. Adda-Decker}, TITLE = {Continuous Speech Dictation System at LIMSI}, BOOKTITLE = {Proc.of the CRIM/FORWISS Workshop on Progress and Propects of Speech Research and Technology, Munich}, SERIES = {}, VOLUME = {}, MONTH = Sep, YEAR = 1994, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/forwiss94.pdf} } @INCOLLECTION{Gauvain94C, AUTHOR = {J.-L. Gauvain and L. F. Lamel and G. Adda and J. Mariani}, TITLE = {Recent Progress in Speech-to-text Conversion at LIMSI}, BOOKTITLE = {Esprit Speech Project Workshop book}, EDITOR = {}, PUBLISHER = sv, YEAR = 1994, PAGES = {} } @UNPUBLISHED{Gauvain94D, EDITOR = {L.F. Lamel and J.-L. Gauvain}, TITLE = {Sous-corpus BREF Polyglot, Disque Bref Poly}, PUBLISHER = {CNRS-LIMSI}, MONTH = Dec, YEAR = 1994 } @ARTICLE{Devillers94A, AUTHOR = {C. Dugast and L. Devillers and X. Aubert}, TITLE = {Combining TDNN and HMM in a hybrid system for improved continuous-speech recognition}, JOURNAL = ieeetsa, YEAR = 1994, VOLUME = 2, } @ARTICLE{Gauvain94E, AUTHOR = {J.-L. Gauvain and L. F. Lamel and G. Adda and M. Adda-Decker}, TITLE = {Speaker-Independent Continuous Speech Dictation}, JOURNAL = spcom, MONTH = Sep, YEAR = 1994, VOLUME = 15, NUMBER = {}, PAGES = {21--37}, URL = {http://www-tlp.limsi.fr/public/spcom94.pdf} } @ARTICLE{Gauvain94F, AUTHOR = {J.-L. Gauvain and L. F. Lamel and G. Adda and J. Mariani}, TITLE = {Speech to Text Conversion in French}, JOURNAL = {Int. J. Pat. Rec and A. I.}, MOMTH = Jan, YEAR = 1994, VOLUME = 8, NUMBER = 1, PAGES = {99-131}, URL = {http://www-tlp.limsi.fr/public/iprai94.pdf} } @INPROCEEDINGS{Gauvain94G, AUTHOR = {J.-L. Gauvain and L. F. Lamel and G. Adda and M. Adda-Decker}, TITLE = {{The LIMSI Continuous Speech Dictation System}}, BOOKTITLE = arpahlt, MONTH = Mar, YEAR = 1994, PAGES = {319-324}, URL = {http://www-tlp.limsi.fr/public/hlt94.pdf} } @INPROCEEDINGS{Gauvain94H, AUTHOR = {J.-L. Gauvain and L.F. Lamel and G. Adda and M. Adda-Decker}, TITLE = {{The LIMSI Continuous Speech Dictation System: Evaluation on the ARPA Wall Street Journal Task}}, BOOKTITLE = ieeeicassp, ADDRESS = {Adelaide}, MONTH = Apr, YEAR = 1994, VOLUME = 1, PAGES = {557-560}, URL = {http://www-tlp.limsi.fr/public/ica94lv.pdf} } @ARTICLE{Gauvain94I, AUTHOR = {J.-L. Gauvain and C.H. Lee}, TITLE = {{Maximum a Posteriori Estimation for Multivariate Gaussian Mixture Observations of Markov Chains}}, JOURNAL = {IEEE Trans. on Speech and Audio Processing}, MONTH = Apr, YEAR = 1994, VOLUME = 2, NUMBER = 2, PAGES = {291--298}, URL = {http://www-tlp.limsi.fr/public/map93.pdf} } @INCOLLECTION{GeoffroisEdeline94A, AUTHOR = {E. Geoffrois and J.-M. Edeline and and J.-F. Vibert}, TITLE = {{Learning by Delay Modifications}}, BOOKTITLE = CNS, EDITOR = {Frank H. Eeckman}, PUBLISHER = {KLUWER}, YEAR = 1994, PAGES = {133--138} } @INPROCEEDINGS{Jardino94A, AUTHOR = {M. Jardino and G. Adda}, TITLE = {A class bigram model for very large corpus}, BOOKTITLE = icslp, ADDRESS = {Yokohama, Japan}, MONTH = Sep, YEAR = 1994 } @INPROCEEDINGS{Jardino94B, AUTHOR = {M. Jardino and G. Adda}, TITLE = {Automatic determination of a stochastic bi-gram class language model}, BOOKTITLE = {Proc. of IGGI-94, Alicante, Espagne}, YEAR = 1994, PAGES = {57} } @INPROCEEDINGS{Lamel94A, AUTHOR = {L.F. Lamel}, TITLE = {Speech Recognizer Quality Assessment for Linguistic Engineering (SQALE)}, BOOKTITLE = {LRE Journ\'ees du G\'enie Linguistique, Paris}, MONTH = Jul, YEAR = 1994, URL = {http://www-tlp.limsi.fr/public/lre94sqale.pdf} } @INPROCEEDINGS{Lamel94B, AUTHOR = {L.F. Lamel and J.-L. Gauvain}, TITLE = {Language Identification Using Phone-based Acoustic Likelihoods}, BOOKTITLE = ieeeicassp, SERIES = {}, NUMBER = {}, ADDRESS = {Adelaide}, MONTH = Apr, YEAR = 1994, PAGES = {}, keywords = { lid }, URL = {http://www-tlp.limsi.fr/public/ica94lid.pdf} } %presentation only slides @INPROCEEDINGS{Lamel94C, AUTHOR = {L.F. Lamel and J.-L. Gauvain and M. Adda-Decker}, TITLE = {Non-Linguistic Speech Feature Identification}, BOOKTITLE = {Language Identification Review Meeting and Workshop}, MONTH = Mar, YEAR = 1994, keywords = { lid }, NOTE = {presentation} } @UNPUBLISHED{Lamel94D, AUTHOR = {L.F. Lamel and E. Klein and C. Matheson and I. Trancoso and J. Zeiliger}, TITLE = {Experimental Distribution of Linguistic Resources}, MONTH = Jun, NOTE = {Report for project LRE RELATOR, deliverables 4A and 5A}, YEAR = 1994, } @INPROCEEDINGS{Lamel94E, AUTHOR = {L.F. Lamel and F. Schiel and A. Fourcin and J. Mariani and H. Tillmann}, TITLE = {The Translanguage English Database TED}, BOOKTITLE = icslp, SERIES = {}, NUMBER = {}, ADDRESS = {Yokohama, Japan}, MONTH = Sep, YEAR = 1994, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/icslp94ted.pdf} } @TECHREPORT{Lamel94F, AUTHOR = {L.F. Lamel and I. Trancoso I and J. Zeiliger}, TITLE = {Report on the situation for all European languages: Spoken language resource}, MONTH = Oct, NOTE = {LRE Project 62-056 Relator, deliverable 1.2 Catalog}, YEAR = 1994, } @INPROCEEDINGS{RaveraAlessandro94A, AUTHOR = {B. Ravera and C. d'Alessandro}, TITLE = {Double-frequency and time-frequency analyses of modulated speech noises}, BOOKTITLE = {Processing V, theories and applications, Elsevier. Proc. of EUSIPCO'94, 7th Europ. sig. Proc.Conf, Edinburgh}, MONTH = Sep, YEAR = 1994 } @INPROCEEDINGS{RaveraAlessandro94B, AUTHOR = {B. Ravera and C. d'Alessandro}, TITLE = {Time frequency analysis of modulated speech noises}, BOOKTITLE = {127th meeting of the Acoust. Soc. of Am J.A.S.A}, VOLUME = {95, Number 5, Pt 2}, YEAR = 1994, PAGES = {2816 (1pSP8)} } @INPROCEEDINGS{Richard94A, AUTHOR = {G. Richard and C. d'Alessandro}, TITLE = {Time-domain analysis/synthesis of the aperiodic component of speech signals}, BOOKTITLE = {2nd International Workshop on Speech Synthesis}, ORGANIZATION = {European Speech Communication Association and Institute of Electronics and Electrical Engineers}, ADDRESS = {Mohonk}, YEAR = 1994, MONTH = Sep, PAGES = {5-8} } @INPROCEEDINGS{RichardAlessandro94A, AUTHOR = {G. Richard and C. d'Alessandro}, TITLE = {Time domain analysis/synthesis of the aperiodic component of speech}, BOOKTITLE = escaieee, MONTH = Sep, YEAR = 1994, PAGES = {5--8} } @INPROCEEDINGS{SteenekenLamel94A, AUTHOR = {H. J. M. Steeneken and L. F. Lamel}, TITLE = {SQALE: Speech Recognizer Quality Assessment for Linguistic Engineering}, BOOKTITLE = arpaslt, SERIES = {}, NUMBER = {}, MONTH = Mar, YEAR = 1994, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/slt94sqale.pdf} } @INCOLLECTION{Alessandro93A, AUTHOR = {C. d'Alessandro}, TITLE = {Auditory-based Wavelet Representation of Speech}, BOOKTITLE = {Visual Representations of Speech Signals}, EDITOR = {Martin Cooke and Steve Beet}, PUBLISHER = {John Wiley\&Sons}, YEAR = 1993, CHAPTER = 8, PAGES = {131--138} } @INPROCEEDINGS{Alessandro93B, AUTHOR = {C. d'Alessandro}, TITLE = {A Numerical model of pitch perception for short-duration vocal tones: application to intonation analysis}, ORGANIZATION = {Proc. of ESCA Workshop on Prosody \,Lund \,Sweden}, BOOKTITLE = {Working papers in General linguistics and phonetics}, VOLUME = 41, MONTH = Sep, YEAR = 1993, PAGES = {234--237} } @INPROCEEDINGS{Alessandro93C, AUTHOR = {C. d'Alessandro and M. Castellengo}, TITLE = {The pitch of short-duration vibrato tones: experimental data and numerical model}, BOOKTITLE = {Stockholm Music Acoustics Conference, Publications of the Royal Swedish Academy of Music, No 79}, ORGANIZATION = {The Royal Swedish Academy of Music}, ADDRESS = {Stockholm}, YEAR = 1993, MONTH = Aug, PAGES = {25-30} } % don't have source @INPROCEEDINGS{Bickley93A, AUTHOR = {C. A. Bickley and S. Hunnicutt and L. F. Lamel}, TITLE = {Alternative strategies for creating AUTOCAD drawings}, BOOKTITLE = {Proc. of ESCA Workshop on Speech and Language Technology for Disabled Persons}, ADDRESS = {Stockholm, Sweden}, MONTH = May, YEAR = 1993 } @INPROCEEDINGS{Alessandro93D, AUTHOR = {L. J. Boe and M. Grosetti and P. E. Mounier-Kuhn and M. Cartier and C. d'Alessandro and H. Emerard and H. Meloni}, TITLE = {Informatique et parole en {F}rance: un quart de si\`ecle apr\`es la rencontre}, BOOKTITLE = {Acte du 3\`emea colloque Histoire de L'Informatique \,Sophia Antipolis}, SERIES = {}, NUMBER = {}, MONTH = {13-15 Octobre}, YEAR = 1993, PAGES = {} } % don't have source @INPROCEEDINGS{Bonneau93A, AUTHOR = {H. Bonneau-Maynard and J.-L. Gauvain and D. Goodine and L. F. Lamel and J. Polifroni and S. Seneff}, TITLE = {A French Version of the MIT-ATIS System: Probality Issues}, BOOKTITLE = eurospeech, SERIES = {}, NUMBER = {}, ADDRESS = {Berlin}, MONTH = Sep, YEAR = 1993, PAGES = {} } @INCOLLECTION{Demars93A, AUTHOR = {C. Demars}, TITLE = {Time-instantaneous frequency representation}, BOOKTITLE = {Visual Representations of Speech Signals}, EDITOR = {Martin Cooke, Steve Beet and Malcolm Crawford}, PUBLISHER = {John Wiley \& Sons Ltd}, YEAR = 1993, PAGES = {221--228} } @UNPUBLISHED{GarofoloLamel93A, AUTHOR = {J.S. Garofolo and L.F. Lamel and W.M. Fisher and J.G. Fiscus and D. S. Pallett and N. L. Dahlgren}, TITLE = {Documentation for the DARPA TIMIT Acoustic-Phonetic Continuous Speech Corpus CDROM}, MONTH = Feb, NOTE = {NTIS order number PB91-100354}, YEAR = 1993 } @INPROCEEDINGS{Gauvain93A, AUTHOR = {J.-L. Gauvain and L. F. Lamel}, TITLE = {Identification of Non-Linguistic Speech Features}, BOOKTITLE = arpahlt, MONTH = Mar, YEAR = 1993, PAGES = {96--101}, URL = {http://www-tlp.limsi.fr/public/hlt93.pdf} } @INPROCEEDINGS{Gauvain93B, AUTHOR = {J.-L. Gauvain and L. F. Lamel and G. Adda and M. Adda-Decker}, TITLE = {Large Vocabulary Speech Recognition in English and French}, BOOKTITLE = ieeeasr, SERIES = {}, NUMBER = {}, MONTH = Dec, YEAR = 1993, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/asru93lv.pdf} } @UNPUBLISHED{Gauvain93C, EDITOR = {J.-L. Gauvain and L. F. Lamel}, TITLE = {Sous-corpus BREF 80, Disques Bref 80-1 et Bref 80-2}, MONTH = Mar, YEAR = 1993, PUBLISHER = {CNRS-LIMSI} } @INPROCEEDINGS{Gauvain93D, AUTHOR = {J.-L. Gauvain and L. F. Lamel and G. Adda and M. Adda-Decker}, TITLE = {Speaker-Independent Continuous Speech Dictation}, BOOKTITLE = eurospeech, ADDRESS = {Berlin}, MONTH = Sep, YEAR = 1993, URL = {http://www-tlp.limsi.fr/public/euro93lv.pdf} } @INPROCEEDINGS{Gauvain93E, AUTHOR = {J.-L. Gauvain and L. F. Lamel and G. Adda and M. Adda-Decker}, TITLE = {{The LIMSI Nov93 WSJ System}}, BOOKTITLE = arpaslt, MONTH = Mar, YEAR = 1993, URL = {http://www-tlp.limsi.fr/public/slt94.pdf} } @INPROCEEDINGS{Geoffrois93A, AUTHOR = {E. Geoffrois}, TITLE = {A Pitch Contour Analysis Guided by Prosodic Event Detection}, BOOKTITLE = eurospeech, ADDRESS = {Berlin}, YEAR = 1993, MONTH = Sep, PAGES = {793--796} } @INPROCEEDINGS{Geoffrois93B, AUTHOR = {E. Geoffrois}, TITLE = {Estimation of Prosodic Events from Japanese F$_0$ Contours}, BOOKTITLE = {Technical Report of IEICE}, YEAR = 1993, MONTH = Jun, PAGES = {1--8} } @INPROCEEDINGS{Geoffrois93C, AUTHOR = {E. Geoffrois}, TITLE = {Prosodic Event Detection from F$_0$ Contours Using the Fujisaki Model}, BOOKTITLE = ASJSPRING, YEAR = 1993, MONTH = Mar, PAGES = {187--188} } @INPROCEEDINGS{GrauAlessandro93A, AUTHOR = {S. Grau and C. d'Alessandro and G.Richard}, TITLE = {A Speech formant synthesizer based on harmonic + formant-waveforms representations}, BOOKTITLE = eurospeech, SERIES = {}, NUMBER = {}, ADDRESS = {Berlin}, MONTH = Sep, YEAR = 1993, PAGES = {1697--1700} } @INPROCEEDINGS{Jardino93A, AUTHOR = {M. Jardino and G. Adda}, TITLE = {Language modeling for {CSR} of large corpus using automatic classification of words}, BOOKTITLE = eurospeech, ADDRESS = {Berlin}, MONTH = Sep, YEAR = 1993, PAGES = {1191} } @INPROCEEDINGS{Jardino93B, AUTHOR = {M. Jardino and G. Adda}, TITLE = {Automatic word classification simulated annealing}, BOOKTITLE = ieeeicassp, ADDRESS = {Minneapolis}, MONTH = Apr, YEAR = 1993, PAGES = {II 41} } @ARTICLE{Lamel93A, AUTHOR = {L.F. Lamel}, TITLE = {A knowledge-based system for stop consonant identification based on speech spectogram reading}, JOURNAL = csl, YEAR = 1993, MONTH = Apr, VOLUME = 7, NUMBER = 2, PAGES = {169--191}, URL = {http://www-tlp.limsi.fr/public/lamel_csl_93.pdf} } @INPROCEEDINGS{Lamel93B, AUTHOR = {L.F. Lamel}, TITLE = {CODOSDA Action NEWS}, BOOKTITLE = {Workshop CODOSDA, Berlin}, MONTH = Sep, YEAR = 1993, } @ARTICLE{Lamel93C, AUTHOR = {L.F. Lamel}, TITLE = {The DARPA HLT Workshop}, JOURNAL = {ELSNEWS}, MONTH = Apr, YEAR = 1993, SERIES = 11, NUMBER = {2(2)}, NOTE = {Also published in the NESCA Newsletter} } @INPROCEEDINGS{Lamel93D, AUTHOR = {L.F. Lamel and J.-L.Gauvain}, TITLE = {Cross-Lingual Experiments with Phone Recognition}, BOOKTITLE = ieeeicassp, ADDRESS = {Minneapolis}, MONTH = Apr, YEAR = 1993, PAGES = {}, URL = {http://www-tlp.limsi.fr/public/ica93.pdf} } @INPROCEEDINGS{Lamel93E, AUTHOR = {L.F. Lamel and J.-L. Gauvain and B. Proust and C. Bouhier and R. Boesch}, TITLE = {Generation and Synthesis of Broadcast Messages}, BOOKTITLE = {Proc.ESCA Workshop on Applications of Speech Technology, Lautrach}, SERIES = {}, NUMBER = {}, MONTH = Sep, YEAR = 1993, PAGES = {207-210}, URL = {http://www-tlp.limsi.fr/public/ast93.pdf} } @INPROCEEDINGS{Lamel93F, AUTHOR = {L.F. Lamel and J.-L. Gauvain}, TITLE = {High Performance Speaker-Independent Phone Recognition USING CDHMM}, BOOKTITLE = eurospeech, SERIES = {}, NUMBER = {}, ADDRESS = {Berlin}, MONTH = Sep, YEAR = 1993, PAGES = {121-124}, URL = {http://www-tlp.limsi.fr/public/euro93ph.pdf} } @INPROCEEDINGS{Lamel93G, AUTHOR = {L.F. Lamel and J.-L. Gauvain}, TITLE = {Identifying Non Linguistic Speech Features}, BOOKTITLE = eurospeech, ADDRESS = {Berlin}, MONTH = Sep, YEAR = 1993, PAGES = {23-31}, URL = {http://www-tlp.limsi.fr/public/euro93nl.pdf} } @INPROCEEDINGS{LamelGauvain93L, AUTHOR = {L.F. Lamel and J.-L. Gauvain}, TITLE = {{LIMSI WSJ System: Nov 92 evaluation}}, BOOKTITLE = darpasnlw, MONTH = Jan, YEAR = 1993, NOTE = {presentation} } @INPROCEEDINGS{Lamel93H, AUTHOR = {L.F. Lamel and J.-L. Gauvain}, TITLE = {Language Identification Using Phone-based Acoustic Likelihoods}, BOOKTITLE = ieeeasr, SERIES = {}, NUMBER = {}, MONTH = Dec, YEAR = 1993, PAGES = {}, keywords = { lid }, URL = {http://www-tlp.limsi.fr/public/asru93lid.pdf} } % don't have source @ARTICLE{LeeGauvain93A, AUTHOR = {C.H. Lee and J.-L. Gauvain and R. Pieraccini and L.R. Rabiner}, TITLE = {Large Vocabulary Speech Recognition Using Subword Units}, JOURNAL = spcom, YEAR = 1993, MONTH = Dec, VOLUME = {}, NUMBER = {}, PAGES = {} } % don't have source @ARTICLE{Lee93A, AUTHOR = {C. H. Lee and J.-L. Gauvain and R. Pieraccini and L. R. Rabiner}, TITLE = {Subword-Based Large-Vocabulary Speech Recognition}, JOURNAL = atttj, YEAR = 1993, MONTH = {Sept/Oct}, VOLUME = {}, NUMBER = {}, PAGES = {} } % don't have source @INPROCEEDINGS{LeeGauvain93B, AUTHOR = {C. H. Lee and J.-L. Gauvain}, TITLE = {Bayesian Technics in Stochastic Modeling}, BOOKTITLE = ieeeasr, SERIES = {}, NUMBER = {}, ADDRESS = {Snowbird}, MONTH = Dec, YEAR = 1993, PAGES = {} } % don't have source @INPROCEEDINGS{LeeGauvain93C, AUTHOR = {C. H. Lee and J.-L. Gauvain}, TITLE = {Speaker Adaptation based on MAP Estimation of HMM Parameters}, BOOKTITLE = ieeeicassp, SERIES = {}, NUMBER = {}, ADDRESS = {Minneapolis}, MONTH = Apr, YEAR = 1993, PAGES = {} } @INPROCEEDINGS{MarqueBennacef93A, AUTHOR = {F. Marque and S. K. Bennacef and F. Néel and S. Trinh}, TITLE = {PAROLE : A Vocal dialogue System for Air Traffic Control Training}, BOOKTITLE = {Applications of Speech Technology, Lautrach, Germany}, MONTH = {16-17 September}, YEAR = 1993 } @INPROCEEDINGS{etrw93syn, AUTHOR = {L.F. Lamel and J.L. Gauvain and B. Prouts and C. Bouhier and R. Boesch}, TITLE = {Generation and Synthesis of Broadcast Messages}, BOOKTITLE = {Applications of Speech Technology, Lautrach, Germany}, MONTH = {16-17 September}, URL = {http://www-tlp.limsi.fr/public/ast93.pdf}, YEAR = 1993 } @INPROCEEDINGS{RichardAlessandro93A, AUTHOR = {G. Richard and C. d'Alessandro and S. Grau}, TITLE = {Musical noise synthesis using ramdom wavelets}, BOOKTITLE = {Proc. of Stockholm Music Acoustics Conference {SMAC} 93}, SERIES = {Publications of the Royal Swedish Academy of Music}, NUMBER = 79, MONTH = {}, YEAR = 1993, NOTE = {paru en 1994}, PAGES = {580--583} } @ARTICLE{Alessandro92A, AUTHOR = {C. d'Alessandro and C. Demars}, TITLE = {Repr\'esentations temps-fr\'equence du signal de parole}, JOURNAL = {Traitement du Signal}, YEAR = 1992, VOLUME = 9, NUMBER = 2, PAGES = {153-173}, } @INPROCEEDINGS{Alessandro92B, AUTHOR = {C. d'Alessandro}, TITLE = {Speech Analysis and Synthesis Using an Auditory-based Wavelet Representation}, BOOKTITLE = {Workshop Comparing Speech Signal Representation}, ORGANIZATION = {European Speech Communication Association}, ADDRESS = {Sheffield}, YEAR = 1992, MONTH = Apr, PAGES = {31-38} } @INPROCEEDINGS{Alessandro92C, AUTHOR = {C. d'Alessandro and G. Richard and S. Grau}, TITLE = {Random wavelet representation of unvoiced speech}, BOOKTITLE = {International Symposium on Time-Frequency and Time-Sacle analysis}, ORGANIZATION = {Institute of Electronics and Electrical Engineers}, ADDRESS = {Victoria, BC}, YEAR = 1992, MONTH = Oct, PAGES = {41-44} } @INPROCEEDINGS{Beaugendre92A, AUTHOR = {F. Beaugendre and C. d'Alessandro and A. Lacheret-Dujour and J. Terken}, TITLE = {Un inventaire des mouvements m\'elodiques du Fran\c{c}ais}, BOOKTITLE = {19\`emes Journ\'ees d'\'etudes sur la parole}, ORGANIZATION = {Groupe Communication Parl\'ee, Soci\'et\'e Fran\c{c}aise d'Acoustique}, ADDRESS = {Bruxelles}, YEAR = 1992, MONTH = Apr, PAGES = {351-356} } @INPROCEEDINGS{Beaugendre92B, AUTHOR = {F. Beaugendre and C. d'Alessandro and A. Lacheret-Dujour and J. Terken}, TITLE = {A Perceptual Study of French Intonation}, BOOKTITLE = icslp, ADDRESS = {Banff}, YEAR = 1992, PAGES = {739--742} } @INPROCEEDINGS{Bennacef92A, AUTHOR = {S. K. Bennacef and K. Matrouf and F. N\'eel}, TITLE = {Strat\'egie de dialogue pour traitement du language parl\'e: Application aux renseignements telephoniques}, BOOKTITLE = {S\'eminaire Dialogue, Dourdan, France}, MONTH = {15-16 Avril}, YEAR = 1992 } @TECHREPORT{Gangolf92A, AUTHOR = {R. Boesch and J.-J. Gangolf and J.-L. Gauvain and B. Labelle and B. Prouts and G. Qu\'enot and P. Wacrenier}, TITLE = {Etude et réalisation d'un système à architecture parallèle dédié à la reconnaissance de la parole}, INSTITUTION = {Compte rendu de fin d'\'etude, Contrat MRT no. 89.S.0622}, NUMBER = {}, MONTH = Nov, YEAR = 1992, NOTE = {}, PAGES = {} } @ARTICLE{Demars92A, AUTHOR = {C. Demars}, TITLE = {M\'ethodes temps-fr\'equence-Op\'erations signaux test}, JOURNAL = {Traitement du signal}, PAGES = {99--100}, YEAR = 1992, NUMBER = 1, VOLUME = 9, } @INPROCEEDINGS{Demars92B, AUTHOR = {C. Demars}, TITLE = {Time-instantaneous frequency representation}, BOOKTITLE = {ESCA Workshop Comparing Speech Signal Representation, Sheffield, England}, MONTH = Apr, YEAR = 1992, PAGES = {161--168} } @INPROCEEDINGS{Gauvain92A, AUTHOR = {J.-L. Gauvain and L. F. Lamel}, TITLE = {Speaker-Independent Phone Recognition Using BREF}, BOOKTITLE = darpasnlw, MONTH = feb, YEAR = 1992, URL = {http://www-tlp.limsi.fr/public/darpa92.pdf} } @ARTICLE{Gauvain92B, AUTHOR = {J.-L. Gauvain and C. H. Lee}, TITLE = {Bayesian Learning for Hidden Markov Model with Gaussian Mixture State Observation Densities}, JOURNAL = spcom, VOLUME = {11}, SERIES = {2-3}, MONTH = Jun, YEAR = 1992, PAGES = {205-213}, URL = {http://www-tlp.limsi.fr/public/spc92map.pdf} } % don't have source @INPROCEEDINGS{Gauvain92C, AUTHOR = {J.-L. Gauvain and C. H. Lee}, TITLE = {Improved acoustic modeling with Bayesian learning}, BOOKTITLE = ieeeicassp, VOLUME = {}, SERIES = {}, ADDRESS = {San Francisco}, MONTH = mar, YEAR = 1992, PAGES = {481-484} } @INPROCEEDINGS{GauvainLee92M, AUTHOR = {J.-L. Gauvain and C. H. Lee}, TITLE = {MAP Estimation of Continuous Density HMM: Theory and Applications}, BOOKTITLE = darpasnlw, MONTH = feb, YEAR = 1992, URL = {http://www-tlp.limsi.fr/public/darfeb92.pdf} } @INPROCEEDINGS{Geoffrois92A, AUTHOR = {E. Geoffrois and J.-F. Vibert}, TITLE = {Mod\'elisation du r\'eaccord de neurones dans un syst\`eme sensoriel par modification de d\'elais de transmission}, BOOKTITLE = NSI, YEAR = 1992, MONTH = May } @INPROCEEDINGS{Grau92A, AUTHOR = {S. Grau and C. d'Alessandro}, TITLE = {Analyse-synth\`ese par d\'ecomposition de la partie d\'eterministe et de la partie al\'eatoire du signal de parole}, BOOKTITLE = {19\`emes Journ\'ees d'\'etudes sur la parole}, ORGANIZATION = {Groupe Communication Parl\'ee, Soci\'et\'e Fran\c{c}aise d'Acoustique}, ADDRESS = {Bruxelles}, YEAR = 1992, MONTH = Apr, PAGES = {313-318} } @INPROCEEDINGS{Lamel92A, AUTHOR = {L.F. Lamel}, TITLE = {Extracting acoustic-phonetic information from the spectrogram}, BOOKTITLE = {r\'eunion du GdR 134 CNRS TdSI, ENST, Paris}, MONTH = Jun, YEAR = 1992 } @TECHREPORT{Lamel92B, AUTHOR = {L.F. Lamel}, TITLE = {Report on Speech Corpora Development in the U.S.}, NOTE = {Notes et documents LIMSI: 92-8}, YEAR = 1992 } @INPROCEEDINGS{Lamel92C, AUTHOR = {L.F. Lamel and J.-L. Gauvain}, TITLE = {Continuous Speech Recognition at LIMSI}, BOOKTITLE = {Final review of the DARPA Artificial Neural Network Technology (ANNT) Speech Program, Stanford, CA}, VOLUME = {}, SERIES = {}, MONTH = Sep, YEAR = 1992, PAGES = {59--64}, URL = {http://www-tlp.limsi.fr/public/rmsep92.pdf} } @INPROCEEDINGS{Lamel92D, AUTHOR = {L.F. Lamel and J.-L. Gauvain}, TITLE = {Experiments on Speaker-Independent Phone Recognition Using BREF}, BOOKTITLE = ieeeicassp, VOLUME = {}, SERIES = {}, ADDRESS = {San Francisco}, MONTH = Mar, YEAR = 1992, PAGES = {557-560}, URL = {http://www-tlp.limsi.fr/public/ica92lim.pdf} } % slides only @INPROCEEDINGS{Lamel92E, AUTHOR = {L.F. Lamel and J.-L. Gauvain}, TITLE = {Large Vocabulary Speech Recognition at LIMSI}, BOOKTITLE = {1st International Workshop on Spoken Language Translation, Warden, Germany}, MONTH = Oct, NOTE = {presentation}, YEAR = 1992 } % don't have source file @INPROCEEDINGS{LeeGauvain92A, AUTHOR = {C. H. Lee and J.-L. Gauvain and R. Pieraccini and L. R. Rabiner}, TITLE = {Large Vocabulary Speech Recognition Using Subword Units}, BOOKTITLE = {Fourth Australian International Conference on Speech Science and Technology (SST-92)}, VOLUME = {}, SERIES = {}, MONTH = Dec, YEAR = 1992, PAGES = {} } @INPROCEEDINGS{Pieraccini92A, AUTHOR = {R. Pieraccini and E. Tzoukermann and Z. Gorelov and J.L. Gauvain and E. Levin and C. H. Lee and J. G. Wilpon}, TITLE = {A Speech Understanding System Based on Statistical Representation of Semantics}, BOOKTITLE = ieeeicassp, VOLUME = {}, SERIES = {}, MONTH = Mar, YEAR = 1992, PAGES = {193--196} } @INPROCEEDINGS{Pieraccini92B, AUTHOR = {R. Pieraccini and E. Tzoukermann and Z. Gorelov and E. Levin and C. H. Lee and J.-L. Gauvain}, TITLE = {Progress Report on the Chronus System: ATIS Benchmark Results}, BOOKTITLE = darpasnlw, MONTH = Feb, YEAR = 1992 } @INPROCEEDINGS{Richard92A, AUTHOR = {G. Richard and C. d'Alessandro and S. Grau}, TITLE = {Synth\`ese de bruit par formes d'ondes formantiques al\'eatoires}, BOOKTITLE = {19\`emes Journ\'ees d'\'etudes sur la parole}, ORGANIZATION = {Groupe Communication Parl\'ee, Soci\'et\'e Fran\c{c}aise d'Acoustique}, ADDRESS = {Bruxelles}, YEAR = 1992, MONTH = Apr, PAGES = {71-76} } @INPROCEEDINGS{Richard92B, AUTHOR = {G. Richard and C. d'Alessandro and S. Grau}, TITLE = {Unvoiced speech synthesis using Poissonian Random formant wave functions}, BOOKTITLE = {Signal Processing VI, theories and applications, European Signal Processing Conference}, ORGANIZATION = {European Signal Processing Association}, ADDRESS = {Bruxelles}, YEAR = 1992, MONTH = Aug, PAGES = {347-350} } @INPROCEEDINGS{Wesfreid92A, AUTHOR = {E. Wesfreid and M. V. Wickerhauser and C. d'Alessandro and X. Fang}, TITLE = {Speech signal segmentation via malvar wavelets}, BOOKTITLE = {International Conference on Wavelets and Applications}, ADDRESS = {Toulouse}, YEAR = 1992 } @INPROCEEDINGS{Alessandro91A, AUTHOR = {C. d'Alessandro and D. Beautemps}, TITLE = {Transformation en ondelettes sur une \'echelle fr\'equentielle auditive}, BOOKTITLE = {XIII\`emes colloque GRETSI}, ORGANIZATION = {Groupe d'Etude sur le Traitement du Signal}, ADDRESS = {Juan-les-Pins}, YEAR = 1991, MONTH = Sep, PAGES = {745-748} } @INPROCEEDINGS{Alessandro91B, AUTHOR = {C. d'Alessandro and C. Demars}, TITLE = {Repr\'esentations temps-fr\'equence du signal de parole}, BOOKTITLE = {S\'eminaire Traitement et Repr\'esentation du Signal de Parole}, ORGANIZATION = {Groupe Communication Parl\'ee, Soci\'et\'e Fran\c{c}aise d'Acoustique}, ADDRESS = {Le Mans}, YEAR = 1991, MONTH = Jun, PAGES = {11-41} } @INPROCEEDINGS{Alessandro91C, AUTHOR = {C. d'Alessandro and D. Beautemps}, TITLE = {Un spectrographe et un Vocodeur par transform\'ee en ondelettes utilisant des contraintes auditives}, BOOKTITLE = {S\'eminaire Traitement et Repr\'esentation du Signal de Parole}, ORGANIZATION = {Groupe Communication Parl\'ee, Soci\'et\'e Fran\c{c}aise d'Acoustique}, ADDRESS = {Le Mans}, YEAR = 1991, MONTH = Jun, PAGES = {61-69} } @INPROCEEDINGS{Alessandro91D, AUTHOR = {C. d'Alessandro and M. Castellengo}, TITLE = {Etude de la perception des notes courtes chant\'ees en pr\'esence de vibrato}, BOOKTITLE = {XIIth International Congress of Phonetic Sciences}, ADDRESS = {Aix-en-Provence}, YEAR = 1991, MONTH = Aug, PAGES = {(5) 402-405} } @INPROCEEDINGS{Alessandro91E, AUTHOR = {C. d'Alessandro and D. Beautemps}, TITLE = {Justification perceptive du spectrographe auditif}, BOOKTITLE = {XIIth International Congress of Phonetic Sciences}, ADDRESS = {Aix-en-Provence}, YEAR = 1991, MONTH = Aug, PAGES = {5:86-89} } @INPROCEEDINGS{Alessandro91F, AUTHOR = {C. d'Alessandro and M. Castellengo}, TITLE = {Etude, par la synth\`ese, de la perception du vibrato vocal dans les transitions de notes}, BOOKTITLE = {Congr\`es International d'Audiophonologie, Bulletin d'Audiophonologie, Annales scientifiques de l'Universit\'e de franche-comt\'e, Vol. VII No. 5 \& 6}, ADDRESS = {Besan\c{c}on}, YEAR = 1991, MONTH = Feb, PAGES = {551-564} } @INPROCEEDINGS{Alessandro91G, AUTHOR = {C. d'Alessandro and C. Demars}, TITLE = {Repr\'esentations temps-fr\'equence du signal de parole}, BOOKTITLE = {Séminaire Traitement et représentation du signal de parole SFA et GRECO-PRC Communication Homme-Machine, Le Mans}, MONTH = Jun, YEAR = 1991, PAGES = {11--41} } @INPROCEEDINGS{AugerDemar91A, AUTHOR = {F. Auger and C. Demars and T. Doligez and P. Flandrin and C. Lambert and J. Mars and F. Molinaro and J.-P. Ovarlez and O. Rioul}, TITLE = {Rapport de synth\`ese: Op\'erations signaux-test, M\'ethodes temps-fr\'equence Fiches synth\'etiques}, BOOKTITLE = {Assembl\'ee g\'en\'erale du GDR 134 Traitement su signal et Images, Batz sur Mer}, MONTH = {8-9 Avril}, YEAR = 1991, PAGES = {24-25} } % don't have source file @INPROCEEDINGS{Lamel91A, AUTHOR = {J. Baker and P. Bamberg and L. Gillick and L. F. Lamel and R. Roth and F. Scattone and D. Sturtevant and O. Ba and R. Benedict}, TITLE = {Dragon Systems Resource Management Benchmark Results}, BOOKTITLE = darpasnlw, MONTH = Feb, YEAR = 1991 } @INPROCEEDINGS{Demars91A, AUTHOR = {C. Demars}, TITLE = {Utilisation de la fr\'equence instantan\'ee en traitement de la parole}, BOOKTITLE = {S\'eminaire Traitement et repr\'esentation du signal de parole SFA et GRECO-PRC Communication Homme-Machine, Le Mans}, MONTH = {4-5 juin}, YEAR = 1991, PAGES = {57--60} } @INPROCEEDINGS{GauvainLee91, AUTHOR = {J.-L. Gauvain and C. H. Lee}, TITLE = {Bayesian Learning of Gaussian Mixture Densities for Hidden Markov Models}, BOOKTITLE = darpasnlw, PUBLISHER = {Morgan Kaufmann Publishers}, VOLUME = {}, SERIES = {}, MONTH = Feb, YEAR = 1991, PAGES = {272}, URL = {http://www-tlp.limsi.fr/public/darpa91.pdf} } %can't find latex file @INPROCEEDINGS{Gauvain91A, AUTHOR = {J.-L. Gauvain and C. H. Lee}, TITLE = {Bayesian Learning for Hidden Markov Model with Gaussian Mixture State Observation Densities}, BOOKTITLE = eurospeech, VOLUME = {}, SERIES = {}, ADDRESS = {Genoa}, MONTH = Sep, YEAR = 1991, PAGES = {939--942} } @TECHREPORT{Geoffrois91A, AUTHOR = {E. Geoffrois}, TITLE = {Frequency Receptive Field Plasticity in the Guinea Pig Thalamus: The Role of Delay Modifications in Learning}, YEAR = 1991, NOTE = {Master Thesis} } @TECHREPORT{Geoffrois91B, AUTHOR = {E. Geoffrois}, TITLE = {La plasticit\'e des champs r\'ecepteurs aux fréquences dans le thalamus auditif du cobaye~: Le r\^ole des modifications de d\'elais dans l'apprentissage}, YEAR = 1991, NOTE = {M\'emoire de DEA} } @BOOK{Gauvain91B, AUTHOR = {J.-P. Haton and J.-M. Pierrel and G. Perennou and J. Caelen and J.-L. Gauvain}, TITLE = {Reconnaissance automatique de la parole}, PUBLISHER = {Dunod Informatique,Paris}, MONTH = Feb, YEAR = 1991, SERIES = {}, NUMBER = {}, EDITION = {}, NOTE = {} } @TECHREPORT{Lamel91B, AUTHOR = {L. F. Lamel}, TITLE = {ABXtest software and associated ScoreABX scoring Software}, NOTE = {Notes et documents LIMSI: 91-23}, YEAR = 1991 } @TECHREPORT{Lamel91C, AUTHOR = {L.F. Lamel}, TITLE = {FCtest Perceptual Test Facility}, NOTE = {Notes et documents LIMSI: 91-22}, YEAR = 1991 } @TECHREPORT{Lamel91D, AUTHOR = {L.F. Lamel}, TITLE = {LIMREC recording system}, NOTE = {Notes et documents LIMSI: 91-24}, YEAR = 1991 } @INPROCEEDINGS{Lamel91E, AUTHOR = {L.F. Lamel and J.-L. Gauvain and M. Esk\'enazi}, TITLE = {BREF, a Large Vocabulary Spoken Corpus for French}, BOOKTITLE = eurospeech, VOLUME = {}, SERIES = {}, ADDRESS = {Genoa}, MONTH = Sep, YEAR = 1991, PAGES = {505--508}, URL = {http://www-tlp.limsi.fr/public/e91_0505.pdf} } @INPROCEEDINGS{Matrouf91A, AUTHOR = {K. Matrouf and F. N\'eel and J.-L. Gauvain and J. Mariani}, TITLE = {Système de dialogue orienté par la tâche: {P}oste pilote vocal}, BOOKTITLE = {2\`emes Journ\'ees Nationales du GRECO-PRC Communication Homme-Machine, Toulouse}, VOLUME = {}, SERIES = {}, MONTH = Jan, YEAR = 1991, PAGES = {} } @INPROCEEDINGS{Pieraccini91A, AUTHOR = {R. Pieraccini and E. Levin and J.-L. Gauvain and Z. Gorelov and C.H. Lee and E. Tzoukermann and J.G. Wilpon}, TITLE = {A Speech Understanding System Based on Statistical Representation of Semantics}, BOOKTITLE = ieeeasr, MONTH = Dec, YEAR = 1991 } @INPROCEEDINGS{Alessandro90B, AUTHOR = {C. d'Alessandro and D. Beautemps}, TITLE = {Repr\'esentation, modification et synth\`ese du signal vocal par formes d'ondes \'el\'ementaires}, BOOKTITLE = {Colloque International "Mod\`eles physiques, cr\'eation musicale et ordinateur". Editions de la maison des sciences de l'homme}, ADDRESS = {Grenoble}, YEAR = 1990, PAGES = {247-272} } @INPROCEEDINGS{Demars90A, AUTHOR = {C. Demars}, TITLE = {Repr\'esentations temps-fr\'equence et param\'etrisations d'un signal. El\'ements de monographie}, BOOKTITLE = {Note interne LIMSI 90}, MONTH = nov, YEAR = 1990, PAGES = {1--96} } @TECHREPORT{Gangolf90A, AUTHOR = {J.-J. Gangolf and J.-L. Gauvain and J. Mariani}, TITLE = {Etude et r\'ealisation d'un syst\`eme d'authentification vocale}, INSTITUTION = {Rapport de fin de contrat MRT}, NUMBER = {}, MONTH = Dec, YEAR = 1990, PAGES = {} } @INPROCEEDINGS{Gauvain90A, AUTHOR = {J.-L. Gauvain and L.F. Lamel and M. Esk\'enazi}, TITLE = {Design Considerations and Text Selection for BREF, a large French read-speech corpus}, BOOKTITLE = icslp, ADDRESS = {Kobe, Japan}, MONTH = Nov, YEAR = 1990, VOLUME = 2, PAGES = {1097-2000}, URL = {http://www-tlp.limsi.fr/public/kobe90.pdf} } @TECHREPORT{Geoffrois90A, AUTHOR = {E. Geoffrois}, TITLE = {A Scheme for Automatic Determination of the Parameters of Fujisaki's Model of Intonation}, NOTE = {NTT technical report}, YEAR = 1990, MONTH = Aug } @ARTICLE{Alessandro90A, AUTHOR = {C. d'Alessandro}, TITLE = {Time-frequency speech transformation based on an elementary waveform representation}, JOURNAL = spcom, YEAR = 1990, VOLUME = 9, NUMBER = {5-6}, PAGES = {419-431} } @INPROCEEDINGS{Lamel90A, AUTHOR = {L.F. Lamel and M. Eskenazi}, TITLE = {Manipulation de param\`etres issus d'une analyse en formes d'ondes: Tests pr\'eliminaires}, BOOKTITLE = {Proc. XVIIIemes Journ\'ees d'Etudes sur la Parole}, YEAR = 1990 } %presentation only slides @INPROCEEDINGS{Lamel90B, AUTHOR = {L.F. Lamel and M. Eskenazi}, TITLE = {Preliminary experiments manipulating elementary waveform parameters}, BOOKTITLE = {JASA}, VOLUME = 87, SERIES = 1, NOTE = {presentation}, YEAR = 1990 } @UNPUBLISHED{Lamel90C, AUTHOR = {L.F. Lamel and J. Garafolo and J. Fiscus and W. Fisher and D. S. Pallett}, TITLE = {TIMIT: The DARPA Acoustic-Phonetic Speech Corpus}, NOTE = {NTIS Publication}, YEAR = 1990 } @INPROCEEDINGS{Matrouf90A, AUTHOR = {K. Matrouf and J.-L Gauvain and F. N\'eel and J. Mariani}, TITLE = {{Adapting Probability-Transitions in DP Matching Process for an Oral Task-Oriented Dialogue}}, BOOKTITLE = ieeeicassp, ADDRESS = {Toronto}, MONTH = Apr, YEAR = 1990, PAGES = {569--572} } @INPROCEEDINGS{Matrouf90B, AUTHOR = {K. Matrouf and J.-L. Gauvain and F. N\'eel and J. Mariani}, TITLE = {An Organ Task-Oriented Dialogue for Air-Traffic Controller Training}, BOOKTITLE = {SPIE's 1990 Technical Symposium on Optical Engineering and Photonics in Aerospace Sensing, Applications of Artificial Intelligence, Orlando}, MONTH = Apr, YEAR = 1990 } @INPROCEEDINGS{Lamel89, AUTHOR = { L. F. Lamel }, TITLE = {Some Perspectives on Speech Database Development }, BOOKTITLE = {Proceedings of the ESCA Workshop on Speech Input/Output Assessment and Speech Databases }, URL = {http://www-tlp.limsi.fr/public/esca89.pdf}, YEAR = 1989 } @INPROCEEDINGS{LamelKasselSeneff_timit86, AUTHOR = { L. F. Lamel and R.H. Kassel and S. Seneff}, TITLE = {Speech Database Development: Design and Analysis of the Acoustic-Phonetic Corpus}, URL = {http://www-tlp.limsi.fr/public/LamelKasselSeneff_timit86.pdf}, BOOKTITLE = { Proc. DARPA Speech Recognition Workshop, Report no. SAIC-86/1546}, MONTH = Feb, YEAR = 1986 }