

de Recherche et d’Innovation
en Cybersécurité et Société
Gagnon, S.; Messaoudi, S.; Charbonneau, A.
Automated Text Classification based on an ontology standard: Application of the Extensible Business Reporting Language (XBRL) to Reuters Corpus Volume 1 (RCV1) Article de journal
Dans: CORIA 2011: COnference en Recherche d'Information et Applications - Conference on Information Retrieval and Applications, p. 151–158, 2011, ISSN: 978-235768024-1 (ISBN).
Résumé | Liens | BibTeX | Étiquettes: Administrative data processing, Automated Text Classification, Automation, Classification (of information), Domain-specific ontologies, Extensible Business Reporting Language (XBRL), F measure, Financial news, Information retrieval, Ontology, Reuters, Reuters Corpus Volume 1 (RCV1), Text classification, Text processing
@article{gagnon_automated_2011,
title = {Automated Text Classification based on an ontology standard: Application of the Extensible Business Reporting Language (XBRL) to Reuters Corpus Volume 1 (RCV1)},
author = {S. Gagnon and S. Messaoudi and A. Charbonneau},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84869423599&partnerID=40&md5=1d12a436b5acb9f715fd6f1669e37be4},
issn = {978-235768024-1 (ISBN)},
year = {2011},
date = {2011-01-01},
booktitle = {CORIA 2011: COnference en Recherche d'Information et Applications - Conference on Information Retrieval and Applications},
journal = {CORIA 2011: COnference en Recherche d'Information et Applications - Conference on Information Retrieval and Applications},
pages = {151–158},
address = {Avignon},
abstract = {We demonstrate that applying a domain-specific ontology standard significantly improves Automated Text Classification (ATC). We use the Extensible Business Reporting Language (XBRL) to define a standard ontology and compare the performance of an ACT engine (IBM Classification Module v.8.6) against 2 other list of concepts, namely simple and hierarchical. Our sample of financial news is extracted from the Reuters Corpus Volume 1 (RCV1), where 2 experts in finance help us code 1000 of the 45000 news dealing with mergers and acquisitions. We report recall, precision, the F measure, and in addition a hierarchical measure adjusted for classification relevance in parent classes, as well as a more detailed measure evaluating the classification improvements at the level of each text.},
keywords = {Administrative data processing, Automated Text Classification, Automation, Classification (of information), Domain-specific ontologies, Extensible Business Reporting Language (XBRL), F measure, Financial news, Information retrieval, Ontology, Reuters, Reuters Corpus Volume 1 (RCV1), Text classification, Text processing},
pubstate = {published},
tppubtype = {article}
}