

de Recherche et d’Innovation
en Cybersécurité et Société
El-Kass, W.; Gagnon, S.; Iglewski, M.
A visual and results-driven rules composition approach for better information extraction Article d'actes
Dans: A., Zaremba M. Sasiadek J. Dolgui (Ed.): IFAC-PapersOnLine, p. 112–117, 2015, ISBN: 24058963 (ISSN), (Issue: 3 Journal Abbreviation: IFAC-PapersOnLine).
Résumé | Liens | BibTeX | Étiquettes: Automation, F-score, Flow visualization, Harmonic mean, Information analysis, Information extraction, Information extraction rules, Information retrieval, Rule based, Rule composition, Rules composition, Visual process, Visualization
@inproceedings{el-kass_visual_2015,
title = {A visual and results-driven rules composition approach for better information extraction},
author = {W. El-Kass and S. Gagnon and M. Iglewski},
editor = {Zaremba M. Sasiadek J. Dolgui A.},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84953876235&doi=10.1016%2fj.ifacol.2015.06.067&partnerID=40&md5=3cd38b1d6b9efc819cd882d181cdda92},
doi = {10.1016/j.ifacol.2015.06.067},
isbn = {24058963 (ISSN)},
year = {2015},
date = {2015-01-01},
booktitle = {IFAC-PapersOnLine},
volume = {28},
pages = {112–117},
abstract = {We present a highly visual process for creating and combining elementary information extraction rules, based on their results, in order to find the rules combination that produces the most accurate information extraction results. A rule's accuracy is determined by its F-Score which is the harmonic mean of the precision and the recall of that rule. Rules are combined using logical OR and AND operators. Running a few hundreds rules combinations over a corpus, in order to determine their accuracies, can take days. Using our approach, millions of rules combinations can be tested and their accuracies (F-Score) can be calculated in few seconds. A prototype was created to demonstrate the effectiveness of our approach. © 2015, IFAC (International Federation of Automatic Control) Hosting by Elsevier Ltd. All rights reserved.},
note = {Issue: 3
Journal Abbreviation: IFAC-PapersOnLine},
keywords = {Automation, F-score, Flow visualization, Harmonic mean, Information analysis, Information extraction, Information extraction rules, Information retrieval, Rule based, Rule composition, Rules composition, Visual process, Visualization},
pubstate = {published},
tppubtype = {inproceedings}
}
Gagnon, S.; Messaoudi, S.; Charbonneau, A.
Automated Text Classification based on an ontology standard: Application of the Extensible Business Reporting Language (XBRL) to Reuters Corpus Volume 1 (RCV1) Article de journal
Dans: CORIA 2011: COnference en Recherche d'Information et Applications - Conference on Information Retrieval and Applications, p. 151–158, 2011, ISSN: 978-235768024-1 (ISBN).
Résumé | Liens | BibTeX | Étiquettes: Administrative data processing, Automated Text Classification, Automation, Classification (of information), Domain-specific ontologies, Extensible Business Reporting Language (XBRL), F measure, Financial news, Information retrieval, Ontology, Reuters, Reuters Corpus Volume 1 (RCV1), Text classification, Text processing
@article{gagnon_automated_2011,
title = {Automated Text Classification based on an ontology standard: Application of the Extensible Business Reporting Language (XBRL) to Reuters Corpus Volume 1 (RCV1)},
author = {S. Gagnon and S. Messaoudi and A. Charbonneau},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84869423599&partnerID=40&md5=1d12a436b5acb9f715fd6f1669e37be4},
issn = {978-235768024-1 (ISBN)},
year = {2011},
date = {2011-01-01},
booktitle = {CORIA 2011: COnference en Recherche d'Information et Applications - Conference on Information Retrieval and Applications},
journal = {CORIA 2011: COnference en Recherche d'Information et Applications - Conference on Information Retrieval and Applications},
pages = {151–158},
address = {Avignon},
abstract = {We demonstrate that applying a domain-specific ontology standard significantly improves Automated Text Classification (ATC). We use the Extensible Business Reporting Language (XBRL) to define a standard ontology and compare the performance of an ACT engine (IBM Classification Module v.8.6) against 2 other list of concepts, namely simple and hierarchical. Our sample of financial news is extracted from the Reuters Corpus Volume 1 (RCV1), where 2 experts in finance help us code 1000 of the 45000 news dealing with mergers and acquisitions. We report recall, precision, the F measure, and in addition a hierarchical measure adjusted for classification relevance in parent classes, as well as a more detailed measure evaluating the classification improvements at the level of each text.},
keywords = {Administrative data processing, Automated Text Classification, Automation, Classification (of information), Domain-specific ontologies, Extensible Business Reporting Language (XBRL), F measure, Financial news, Information retrieval, Ontology, Reuters, Reuters Corpus Volume 1 (RCV1), Text classification, Text processing},
pubstate = {published},
tppubtype = {article}
}