

de Recherche et d’Innovation
en Cybersécurité et Société
Allaoui, M.; Hedjam, R.; Bouanane, K.; Allili, M. S.; Kherfi, M. L.; Belhaouari, S. B.
Exploring non-negativity for improved manifold embedding: Application to t-SNE Article de journal
Dans: Knowledge-Based Systems, vol. 330, 2025, ISSN: 09507051 (ISSN).
Résumé | Liens | BibTeX | Étiquettes: Dimensionality reduction, Embedding technique, Embeddings, Gradient methods, Gradient-descent, Manifold embedding, Matrix algebra, Non-negative matrix factorization, Non-negativity, Nonnegative matrix factorization, Nonnegativity constraints, Performance, T-SNE
@article{allaoui_exploring_2025,
title = {Exploring non-negativity for improved manifold embedding: Application to t-SNE},
author = {M. Allaoui and R. Hedjam and K. Bouanane and M. S. Allili and M. L. Kherfi and S. B. Belhaouari},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105018098090&doi=10.1016%2Fj.knosys.2025.114547&partnerID=40&md5=237540c38a928146d589b96cd6888547},
doi = {10.1016/j.knosys.2025.114547},
issn = {09507051 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Knowledge-Based Systems},
volume = {330},
abstract = {Drawing inspiration from Non-negative Matrix Factorization (NMF), this paper explores the potential of incorporating non-negativity constraints into embedding techniques, with a focus on t-SNE as an application. Specifically, we investigate the following questions: Can enforcing non-negativity in the embedding space enhance interpretability and improve the quality of embedded data? By prioritizing non-negativity, can embedding methods achieve better performance and more meaningful representations? Additionally, does enforcing non-negativity in the embedded space help preserve both the local and global structure of data in the manifold, leading to more accurate and interpretable embeddings? In this work, we could show both objectively and subjectively how enforcing t-SNE to leverage the non-negativity of the data addresses the raised questions. To achieve this, we introduced a novel approach to transforming the additive update rule of the gradient descent used by t-SNE to a multiplicative counterpart to enforce the non-negativity in the embedded space. However, grappling with full non-negativity in the gradient descent formula presents challenges, prompting our focus solely on the (yi−yj) term, resulting in a semi-non-negative t-SNE algorithm, shortly named SN-tSNE. Nevertheless, experimental findings substantiate the significant impact of the proposed update rule on the performance and efficacy of the SN-tSNE algorithm. Furthermore, additional experiments are performed to compare SN-tSNE with its precursor t-SNE, as well as the competitive embedding technique UMAP, alongside other relevant embedding and dimensionality reduction models like NMF. The source code of SN-tSNE is available on GitHub (https://github.com/M-Allaoui/SN-tSNE.git). © 2025},
keywords = {Dimensionality reduction, Embedding technique, Embeddings, Gradient methods, Gradient-descent, Manifold embedding, Matrix algebra, Non-negative matrix factorization, Non-negativity, Nonnegative matrix factorization, Nonnegativity constraints, Performance, T-SNE},
pubstate = {published},
tppubtype = {article}
}
Souza, J. V.; Amamou, H.; Chen, R.; Salari, E.; Gubelmann, R.; Niklaus, C.; Serpa, T.; Lima, M. M. F.; Pinto, P. T.; Kshirsagar, S.; Davoust, A.; Handschuh, S.; Avila, A. R.
Cross-Lingual Keyword Extraction for Pesticide Terminology in Brazilian Portuguese and English Article de journal
Dans: Journal of the Brazilian Computer Society, vol. 31, no 1, p. 973–990, 2025, ISSN: 01046500 (ISSN).
Résumé | Liens | BibTeX | Étiquettes: Agriculture, BERT embedding, BERT embeddings, Cross-lingual, Embeddings, extraction, Food consumption, Keywords extraction, Labelings, Low resource languages, Multilingual extraction, Pesticides, Technical terms, Terminology, Word alignment
@article{de_souza_cross-lingual_2025,
title = {Cross-Lingual Keyword Extraction for Pesticide Terminology in Brazilian Portuguese and English},
author = {J. V. Souza and H. Amamou and R. Chen and E. Salari and R. Gubelmann and C. Niklaus and T. Serpa and M. M. F. Lima and P. T. Pinto and S. Kshirsagar and A. Davoust and S. Handschuh and A. R. Avila},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-105019700300&doi=10.5753%2Fjbcs.2025.5815&partnerID=40&md5=85ee75baf4550666a307310cd04d1c83},
doi = {10.5753/jbcs.2025.5815},
issn = {01046500 (ISSN)},
year = {2025},
date = {2025-01-01},
journal = {Journal of the Brazilian Computer Society},
volume = {31},
number = {1},
pages = {973–990},
abstract = {Agriculture plays a crucial role in Brazil’s economy. As the country intensifies its activities in the sector, the use of pesticides also increases. Hence, the risks associated with pesticide-laden food consumption have become a concern for chemistry researchers. An issue affecting regulatory standardization of pesticides in Brazil is the difficulty in translating pesticide names, particularly from English. For example, the word malathion can be translated from English to Portuguese as malatiom or malatião, resulting in inconsistent labeling. This issue extends to the broader problem of translating highly technical terms between languages, in particular for low-resource languages. In this work, we investigate terminological variation in the chemistry of organophosphorus pesticides. Our goal is to study strategies for domain-specific multilingual keyword extraction. To that end, two corpora were built based on pesticide-related scientific documents in Brazilian Portuguese and English, which led to a total of 84 and 210 texts, respectively, representing the low-and high-resource languages in this study. We then assessed 6 methods for keyword extraction: Simple Maths, TF-IDF, YAKE, TextRank, MultipartiteRank, and KeyBERT. We relied on a multilingual contextual BERT embedding to retrieve corresponding pesticide names in the target language. Finetuning was also explored to improve the multilingual representation further. Moreover, we evaluated the use of large language models (LLMs) combined with the recent retrieval-augmented generation (RAG) framework. As a result, we found that the contextual approach, combined with fine-tuning, provided the best results, contributing to enhancing Pesticide Terminology Extraction in a multilingual scenario. © 2025, Brazilian Computing Society. All rights reserved.},
keywords = {Agriculture, BERT embedding, BERT embeddings, Cross-lingual, Embeddings, extraction, Food consumption, Keywords extraction, Labelings, Low resource languages, Multilingual extraction, Pesticides, Technical terms, Terminology, Word alignment},
pubstate = {published},
tppubtype = {article}
}



