@inproceedings{645a48e1158742d48a4649bb5a0d0416,
title = "Semantic Correlation Graph Embedding",
abstract = "Many data sets include categorical features in the form of nominal and ordinal features. However, most machine learning algorithms cannot deal with categorical features directly because they require numerical input features. Categorical embeddings are an effective approach to converting categorical features into numerical vectors. This work proposes a novel embedding approach, called Semantic Correlation Graph Embedding, to create embeddings from knowledge graphs. The approach constructs a semantic correlation graph of triplets among the categorical features to learn numerical embeddings. Our approach aims to uncover relationships taking place in categorical data in terms of low-level knowledge and semantics that may help group the features of the data sets under semantic entities. Three distinct embedding models are proposed according to how the graph is constructed. The results are evaluated with two public data sets. They show that the learned embeddings produce a statistically significant improvement in the performance of the classification tasks in terms of AUC, F1 score, precision, and recall.",
keywords = "Categorical data, Logistic regression, Knowledge graph, Graph embedding, TransE",
author = "W.W. Wang and Y.C. Han and S. Bromuri and M. Dumontier",
note = "Funding Information: This research was supported by the Province of Limburg, The Netherlands, under grant number SAS-2020-03117. Publisher Copyright: {\textcopyright} 2022 IEEE.; IEEE International Conference on Fuzzy Systems, FUZZ-IEEE 2022 ; Conference date: 18-07-2022 Through 23-07-2022",
year = "2022",
doi = "10.1109/FUZZ-IEEE55066.2022.9882620",
language = "English",
isbn = "9781665467100",
series = "IEEE International Fuzzy Systems Conference Proceedings",
publisher = "IEEE",
booktitle = "2022 IEEE INTERNATIONAL CONFERENCE ON FUZZY SYSTEMS (FUZZ-IEEE)",
address = "United States",
}