@inbook{38dd73ba24bb4777aa599f898c443494,
title = "Chameleon: a Python workflow toolkit for feature selection",
abstract = "When considering classification problems in relation to high-dimensional data sets, such as biological data sets, the need for effective methods of dimensionality reduction by feature selection becomes apparent. Feature selection has been shown to significantly decrease computational cost and allow for classification models that are more easily interpretable. We present Chameleon, a Python-based toolkit that integrates all steps in a feature selection evaluation pipeline – from splitting data for cross-validation, to visualisation of classification results using various metrics. We implemented in Chameleon six existing feature selection methods, six common classification methods, and the classification results are evaluated using two different metrics. We also implemented an ensemble method which selects only common features from the different methods evaluated. Experimental results using four different data sets suggest that the common features method achieves improved or similar classification performance, compared to the individual feature selection algorithms, using smaller and thus more computationally efficient subsets of features.",
keywords = "Biological data, Classification, Feature selection",
author = "Diviya Thilakeswaran and Simon McManis and Wang, \{X. Rosalind\}",
year = "2021",
month = dec,
doi = "10.1007/978-981-16-8531-6\_9",
language = "English",
isbn = "9789811685309",
series = "Communications in Computer and Information Science",
publisher = "Springer",
pages = "121--135",
editor = "Yue Xu and Rosalind Wang and Anton Lord and Boo, \{Yee Ling\} and Richi Nayak and Yanchang Zhao and Graham Williams",
booktitle = "Data Mining: 19th Australasian Conference on Data Mining, AusDM, Brisbane, QLD, Australia, December 14-15, 2021, Proceedings",
}