Recursive feature elimination with cross-validation¶
A recursive feature elimination example with automatic tuning of the number of features selected with cross-validation.
Optimal number of features : 3 /home/circleci/project/sklearn/utils/deprecation.py:103: FutureWarning: The `grid_scores_` attribute is deprecated in version 1.0 in favor of `cv_results_` and will be removed in version 1.2. warnings.warn(msg, category=FutureWarning)
print(__doc__) import matplotlib.pyplot as plt from sklearn.svm import SVC from sklearn.model_selection import StratifiedKFold from sklearn.feature_selection import RFECV from sklearn.datasets import make_classification # Build a classification task using 3 informative features X, y = make_classification(n_samples=1000, n_features=25, n_informative=3, n_redundant=2, n_repeated=0, n_classes=8, n_clusters_per_class=1, random_state=0) # Create the RFE object and compute a cross-validated score. svc = SVC(kernel="linear") # The "accuracy" scoring shows the proportion of correct classifications min_features_to_select = 1 # Minimum number of features to consider rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(2), scoring='accuracy', min_features_to_select=min_features_to_select) rfecv.fit(X, y) print("Optimal number of features : %d" % rfecv.n_features_) # Plot number of features VS. cross-validation scores plt.figure() plt.xlabel("Number of features selected") plt.ylabel("Cross validation score (accuracy)") plt.plot(range(min_features_to_select, len(rfecv.grid_scores_) + min_features_to_select), rfecv.grid_scores_) plt.show()
Total running time of the script: ( 0 minutes 4.822 seconds)