This is documentation for an old release of Scikit-learn (version 1.1). Try the latest stable release (version 1.6) or development (unstable) versions.
Note
Click here to download the full example code or to run this example in your browser via Binder
Bisecting K-Means and Regular K-Means Performance Comparison¶
This example shows differences between Regular K-Means algorithm and Bisecting K-Means.
While K-Means clusterings are different when with increasing n_clusters, Bisecting K-Means clustering build on top of the previous ones.
This difference can visually be observed.

import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.cluster import BisectingKMeans, KMeans
print(__doc__)
# Generate sample data
n_samples = 1000
random_state = 0
X, _ = make_blobs(n_samples=n_samples, centers=2, random_state=random_state)
# Number of cluster centers for KMeans and BisectingKMeans
n_clusters_list = [2, 3, 4, 5]
# Algorithms to compare
clustering_algorithms = {
"Bisecting K-Means": BisectingKMeans,
"K-Means": KMeans,
}
# Make subplots for each variant
fig, axs = plt.subplots(
len(clustering_algorithms), len(n_clusters_list), figsize=(15, 5)
)
axs = axs.T
for i, (algorithm_name, Algorithm) in enumerate(clustering_algorithms.items()):
for j, n_clusters in enumerate(n_clusters_list):
algo = Algorithm(n_clusters=n_clusters, random_state=random_state)
algo.fit(X)
centers = algo.cluster_centers_
axs[j, i].scatter(X[:, 0], X[:, 1], s=10, c=algo.labels_)
axs[j, i].scatter(centers[:, 0], centers[:, 1], c="r", s=20)
axs[j, i].set_title(f"{algorithm_name} : {n_clusters} clusters")
# Hide x labels and tick labels for top plots and y ticks for right plots.
for ax in axs.flat:
ax.label_outer()
ax.set_xticks([])
ax.set_yticks([])
plt.show()
Total running time of the script: ( 0 minutes 0.344 seconds)