クラスタリング(k-means)のお勉強
k-meansでクラスタリング(教師なし)
import numpy as np from sklearn.cluster import KMeans # size 量的データ features = np.array([ [20, 95, 190], [52, 103, 103], [50, 70, 280], [65, 65, 210], [84, 84, 96], [20, 50, 140], [28, 49, 176], [40, 75, 80], [2, 25, 25], [2, 30, 50], [14, 68, 132], ]) # size, weight, stiffness 質的データ features = np.array([ [1, 1, 1], [1, 0, 1], [1, 1, 1], [1, 0, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 0, 1], [0, 0, 0], [0, 0, 0], [1, 1, 1], ]) kmeans_model = KMeans(n_clusters=2, random_state=10).fit(features) labels = kmeans_model.labels_ for label, feature in zip(labels, features): print(label, feature, feature.sum())
可視化
import numpy as np from sklearn.cluster import KMeans from mpl_toolkits.mplot3d import Axes3D import matplotlib.pyplot as plt # width height depth features = np.array([ [20, 95, 190], [52, 103, 103], [50, 70, 280], [65, 65, 210], [84, 84, 96], [20, 50, 140], [28, 49, 176], [40, 75, 80], [2, 25, 25], [2, 30, 50], [14, 68, 132], ]) km = KMeans(n_clusters=3, random_state=10) kmeans_model = km.fit(features) labels = kmeans_model.labels_ X = features y_km = labels names = ["class1" , "class2", "class3" ] for label, feature in zip(labels, features): print(label, feature, feature.sum()) #Predicted data plot fig = plt.figure() ax = Axes3D(fig) ax_km1=ax.scatter3D(np.ravel(X[y_km==0,0]),np.ravel(X[y_km==0,1]),np.ravel(X[y_km==0,2]),c='y', marker='x') ax_km2=ax.scatter3D(np.ravel(X[y_km==1,0]),np.ravel(X[y_km==1,1]),np.ravel(X[y_km==1,2]),c='r', marker='x') ax_km3=ax.scatter3D(np.ravel(X[y_km==2,0]),np.ravel(X[y_km==2,1]),np.ravel(X[y_km==2,2]),c='b', marker='x') ax_km4=ax.scatter3D(km.cluster_centers_[:, 0],km.cluster_centers_[:, 1],km.cluster_centers_[:, 2],c='lightgreen', marker='s') plt.legend((ax_km1, ax_km2, ax_km3,ax_km4), (names[0], names[1], names[2],"Centroid"), scatterpoints=1, loc='upper left', ncol=3, fontsize=8) plt.show()
参考:
データの種類 (質的データ、量的データ) - ナンバーズ予想で学ぶ統計学
scikit-learn による最も基本的なクラスタリング分析 - Qiita
Python 機械学習 Scikit-learnによるクラスタリング分析(k-means法)の実践 – colab 学習ログ