在Python中,可以使用scikit-learn庫(kù)來(lái)實(shí)現(xiàn)Self-Organizing Maps (SOM)聚類算法。下面是一個(gè)示例代碼:
from sklearn.cluster import KMeans
import numpy as np
class SOM:
def __init__(self, n_clusters, grid_size, learning_rate=0.2, max_iterations=100):
self.n_clusters = n_clusters
self.grid_size = grid_size
self.learning_rate = learning_rate
self.max_iterations = max_iterations
def fit(self, X):
# 初始化權(quán)重矩陣
self.weights = np.random.rand(self.grid_size[0], self.grid_size[1], X.shape[1])
for iteration in range(self.max_iterations):
# 隨機(jī)選擇一個(gè)樣本
sample = X[np.random.choice(X.shape[0])]
# 計(jì)算最近的聚類中心
bmu = self.find_best_matching_unit(sample)
# 更新鄰近的權(quán)重矩陣
self.update_weights(sample, bmu, iteration)
def find_best_matching_unit(self, sample):
# 計(jì)算每個(gè)聚類中心與樣本的距離
distances = np.linalg.norm(self.weights - sample, axis=2)
# 找到最近的聚類中心
bmu_index = np.unravel_index(np.argmin(distances), distances.shape)
return bmu_index
def update_weights(self, sample, bmu, iteration):
# 計(jì)算鄰近的權(quán)重矩陣范圍
radius = self.calculate_radius(iteration)
start = np.maximum(0, bmu - radius)
end = np.minimum(self.grid_size, bmu + radius + 1)
# 更新鄰近的權(quán)重矩陣
for i in range(start[0], end[0]):
for j in range(start[1], end[1]):
self.weights[i, j] += self.learning_rate * (sample - self.weights[i, j])
def calculate_radius(self, iteration):
# 計(jì)算鄰近的權(quán)重矩陣范圍
initial_radius = np.max(self.grid_size) / 2
time_constant = self.max_iterations / np.log(initial_radius)
return initial_radius * np.exp(-iteration / time_constant)
def predict(self, X):
# 計(jì)算每個(gè)樣本所屬的聚類中心
distances = np.linalg.norm(self.weights - X[:, np.newaxis, np.newaxis], axis=3)
cluster_indices = np.argmin(distances, axis=2)
# 使用KMeans算法對(duì)聚類中心進(jìn)行進(jìn)一步的聚類
kmeans = KMeans(n_clusters=self.n_clusters)
kmeans.fit(self.weights.reshape(-1, self.weights.shape[2]))
# 根據(jù)KMeans算法的聚類結(jié)果,將樣本分配到最終的聚類中心
return kmeans.predict(self.weights.reshape(-1, self.weights.shape[2]))[cluster_indices]
# 示例使用
# 創(chuàng)建一個(gè)包含三個(gè)聚類中心的SOM模型,并使用iris數(shù)據(jù)集進(jìn)行訓(xùn)練和預(yù)測(cè)
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data
som = SOM(n_clusters=3, grid_size=(10, 10))
som.fit(X)
labels = som.predict(X)
print(labels)
上述代碼實(shí)現(xiàn)了一個(gè)簡(jiǎn)單的SOM聚類算法,使用iris數(shù)據(jù)集進(jìn)行了訓(xùn)練和預(yù)測(cè)。首先,定義了一個(gè)SOM類,該類包含了聚類的基本操作,如初始化權(quán)重矩陣、計(jì)算最近的聚類中心、更新鄰近的權(quán)重矩陣等。然后,使用fit方法對(duì)SOM模型進(jìn)行訓(xùn)練,使用predict方法對(duì)樣本進(jìn)行聚類預(yù)測(cè)。最后,使用KMeans算法對(duì)聚類中心進(jìn)行進(jìn)一步的聚類,將樣本分配到最終的聚類中心。