# -*- coding: UTF-8 -*- # 作者:qq_44193969 # 标题:无监督聚类 # 描述:KNN 聚类,近朱者赤,近墨者黑 import numpy as np import matplotlib.pyplot as plt from sklearn.neighbors import KNeighborsClassifier def generate_data(class1_num, class2_num): np.random.seed(2021) data_size_1 = class1_num x1_1 = np.random.normal(loc=2, scale=1.0, size=data_size_1) x2_1 = np.random.normal(loc=3, scale=1.0, size=data_size_1) y_1 = [0 for _ in range(data_size_1)] data_size_2 = class2_num x1_2 = np.random.normal(loc=6, scale=2.0, size=data_size_2) x2_2 = np.random.normal(loc=8, scale=2.0, size=data_size_2) y_2 = [1 for _ in range(data_size_2)] x1 = np.concatenate((x1_1, x1_2), axis=0) x2 = np.concatenate((x2_1, x2_2), axis=0) x = np.hstack((x1.reshape(-1, 1), x2.reshape(-1, 1))) y = np.concatenate((y_1, y_2), axis=0) data_size_all = data_size_1+data_size_2 shuffled_index = np.random.permutation(data_size_all) x = x[shuffled_index] y = y[shuffled_index] split_index = int(data_size_all*0.7) x_train = x[:split_index] y_train = y[:split_index] x_test = x[split_index:] y_test = y[split_index:] return x_train, y_train, x_test, y_test def show_data(x_train, y_train, x_test, y_test): plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, marker='.') plt.show() plt.scatter(x_test[:, 0], x_test[:, 1], c=y_test, marker='.') plt.show() def train_and_predict(is_show=False): x_train, y_train, x_test, y_test = generate_data(300, 500) if is_show: show_data(x_train, y_train, x_test, y_test) neigh = KNeighborsClassifier(n_neighbors=2) neigh.fit(x_train, y_train) acc_count = 0 for idx, x_test_ in enumerate(x_test): res = neigh.predict(x_test_.reshape(1, -1)) if res[0] == y_test[idx]: acc_count += 1 acc = acc_count / len(x_test) print('准确率为: {}'.format(acc)) if __name__ == '__main__': train_and_predict()