# -*- coding: UTF-8 -*-
# 作者：qq_44193969
# 标题：无监督聚类
# 描述：KNN 聚类，近朱者赤，近墨者黑

import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier


def generate_data(class1_num, class2_num):
    np.random.seed(2021)
    data_size_1 = class1_num
    x1_1 = np.random.normal(loc=2, scale=1.0, size=data_size_1)
    x2_1 = np.random.normal(loc=3, scale=1.0, size=data_size_1)
    y_1 = [0 for _ in range(data_size_1)]

    data_size_2 = class2_num
    x1_2 = np.random.normal(loc=6, scale=2.0, size=data_size_2)
    x2_2 = np.random.normal(loc=8, scale=2.0, size=data_size_2)
    y_2 = [1 for _ in range(data_size_2)]

    x1 = np.concatenate((x1_1, x1_2), axis=0)
    x2 = np.concatenate((x2_1, x2_2), axis=0)
    x = np.hstack((x1.reshape(-1, 1), x2.reshape(-1, 1)))
    y = np.concatenate((y_1, y_2), axis=0)

    data_size_all = data_size_1+data_size_2
    shuffled_index = np.random.permutation(data_size_all)
    x = x[shuffled_index]
    y = y[shuffled_index]

    split_index = int(data_size_all*0.7)
    x_train = x[:split_index]
    y_train = y[:split_index]
    x_test = x[split_index:]
    y_test = y[split_index:]
    return x_train, y_train, x_test, y_test


def show_data(x_train, y_train, x_test, y_test):
    plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train, marker='.')
    plt.show()
    plt.scatter(x_test[:, 0], x_test[:, 1], c=y_test, marker='.')
    plt.show()


def train_and_predict(is_show=False):
    x_train, y_train, x_test, y_test = generate_data(300, 500)
    if is_show:
        show_data(x_train, y_train, x_test, y_test)
    neigh = KNeighborsClassifier(n_neighbors=2)
    neigh.fit(x_train, y_train)
    acc_count = 0
    for idx, x_test_ in enumerate(x_test):
        res = neigh.predict(x_test_.reshape(1, -1))
        if res[0] == y_test[idx]:
            acc_count += 1
    acc = acc_count / len(x_test)
    print('准确率为: {}'.format(acc))


if __name__ == '__main__':
    train_and_predict()