示例代码:import numpy as np
import matplotlib.pyplot as plt
raw_data_x = [[3.3935, 2.3312],
[3.1101, 1.7815],
[1.3438, 3.3684],
[3.5823, 4.6792],
[2.2804, 2.8670],
[7.4234, 4.6965],
[5.7451, 3.5340],
[9.1722, 2.5111],
[7.7928, 3.4241],
[7.9398, 0.7916]]
raw_data_y = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
# 训练集样本的data
x_train = np.array(raw_data_x)
# 训练集样本的label
y_train = np.array(raw_data_y)
# 1)绘制训练集样本与新样本的散点图
# 根据样本类型(0、1两种类型),绘制所有样本的各特征点
plt.scatter(x_train[y_train == 0, 0], x_train[y_train == 0, 1], color = 'g')
plt.scatter(x_train[y_train == 1, 0], x_train[y_train == 1, 1], color = 'r')
# 新样本
x = np.array([8.0936, 3.3657])
# 将新样本的特征点绘制在训练集的样本空间
plt.scatter(x[0], x[1], color = 'b')
plt.show()
# 2)在特征空间中,计算训练集样本中的所有点与新样本的点的欧拉距离
from math import sqrt
# math模块下的sqrt函数:对数值开平方sqrt(number)
distances = []
for x_train in x_train:
d = sqrt(np.sum((x - x_train) ** 2))
distances.append(d)
# 也可以用list的生成表达式实现:
# distances = [sqrt(np.sum((x - x_train) ** 2)) for x_train in x_train]
# 3)找出距离新样本最近的k个点,并得到对新样本的预测类型
nearest = np.argsort(distances)
k = 6
# 找出距离最近的k个点的类型
topK_y = [y_train[i] for i in nearest[:k]]
# 根据类别对k个点的数量进行统计
from collections import Counter
votes = Counter(topK_y)
# 获取所需的预测类型:predict_y
predict_y = votes.most_common(1)[0][0]