基于make_moons数据集建立k近邻分类器¶

发布日期：2019-10-22
难度：一般
类别：分类与预测、k近邻
标签：Python、scikit-learn、k近邻、make_moons

1. 问题描述¶

使用k近邻分类模型对make_moons数据集进行分类预测。

2. 程序实现¶

#导入数据集，并将其划分为训练集和测试集
import sklearn.datasets 
from sklearn.model_selection import train_test_split
X, y = sklearn.datasets.make_moons(100,noise=0.3)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
#建立KNN模型，并输出预测结果
from sklearn.neighbors import KNeighborsClassifier
clf=KNeighborsClassifier(n_neighbors=3)
clf.fit(X_train,y_train)
result=clf.predict(X_test)
print(result)

[1 0 1 1 1 1 1 1 0 0 1 1 1 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1]

#结果评估
from sklearn.metrics import classification_report
train_score=clf.score(X_train, y_train)
test_score=clf.score(X_test,y_test)
print(train_score) #在训练集上的准确率
print(test_score) #在测试集上的准确率
print (sum(result==y_test)) #预测正确的个数
print(classification_report(y_test,result))

0.9142857142857143
0.9
27
              precision    recall  f1-score   support

           0       0.94      0.88      0.91        17
           1       0.86      0.92      0.89        13

    accuracy                           0.90        30
   macro avg       0.90      0.90      0.90        30
weighted avg       0.90      0.90      0.90        30

#二维可视化效果图
from matplotlib.colors import ListedColormap
import matplotlib.pyplot as plt
import numpy as np
cmap_light=ListedColormap(['#FFAAAA','#AFEEEE'])
cmap_dark=ListedColormap(['#FF0000','#000080'])
x_min,x_max=X[:,0].min()-1,X[:,0].max()+1
y_min,y_max=X[:,1].min()-1,X[:,1].max()+1
xx,yy=np.meshgrid(np.arange(x_min,x_max,0.1),np.arange (y_min,y_max,0.1))
Z=clf.predict(np.c_[xx.ravel(),yy.ravel()]).reshape(xx.shape)
plt.figure()
plt.pcolormesh(xx,yy,Z,cmap=cmap_light)
plt.scatter(X[:,0], X[:,1],c=y,cmap=cmap_dark)
plt.show()