对于真实数据来说,往往先对所有特征进行训练及预测,再通过降维处理之后进行可视化绘图展示。下面使用SVC分类模型对乳腺癌数据集进行代码实现。
#导入数据集
from sklearn.datasets import load_breast_cancer
cancer=load_breast_cancer()
#划分为训练集和测试集
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.3, random_state=42)
#建立svc模型
from sklearn import svm
clf=svm.SVC()
clf.fit(X_train,y_train)
#效果评估
train_score=clf.score(X_train, y_train)
test_score=clf.score(X_test,y_test)
print(train_score)
print(test_score)
result=clf.predict(X_test)
from sklearn.metrics import classification_report
print(classification_report(y_test,result))
#对预测结果进行降维及可视化
#降维
from sklearn.decomposition import PCA
pca=PCA(n_components=2)
newData=pca.fit_transform(X_test)
#绘图可视化
import matplotlib.pyplot as plt
type1_x = []
type1_y = []
type2_x = []
type2_y = []
for i in range(len(y_test)):
if y_test[i] == 0:
type1_x.append(newData[i,0])
type1_y.append(newData[i,1])
if y_test[i] == 1:
type2_x.append(newData[i,0])
type2_y.append(newData[i,1])
plt.figure()
plt.scatter(type1_x,type1_y,c='#000080',label='benign')
plt.scatter(type2_x,type2_y,c='#FF0000',label='malignant')
plt.legend()
plt.show()