下面将在make_moons数据集上对比单个决策树、Bagging算法、AdaBoost算法和RandomForest四种算法的预测效果和可视化图。
import sklearn
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from matplotlib.colors import ListedColormap
#导入数据,划分为训练集和测试集
X, y = sklearn.datasets.make_moons(300,noise=0.3,random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
#分别建立决策树、Bagging、AdaBoost、随机森林模型
clf_tree=DecisionTreeClassifier(max_depth=None, min_samples_split=2, random_state=1)
clf_bagging=BaggingClassifier(n_estimators=100, random_state=1)
clf_adaboost=AdaBoostClassifier(n_estimators=100, random_state=1)
clf_randomforest=RandomForestClassifier(n_estimators=100, random_state=1)
clf_tree = clf_tree.fit(X_train, y_train)
clf_bagging = clf_bagging.fit(X_train, y_train)
clf_adaboost = clf_adaboost.fit(X_train, y_train)
clf_randomforest = clf_randomforest.fit(X_train, y_train)
#准确率评估
tree_score=clf_tree.score(X_test,y_test)
bagging_score=clf_bagging.score(X_test,y_test)
adaboost_score=clf_adaboost.score(X_test,y_test)
randomforest_score=clf_randomforest.score(X_test,y_test)
print("tree_score:%s" % (tree_score))
print("bagging_score:%s" % (bagging_score))
print("adaboost_score:%s" % (adaboost_score))
print("randomforest_score:%s" % (randomforest_score))
#接下来对4种模型的可视化效果进行比较
#定义效果图绘制函数
def plot_hyperplane(clf, X, y, h=0.02, title='hyperplan'):
x_min,x_max=X[:,0].min()-1,X[:,0].max()+1
y_min,y_max=X[:,1].min()-1,X[:,1].max()+1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
plt.title(title)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xticks(())
plt.yticks(())
Z=clf.predict(np.c_[xx.ravel(),yy.ravel()]).reshape(xx.shape)
cmap_light=ListedColormap(['#FFAAAA','#AFEEEE'])
cmap_dark=ListedColormap(['#FF0000','#000080'])
plt.pcolormesh(xx,yy,Z,cmap=cmap_light)
plt.scatter(X[:,0], X[:,1],c=y,cmap=cmap_dark)
#绘制可视化效果图
plt.figure()
plot_hyperplane(clf_tree,X,y,h=0.05,title='tree')
plt.show()