基于make_moons数据集建立并比较集成分类器效果

  • 发布日期:2019-10-22
  • 难度:较难
  • 类别:分类与预测、集成分类器
  • 标签:Python、scikit-learn、Bagging、Boosting、随机森林、make_moons

1. 问题描述

下面将在make_moons数据集上对比单个决策树、Bagging算法、AdaBoost算法和RandomForest四种算法的预测效果和可视化图。

2. 程序实现

In [1]:
import sklearn
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from matplotlib.colors import ListedColormap

#导入数据,划分为训练集和测试集
X, y = sklearn.datasets.make_moons(300,noise=0.3,random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
#分别建立决策树、Bagging、AdaBoost、随机森林模型
clf_tree=DecisionTreeClassifier(max_depth=None, min_samples_split=2, random_state=1)
clf_bagging=BaggingClassifier(n_estimators=100, random_state=1)
clf_adaboost=AdaBoostClassifier(n_estimators=100, random_state=1)
clf_randomforest=RandomForestClassifier(n_estimators=100, random_state=1)
clf_tree = clf_tree.fit(X_train, y_train)
clf_bagging = clf_bagging.fit(X_train, y_train)
clf_adaboost = clf_adaboost.fit(X_train, y_train)
clf_randomforest = clf_randomforest.fit(X_train, y_train)
#准确率评估
tree_score=clf_tree.score(X_test,y_test)
bagging_score=clf_bagging.score(X_test,y_test)
adaboost_score=clf_adaboost.score(X_test,y_test)
randomforest_score=clf_randomforest.score(X_test,y_test)
print("tree_score:%s" % (tree_score))
print("bagging_score:%s" % (bagging_score))
print("adaboost_score:%s" % (adaboost_score))
print("randomforest_score:%s" % (randomforest_score))
tree_score:0.8444444444444444
bagging_score:0.8888888888888888
adaboost_score:0.9111111111111111
randomforest_score:0.8777777777777778
In [2]:
#接下来对4种模型的可视化效果进行比较
#定义效果图绘制函数
def plot_hyperplane(clf, X, y, h=0.02, title='hyperplan'):
    x_min,x_max=X[:,0].min()-1,X[:,0].max()+1
    y_min,y_max=X[:,1].min()-1,X[:,1].max()+1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    plt.title(title)
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.xticks(())
    plt.yticks(())
    Z=clf.predict(np.c_[xx.ravel(),yy.ravel()]).reshape(xx.shape)
    cmap_light=ListedColormap(['#FFAAAA','#AFEEEE'])
    cmap_dark=ListedColormap(['#FF0000','#000080'])
    plt.pcolormesh(xx,yy,Z,cmap=cmap_light)
    plt.scatter(X[:,0], X[:,1],c=y,cmap=cmap_dark)
#绘制可视化效果图
plt.figure()
plot_hyperplane(clf_tree,X,y,h=0.05,title='tree')
plt.show()