基于iris鸢尾花数据集作为多维数据可视化的Python实现案例,可视化方法包括Andrews曲线法、平行坐标法、主成分分析、多维尺度分析四种。
import pandas as pd
import matplotlib.pyplot as plt
#读取数据
data = pd.read_csv('iris.csv')
print(data.head())
from pandas.plotting import andrews_curves
andrews_curves(data,'Name')
plt.show()
from pandas.plotting import parallel_coordinates
parallel_coordinates(data,'Name')
plt.show()
from sklearn import decomposition
#主成分分析法,为实现可视化效果,维度个数为2
PCA = decomposition.PCA(n_components=2)
X = PCA.fit_transform(data.ix[:,:-1].values)
#作图
pos=pd.DataFrame()
pos['X'] =X[:, 0]
pos['Y'] =X[:, 1]
pos['Name'] = data['Name']
ax = pos.ix[pos['Name']=='Iris-virginica'].plot(kind='scatter', x='X', y='Y', color='blue', label='virginica')
ax = pos.ix[pos['Name']=='Iris-setosa'].plot(kind='scatter', x='X', y='Y', color='green', label='setosa', ax=ax)
pos.ix[pos['Name']=='Iris-versicolor'].plot(kind='scatter', x='X', y='Y', color='red', label='versicolor', ax=ax)
plt.show()
from sklearn import manifold
from sklearn.metrics import euclidean_distances
similarities = euclidean_distances(data.ix[:,:-1].values)
#实施多维尺度,为实现可视化效果,维度个数为2
mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, dissimilarity="precomputed",n_jobs=1)
X = mds.fit(similarities).embedding_
#作图
pos=pd.DataFrame(X, columns=['X', 'Y'])
pos['Name'] = data['Name']
ax = pos.ix[pos['Name']=='Iris-virginica'].plot(kind='scatter', x='X', y='Y', color='blue', label='virginica')
ax = pos.ix[pos['Name']=='Iris-setosa'].plot(kind='scatter', x='X', y='Y', color='green', label='setosa', ax=ax)
pos.ix[pos['Name']=='Iris-versicolor'].plot(kind='scatter', x='X', y='Y', color='red', label='versicolor', ax=ax)
plt.show()