主成分分析

  • 发布日期:2019-11-27
  • 难度:简单
  • 类别:数据预处理、数据规约
  • 标签:Python、数据规约、维规约、主成分分析

1. 问题描述

使用Python第三方库sklearn包中自带的数据集digits对主成分分析进行实现。

2. 程序实现

In [1]:
from sklearn import datasets,decomposition
#载入digits数据集
digits = datasets.load_digits()
data = digits.data
#定义主成分的数量或解释度阈值
#如果输入的参数是不小于1的整数,则认为是主成分的数量
#如果输入的参数是0-1之间的浮点数,则认为是解释度阈值
pca = decomposition.PCA(n_components=6)
pca.fit(data)
print(pca.components_)     #特征向量
[[-3.26665006e-18 -1.73094652e-02 -2.23428835e-01 -1.35913305e-01
  -3.30323104e-02 -9.66340845e-02 -8.32943754e-03  2.26900093e-03
  -3.20516500e-04 -1.19308905e-01 -2.44451675e-01  1.48512746e-01
  -4.67319400e-02 -2.17740744e-01 -1.48136771e-02  4.47779527e-03
  -4.94136411e-05 -7.95419378e-02  8.33951458e-02  2.15915343e-01
  -1.72126800e-01 -1.63712098e-01  2.86444452e-02  4.23251806e-03
   9.85488574e-05  6.42319144e-02  2.54093315e-01 -3.56771034e-02
  -2.09462569e-01 -4.31311426e-02  5.13118687e-02  2.13422732e-04
   0.00000000e+00  1.59950883e-01  3.68690775e-01  1.64406827e-01
   8.52007913e-02  3.72982855e-02  2.15866979e-02  0.00000000e+00
   1.28865584e-03  1.06945286e-01  3.03067457e-01  2.47813039e-01
   2.09637296e-01  1.22325217e-02 -3.69458497e-02  1.61485028e-03
   6.93023538e-04 -8.35144252e-03 -5.58598987e-02  9.30534164e-02
   1.07387720e-01 -1.37734565e-01 -6.32879465e-02  9.61671145e-04
   9.55079098e-06 -1.40786840e-02 -2.35675488e-01 -1.41225588e-01
  -9.15964625e-03 -8.94184678e-02 -3.65977108e-02 -1.14684952e-02]
 [ 2.71533481e-18 -1.01064564e-02 -4.90849194e-02 -9.43337397e-03
  -5.36015521e-02 -1.17755314e-01 -6.21281804e-02 -7.93574600e-03
  -1.63216226e-04 -2.10167031e-02  6.03485689e-02 -5.33769772e-03
  -9.19769233e-02 -5.19210523e-02 -5.89354697e-02 -3.33283411e-03
  -4.22872038e-05  3.62458528e-02  1.98257337e-01 -4.86386542e-02
  -2.25574897e-01 -4.50542081e-03  2.67696744e-02 -2.08735604e-04
  -5.66233930e-05  7.71235120e-02  1.88447106e-01 -1.37952515e-01
  -2.61042776e-01  4.98350654e-02  6.51113798e-02  4.03200429e-05
  -0.00000000e+00  8.81559928e-02  8.71737558e-02 -2.70860182e-01
  -2.85291802e-01  1.66461586e-01  1.27860546e-01 -0.00000000e+00
   2.89440253e-04  5.08304885e-02  1.30274465e-01 -2.68906462e-01
  -3.01575536e-01  2.40259061e-01  2.17555552e-01  1.32726080e-03
   2.86743001e-04  1.05548299e-02  1.53370697e-01 -1.19535175e-01
  -9.72508010e-02  2.85869534e-01  1.48776445e-01  5.42290596e-04
  -3.34028056e-05 -1.00791164e-02 -7.02724063e-02  1.71108133e-02
   1.94296408e-01  1.76697120e-01  1.94547071e-02 -6.69693932e-03]
 [-1.80513093e-17  1.83420722e-02  1.26475543e-01  1.32154803e-01
  -1.34016052e-01 -2.64938820e-01 -1.16643751e-01 -1.68423443e-02
   3.93971153e-04  7.94371428e-02  8.74387369e-02  6.55909589e-02
  -7.89789909e-04 -2.80102010e-01 -1.59704543e-01 -1.51616157e-02
   2.12613362e-04  4.24412648e-02  4.72719595e-03  7.38147837e-02
   9.65404304e-02 -3.07581107e-01 -1.48011911e-01 -6.33142427e-03
   5.01584032e-05 -4.41792032e-02 -7.25665149e-02  7.13385180e-02
  -8.05254540e-03 -3.53007953e-01 -1.48327644e-01 -2.18498564e-04
  -0.00000000e+00 -5.45021589e-02 -3.36020107e-02  1.41335254e-01
   1.50096894e-02 -2.57973886e-01 -9.38408394e-02 -0.00000000e+00
  -3.19977124e-04 -2.41498932e-02  1.21723763e-01  1.71362109e-01
  -8.57777755e-02 -1.44627529e-01  7.01061987e-02  2.92247625e-03
  -1.29047609e-04  1.86136089e-02  2.34990813e-01  1.69857754e-01
  -5.39961732e-02  8.48975805e-02  2.22787078e-01  2.18621438e-02
  -1.39805150e-05  1.75833095e-02  1.23158213e-01  9.83532212e-02
   1.38485238e-01  2.32084162e-01  1.67026561e-01  3.48043825e-02]
 [-1.19362374e-17  2.00129020e-02  1.77426949e-01  1.95680552e-01
   3.20885093e-02  9.51350135e-02  7.18349961e-02  9.20554937e-03
   5.95996537e-05  7.48786883e-02  3.07557236e-01  3.70451264e-02
  -1.53699638e-01  2.71635282e-02  4.54364540e-02  2.41658096e-03
   1.12278199e-04  1.23902363e-01  2.19811816e-01 -2.75643737e-01
  -2.78363547e-01 -5.73540128e-02  2.24793892e-02 -1.50781024e-03
   8.94450536e-05  9.83628599e-02  1.16326624e-01 -1.92185443e-01
  -2.15997013e-01 -8.19871090e-02  5.52831539e-02 -5.87383511e-05
  -0.00000000e+00  5.23188387e-02  9.79142349e-02  1.04807734e-02
  -1.32055018e-01 -1.60843097e-01  9.95689874e-03 -0.00000000e+00
  -4.99096596e-04  6.27123271e-03  5.21400601e-02  5.89659046e-02
  -2.56251949e-02 -1.94298895e-01 -8.43566334e-02 -2.28967783e-03
  -4.06732541e-04  1.06453680e-02  1.07330499e-01  1.32071950e-01
  -3.17390358e-02 -2.25620645e-01 -1.39554537e-01 -1.59986723e-02
  -1.20693552e-05  2.10070204e-02  2.03251712e-01  2.08231525e-01
  -2.24549289e-01 -3.07658397e-01 -9.93888634e-02 -2.44735044e-02]
 [ 6.46504626e-18  1.40786114e-02  8.28242618e-02 -6.56025309e-02
  -1.53939434e-02  1.66985324e-01  9.48940058e-02  4.39552881e-03
   9.69002229e-05  5.23098177e-02  4.44325843e-02 -4.57337097e-03
  -1.33895528e-01 -2.15381269e-01  3.54117663e-02  9.41726299e-03
   1.81472886e-04  9.86917297e-02  2.18718716e-01  1.46339999e-01
  -1.31926877e-01 -3.62162262e-01 -1.77400844e-02  5.91936333e-03
   1.57241377e-04  1.12919096e-01  3.14997023e-01  3.07685090e-01
   2.76348802e-02 -1.68647629e-01 -9.77589775e-02  1.76630542e-04
  -0.00000000e+00 -3.89482671e-02 -7.45777210e-02  4.64774769e-03
   7.28950082e-02  1.68422053e-02 -1.05783956e-01 -0.00000000e+00
  -4.06227929e-04 -9.92263891e-02 -3.99399271e-01 -2.30079941e-01
   1.02602689e-01  1.19983892e-01 -4.12243114e-02 -5.31522133e-04
  -7.50791339e-04 -2.54619399e-02 -2.60688906e-01 -1.95896271e-01
   1.23534572e-01  4.59257555e-02 -1.76779408e-02  7.18162031e-03
  -9.38847052e-05  1.24603791e-02  7.08989547e-02  2.21501239e-02
   5.05662669e-02 -3.56149828e-02 -1.59328289e-02  2.24344633e-02]
 [ 5.28721877e-17 -9.63018133e-03 -8.09649937e-02  1.27379169e-02
  -1.47997704e-02  3.13843479e-02  5.30251936e-02  1.36719902e-02
  -2.53209701e-04 -1.24582708e-02 -4.98057599e-02 -9.44312964e-02
  -2.84582396e-01 -1.36729436e-01  4.22464464e-02  8.35709178e-03
  -2.92413067e-05 -3.25591485e-02 -1.38306573e-01 -3.02268083e-01
  -2.53827362e-01 -1.16081514e-01 -3.73980951e-03  3.57631281e-03
  -4.49612505e-05 -3.82816157e-02 -1.04855291e-01 -1.62743564e-01
   2.75191174e-02  5.07755296e-02 -1.28975187e-02  2.48463761e-06
  -0.00000000e+00 -3.39493031e-02  3.39386744e-02  1.92609384e-01
   2.84534053e-01  2.54836484e-01  5.25117250e-02 -0.00000000e+00
  -9.22023375e-04 -6.43575455e-02 -1.34059427e-01  5.09320407e-02
  -1.00688738e-01 -9.81046062e-02  1.82231469e-01  5.87748680e-03
  -1.41146592e-03 -5.74881980e-02 -2.38110565e-01 -2.36132088e-02
  -3.87831303e-01 -1.77012144e-01  2.65567004e-01  1.80068858e-02
  -9.47230313e-05 -1.16184286e-02 -7.07407804e-02 -2.07455397e-02
  -1.06991248e-01  1.64481637e-01  1.43993251e-01  1.50514027e-02]]
In [2]:
print(pca.explained_variance_ratio_)     #解释度
[0.14890594 0.13618771 0.11794594 0.08409979 0.05782415 0.0491691 ]
In [3]:
print(pca.transform(data))    #降维之后的数据
# 共1979个降维后向量
[[-1.25946643e+00  2.12748834e+01 -9.46305466e+00  1.30141908e+01
  -7.12882133e+00 -7.44069492e+00]
 [ 7.95761128e+00 -2.07686989e+01  4.43950613e+00 -1.48936656e+01
   5.89624018e+00 -6.48540527e+00]
 [ 6.99192297e+00 -9.95598632e+00  2.95855806e+00 -1.22883036e+01
  -1.81260163e+01 -4.50779429e+00]
 ...
 [ 1.08012837e+01 -6.96025220e+00  5.59955458e+00 -7.22773463e+00
  -3.45965040e+00 -1.23750835e+01]
 [-4.87210007e+00  1.24239535e+01 -1.01708664e+01 -6.66884777e+00
   3.56905959e+00 -3.88459402e-03]
 [-3.44389627e-01  6.36554920e+00  1.07737085e+01 -7.72621323e+00
  -3.31061271e+00 -3.04912125e+00]]
In [ ]: