scipy中的hierarchy聚类方法应用案例

  • 发布日期:2019-10-25
  • 难度:中等
  • 类别:聚类分析、hierarchy聚类方法应用案例
  • 标签:Python、scipy.cluster.hierarchy

1. 问题描述

如下程序是层次聚类算法的一个实例。应用的数据集为教材中的16中不同品牌饮料数据集。最终,所有饮料被很好地聚为了3类。

2. 程序实现

In [1]:
import pandas as pd
#用于进行层次聚类,话层次聚类图的工具包
from scipy.cluster import hierarchy
from scipy import cluster
import matplotlib.pyplot as plt
#用于主成分分析降维的包
from sklearn import decomposition as skldec
df = pd.read_excel("drink.xlsx")
#开始画层次聚类树状图
Z = hierarchy.linkage(df, method ='ward',metric='euclidean')
hierarchy.dendrogram(Z,labels = df.index)
#在某个高度进行剪切
label = cluster.hierarchy.cut_tree(Z,height=0.8)
label = label.reshape(label.size,)
plt.show()
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-1-65f18eb901f9> in <module>
      6 #用于主成分分析降维的包
      7 from sklearn import decomposition as skldec
----> 8 df = pd.read_excel("drink.xlsx")
      9 #开始画层次聚类树状图
     10 Z = hierarchy.linkage(df, method ='ward',metric='euclidean')

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    176                 else:
    177                     kwargs[new_arg_name] = new_arg_value
--> 178             return func(*args, **kwargs)
    179         return wrapper
    180     return _deprecate_kwarg

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    176                 else:
    177                     kwargs[new_arg_name] = new_arg_value
--> 178             return func(*args, **kwargs)
    179         return wrapper
    180     return _deprecate_kwarg

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/io/excel.py in read_excel(io, sheet_name, header, names, index_col, usecols, squeeze, dtype, engine, converters, true_values, false_values, skiprows, nrows, na_values, parse_dates, date_parser, thousands, comment, skipfooter, convert_float, **kwds)
    305 
    306     if not isinstance(io, ExcelFile):
--> 307         io = ExcelFile(io, engine=engine)
    308 
    309     return io.parse(

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/io/excel.py in __init__(self, io, **kwds)
    392             self.book = xlrd.open_workbook(file_contents=data)
    393         elif isinstance(self._io, compat.string_types):
--> 394             self.book = xlrd.open_workbook(self._io)
    395         else:
    396             raise ValueError('Must explicitly set engine if not passing in'

/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/xlrd/__init__.py in open_workbook(filename, logfile, verbosity, use_mmap, file_contents, encoding_override, formatting_info, on_demand, ragged_rows)
    109     else:
    110         filename = os.path.expanduser(filename)
--> 111         with open(filename, "rb") as f:
    112             peek = f.read(peeksz)
    113     if peek == b"PK\x03\x04": # a ZIP file

FileNotFoundError: [Errno 2] No such file or directory: 'drink.xlsx'