mooc机器学习第六天-K近邻,决策树,朴素贝叶斯分类器简单尝试

1.下面的代码是上一篇理论中的小例子

from sklearn.neighbors import KNeighborsClassifier # K近邻分类器
from sklearn.datasets import load_iris  # 鸢尾花数据
from sklearn.tree import DecisionTreeClassifier  #决策树分类器
from sklearn.model_selection import cross_val_score #交叉验证值函数
from sklearn.naive_bayes import GaussianNB #朴素贝叶斯分类器
import  numpy as np #科学计算库


#小示例实现顺序与导包顺序相同


X=[[0],[1],[2],[3]]
y=[0,0,1,1]

neigh=KNeighborsClassifier(n_neighbors=3)
neigh.fit(X,y)
print("+++++K近邻+++++")
print(neigh.predict([[1.2]]))



clf=DecisionTreeClassifier()
irls=load_iris()
re=cross_val_score(clf,irls.data,irls.target,cv=10)
print("+++++交叉验证+++++")
print(re)

print("+++++决策树+++++")
clf.fit(X,y)
print(clf.predict([[2.2]]))



A=np.array([[-1,-1],[-2,-1],[-3,-2],[2,1],[1,1],[3,2]])
B=np.array([1,1,1,2,2,2])

clf1=GaussianNB(priors=None)
clf1.fit(A,B)
r=clf1.predict([[-0.8,-1]])
print("+++++朴素毕贝叶斯+++++")
print(r)

2.结果

  

 

+++++K近邻+++++
[0]
+++++交叉验证+++++
[ 1.          0.93333333  1.          0.93333333  0.93333333  0.86666667
  0.93333333  0.93333333  1.          1.        ]
+++++决策树+++++
[1]
+++++朴素毕贝叶斯+++++
[1]

 

 

3.利用mooc给的feature数据实践

 

import  numpy as np
import  pandas as pd

from  sklearn.preprocessing import  Imputer#数据预处理库
from  sklearn.cross_validation import train_test_split  #打乱训练数据
from  sklearn.metrics  import  classification_report #计算召回率,F1值,精准度


from sklearn.neighbors import  KNeighborsClassifier
from sklearn.tree import  DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB


def load_datasets(feature_path,lable_path):
    #设定shape
    feature=np.ndarray(shape=(0,41))
    lable=np.ndarray(shape=(0,1))
    #处理文件缺失值
    for file in feature_path:
        df=pd.read_table(file,delimiter=‘,‘,na_values="?",header=None)
        imp=Imputer(missing_values=‘NaN‘,strategy=‘mean‘,axis=0)
        imp.fit(df)
        #Impute all missing values in X.
        df=imp.transform(df)
        feature=np.concatenate((feature,df))

    for file in lable_path:
        df=pd.read_table(file,header=None)
        lable=np.concatenate((lable,df))


    lable=np.ravel(lable)
    return feature, lable


if __name__ == ‘__main__‘:
   ‘‘‘数据具体路径‘‘‘
   featurepaths=[‘/A/A.feature‘,
                ‘/B/B.feature‘,
                ‘/C/C.feature‘,
                ‘/D/D.feature‘,
                ‘/E/E.feature‘
                ]
   labelPaths=[‘/A/A.label‘,
              ‘/B/B.label‘,
              ‘/C/C.label‘,
              ‘/D/D.label‘,
              ‘/E/E.label‘]

   ‘‘‘读如数据‘‘‘
   x_train, y_train = load_datasets(featurepaths[:4], labelPaths[:4])
   x_test, y_test = load_datasets(featurepaths[4:], labelPaths[4:])
   #打乱训练数据
   x_train, x_, y_train, y_ = train_test_split(x_train, y_train, test_size=0.0)


   #创建三种分类器并预测
   print(‘Start training knn‘)
   knn = KNeighborsClassifier().fit(x_train, y_train)
   print(‘Training done‘)
   answer_knn = knn.predict(x_test)
   print(‘Prediction done‘)

   print(‘Start training DT‘)
   dt = DecisionTreeClassifier().fit(x_train, y_train)
   print(‘Training done‘)
   answer_dt = dt.predict(x_test)
   print(‘Prediction done‘)

   print(‘Start training Bayes‘)
   gnb = GaussianNB().fit(x_train, y_train)
   print(‘Training done‘)
   answer_gnb = gnb.predict(x_test)
   print(‘Prediction done‘)


   #结果展示
   ‘‘‘
   Build a text report showing the main classification metrics
   classification_report&精确度/召回率/F1值
   ‘‘‘
   print(‘\n\nThe classification report for knn:‘)
   print(classification_report(y_test, answer_knn))
   print(‘\n\nThe classification report for DT:‘)
   print(classification_report(y_test, answer_dt))
   print(‘\n\nThe classification report for Bayes:‘)
   print(classification_report(y_test, answer_gnb))

  

 

 

相关推荐