最简单的机器学习流程 基于sklearn
最简单的机器学习流程
1读取数据
import pandas as pd
data = pd.read_csv
2切分数据与标签
datax = data.iloc[]
datay = data.iloc[]
3划分数据集
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(datax,datay,test_size = ,random_state=)
4无量纲化
from sklearn.preprocessing import StandardScaler
transfer = StandScaler()
x_train_new = transfer.fit_transfrom(x_train)
x_test_new = transfer.transform(x_test)
5预估器流程(svm举例)
from sklearn.svm import svc
estimator = svc(c=1.0,kernel = ‘rbf‘,gamma=‘auto‘....)
6网格搜索,交叉验证暴力求解超参数
from sklearn.model_selection import GridSearchCV
c_range=np.logspace(-5,15,11,base=2)
gamma_range = np.logspace(-15,3,13,base=2)
param_grid=[{‘kernel‘=[‘rbf‘],‘c‘=c_range,‘gamma‘=gamma_range}]#暴力求解c,与gamma的值
grid = GridSearchCV(estimator,param_grid,cv=5,n_jobs=-1)
estimator = grid.fit(x_train,y_train)
7模型评估
y_predict = estimator.predict(x_test)
print(y_test==y_predict)
score = estimator.score(x_test,y_test)
print(score)