""" 基于鸢尾花的不同分类器的效果比对: step1: 准备数据; 提取数据的特征向量X,Y 将Y数据采用LabelEncoder转化为数值型数据; step2: 将提取的特征向量X,Y进行拆分(训练集与测试集) step3: 构建不同分类器并设置参数,例如:KNN,RF,SVM,deng; step4: 循环遍历model列表: 设置时间戳; 每循环一个模型,进行模型训练,模型验证或测试打分并将结果进行记录; step5:plot(画图) """ import time import warnings import sys import matplotlib.pyplot as plt import numpy as np import pandas as pd import matplotlib.pylab as mpl from sklearn.preprocessing import LabelEncoder ###数据清洗 from sklearn.model_selection import train_test_split from sklearn.svm import SVC, SVR from sklearn.neighbors import KNeighborsClassifier ###KNN from sklearn.ensemble import RandomForestClassifier ###随机森林 from sklearn.linear_model import LogisticRegression ###逻辑回归器 # 溢出警告设置为0 warnings.filterwarnings('ignore') # 防止中文乱码 mpl.rcParams['font.sans-serif'] = [u'simHei'] mpl.rcParams['axes.unicode_minus'] = 'False' # 数据路径 path = 'iris.data' names = ['A', 'B', 'C', 'D', 'cls'] # 创建读取数据对象 data = pd.read_csv(filepath_or_buffer=path, header=None, names=names) print(data) # print(data['cls'].values) # sys.exit() # 读取特征向量X, Y X = data.iloc[:, :2] # Y = data.iloc[:, :-1] Y = data['cls'] # 将字符型数据Y转成数值型 数据清洗或调用LabelEncoder class_label = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2} # 对目标属性做一个类别的转换,将字符串的数据转换为从0开始的int值 ### 方式一: LabelEncoder方法 # label_encoder = LabelEncoder() # Y = label_encoder.fit_transform(Y) # print(Y) ### 方式二:采用map一一印射关系+匿名函数 data['cls'] = list(map(lambda cls: class_label[cls], data['cls'].values)) print(data['cls'].values) # 数据拆分训练集和测试集 x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=10) # 构建不同分类器 svc = SVC(C=0.2, kernel='rbf', decision_function_shape='ovr') knn = KNeighborsClassifier(n_neighbors=5) log = LogisticRegression() rand = RandomForestClassifier(n_estimators=150, max_depth=3) models = np.array([svc, knn, log, rand]) # 创建新列表记录数据 T = [] TRAIN_SORCE = [] TEST_SORCE = [] # 遍历不同分类器进行训练、打分和时间花销,并记录方便后续画图 for i in models: N = time.clock() i.fit(x_train, y_train) M = time.clock() T.append(M-N) TRAIN_SORCE.append(i.score(x_train, y_train)) TEST_SORCE.append(i.score(x_test, y_test)) # 画图 plt.figure(num=1) plt.plot(['svc01', 'knn02', 'log03', 'rand04'], TRAIN_SORCE, 'r-', linewidth='3', label='TRAIN_SORCE') plt.plot(['svc01', 'knn02', 'log03', 'rand04'], TEST_SORCE, 'b-o', linewidth='3', label='TEST_SORCE') # 设置Y轴尺度 plt.ylim(0.5, 1.2) plt.figure(num=2) plt.plot(['svc01', 'knn02', 'log03', 'rand04'], T, 'g-o', linewidth='3', label='time') plt.show() E:\myprogram\anaconda\envs\python3.6\python.exe E:/XXX/L-SVM/_differ-classifiies.py A B C D cls 0 5.1 3.5 1.4 0.2 Iris-setosa 1 4.9 3.0 1.4 0.2 Iris-setosa 2 4.7 3.2 1.3 0.2 Iris-setosa 3 4.6 3.1 1.5 0.2 Iris-setosa 4 5.0 3.6 1.4 0.2 Iris-setosa .. ... ... ... ... ... 145 6.7 3.0 5.2 2.3 Iris-virginica 146 6.3 2.5 5.0 1.9 Iris-virginica 147 6.5 3.0 5.2 2.0 Iris-virginica 148 6.2 3.4 5.4 2.3 Iris-virginica 149 5.9 3.0 5.1 1.8 Iris-virginica [150 rows x 5 columns] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] Process finished with exit code 0
文章来源地址https://www.toymoban.com/news/detail-650090.html
文章来源:https://www.toymoban.com/news/detail-650090.html
到了这里,关于不同分类器对数据的处理的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!