Python机器学习实验二:2.编写代码,实现对iris数据集的KNN算法分类及预测并改进
2、改进模型,要求:
(1)数据集划分采用10折交叉验证;
(2)寻找最优的n_neighbors值(在5-10之间);
(3)使用新的模型预测未知种类的鸢尾花。
待预测未知数据:
X1=[[1.5 , 3 , 5.8 , 2.2], [6.2 , 2.9 , 4.3 , 1.3]]
点个👍吧
#引入十折交叉验证算法
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import precision_score
from sklearn.model_selection import cross_val_score
iris = load_iris()
x = iris.data
y = iris.target
#找出最有n_neighbors,暴力遍历每一种可能的n_neighbors
#划分数据集
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)
# print(x_train)
# print('----------------------')
# print(x_test)
# print('----------------------')
# print(y_train)
# print('----------------------')
# print(y_test)
# print('----------------------')
knn_scores_res = []
#找出最有n_neighbors,暴力遍历每一种可能的n_neighbors
for i in range(5,11):
knn = KNeighborsClassifier(n_neighbors = i)
knn = knn.fit(x_train,y_train)
# y_pre = knn.predict()
#十折交叉验证算法
knn_scores = []
for j in range(10):
knn_score = cross_val_score(knn,x_train,y_train,cv=10).mean()
knn_scores.append(knn_score)
pass
print(knn_scores)
knn_scores_res.append(sum(knn_scores)/len(knn_scores))
print(knn_scores_res)
print("最大的准确率为:",max(knn_scores_res))
for i in range(len(knn_scores_res)):
if knn_scores_res[i] == max(knn_scores_res):
print("达到最好训练模型的n_neighbors为:%d"%(i + 5))
k = i + 5
#当n_neighbors=9时对鸢尾花进行预测,第一次算的n_neighbors = 9,准确率比不改进还低
knn = KNeighborsClassifier(n_neighbors= k)
knn = knn.fit(x_train,y_train)
y_pre = knn.predict(x_test)
s = precision_score(y_test,y_pre,average=None)
print("改进后的模型的准确率为:",s)
outputLabel = knn.predict([[1.5, 3, 5.8, 2.2],[6.2, 2.9, 4.3, 1.3]])
print("改进后的算法的预测结果为:",outputLabel)
点个👍吧
点个👍吧