1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
| # 导入所需的库
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
# 配置
IMAGE_SIZE = (28, 28)
def load_data(data_path):
images = []
labels = []
# 获取文件夹列表,按名称排序确保标签一致性
try:
class_folders = sorted([d for d in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, d))])
except FileNotFoundError:
print(f"找不到指定目录{data_path}")
return np.array([]), np.array([])
if not class_folders:
print(f"警告: 在目录 {data_path} 中未找到任何子文件夹作为类别。")
return np.array([]), np.array([])
for label_name in class_folders:
print(f"正在加载标签为{label_name}的图像")
# 尝试将文件夹名转换为整数作为标签
try:
label_int = int(label_name)
except ValueError:
print(f"警告: 文件夹名 '{label_name}' 无法转换为整数,跳过该文件夹。")
continue
folder_path = os.path.join(data_path, label_name)
for image_name in os.listdir(folder_path):
# 仅处理.bmp格式的图像
if image_name.endswith(".bmp"):
# 图片的完整路径
image_path = os.path.join(folder_path, image_name)
# 以为灰度模式读取图像
# cv2.IMREAD_GRAYSCALE: 一个常量标志,用于告诉 imread 将彩色图像加载为灰度图像。这通常是简化数据和减少计算量的一种方式。
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
img_resized = cv2.resize(img, IMAGE_SIZE)
# 将像素从0-255归一化为0-1范围,有助于模型学习
img_normalized = img_resized.astype("float32") / 255.
# 图像展平为一维数组(本来是二维矩阵)
flattened_image = img_normalized.flatten()
images.append(flattened_image)
labels.append(label_int)
return np.array(images), np.array(labels)
data_path = "../data/data/"
# 加载数据
X, y = load_data(data_path)
if X.size == 0:
print("没有加载到任何图像,程序退出。")
# return
print(f"\n已加载 {X.shape[0]} 张图像,每张图像展平后有 {X.shape[1]} 个特征。")
print(f"已加载 {y.shape[0]} 个标签。")
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"训练集大小: {len(X_train)}")
print(f"测试集大小: {len(X_test)}")
# 训练,寻找最佳的k
# 准备KNN分类器 训练,寻找最佳的k值
# 存储不同K值对应的准确率
k_values = range(1, 25)
accuracies = []
for k in k_values:
# 实例化KNN分类器,n_neighbors为当前的k值
knn = KNeighborsClassifier(n_neighbors=k, weights="distance")
# 使用训练数据拟合模型
knn.fit(X_train, y_train)
# 使用测试数据进行预测
y_pred = knn.predict(X_test)
# 计算并存储准确率
accuracy = accuracy_score(y_test, y_pred)
print(f"k={k}, 准确率是:{accuracy}")
accuracies.append(accuracy)
# 绘制K值与准确率的关系图
plt.figure(figsize=(10, 6))
plt.plot(k_values, accuracies, marker='o')
plt.title('K值与准确率')
plt.xlabel('K值')
plt.ylabel('准确率')
plt.xticks(k_values)
plt.grid(True)
plt.show()
# 找到准确率最高的K值
best_k_index = np.argmax(accuracies)
best_k = k_values[best_k_index]
best_accuracy = accuracies[best_k_index]
print(f"\n最佳K值: {best_k}, 对应的准确率: {best_accuracy:.2f}")
|