前言
一个简单的手势识别,过程很简单,主要用到了opencv
和sklearn
和tkinter
三个库,下面我将会展示整个项目的代码和简要说明,并且下面将会是完整的已经全部集成在三个.py
文件的代码,你只需要将三个文件分别执行就可以训练出自己的手势识别模型
项目思想:
- 通过颜色寻找图像中手的轮廓
- 由轮廓得到一串傅里叶描述子作为一个样本
- 利用多个样本构成的数据集,在使用
SVM
支持向量机完成分类工作
01 环境配置
python版本:3.7requirements.txt
包文件内容如下:
certifi @ file:///C:/b/abs_85o_6fm0se/croot/certifi_1671487778835/work/certifi
et-xmlfile==1.1.0
imageio==2.28.1
joblib==1.2.0
networkx==2.6.3
numpy==1.21.6
opencv-contrib-python==4.7.0.72
opencv-python==4.7.0.72
openpyxl==3.1.2
packaging==23.1
pandas==1.3.5
Pillow==9.5.0
python-dateutil==2.8.2
pytz==2023.3
PyWavelets==1.3.0
scikit-image==0.19.3
scikit-learn==1.0.2
scipy==1.7.3
six==1.16.0
threadpoolctl==3.1.0
tifffile==2021.11.2
wincertstore==0.2
通过终端命令:pip install -r requirements.txt -i https://pypi.douban.com/simple
安装所需模块
02 第一个py文件:获取图像数据
我们要对每一个手势获取一些图片作为样本,因为本身是基于轮廓的所以我们会将轮廓图像也展示出来便于参考截取。
- 导入模块:
from tkinter import *
from skimage import io, transform
import threading
import cv2
import warnings
import time
import os
import numpy as np
warnings.simplefilter('ignore')
- 添加通过皮肤检测获取手部图像的函数
def binaryMask(frame, x0, y0, width, height):
frame1 = cv2.rectangle(frame, (x0, y0), (x0 + width, y0 + height), (0, 255, 0)) # 画出截取的手势框图
roi = frame[y0:y0 + height, x0:x0 + width] # roi=手势框图
# cv2.imshow("roi", roi) # 显示手势框图
res = skinMask(roi) # 进行肤色检测
kernel = np.ones((5, 5), np.uint8) # 设置卷积核
erosion = cv2.erode(res, kernel) # 腐蚀操作
res = cv2.dilate(erosion, kernel) # 对res膨胀操作 dilation
# ret, fourier_result = fd.fourierDesciptor(res)
return frame1, res
# return frame1, roi, res, ret, fourier_result
# cv2.imshow("res", res) # res是roi显示肤色检测后的图像
def skinMask(roi):
YCrCb = cv2.cvtColor(roi, cv2.COLOR_BGR2YCR_CB) # 转换至YCrCb空间
(y, cr, cb) = cv2.split(YCrCb) # 拆分出Y,Cr,Cb值
cr1 = cv2.GaussianBlur(cr, (5, 5), 0)
_, skin = cv2.threshold(cr1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) # Ostu处理
res = cv2.bitwise_and(roi, roi, mask=skin)
return res
- 添加获取傅里叶描述子的函数,就是我们通过处理好的手部图像轮廓获取数据样本的部分:
MIN_DESCRIPTOR = 32 # surprisingly enough, 2 descriptors are already enough
##计算傅里叶描述子
def fourierDesciptor(res):
# Laplacian算子进行八邻域检测
gray = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
dst = cv2.Laplacian(gray, cv2.CV_16S, ksize=3)
Laplacian = cv2.convertScaleAbs(dst)
contour = find_contours(Laplacian) # 提取轮廓点坐标
contour_array = contour[0][:, 0, :] # 注意这里只保留区域面积最大的轮廓点坐标
ret_np = np.ones(dst.shape, np.uint8) # 创建黑色幕布
ret = cv2.drawContours(ret_np, contour[0], -1, (255, 255, 255), 1) # 绘制白色轮廓
contours_complex = np.empty(contour_array.shape[:-1], dtype=complex)
contours_complex.real = contour_array[:, 0] # 横坐标作为实数部分
contours_complex.imag = contour_array[:, 1] # 纵坐标作为虚数部分
fourier_result = np.fft.fft(contours_complex) # 进行傅里叶变换
# fourier_result = np.fft.fftshift(fourier_result)
descirptor_in_use = truncate_descriptor(fourier_result) # 截短傅里叶描述子
# reconstruct(ret, descirptor_in_use)
return ret, descirptor_in_use
# return descirptor_in_use
def find_contours(Laplacian):
#binaryimg = cv2.Canny(res, 50, 200) #二值化,canny检测
h_c,h_i= cv2.findContours(Laplacian,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE) #寻找轮廓
contour = sorted(h_c,key=cv2.contourArea, reverse=True)#对一系列轮廓点坐标按它们围成的区域面积进行排序
return contour
# 截短傅里叶描述子
def truncate_descriptor(fourier_result):
descriptors_in_use = np.fft.fftshift(fourier_result)
# 取中间的MIN_DESCRIPTOR项描述子
center_index = int(len(descriptors_in_use) / 2)
low, high = center_index - int(MIN_DESCRIPTOR / 2), center_index + int(MIN_DESCRIPTOR / 2)
descriptors_in_use = descriptors_in_use[low:high]
descriptors_in_use = np.fft.ifftshift(descriptors_in_use)
return descriptors_in_use
##由傅里叶描述子重建轮廓图
def reconstruct(img, descirptor_in_use):
# descirptor_in_use = truncate_descriptor(fourier_result, degree)
# descirptor_in_use = np.fft.ifftshift(fourier_result)
# descirptor_in_use = truncate_descriptor(fourier_result)
# print(descirptor_in_use)
contour_reconstruct = np.fft.ifft(descirptor_in_use)
contour_reconstruct = np.array([contour_reconstruct.real,
contour_reconstruct.imag])
contour_reconstruct = np.transpose(contour_reconstruct)
contour_reconstruct = np.expand_dims(contour_reconstruct, axis=1)
if contour_reconstruct.min() < 0:
contour_reconstruct -= contour_reconstruct.min()
contour_reconstruct *= img.shape[0] / contour_reconstruct.max()
contour_reconstruct = contour_reconstruct.astype(np.int32, copy=False)
black_np = np.ones(img.shape, np.uint8) # 创建黑色幕布
black = cv2.drawContours(black_np, contour_reconstruct, -1, (255, 255, 255), 10) # 绘制白色轮廓
# cv2.imshow("contour_reconstruct", black)
# cv2.imwrite('recover.png',black)
return black
- 添加基于
tkinter
的GUI
的类,类中主要使用多线程来展示相机拍摄到的视频和转换为轮廓图像的视频,以及一些简单的截取和记录功能
# 创建获取样本的类
class Application(Frame):
def __init__(self, master=None):
super().__init__(master)
self.master = master
self.pack()
self.isstart = False
self.num = 1 # 记录图片数
self.label = 1
self.cnt = 1
self.single_sum = 20 # 设置单个手势需要的图片数
self.time = time.time() # 生成一个文件夹的后缀
self.create_widget()
def create_widget(self):
# 添加组件
self.button01 = Button(master=self.master, text='开启摄像头', command=self.video)
self.button01.place(x=180, y=470+10)
self.button001 = Button(master=self.master, text='关闭程序', command=self.over)
self.button001.place(x=270, y=470+10)
self.label01 = Label(master=self.master)
self.label01.place(x=30, y=50)
self.button02 = Button(master=self.master, text='截取图像', command=self.cut_image)
self.button02.place(x=680, y=470+10)
self.v1 = StringVar(self.master, '保存为:1_1.png')
self.label_cut = Label(master=self.master, textvariable=self.v1)
self.label_cut.place(x=750, y=470+10)
self.label02 = Label(master=self.master)
self.label02.place(x=630, y=50)
self.label_num = Label(master=self.master, text='单类图片数:')
self.label_num.place(x=750, y=470+30)
self.v2 = StringVar(self.master, f"{self.single_sum}")
self.entry = Entry(master=self.master, textvariable=self.v2)
self.entry.place(x=820, y=470+30)
def cut_image(self):
if self.isstart:
if not os.path.exists(f'./images-{int(self.v2.get())}_{int(self.time)}'):
os.mkdir(f'./images-{int(self.v2.get())}_{int(self.time)}')
io.imsave(f'./images-{int(self.v2.get())}_{int(self.time)}/{self.label}_{self.cnt}.png',
self.res[:, :, ::-1])
self.num += 1
if int(self.v2.get()) != 1:
if self.num % int(self.v2.get()) == 1:
self.label += 1
self.cnt = 1
else:
self.cnt += 1
else:
self.label += 1
self.cnt = 1
self.v1.set(f'保存为:{self.label}_{self.cnt}.png')
if self.num-1 == int(self.v2.get()) * 10:
self.over()
def over(self):
if self.isstart:
self.cap.release()
cv2.destroyAllWindows() # 关闭所有窗口
self.master.destroy()
def video(self):
if not self.isstart:
self.isstart = True
t1 = threading.Thread(target=self.open_video)
t1.setDaemon(True)
t1.start()
def open_video(self):
if not os.path.exists(f'./cache_image'):
os.mkdir(f'./cache_image')
self.width, self.height = 400, 400 # 设置拍摄窗口大小
self.cap = cv2.VideoCapture(0) # 开摄像头
while True:
ret, frame = self.cap.read() # 读取摄像头的内容,ret bool 判断获取帧
self.frame = cv2.flip(frame, 1) # frame 获取到的一帧
frame1, self.res = binaryMask(frame, (frame.shape[1]-self.width)//2,
(frame.shape[0]-self.height)//2,
self.width, self.height) # 取手势所在框图并进行处理
io.imsave('./cache_image/abc.gif', (transform.resize(frame1[:, :, ::-1], (
400, 400 / frame1.shape[0] * frame1.shape[1], frame1.shape[2])) * 255).astype('uint8'))
self.video_1 = PhotoImage(file='./cache_image/abc.gif')
self.label01.config(image=self.video_1)
temp = fourierDesciptor(self.res)
out_line = temp[0]
io.imsave('./cache_image/out_line.gif', (transform.resize(out_line[:, :], (
400, 400 / out_line.shape[0] * out_line.shape[1])) * 255).astype('uint8'))
self.video_2 = PhotoImage(file='./cache_image/out_line.gif')
self.label02.config(image=self.video_2)
abc1 = None
abc2 = None
abc1 = self.video_1 # 作用可以让视频不闪烁
abc2 = self.video_2 # 作用可以让视频不闪烁
- 执行代码
if __name__ == '__main__':
root = Tk()
root.geometry("1100x550")
root.title('不就是获取点图片数据吗')
app = Application(root)
root.mainloop()
我们先来查看一下运行效果图:
可以看出我准备收集每一类20张图片,它会自动将截取图片数目加一,从1_1
一直到10_20
才能截取完毕,截取时注意背景颜色不要有雨皮肤相近的颜色出现,获取的数据会自动保存到当前文件夹下的data
目录内。
03 第二个py文件:处理数据,并训练模型
这里我们处理数据还是需要用到刚才的傅里叶描述子的函数,为了不让你翻回去,我再下面又重新来了一遍。
- 导入所需模块
from tkinter import *
import tkinter.filedialog
import pandas as pd
from skimage import io
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
import cv2
import warnings
import time
import numpy as np
import random
import pickle
warnings.simplefilter('ignore')
- 添加获取傅里叶描述子的函数
MIN_DESCRIPTOR = 32 # surprisingly enough, 2 descriptors are already enough
##计算傅里叶描述子
def fourierDesciptor(res):
# Laplacian算子进行八邻域检测
gray = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
dst = cv2.Laplacian(gray, cv2.CV_16S, ksize=3)
Laplacian = cv2.convertScaleAbs(dst)
contour = find_contours(Laplacian) # 提取轮廓点坐标
contour_array = contour[0][:, 0, :] # 注意这里只保留区域面积最大的轮廓点坐标
ret_np = np.ones(dst.shape, np.uint8) # 创建黑色幕布
ret = cv2.drawContours(ret_np, contour[0], -1, (255, 255, 255), 1) # 绘制白色轮廓
contours_complex = np.empty(contour_array.shape[:-1], dtype=complex)
contours_complex.real = contour_array[:, 0] # 横坐标作为实数部分
contours_complex.imag = contour_array[:, 1] # 纵坐标作为虚数部分
fourier_result = np.fft.fft(contours_complex) # 进行傅里叶变换
# fourier_result = np.fft.fftshift(fourier_result)
descirptor_in_use = truncate_descriptor(fourier_result) # 截短傅里叶描述子
# reconstruct(ret, descirptor_in_use)
# return ret, descirptor_in_use
return descirptor_in_use
def find_contours(Laplacian):
# binaryimg = cv2.Canny(res, 50, 200) #二值化,canny检测
h_c, h_i = cv2.findContours(Laplacian, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) # 寻找轮廓
contour = sorted(h_c, key=cv2.contourArea, reverse=True) # 对一系列轮廓点坐标按它们围成的区域面积进行排序
return contour
# 截短傅里叶描述子
def truncate_descriptor(fourier_result):
descriptors_in_use = np.fft.fftshift(fourier_result)
# 取中间的MIN_DESCRIPTOR项描述子
center_index = int(len(descriptors_in_use) / 2)
low, high = center_index - int(MIN_DESCRIPTOR / 2), center_index + int(MIN_DESCRIPTOR / 2)
descriptors_in_use = descriptors_in_use[low:high]
descriptors_in_use = np.fft.ifftshift(descriptors_in_use)
return descriptors_in_use
##由傅里叶描述子重建轮廓图
def reconstruct(img, descirptor_in_use):
# descirptor_in_use = truncate_descriptor(fourier_result, degree)
# descirptor_in_use = np.fft.ifftshift(fourier_result)
# descirptor_in_use = truncate_descriptor(fourier_result)
# print(descirptor_in_use)
contour_reconstruct = np.fft.ifft(descirptor_in_use)
contour_reconstruct = np.array([contour_reconstruct.real,
contour_reconstruct.imag])
contour_reconstruct = np.transpose(contour_reconstruct)
contour_reconstruct = np.expand_dims(contour_reconstruct, axis=1)
if contour_reconstruct.min() < 0:
contour_reconstruct -= contour_reconstruct.min()
contour_reconstruct *= img.shape[0] / contour_reconstruct.max()
contour_reconstruct = contour_reconstruct.astype(np.int32, copy=False)
black_np = np.ones(img.shape, np.uint8) # 创建黑色幕布
black = cv2.drawContours(black_np, contour_reconstruct, -1, (255, 255, 255), 1) # 绘制白色轮廓
# cv2.imshow("contour_reconstruct", black)
# cv2.imwrite('recover.png',black)
return black
- 添加基于
tkinter
的GUI
的类,类中通过一些cv2
中的模块对图像数据进行了旋转和翻转形式的数据增强操作,数据量乘了个25
,然后通过傅里叶描述子获取每一个图片的特征数据,最后导入SVM
模型进行训练得出模型
# 创建获取样本的类
class Application(Frame):
def __init__(self, master=None):
super().__init__(master)
self.master = master
self.pack()
# 一些默认值
self.num = 20
self.more_data_path = ''
self.origin_data_path = ''
self.save_features_path = ''
self.SVR_model_path = ''
self.train_percent = 4 / 5
self.time = time.time()
self.create_widget()
def create_widget(self):
# 添加组件
# ?》需要询问获取原图片的保存位置和处理后的图片的放置位置
self.button_select_origin = Button(master=self.master, text='原数据路径:', command=self.button_select_origin)
self.button_select_origin.place(x=50, y=60)
self.v_select_origin = StringVar(self.master, self.origin_data_path)
self.entry_select_origin = Entry(master=self.master, textvariable=self.v_select_origin)
self.entry_select_origin.place(x=130, y=60)
self.button_select_to = Button(master=self.master, text='新数据路径:', command=self.button_select_to)
self.button_select_to.place(x=50, y=90)
self.v_select_to = StringVar(self.master, self.more_data_path)
self.entry_select_to = Entry(master=self.master, textvariable=self.v_select_to)
self.entry_select_to.place(x=130, y=90)
# 设置每个类别的单类数目
self.label_num = Label(master=self.master, text='单个类图片个数:')
self.label_num.place(x=10, y=1)
self.v1 = StringVar(self.master, f"{self.num}")
self.entry = Entry(master=self.master, textvariable=self.v1)
self.entry.place(x=110, y=1)
self.button01 = Button(master=self.master, text='1.扩充数据', command=self.get_more_data)
self.button01.place(x=50, y=25)
self.button02 = Button(master=self.master, text='关闭程序', command=self.over)
self.button02.place(x=920, y=25)
# ?》需要询问放置特征数据的位置
self.button_select_feature = Button(master=self.master, text='特征数据保存路径:',
command=self.button_select_feature)
self.button_select_feature.place(x=280, y=60)
self.v_select_feature = StringVar(self.master, self.save_features_path)
self.entry_select_feature = Entry(master=self.master, textvariable=self.v_select_feature)
self.entry_select_feature.place(x=395, y=60)
self.button03 = Button(master=self.master, text='2.获取特征数据', command=self.get_feature_data)
self.button03.place(x=280, y=25)
# ?》需要询问保存模型的位置
self.button_select_model = Button(master=self.master, text='模型保存路径:',
command=self.button_select_model)
self.button_select_model.place(x=550, y=60)
self.v_select_model = StringVar(self.master, self.SVR_model_path)
self.entry_select_model = Entry(master=self.master, textvariable=self.v_select_model)
self.entry_select_model.place(x=640, y=60)
self.button04 = Button(master=self.master, text='3.训练模型', command=self.train_model)
self.button04.place(x=550, y=25)
self.v_label_thing = StringVar(self.master, '状态')
self.label_thing = Label(master=self.master, textvariable=self.v_label_thing, fg='red')
self.label_thing.place(x=450, y=120)
def button_select_model(self):
self.v_select_model.set(tkinter.filedialog.askdirectory())
def button_select_feature(self):
self.v_select_feature.set(tkinter.filedialog.askdirectory())
def button_select_to(self):
self.v_select_to.set(tkinter.filedialog.askdirectory())
def button_select_origin(self):
self.v_select_origin.set(tkinter.filedialog.askdirectory())
def access_bar(self, thing):
self.v_label_thing.set(thing)
def train_model(self):
if self.entry_select_to.get().strip() and self.v_select_feature.get().strip() and self.entry_select_origin.get().strip() and self.v_select_model.get().strip():
# 分割测试集和训练集
data_index = np.array(range(1, int(self.v1.get()) * 25 + 1))
train_index = data_index[:int(int(self.v1.get()) * 25 * self.train_percent) + 1]
test_index = data_index[int(int(self.v1.get()) * 25 * self.train_percent) + 1:]
np.random.shuffle(data_index)
# 获取全部数据
# print(self.v_select_feature.get() + '/' + f'1_1.txt')
fp = open(self.v_select_feature.get() + '/' + f'1_1.txt', 'r')
cols = len(fp.read().strip().split(' '))
fp.close()
print(f"一共有{cols}个特征")
df_train = pd.DataFrame(columns=range(cols + 1))
df_test = pd.DataFrame(columns=range(cols + 1))
for i in range(1, 11):
for j in range(train_index.shape[0]):
fp = open(self.v_select_feature.get() + '/' + f'{i}_{train_index[j]}.txt', 'r')
lst = fp.read().strip().split(' ')
fp.close()
lst = list(map(int, lst))
lst.append(i) # 加入标签
df_train.loc[(i - 1) * 10 + j] = np.array(lst, dtype='float64')
for i in range(1, 11):
for j in range(test_index.shape[0]):
fp = open(self.v_select_feature.get() + '/' + f'{i}_{test_index[j]}.txt', 'r')
lst = fp.read().strip().split(' ')
fp.close()
lst = list(map(int, lst))
lst.append(i) # 加入标签
df_test.loc[(i - 1) * 10 + j] = np.array(lst, dtype='float64')
# print(f"全部的数据:{df_train},{df_test}")
def tran_SVM():
# 网格搜索获取最佳模型
svc = SVC()
parameters = {'kernel': ('linear', 'rbf'),
'C': [1, 3, 5, 7, 9, 11, 13, 15, 17, 19],
'gamma': [0.00001, 0.0001, 0.001, 0.1, 1, 10, 100, 1000]} # 预设置一些参数值
clf = GridSearchCV(svc, parameters, cv=5, n_jobs=8) # 网格搜索法,设置5-折交叉验证
clf.fit(df_train.iloc[:, :-1], df_train.iloc[:, -1])
print(clf.return_train_score)
print(clf.best_params_) # 打印出最好的结果
best_model = clf.best_estimator_
print("SVM Model save...")
save_path = self.v_select_model.get() + f"/svm_model_{int(self.time)}.m"
fp = open(save_path, 'wb')
pickle.dump(best_model, fp) # 保存最好的模型
fp.close()
def test_SVM(clf):
valTest = clf.predict(df_test.iloc[:, :-1])
errorCount = np.sum(valTest != df_test.iloc[:, -1]) # 记录错误个数
print("总共错了%d个数据\n错误率为%.2f%%" % (errorCount, errorCount / df_test.shape[0] * 100))
# 训练 + 验证
tran_SVM()
fp = open(self.v_select_model.get() + f"/svm_model_{int(self.time)}.m", 'rb')
clf = pickle.load(fp)
fp.close()
test_SVM(clf)
self.access_bar("自动训练完成")
def get_feature_data(self):
if self.entry_select_to.get().strip() and self.v_select_feature.get().strip() and self.entry_select_origin.get().strip():
for i in range(1, 11):
for j in range(1, int(self.v1.get()) * 25 + 1):
roi = io.imread(self.entry_select_to.get() + '/' + str(i) + '_' + str(j) + '.png')[:, :, ::-1]
descirptor_in_use = abs(fourierDesciptor(roi))
# print(descirptor_in_use)
fd_name = self.v_select_feature.get() + '/' + str(i) + '_' + str(j) + '.txt'
with open(fd_name, 'w', encoding='utf-8') as f:
temp = descirptor_in_use[1]
for k in range(1, len(descirptor_in_use)):
x_record = int(100 * descirptor_in_use[k] / temp)
f.write(str(x_record))
f.write(' ')
f.write('\n')
self.access_bar("获取特征完成")
def get_more_data(self):
if self.entry_select_to.get().strip() and self.entry_select_origin.get().strip():
# 旋转
def rotate(image, scale=0.9):
angle = random.randrange(-90, 90) # 随机角度
w = image.shape[1]
h = image.shape[0]
# rotate matrix
M = cv2.getRotationMatrix2D((w / 2, h / 2), angle, scale)
# rotate
image = cv2.warpAffine(image, M, (w, h))
return image
for i in range(1, 11):
cnt = int(self.v1.get()) + 1 # 计数
for j in range(1, int(self.v1.get()) + 1):
roi = io.imread(self.v_select_origin.get() + '/' + str(i) + '_' + str(j) + '.png')[:, :, ::-1]
# print(self.v_select_origin.get() + '/' + str(i) + '_' + str(j) + '.png')
io.imsave(self.v_select_to.get() + '/' + str(i) + '_' + str(j) + '.png', roi[:, :, ::-1]) # 拿出来的图片先复制一份
for k in range(12):
img_rotation = rotate(roi) # 旋转
io.imsave(self.v_select_to.get() + '/' + str(i) + '_' + str(cnt) + '.png', img_rotation[:, :, ::-1])
cnt += 1
img_flip = cv2.flip(img_rotation, 1) # 翻转
io.imsave(self.v_select_to.get() + '/' + str(i) + '_' + str(cnt) + '.png', img_flip[:, :, ::-1])
cnt += 1
self.access_bar("数据扩充完成")
def over(self):
self.master.destroy()
- 执行代码
if __name__ == '__main__':
root = Tk()
root.geometry("1000x200")
root.title('不就是训练模型吗')
app = Application(root)
root.mainloop()
查看运行效果:
我们需要在左上角填入与获取数据添的类别数相同的值,然后从左到右依次执行,执行每一步操作之前还要注意将下面的路径获取完成,运行过程中全部状态都会在下面显示,只要不报错就不用急,有可能是数据较多需要等待一下。
04 第三个py文件:使用模型进行预测
最后就是使用上一步训练出来的模型进行预测的操作,所以在获取数据时使用的傅里叶描述子函数和皮肤检测函数都要重新拿过来使用
- 导入模块
from tkinter import *
import tkinter.filedialog
import numpy as np
from skimage import io, transform
import threading
import pickle
import cv2
import os
import warnings
warnings.simplefilter('ignore')
- 添加傅里叶描述子函数和皮肤检测函数,功能和之前一样
MIN_DESCRIPTOR = 32 # surprisingly enough, 2 descriptors are already enough
##计算傅里叶描述子
def fourierDesciptor(res):
# Laplacian算子进行八邻域检测
gray = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
dst = cv2.Laplacian(gray, cv2.CV_16S, ksize=3)
Laplacian = cv2.convertScaleAbs(dst)
contour = find_contours(Laplacian) # 提取轮廓点坐标
contour_array = contour[0][:, 0, :] # 注意这里只保留区域面积最大的轮廓点坐标
ret_np = np.ones(dst.shape, np.uint8) # 创建黑色幕布
ret = cv2.drawContours(ret_np, contour[0], -1, (255, 255, 255), 1) # 绘制白色轮廓
contours_complex = np.empty(contour_array.shape[:-1], dtype=complex)
contours_complex.real = contour_array[:, 0] # 横坐标作为实数部分
contours_complex.imag = contour_array[:, 1] # 纵坐标作为虚数部分
fourier_result = np.fft.fft(contours_complex) # 进行傅里叶变换
# fourier_result = np.fft.fftshift(fourier_result)
descirptor_in_use = truncate_descriptor(fourier_result) # 截短傅里叶描述子
# reconstruct(ret, descirptor_in_use)
return ret, descirptor_in_use
# return descirptor_in_use
def find_contours(Laplacian):
#binaryimg = cv2.Canny(res, 50, 200) #二值化,canny检测
h_c,h_i= cv2.findContours(Laplacian,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE) #寻找轮廓
contour = sorted(h_c,key=cv2.contourArea, reverse=True)#对一系列轮廓点坐标按它们围成的区域面积进行排序
return contour
# 截短傅里叶描述子
def truncate_descriptor(fourier_result):
descriptors_in_use = np.fft.fftshift(fourier_result)
# 取中间的MIN_DESCRIPTOR项描述子
center_index = int(len(descriptors_in_use) / 2)
low, high = center_index - int(MIN_DESCRIPTOR / 2), center_index + int(MIN_DESCRIPTOR / 2)
descriptors_in_use = descriptors_in_use[low:high]
descriptors_in_use = np.fft.ifftshift(descriptors_in_use)
return descriptors_in_use
##由傅里叶描述子重建轮廓图
def reconstruct(img, descirptor_in_use):
# descirptor_in_use = truncate_descriptor(fourier_result, degree)
# descirptor_in_use = np.fft.ifftshift(fourier_result)
# descirptor_in_use = truncate_descriptor(fourier_result)
# print(descirptor_in_use)
contour_reconstruct = np.fft.ifft(descirptor_in_use)
contour_reconstruct = np.array([contour_reconstruct.real,
contour_reconstruct.imag])
contour_reconstruct = np.transpose(contour_reconstruct)
contour_reconstruct = np.expand_dims(contour_reconstruct, axis=1)
if contour_reconstruct.min() < 0:
contour_reconstruct -= contour_reconstruct.min()
contour_reconstruct *= img.shape[0] / contour_reconstruct.max()
contour_reconstruct = contour_reconstruct.astype(np.int32, copy=False)
black_np = np.ones(img.shape, np.uint8) # 创建黑色幕布
black = cv2.drawContours(black_np, contour_reconstruct, -1, (255, 255, 255), 10) # 绘制白色轮廓
# cv2.imshow("contour_reconstruct", black)
# cv2.imwrite('recover.png',black)
return black
def binaryMask(frame, x0, y0, width, height):
frame1 = cv2.rectangle(frame, (x0, y0), (x0 + width, y0 + height), (0, 255, 0)) # 画出截取的手势框图
roi = frame[y0:y0 + height, x0:x0 + width] # roi=手势框图
# cv2.imshow("roi", roi) # 显示手势框图
res = skinMask(roi) # 进行肤色检测
kernel = np.ones((5, 5), np.uint8) # 设置卷积核
erosion = cv2.erode(res, kernel) # 腐蚀操作
res = cv2.dilate(erosion, kernel) # 对res膨胀操作 dilation
# ret, fourier_result = fd.fourierDesciptor(res)
return frame1, res
# return frame1, roi, res, ret, fourier_result
# cv2.imshow("res", res) # res是roi显示肤色检测后的图像
def skinMask(roi):
YCrCb = cv2.cvtColor(roi, cv2.COLOR_BGR2YCR_CB) # 转换至YCrCb空间
(y, cr, cb) = cv2.split(YCrCb) # 拆分出Y,Cr,Cb值
cr1 = cv2.GaussianBlur(cr, (5, 5), 0)
_, skin = cv2.threshold(cr1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) # Ostu处理
res = cv2.bitwise_and(roi, roi, mask=skin)
return res
- 添加基于
tkinter
的GUI
的类,类中主要使用多线程来展示相机拍摄到的视频,通过截取图像进行预测
# 创建类
class Application(Frame):
def __init__(self, master=None):
super().__init__(master)
self.master = master
self.pack()
self.isstart = False
self.true = 0
self.fail = 0
self.new = False
self.cut = False
self.create_widget()
def create_widget(self):
# 添加组件
self.button01 = Button(master=self.master, text='开启摄像头', command=self.video)
self.button01.place(x=180, y=470 + 10)
self.button001 = Button(master=self.master, text='关闭程序', command=self.over)
self.button001.place(x=270, y=470 + 10)
self.button_select_model = Button(master=self.master, text='选择模型', command=self.button_select_model)
self.button_select_model.place(x=500, y=480)
self.label01 = Label(master=self.master)
self.label01.place(x=30, y=50)
self.button02 = Button(master=self.master, text='截取图像', command=self.image)
self.button02.place(x=680, y=470 + 10)
self.button03 = Button(master=self.master, text='原始图像', command=self.change_origin)
self.button03.place(x=680 + 70, y=470 + 10)
self.button04 = Button(master=self.master, text='轮廓图像', command=self.change_outline)
self.button04.place(x=680 + 140, y=470 + 10)
self.label00 = Label(master=self.master)
self.label00.place(x=630, y=50)
self.v1 = StringVar(self.master, "预测结果")
self.label02 = Label(master=self.master, textvariable=self.v1)
self.label02.place(x=680 + 210, y=470 + 10)
self.v2 = StringVar(self.master, "正确率")
self.label03 = Label(master=self.master, textvariable=self.v2)
self.label03.place(x=780 + 280, y=470 + 10)
self.button05 = Button(master=self.master, text='正确', command=self.judge_true)
self.button05.place(x=850 + 140, y=450 + 10)
self.button06 = Button(master=self.master, text='错误', command=self.judge_fail)
self.button06.place(x=850 + 140, y=480 + 10)
def button_select_model(self):
fp = open(tkinter.filedialog.askopenfilename(), 'rb')
self.model = pickle.load(fp)
fp.close()
def over(self):
if self.isstart:
self.cap.release()
cv2.destroyAllWindows() # 关闭所有窗口
self.master.destroy()
def change_origin(self):
if self.isstart and self.cut:
self.label00.config(image=self.img)
def change_outline(self):
if self.isstart and self.cut:
self.label00.config(image=self.outline)
def judge_true(self):
if self.isstart and self.new:
self.new = False
self.true += 1
self.v2.set(f"正确率:{self.true / (self.true + self.fail):.2%}")
def judge_fail(self):
if self.isstart and self.new:
self.new = False
self.fail += 1
self.v2.set(f"正确率:{self.true / (self.true + self.fail):.2%}")
def image(self):
if self.isstart:
if not os.path.exists(f'./cache_image'):
os.mkdir(f'./cache_image')
self.new = True
self.cut = True
frame1, res = binaryMask(self.frame, (self.frame.shape[1] - self.width) // 2,
(self.frame.shape[0] - self.height) // 2, self.width,
self.height) # 取手势所在框图并进行处理
io.imsave('./cache_image/img.gif', (transform.resize(frame1[:, :, ::-1], (
400, 400 / frame1.shape[0] * frame1.shape[1], frame1.shape[2])) * 255).astype('uint8'))
self.img = PhotoImage(file='./cache_image/img.gif')
self.label00.config(image=self.img)
temp = fourierDesciptor(res)
out_line = temp[0]
io.imsave('./cache_image/out_line.gif', (transform.resize(out_line[:, :], (
400, 400 / out_line.shape[0] * out_line.shape[1])) * 255).astype('uint8'))
self.outline = PhotoImage(file='./cache_image/out_line.gif')
descirptor_in_use = abs(temp[1])
temp = descirptor_in_use[1]
X_test = []
for k in range(1, len(descirptor_in_use)):
x_record = int(100 * descirptor_in_use[k] / temp)
X_test.append(x_record)
X_test = np.array(X_test)
pred = self.model.predict(X_test.reshape(1, -1))
self.v1.set(f"预测结果:{int(pred[0])}")
def video(self):
if not self.isstart:
self.isstart = True
t1 = threading.Thread(target=self.open_video)
t1.setDaemon(True)
t1.start()
def open_video(self):
self.width, self.height = 400, 400 # 设置拍摄窗口大小
self.cap = cv2.VideoCapture(0) # 开摄像头
while True:
ret, frame = self.cap.read() # 读取摄像头的内容,ret bool 判断获取帧
self.frame = cv2.flip(frame, 1) # frame 获取到的一帧
frame1 = cv2.rectangle(self.frame,
((frame.shape[1] - self.width) // 2, (frame.shape[0] - self.height) // 2), (
(frame.shape[1] - self.width) // 2 + self.width,
(frame.shape[0] - self.height) // 2 + self.height),
(0, 255, 0))
io.imsave('./cache_image/abc.gif', (transform.resize(frame1[:, :, ::-1], (
400, 400 / frame1.shape[0] * frame1.shape[1], frame1.shape[2])) * 255).astype('uint8'))
self.video_ = PhotoImage(file='./cache_image/abc.gif')
self.label01.config(image=self.video_)
abc = None
abc = self.video_ # 作用可以让视频不闪烁
- 执行程序:
if __name__ == '__main__':
root = Tk()
root.geometry("1200x550")
root.title('不就是一个手势识别吗')
app = Application(root)
root.mainloop()
查看执行效果:
预测之前需要先选择训练好的模型,然后可以看出我截取的这张图片成功预测出了结果,你还可以点击右侧的正确与错误进行统计准确率。
05 结尾
整个项目就结束了,上面的这么完整人性化的项目,你懂得。文章来源:https://www.toymoban.com/news/detail-765381.html
参考文章:http://t.csdn.cn/4kXbQ
参考文章:http://t.csdn.cn/YIgvZ文章来源地址https://www.toymoban.com/news/detail-765381.html
到了这里,关于课程设计——基于opencv的手势识别【真】完整项目的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!