我用手探测器工作了一段时间,目的是检测来自图片/视频/网络摄像头的手势。我用python2.7实现了这个项目,并使用了opencv和sklearn。
最后,我已经达到手势检测器通过以下技术检测手的程度:
图片中的手部检测工作得很好,但是当我尝试识别正确的手势类时,我的问题就出现了。我已经通过使用SVM,KNN,RandomForest等算法训练图片来实现分类,这是我不想改变的技术。我有6个不同的手势(6个班级),训练集大小约为100个图片/类,测试集大小约为10个图片/类。使用与上述相同的技术进行训练和测试图像,并将它们转换为灰度.bmp图像。之后,我将图片重新调整为相同大小,并制作了像MNIST模型一样的.pkl数据集。然后我训练了具有以下特征的模型:
在对模型进行训练后,我得到了非常好的模型结果(预测最佳模型约为0.95-0.97),混淆矩阵看起来也很好,所以我认为模型已经正确学习。
问题:分类器分类但大部分时间都是错误的。首先我认为我应该增加数据集大小,但后来我注意到有人设法识别只有1 pic / class的手势,所以现在我认为我做错了。我的模型也应该有效,因为这种技术与MNIST手写数字相同,我的模型几乎每个数字都被正确分类。问题也可能在于HOG参数,而HOG对我来说并不是很熟悉。此外,我的数据集图片被绘制,手部姿势周围没有空间,可以实现结果。如果有人知道我失败了,我会非常感激。
编辑1 : 我在这里包含了detectHand,generateClassifiers文件,因为云服务并没有起作用。你必须用你的脸拍出手的测试图片并调整hsv参数以获得阈值手。
detectHand.py
import cv2
import numpy as np
import argparse as ap
from sklearn.externals import joblib
from skimage.feature import hog
def callback(x):
pass
parser = ap.ArgumentParser()
parser.add_argument("-c", "--classiferPath", help="Path to Classifier File", required="True")
parser.add_argument("-i", "--image", help="Path to Image", required="True")
args = vars(parser.parse_args())
# Load the classifier
clf, pp = joblib.load(args["classiferPath"])
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
cv2.namedWindow('HSV')
# create trackbars for color change
cv2.createTrackbar('MinH','HSV',0,255, callback) # Adjust your hand to get thresholded with HSV adjuster
cv2.createTrackbar('MaxH','HSV',25,255, callback)
cv2.createTrackbar('MinS','HSV',86,255, callback)
cv2.createTrackbar('MaxS','HSV',180,255, callback)
cv2.createTrackbar('MinV','HSV',131,255, callback)
cv2.createTrackbar('MaxV','HSV',255,255, callback)
while True:
#read and resize image
im = cv2.imread(args["image"])
im = cv2.resize(im,(960,540))
# get current position of six trackbars
MinH = cv2.getTrackbarPos('MinH','HSV')
MaxH = cv2.getTrackbarPos('MaxH','HSV')
MinS = cv2.getTrackbarPos('MinS','HSV')
MaxS = cv2.getTrackbarPos('MaxS','HSV')
MinV = cv2.getTrackbarPos('MinV','HSV')
MaxV = cv2.getTrackbarPos('MaxV','HSV')
blur = cv2.blur(im,(3,3))
# make bgr to hsv, treshold and AND operator
hsv = cv2.cvtColor(blur, cv2.COLOR_BGR2HSV)
lower = np.array([MinH, MinS, MinV])
upper = np.array([MaxH, MaxS, MaxV])
mask2 = cv2.inRange(hsv,lower,upper)
#Kernel matrices for morphological transformation
kernel_square = np.ones((11,11),np.uint8)
kernel_ellipse= cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5))
#Perform morphological transformations to filter out the background noise
#Dilation increase skin color area
#Erosion increase skin color area
dilation = cv2.dilate(mask2,kernel_ellipse,iterations = 1)
erosion = cv2.erode(dilation,kernel_square,iterations = 1)
filtered = cv2.medianBlur(erosion,5)
ret,thresh = cv2.threshold(filtered,127,255,0)
# detect faces from picture and remove it
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
gray = cv2.equalizeHist(gray)
faces = face_cascade.detectMultiScale(gray, 1.3, 3, minSize=(20,20), flags=cv2.CASCADE_SCALE_IMAGE)
for (x,y,w,h) in faces:
cv2.rectangle(thresh, (x,y),(x+h,y+w), (0,0,0), cv2.FILLED)
im2, contours, hier = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
max_area=100
ci=0
for i in range(len(contours)):
cnt=contours[i]
area = cv2.contourArea(cnt)
if(area>max_area):
max_area=area
ci=i
#Largest area contour
im1 = im.copy()
cnts = contours[ci]
rect = cv2.boundingRect(cnts)
x1,y1,w1,h1 = rect
#center of palm
maxdistance=0
pt=(0,0)
for index_y in range(int(y1+0.25*h1),int(y1+0.8*h1)):
for index_x in range(int(x1+0.3*w1),int(x1+0.9*w1)):
distance=cv2.pointPolygonTest(cnts,(index_x,index_y), True)
if(distance>maxdistance):
maxdistance=distance
pt = (index_x,index_y)
cv2.circle(im1,pt,int(maxdistance),(255,0,0),2)
cv2.rectangle(im1, (x1,y1),(x1+w1,pt[1]+int(maxdistance)), (0,0,255), 3)
cropped_image = thresh[y1:pt[1]+int(maxdistance),x1:x1+w1]
#edged = cv2.Canny(cropped_image, 100,200)
roi = cv2.resize(cropped_image, (100, 150), interpolation=cv2.INTER_AREA)
# Calculate the HOG features
roi_hog_fd = hog(roi, orientations=9, pixels_per_cell=(5, 5), cells_per_block=(2, 2), visualise=False)
roi_hog_fd = pp.transform(np.array([roi_hog_fd], 'float64'))
nbr = clf.predict(roi_hog_fd)
cv2.putText(im1, str(nbr[0]), (x1,y1),cv2.FONT_HERSHEY_DUPLEX, 2, (0, 255, 255), 3)
cv2.imshow('Output', im1)
cv2.imshow('Hand', cropped_image)
cv2.imshow('roi', roi)
c= cv2.waitKey(5)
if c==27:
break
cv2.destroyAllWindows()
generateClassifiers.py
#!/usr/bin/python
# Import the modules
from sklearn.externals import joblib
import pickle
from skimage.feature import hog
from sklearn import preprocessing
import numpy as np
from collections import Counter
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
import cv2
def ModelRandomQuessing(hog_features, labels, pp):
model = "RandomQuessing"
clf = DummyClassifier()
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model1randomquessing.pkl", compress=3)
return (model, clf)
def ModelLinearSVM(hog_features, labels, pp):
model = "LinearSVM"
clf = SGDClassifier(n_jobs=-1)
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model2linearsvm.pkl", compress=3)
return (model, clf)
def ModelKNN(hog_features, labels, pp):
model = "KNearestNeighbors"
clf = KNeighborsClassifier(n_jobs=-1)
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model3knn.pkl", compress=3)
return (model, clf)
def ModelSVM(hog_features, labels, pp):
model = "SupportVectorMachine"
clf = SVC(kernel="rbf")
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model4svm.pkl", compress=3)
return (model, clf)
def ModelDecisionTree(hog_features, labels, pp):
model = "DecisionTree"
clf = DecisionTreeClassifier()
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model5decisiontree.pkl", compress=3)
return (model, clf)
def ModelRandomForest(hog_features, labels, pp):
model = "RandomForest"
clf = RandomForestClassifier()
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model6randomforest.pkl", compress=3)
return (model, clf)
def ModelAdaboost(hog_features, labels, pp):
model = "Adaboost"
clf = AdaBoostClassifier()
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model7adaboost.pkl", compress=3)
return (model, clf)
def ModelGaussianNB(hog_features, labels, pp):
model = "GaussianNaiveBayes"
clf = GaussianNB()
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model8gaussiannb.pkl", compress=3)
return (model, clf)
def ModelLDA(hog_features, labels, pp):
model = "LinearDiscriminantAnalysis"
clf = LinearDiscriminantAnalysis()
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model9lda.pkl", compress=3)
return (model, clf)
def ModelQDA(hog_features, labels, pp):
model = "QuadraticDiscriminantAnalysis"
clf = QuadraticDiscriminantAnalysis()
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model10qda.pkl", compress=3)
return (model, clf)
def ModelLogisticRegression(hog_features, labels, pp):
model = "LogisticRegression"
clf = LogisticRegression(n_jobs=-1)
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model11logisticregression.pkl", compress=3)
return (model, clf)
def ModelMLP(hog_features, labels, pp):
model = "MultilayerPerceptron"
clf = MLPClassifier(activation='relu',hidden_layer_sizes=(200,200),solver='lbfgs',alpha=10,verbose=True)
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model12mlp.pkl", compress=3)
return (model, clf)
def ModelBestKNN(hog_features, labels, pp):
model = "BestKNearestNeighbors"
clf = KNeighborsClassifier(n_jobs=-1,weights='distance',n_neighbors=4)
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model13bestknn.pkl", compress=3)
return (model, clf)
def ModelBestSVM(hog_features, labels, pp):
model = "BestSupportVectorMachine"
clf = SVC(kernel='rbf',cache_size=2000,C=10.0,gamma='auto',class_weight='balanced')
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model14bestsvm.pkl", compress=3)
return (model, clf)
def ModelBestRandomForest(hog_features, labels, pp):
model = "BestRandomForest"
clf = RandomForestClassifier(n_jobs=-1,n_estimators=500,max_features='auto')
clf.fit(hog_features, labels)
joblib.dump((clf, pp), "model15bestrf.pkl", compress=3)
return (model, clf)
def accuracy(modelclf, X_test, Y_test):
model, clf = modelclf
predicted = clf.predict(X_test)
print("Classification report for classifier %s:\n%s\n"
% (model, classification_report(Y_test, predicted)))
print("Confusion matrix:\n%s" % confusion_matrix(Y_test, predicted))
if __name__=='__main__':
# Load the dataset
with open('handdetection.pkl', 'rb') as f:
data = pickle.load(f)
# Extract the features and labels
X = data[0]
Y = data[1]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1)
# Extract the hog features
list_X_train = []
for trainsample in X_train:
fd, hog_image = hog(trainsample.reshape((150, 100)), orientations=9, pixels_per_cell=(5, 5), cells_per_block=(2, 2), visualise=True)
list_X_train.append(fd)
X_train = np.array(list_X_train, 'float64')
# Normalize the features
pp = preprocessing.StandardScaler().fit(X_train)
X_train = pp.transform(X_train)
#Same for testset
list_X_test = []
for testsample in X_test:
fd = hog(testsample.reshape((150, 100)), orientations=9, pixels_per_cell=(5, 5), cells_per_block=(2, 2), visualise=False)
list_X_test.append(fd)
X_test = np.array(list_X_test, 'float64')
X_test = preprocessing.StandardScaler().fit(X_test).transform(X_test)
print ("Count of digits in dataset", Counter(Y_train))
#accuracy(ModelRandomQuessing(X_train, Y_train, pp),X_test,Y_test)
#accuracy(ModelLinearSVM(X_train, Y_train, pp),X_test,Y_test)
accuracy(ModelKNN(X_train, Y_train, pp),X_test,Y_test)
accuracy(ModelSVM(X_train, Y_train, pp),X_test,Y_test)
#accuracy(ModelDecisionTree(X_train, Y_train, pp),X_test,Y_test)
accuracy(ModelRandomForest(X_train, Y_train, pp),X_test,Y_test)
#accuracy(ModelAdaboost(X_train, Y_train, pp),X_test,Y_test)
#accuracy(ModelGaussianNB(X_train, Y_train, pp),X_test,Y_test)
#accuracy(ModelLDA(X_train, Y_train, pp),X_test,Y_test)
#accuracy(ModelQDA(X_train, Y_train, pp),X_test,Y_test)
#accuracy(ModelLogisticRegression(X_train, Y_train, pp),X_test,Y_test)
accuracy(ModelMLP(X_train, Y_train, pp),X_test,Y_test)
#accuracy(ModelBestKNN(X_train, Y_train, pp),X_test,Y_test)
#accuracy(ModelBestSVM(X_train, Y_train, pp),X_test,Y_test)
#accuracy(ModelBestRandomForest(X_train, Y_train, pp),X_test,Y_test)
while True:
cv2.imshow('hog', hog_image)
c = cv2.waitKey(5)
if c==27:
break
cv2.destroyAllWindows()
我将再次尝试使用dropbox来面对haar-cascade和handdetection.pkl数据集。将这些所有文件包含在同一文件夹中。
哈尔:https://www.dropbox.com/s/zdc096drhbr1sx3/haarcascade_frontalface_default.xml?dl=0 数据集:https://www.dropbox.com/s/pieywxg8rl8rsw4/handdetection.pkl?dl=0
答案 0 :(得分:0)
在对模型进行训练后,我得到了非常好的模型结果(预测最佳模型约为0.95-0.97),混淆矩阵看起来也很好,所以我认为模型已经正确学习。
问题:分类器错误地分类但大部分时间。
如果没有提供验证集并且用于调整最终用于预测的任何模型的参数,这可能表示过度拟合。如果是这种情况,grid search cross validation之类的内容可能有所帮助。
当验证集上的性能开始受损而训练集上的性能仍在提高时,您将能够判断模型何时过度拟合。
答案 1 :(得分:0)
我找到了解决方案,所以我回答了我自己的问题。关键词是“胡时刻”。我的训练数据集非常小,以至于使用来自二进制图像的svm分类器的训练猪特征将无法工作。因此,我计算了数据集的每张图片的Hu时刻,并使用svm来表示时刻,并且效果非常好。通过对Hu Moments的绝对值进行符号记录来进一步提高性能。之后我使用了svm,这也适用于网络摄像头图片!