我需要使用提升级联训练来对scikit-learn中的一些图像进行分类。 我想根据HoG功能进行分类。
我的代码改编自this example。
这部分代码是我唯一真正做过的事情:
import sys
from scipy import misc, ndimage
from skimage import data, io, filter, color, exposure
from skimage.viewer import ImageViewer
from skimage.feature import hog
from skimage.transform import resize
import matplotlib.pyplot as plt
from sklearn.datasets import make_gaussian_quantiles
from sklearn.ensemble import AdaBoostClassifier
from sklearn.externals.six.moves import xrange
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
import pylab as pl
from sklearn.externals.six.moves import zip
f = open("PATH_TO_SAMPLES_LIST\\samples.txt",'r')
out = f.read().splitlines()
import numpy as np
### THIS IS THE MAIN CHANGES I MADE TO THE CODE
### THE CHANGES ARE ONLY IN ORDER TO GET HOG FEATURES OUT OF IMAGES TO PASS ON TO THE CLASSIFIERS
imgs = []
tmp_hogs = []
# I've omitted the code where I populate an array called "labels", but it's just a 1D #array with 528 elements, either 1 or 0
i=0
for file in out:
filepath = "C:\\work_asaaki\\caltech\\cars_brad\\resized\\"
readfile = filepath + file
curr_img = color.rgb2gray(io.imread(readfile))
imgs.append(curr_img)
fd, hog_image = hog(curr_img, orientations=8, pixels_per_cell=(8, 8),
cells_per_block=(1, 1), visualise=True, normalise=True)
tmp_hogs.append(fd)
i+=1
img_hogs = np.array(tmp_hogs, dtype =float)
print img_hogs.shape
n_split = 508
X_train, X_test = np.array(img_hogs[:n_split]), np.array(img_hogs[n_split:])
y_train, y_test = np.array(labels[:n_split]), np.array(labels[n_split:])
其余代码来自链接上的示例:
#### THE CODE BELOW IS TAKEN DIRECTLY FROM THE EXAMPLE
bdt_real = AdaBoostClassifier(
DecisionTreeClassifier(max_depth=2),
n_estimators=600,
learning_rate=1)
bdt_discrete = AdaBoostClassifier(
DecisionTreeClassifier(max_depth=2),
n_estimators=600,
learning_rate=1.5,
algorithm="SAMME")
bdt_real.fit(X_train, y_train)
bdt_discrete.fit(X_train, y_train)
real_test_errors = []
discrete_test_errors = []
for real_test_predict, discrete_train_predict in zip(
bdt_real.staged_predict(X_test), bdt_discrete.staged_predict(X_test)):
real_test_errors.append(
1. - accuracy_score(real_test_predict, y_test))
discrete_test_errors.append(
1. - accuracy_score(discrete_train_predict, y_test))
n_trees_discrete = len(bdt_discrete)
n_trees_real = len(bdt_real)
# Boosting might terminate early but the following arrays are always
# n_estimators long. We crop them to the actual number of tree here:
discrete_estimator_errors = bdt_discrete.estimator_errors_[:n_trees_discrete]
real_estimator_errors = bdt_real.estimator_errors_[:n_trees_real]
discrete_estimator_weights = bdt_discrete.estimator_weights_[:n_trees_discrete]
plt.figure(figsize=(15, 5))
plt.subplot(131)
plt.plot(xrange(1, n_trees_discrete + 1),
discrete_test_errors, c='black', label='SAMME')
plt.plot(xrange(1, n_trees_real + 1),
real_test_errors, c='black',
linestyle='dashed', label='SAMME.R')
plt.legend()
plt.ylim(0.18, 0.62)
plt.ylabel('Test Error')
plt.xlabel('Number of Trees')
print "n trees"
print n_trees_discrete
print "discrete_test_errors"
print bdt_discrete.estimator_errors_.shape
plt.subplot(132)
plt.plot(xrange(1, n_trees_discrete + 1), discrete_estimator_errors,
"b", label='SAMME', alpha=.5)
plt.plot(xrange(1, n_trees_real + 1), real_estimator_errors,
"r", label='SAMME.R', alpha=.5)
plt.legend()
plt.ylabel('Error')
plt.xlabel('Number of Trees')
plt.ylim((.2,
max(real_estimator_errors.max(),
discrete_estimator_errors.max()) * 1.2))
plt.xlim((-20, len(bdt_discrete) + 20))
print "plotting..."
plt.subplot(133)
plt.plot(xrange(1, n_trees_discrete + 1), discrete_estimator_weights,
"b", label='SAMME')
plt.legend()
plt.ylabel('Weight')
plt.xlabel('Number of Trees')
plt.ylim((0, discrete_estimator_weights.max() * 1.2))
plt.xlim((-20, n_trees_discrete + 20))
# prevent overlapping y-axis labels
plt.subplots_adjust(wspace=0.25)
plt.show()
我的问题是,这是根据HoG功能对图像进行分类的正确方法吗?
共有528张图片。首先他们是240x360。
但当我打印出img_hogs
的形状时,我得到了:
(528L, 2640L)
我被告知没有图表可以绘制,因为分类是提前终止的,因为有比图像更多的功能。所以我将图像缩小到20x30。
现在当我打印img_hogs
的形状时,我得到:
(528L, 48L)
但我仍然没有得到任何结果。在任何一种情况下,都会绘制轴,但图表为空。
答案 0 :(得分:1)
我会删除这个问题,但我发现分类中的另一个新手可能会遇到同样的问题,所以我只是在回答这个问题。
问题是因为一个非常愚蠢的错误 - 我的训练阵列labels
没有负面因素,它完全由一个类别的标签组成。