我目前是机器学习的新手,想问一下如何训练数据集。我的数据集是一个包含字母图像的文件夹。这是我正在处理的代码,目前卡在火车上。
import numpy as np
from sklearn.svm import LinearSVC
import os
import cv2
import joblib
Dataset_folder = 'Dataset/'
directory = os.listdir(Dataset_folder)
folders = directory
folders = sorted(folders)
print (folders)
trainset = []
for folder in directory:
flist = os.listdir(os.path.join(Dataset_folder, folder))
for f in flist:
img = cv2.imread(Dataset_folder + folder + "/" + f,0)
img = cv2.resize(img, (36,36))
trainset.append(img)
print ('done')
# Labeling for trainset
train_label = []
for i in range(0,10):
temp = 140*[i]
train_label += temp
trainset = np.reshape(trainset, (4650, -1))
# Create an linear SVM object
clf = LinearSVC()
# Perform the training
clf.fit(trainset, train_label)
print("Training finished successfully")