Question

这是一个字母识别任务，有284个图像和19个类。我想申请天真的贝叶斯。首先，我必须将每个图像转换为特征向量，为了减少额外的信息，我应该使用一些特征选择代码，如裁剪图像，以删除额外的黑色边框。但我在python方面经验不足。

如何裁剪图像中的黑色空格以减小csv文件的大小？（因为列比预期的要多！）还有如何将图像调整为相同大小？

from PIL import Image, ImageChops
from resize import trim
import numpy as np
import cv2
import os
import csv

#Useful function
def createFileList(myDir, format='.jpg'):
    fileList = []
    print(myDir)
    for root, dirs, files in os.walk(myDir, topdown=False):
     for name in files:
        if name.endswith(format):
            fullName = os.path.join(root, name)
            fileList.append(fullName)

    return fileList

# load the original image
myFileList = createFileList('image_ocr')
#print(myFileList)
for file in myFileList:
    #print(file)
    img_file = Image.open(file)
    # img_file.show()

    # get original image parameters...
    width, height = img_file.size
    format = img_file.format
    mode = img_file.mode

    # Make image Greyscale
    img_grey = img_file.convert('L')


    # Save Greyscale values
    value = np.asarray(img_grey.getdata(), dtype=np.int).reshape((img_grey.size[1], img_grey.size[0]))

    value = value.flatten()
    #print(value)
    with open("trainData.csv", 'a') as f:
        writer = csv.writer(f)
        writer.writerow(value)

使用python进行功能选择

0 个答案: