我正在尝试使用我在网上找到的这个脚本来对图像进行分类,但是我并不是最好的python。该脚本按文件名读取数据。所有图像都以猫或狗开头。相反,我想制作两个培训文件夹,培训师会根据文件夹名称而不是文件名知道图像。
import cv2 # working with, mainly resizing, images
import numpy as np # dealing with arrays
import os # dealing with directories
from random import shuffle # mixing up or currently ordered data that might lead our network astray in training.
from tqdm import tqdm # a nice pretty percentage bar for tasks. Thanks to viewer Daniel BA1/4hler for this suggestion
TRAIN_DIR = '/home/connor/Desktop/TensorFlow/SRproj/training_images/train'
TEST_DIR = '/home/connor/Desktop/TensorFlow/SRproj/training_images/test1'
IMG_SIZE = 50
LR = 1e-3
MODEL_NAME = 'dogsvscats-{}-{}.model'.format(LR, '2conv-basic')
def label_img(img):
word_label = img.split('.')[-3]
# conversion to one-hot array [cat,dog]
# [much cat, no dog]
if word_label == 'cat': return [1,0]
# [no cat, very doggo]
elif word_label == 'dog': return [0,1]
def create_train_data():
training_data = []
for img in tqdm(os.listdir(TRAIN_DIR)):
label = label_img(img)
path = os.path.join(TRAIN_DIR,img)
img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
training_data.append([np.array(img),np.array(label)])
shuffle(training_data)
np.save('train_data.npy', training_data)
return training_data
def process_test_data():
testing_data = []
for img in tqdm(os.listdir(TEST_DIR)):
path = os.path.join(TEST_DIR,img)
img_num = img.split('.')[0]
img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
testing_data.append([np.array(img), img_num])
shuffle(testing_data)
np.save('test_data.npy', testing_data)
return testing_data
train_data = create_train_data()
答案 0 :(得分:1)
假设您的文件夹名称是:
TRAIN_DIRS = ['/path/to/dataset/train/cat', '/path/to/dataset/train/dog']
您只需要更改label_img()
函数来解析父目录中的图像类而不是文件名:
def label_img(img):
# Extract parent directory, supposedly the class label, from full path
# (e.g. "/path/to/dataset/train/cat/img1.png" --> "cat")
word_label = os.path.basename(os.path.dirname(img))
# conversion to one-hot array [cat,dog]
# [much cat, no dog]
if word_label == 'cat': return [1,0]
# [no cat, very doggo]
elif word_label == 'dog': return [0,1]
您还需要更改循环浏览文件夹/图片的方式,例如:替换:
for img in tqdm(os.listdir(TRAIN_DIR)): # ...
由:
for dir in tqdm(TRAIN_DIRS):
for img in tqdm(os.listdir(dir)): # ...