我试图识别最多5位数的验证码和张量流。
读取和处理文件夹中的图像
IMAGE_WIDTH = 160
IMAGE_HEIGHT = 60
CAPTCHA_LENGTH = 5
# Global constants describing the captcha data set.
NUM_CLASSES = 36
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 1000
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 100
training_folder = "train"
testing_folder = "validation"
def load_training_dataset():
no_files = len(os.listdir(training_folder))
return load_dataset(training_folder, 0, no_files)
def load_testing_dataset():
no_files = len(os.listdir(testing_folder))
return load_dataset(testing_folder, 0, no_files)
def normalize_data(X):
x_mean = X.mean(axis=0)
x_std = X.std(axis=0)
X = (X - x_mean) / (x_std + 0.00001)
return X
def training_dataset_length():
return len(os.listdir(training_folder))
def load_dataset(folder, fromPos, toPos):
file_list = os.listdir(folder)
X = np.zeros([toPos - fromPos, IMAGE_HEIGHT * IMAGE_WIDTH])
Y = np.zeros([toPos - fromPos, 5 * NUM_CLASSES])
for i, filename in enumerate(file_list[fromPos:toPos]):
path = folder + filename
img = imread(path, flatten=True)
# captcha_text = filename[0:CAPTCHA_LENGTH]
name = filename.split('_')[1]
captcha_text = name.split('.')
# print(captcha_text)
X[i, :] = img.flatten()
Y[i, :] = label_util.words_to_vec(captcha_text[0])
X = normalize_data(X)
return X, Y
def tf_load_dataset(folder):
filename_queue = tf.train.string_input_producer(
tf.train.match_filenames_once(folder + "/*.jpg"))
image_reader = tf.WholeFileReader()
_, image_file = image_reader.read(filename_queue)
image = tf.image.decode_jpeg(image_file)
Y = np.zeros([len(filename_queue), 5 * NUM_CLASSES])
我的图片的名称如" index_imagetext.png"所以我处理如下标签。我正在为长度小于5的验证码插入另一个角色。所以实际上我不需要36个班级因为我的验证码只包含数字。 11个类对我来说已经足够了,但当我试图将它减少到11时。它给出了关于标签数组形状的错误。我想我也必须改变功能" char_to_vec_pos(char)"但是,我无法理解它的工作原理。
CHAR_VOCAB_SIZE = 36 # Each char in the word can either be a digit 0-9 or a letter a-z giving a total of 36 possible characters.
WORD_SIZE = 5
def char_to_vec_pos(char):
ascii_val = ord(char)
if ascii_val >= 48 and ascii_val <= 57:
return ascii_val - 48
if ascii_val >= 97 and ascii_val <= 122:
return (ascii_val - 97) + 10
raise ValueError('Wrong character {}'.format(char))
def words_to_vec(word):
if len(word) < 5:
if len(word) == 1:
word = word + "bbbb"
if len(word) == 2:
word = word + "bbb"
if len(word) == 3:
word = word + "bb"
if len(word) == 4:
word = word + "b"
word_len = len(word)
vec = np.zeros(word_len * CHAR_VOCAB_SIZE)
# print len(vec)
for i, char in enumerate(word):
idx = (i * CHAR_VOCAB_SIZE) + char_to_vec_pos(char)
vec[idx] = 1
return vec
def vec_to_word(vector):
char_indices = vector.nonzero()[0]
word = list()
for idx in char_indices:
vocab_idx = idx % CHAR_VOCAB_SIZE
if vocab_idx < 10: # 0-9
char_code = vocab_idx + ord('0')
elif vocab_idx <= 35: # a-z
char_code = (vocab_idx - 10) + ord('a')
else:
raise ValueError("Incorrect character code")
word.append(chr(char_code))
return "".join(word)
def prediction_to_word(prediction_vector):
b = np.zeros_like(prediction_vector)
b[np.arange(len(prediction_vector)), prediction_vector.argmax(1)] = 1
word_vector = np.reshape(b, WORD_SIZE * CHAR_VOCAB_SIZE)
word = vec_to_word(word_vector)
return word
def compare_predictions(predictions, labels):
assert len(predictions == len(labels))
print("True | Predicted")
for i, prediction in enumerate(predictions):
label = labels[i]
predicted_word = prediction_to_word(prediction)
true_word = vec_to_word(label)
print("{:7s}|{:10s}".format(true_word, predicted_word))
最后,我的网络结构和评估结构
train_X, train_Y = image_reader.load_training_dataset()
X_input = tf.placeholder(tf.float32, [None, 160 * 60])
X = tf.reshape(X_input, shape=[-1, 160, 60, 1])
Y_ = tf.placeholder(tf.float32, [None, 5 * image_reader.NUM_CLASSES])
learning_rate = 0.001
def create_fully_connected_weight(shape):
return tf.Variable(tf.truncated_normal(shape, stddev=0.1))
def create_conv_weight(patch_height, patch_width, input_channel, output_channel):
initial = tf.truncated_normal(shape=[patch_height, patch_width, input_channel, output_channel], stddev=0.1)
return tf.Variable(initial)
def create_bias(shape):
initial = 0.1 * tf.random_normal(shape=shape)
return tf.Variable(initial)
def create_strides(batch_step, height_step, width_step, channel_step):
return [batch_step, height_step, width_step, channel_step]
def create_conv_layer(input, W, strides, padding='SAME'):
return tf.nn.conv2d(input, W, strides, padding)
def apply_max_pool(x, ksize, strides, padding='SAME'):
return tf.nn.max_pool(x, ksize, strides, padding)
keep_prob = tf.placeholder(tf.float32)
W1 = create_conv_weight(5, 5, 1, 32)
print("W1 shape:", W1.get_shape())
B1 = create_bias([32])
strides1 = create_strides(1, 1, 1, 1)
Y1 = tf.nn.relu(create_conv_layer(X, W1, strides1, padding="SAME") + B1)
Y1 = apply_max_pool(Y1, [1, 2, 2, 1], [1, 2, 2, 1])
Y1 = tf.nn.dropout(Y1, keep_prob=keep_prob)
print(Y1)
W2 = create_conv_weight(5, 5, 32, 64)
print("W2 shape:", W2.get_shape())
B2 = create_bias([64])
strides2 = create_strides(1, 1, 1, 1)
Y2 = tf.nn.relu(create_conv_layer(Y1, W2, strides2, padding="SAME") + B2)
Y2 = apply_max_pool(Y2, [1, 2, 2, 1], [1, 2, 2, 1])
Y2 = tf.nn.dropout(Y2, keep_prob=keep_prob)
print(Y2)
W3 = create_conv_weight(5, 5, 64, 128)
print("W3 shape:", W3.get_shape())
B3 = create_bias([128])
strides3 = create_strides(1, 1, 1, 1)
Y3 = tf.nn.relu(create_conv_layer(Y2, W3, strides3, padding="SAME") + B3)
Y3 = apply_max_pool(Y3, [1, 2, 2, 1], [1, 2, 2, 1])
Y3 = tf.nn.dropout(Y3, keep_prob=keep_prob)
print(Y3)
# keep_prob = tf.placeholder(tf.float32)
Y3 = tf.reshape(Y3, [-1, 20 * 8 * 128])
W4 = create_fully_connected_weight([20 * 8 * 128, 1024])
print("W4 shape:", W4.get_shape())
B4 = create_bias([1024])
Y4 = tf.nn.relu(tf.matmul(Y3, W4) + B4)
Y4 = tf.nn.dropout(Y4, keep_prob=keep_prob)
print(Y4)
W5 = create_fully_connected_weight([1024, 5 * image_reader.NUM_CLASSES])
print("W5 shape:", W5.get_shape())
B5 = create_bias([5 * image_reader.NUM_CLASSES])
print(B5)
Ylogits = tf.matmul(Y4, W5) + B5
# print(Ylogits)
# print(Y_.shape)
cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=Ylogits, labels=Y_)
loss = tf.reduce_mean(cross_entropy)
train_step = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)
# prediction
predictions = tf.reshape(Ylogits, [-1, 5, image_reader.NUM_CLASSES])
Ytrue = tf.reshape(Y_, [-1, 5, image_reader.NUM_CLASSES])
correct_prediction = tf.equal(tf.argmax(predictions, 2), tf.argmax(Ytrue, 2))
# Define the accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# initialize
init = tf.initialize_all_variables()
saver = tf.train.Saver()
sess = tf.Session()
sess.run(init)
# n_classes = image_reader.NUM_CLASSES
batch_size = 250
n_epochs = 15
n_batches_train = int(image_reader.training_dataset_length() // batch_size)
print("number of batches: %d" % (n_batches_train))
def all_batches_run_train(n_batches, data=None, labels=None):
sum_all_batches_loss = 0
sum_all_batches_acc = 0
sum_n_samples = 0
for b in range(n_batches):
offset = b * batch_size
batch_data, batch_labels = image_reader.load_dataset(image_reader.training_folder, offset, offset + batch_size)
n_samples = batch_data.shape[0]
feed_dict = {X_input: batch_data, Y_: Y_train, keep_prob: 0.75}
_, loss_value, a = sess.run([train_step, loss, accuracy], feed_dict=feed_dict)
sum_all_batches_loss += loss_value * n_samples
sum_all_batches_acc += a * n_samples
sum_n_samples += n_samples
if (n_samples != batch_size):
print('n_samples =%d' % n_samples)
print("sum of samples trained %d" % (sum_n_samples))
return (sum_all_batches_loss / sum_n_samples, sum_all_batches_acc / sum_n_samples)
def test_and_evaluate(data=None, labels=None):
assert (data.shape[0] == labels.shape[0])
feed_dict = {X_input: data, Y_: labels, keep_prob: 1}
test_results = sess.run([predictions, correct_prediction, accuracy], feed_dict=feed_dict)
test_preds = test_results[0]
label_util.compare_predictions(test_results[0], labels)
print("printing correct predictions")
print(test_results[1])
return test_results
i = 1
train_ac = []
train_loss = []
test_ac = []
for e in range(n_epochs):
start_time = time.time()
n_data = image_reader.training_dataset_length()
perm = np.random.permutation(n_data)
mean_loss_per_sample_train, accuracy_per_sample_train = all_batches_run_train(n_batches_train)
print("loss after epoch %d = %f: " % (i, mean_loss_per_sample_train))
print("train accuracy after epoch %d = %f: " % (i, accuracy_per_sample_train))
print("-----------------------------------\n")
i = i + 1
train_ac.append(accuracy_per_sample_train)
train_loss.append(mean_loss_per_sample_train)
print('done training')
save_path = saver.save(sess, "./model.ckpt")
plt.title("Training Accuracy over epochs")
plt.plot(train_ac, label="Training Accuracy")
plt.xlabel("epoch")
plt.legend(loc=4)
plt.grid(True)
plt.show()
plt.title("Training loss over epochs")
plt.plot(train_loss, label="Training Loss")
plt.xlabel("epoch")
plt.grid(True)
plt.show()
test_results = test_and_evaluate(data=test_X, labels=test_Y)
print('done testing')
print("Test Accuracy " + str(test_results[2]))
训练结束后,我最多可获得%15的准确度。但是,我遵循的教程是关于%90-95。我的案例和教程之间的唯一区别是我没有修复长度验证码。我最多有5位数字,我认为插入&#34; b&#34;对于空白应该工作。