如何提高验证码识别的低精度

时间:2017-05-18 09:16:29

标签: python machine-learning tensorflow neural-network conv-neural-network

我试图识别最多5位数的验证码和张量流。

读取和处理文件夹中的图像

IMAGE_WIDTH = 160
IMAGE_HEIGHT = 60
CAPTCHA_LENGTH = 5

# Global constants describing the captcha data set.
NUM_CLASSES = 36
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 1000
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 100
training_folder = "train"
testing_folder = "validation"


def load_training_dataset():
    no_files = len(os.listdir(training_folder))
    return load_dataset(training_folder, 0, no_files)


def load_testing_dataset():
    no_files = len(os.listdir(testing_folder))
    return load_dataset(testing_folder, 0, no_files)


def normalize_data(X):
    x_mean = X.mean(axis=0)
    x_std = X.std(axis=0)
    X = (X - x_mean) / (x_std + 0.00001)
    return X


def training_dataset_length():
    return len(os.listdir(training_folder))


def load_dataset(folder, fromPos, toPos):
    file_list = os.listdir(folder)

    X = np.zeros([toPos - fromPos, IMAGE_HEIGHT * IMAGE_WIDTH])
    Y = np.zeros([toPos - fromPos, 5 * NUM_CLASSES])

    for i, filename in enumerate(file_list[fromPos:toPos]):
        path = folder + filename
        img = imread(path, flatten=True)

        # captcha_text = filename[0:CAPTCHA_LENGTH]
        name = filename.split('_')[1]
        captcha_text = name.split('.')
        # print(captcha_text)

        X[i, :] = img.flatten()
        Y[i, :] = label_util.words_to_vec(captcha_text[0])

    X = normalize_data(X)
    return X, Y


def tf_load_dataset(folder):
    filename_queue = tf.train.string_input_producer(
        tf.train.match_filenames_once(folder + "/*.jpg"))

    image_reader = tf.WholeFileReader()
    _, image_file = image_reader.read(filename_queue)
    image = tf.image.decode_jpeg(image_file)

    Y = np.zeros([len(filename_queue), 5 * NUM_CLASSES])

我的图片的名称如" index_imagetext.png"所以我处理如下标签。我正在为长度小于5的验证码插入另一个角色。所以实际上我不需要36个班级因为我的验证码只包含数字。 11个类对我来说已经足够了,但当我试图将它减少到11时。它给出了关于标签数组形状的错误。我想我也必须改变功能" char_to_vec_pos(char)"但是,我无法理解它的工作原理。

CHAR_VOCAB_SIZE = 36  # Each char in the word can either be a digit 0-9 or a letter a-z giving a total of 36 possible characters.
WORD_SIZE = 5


def char_to_vec_pos(char):
    ascii_val = ord(char)
    if ascii_val >= 48 and ascii_val <= 57:
        return ascii_val - 48
    if ascii_val >= 97 and ascii_val <= 122:
        return (ascii_val - 97) + 10
    raise ValueError('Wrong character {}'.format(char))


def words_to_vec(word):
    if len(word) < 5:
        if len(word) == 1:
            word = word + "bbbb"
        if len(word) == 2:
            word = word + "bbb"
        if len(word) == 3:
            word = word + "bb"
        if len(word) == 4:
            word = word + "b"
    word_len = len(word)
    vec = np.zeros(word_len * CHAR_VOCAB_SIZE)
    # print len(vec)

    for i, char in enumerate(word):
        idx = (i * CHAR_VOCAB_SIZE) + char_to_vec_pos(char)
        vec[idx] = 1
    return vec


def vec_to_word(vector):
    char_indices = vector.nonzero()[0]
    word = list()

    for idx in char_indices:
        vocab_idx = idx % CHAR_VOCAB_SIZE

        if vocab_idx < 10:  # 0-9
            char_code = vocab_idx + ord('0')
        elif vocab_idx <= 35:  # a-z
            char_code = (vocab_idx - 10) + ord('a')
        else:
            raise ValueError("Incorrect character code")

        word.append(chr(char_code))

    return "".join(word)


def prediction_to_word(prediction_vector):
    b = np.zeros_like(prediction_vector)
    b[np.arange(len(prediction_vector)), prediction_vector.argmax(1)] = 1
    word_vector = np.reshape(b, WORD_SIZE * CHAR_VOCAB_SIZE)
    word = vec_to_word(word_vector)
    return word


def compare_predictions(predictions, labels):
    assert len(predictions == len(labels))
    print("True   | Predicted")
    for i, prediction in enumerate(predictions):
        label = labels[i]
        predicted_word = prediction_to_word(prediction)
        true_word = vec_to_word(label)
        print("{:7s}|{:10s}".format(true_word, predicted_word))

最后,我的网络结构和评估结构

train_X, train_Y = image_reader.load_training_dataset()

X_input = tf.placeholder(tf.float32, [None, 160 * 60])
X = tf.reshape(X_input, shape=[-1, 160, 60, 1])
Y_ = tf.placeholder(tf.float32, [None, 5 * image_reader.NUM_CLASSES])

learning_rate = 0.001


def create_fully_connected_weight(shape):
    return tf.Variable(tf.truncated_normal(shape, stddev=0.1))


def create_conv_weight(patch_height, patch_width, input_channel, output_channel):
    initial = tf.truncated_normal(shape=[patch_height, patch_width, input_channel, output_channel], stddev=0.1)
    return tf.Variable(initial)


def create_bias(shape):
    initial = 0.1 * tf.random_normal(shape=shape)
    return tf.Variable(initial)


def create_strides(batch_step, height_step, width_step, channel_step):
    return [batch_step, height_step, width_step, channel_step]


def create_conv_layer(input, W, strides, padding='SAME'):
    return tf.nn.conv2d(input, W, strides, padding)


def apply_max_pool(x, ksize, strides, padding='SAME'):
    return tf.nn.max_pool(x, ksize, strides, padding)


keep_prob = tf.placeholder(tf.float32)

W1 = create_conv_weight(5, 5, 1, 32)
print("W1 shape:", W1.get_shape())
B1 = create_bias([32])
strides1 = create_strides(1, 1, 1, 1)
Y1 = tf.nn.relu(create_conv_layer(X, W1, strides1, padding="SAME") + B1)
Y1 = apply_max_pool(Y1, [1, 2, 2, 1], [1, 2, 2, 1])
Y1 = tf.nn.dropout(Y1, keep_prob=keep_prob)
print(Y1)

W2 = create_conv_weight(5, 5, 32, 64)
print("W2 shape:", W2.get_shape())
B2 = create_bias([64])
strides2 = create_strides(1, 1, 1, 1)
Y2 = tf.nn.relu(create_conv_layer(Y1, W2, strides2, padding="SAME") + B2)
Y2 = apply_max_pool(Y2, [1, 2, 2, 1], [1, 2, 2, 1])
Y2 = tf.nn.dropout(Y2, keep_prob=keep_prob)
print(Y2)

W3 = create_conv_weight(5, 5, 64, 128)
print("W3 shape:", W3.get_shape())
B3 = create_bias([128])
strides3 = create_strides(1, 1, 1, 1)
Y3 = tf.nn.relu(create_conv_layer(Y2, W3, strides3, padding="SAME") + B3)
Y3 = apply_max_pool(Y3, [1, 2, 2, 1], [1, 2, 2, 1])
Y3 = tf.nn.dropout(Y3, keep_prob=keep_prob)
print(Y3)

# keep_prob = tf.placeholder(tf.float32)

Y3 = tf.reshape(Y3, [-1, 20 * 8 * 128])

W4 = create_fully_connected_weight([20 * 8 * 128, 1024])
print("W4 shape:", W4.get_shape())
B4 = create_bias([1024])
Y4 = tf.nn.relu(tf.matmul(Y3, W4) + B4)
Y4 = tf.nn.dropout(Y4, keep_prob=keep_prob)
print(Y4)

W5 = create_fully_connected_weight([1024, 5 * image_reader.NUM_CLASSES])
print("W5 shape:", W5.get_shape())
B5 = create_bias([5 * image_reader.NUM_CLASSES])
print(B5)
Ylogits = tf.matmul(Y4, W5) + B5

# print(Ylogits)
# print(Y_.shape)

cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=Ylogits, labels=Y_)
loss = tf.reduce_mean(cross_entropy)

train_step = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

# prediction
predictions = tf.reshape(Ylogits, [-1, 5, image_reader.NUM_CLASSES])
Ytrue = tf.reshape(Y_, [-1, 5, image_reader.NUM_CLASSES])
correct_prediction = tf.equal(tf.argmax(predictions, 2), tf.argmax(Ytrue, 2))

# Define the accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# initialize
init = tf.initialize_all_variables()

saver = tf.train.Saver()

sess = tf.Session()
sess.run(init)

# n_classes = image_reader.NUM_CLASSES
batch_size = 250
n_epochs = 15
n_batches_train = int(image_reader.training_dataset_length() // batch_size)
print("number of batches: %d" % (n_batches_train))


def all_batches_run_train(n_batches, data=None, labels=None):
    sum_all_batches_loss = 0
    sum_all_batches_acc = 0
    sum_n_samples = 0

    for b in range(n_batches):

        offset = b * batch_size
        batch_data, batch_labels = image_reader.load_dataset(image_reader.training_folder, offset, offset + batch_size)

        n_samples = batch_data.shape[0]

        feed_dict = {X_input: batch_data, Y_: Y_train, keep_prob: 0.75}
        _, loss_value, a = sess.run([train_step, loss, accuracy], feed_dict=feed_dict)
        sum_all_batches_loss += loss_value * n_samples
        sum_all_batches_acc += a * n_samples
        sum_n_samples += n_samples
        if (n_samples != batch_size):
            print('n_samples =%d' % n_samples)

    print("sum of samples trained %d" % (sum_n_samples))
    return (sum_all_batches_loss / sum_n_samples, sum_all_batches_acc / sum_n_samples)


def test_and_evaluate(data=None, labels=None):
    assert (data.shape[0] == labels.shape[0])
    feed_dict = {X_input: data, Y_: labels, keep_prob: 1}
    test_results = sess.run([predictions, correct_prediction, accuracy], feed_dict=feed_dict)
    test_preds = test_results[0]
    label_util.compare_predictions(test_results[0], labels)
    print("printing correct predictions")
    print(test_results[1])
    return test_results


i = 1

train_ac = []
train_loss = []
test_ac = []
for e in range(n_epochs):
    start_time = time.time()
    n_data = image_reader.training_dataset_length()
    perm = np.random.permutation(n_data)
    mean_loss_per_sample_train, accuracy_per_sample_train = all_batches_run_train(n_batches_train)
    print("loss after epoch %d = %f: " % (i, mean_loss_per_sample_train))
    print("train accuracy after epoch %d = %f: " % (i, accuracy_per_sample_train))
    print("-----------------------------------\n")
    i = i + 1
    train_ac.append(accuracy_per_sample_train)
    train_loss.append(mean_loss_per_sample_train)


print('done training')
save_path = saver.save(sess, "./model.ckpt")
plt.title("Training Accuracy over epochs")
plt.plot(train_ac, label="Training Accuracy")
plt.xlabel("epoch")
plt.legend(loc=4)
plt.grid(True)
plt.show()

plt.title("Training loss over epochs")
plt.plot(train_loss, label="Training Loss")
plt.xlabel("epoch")
plt.grid(True)
plt.show()

test_results = test_and_evaluate(data=test_X, labels=test_Y)

print('done testing')
print("Test Accuracy " + str(test_results[2]))

训练结束后,我最多可获得%15的准确度。但是,我遵循的教程是关于%90-95。我的案例和教程之间的唯一区别是我没有修复长度验证码。我最多有5位数字,我认为插入&#34; b&#34;对于空白应该工作。

0 个答案:

没有答案