尝试训练模型以识别文本。 参考this link。 整个代码:
from keras import backend as K
from keras.layers import Input, add, concatenate, Activation
from keras.layers import Conv2D, MaxPooling2D, Reshape, Dense, GRU
from keras.models import Model
import numpy as np
img_w = 128
# Input Parameters
img_h = 64
# Network parameters
conv_filters = 16
kernel_size = (3, 3)
pool_size = 2
time_dense_size = 32
rnn_size = 512
minibatch_size = 32
unique_tokens = 28
if K.image_data_format() == 'channels_first':
input_shape = (1, img_w, img_h)
else:
input_shape = (img_w, img_h, 1)
act = 'relu'
input_data = Input(name='Brahma.jpg', shape=input_shape, dtype='float32')
inner = Conv2D(conv_filters, kernel_size, padding='same',
activation=act, kernel_initializer='he_normal',
name='conv1')(input_data)
inner1 = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner)
inner2 = Conv2D(conv_filters, kernel_size, padding='same',
activation=act, kernel_initializer='he_normal',
name='conv2')(inner1)
inner3 = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner2)
conv_to_rnn_dims = (img_w // (pool_size ** 2), (img_h // (pool_size ** 2)) * conv_filters)
inner4 = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner3)
# cuts down input size going into RNN:
inner5 = Dense(time_dense_size, activation=act, name='dense1')(inner4)
# Two layers of bidirectional GRUs
# GRU seems to work as well, if not better than LSTM:
gru_1 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru1')(inner5)
gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru1_b')(inner5)
gru1_merged = add([gru_1, gru_1b])
gru_2 = GRU(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='gru2')(gru1_merged)
gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='gru2_b')(gru1_merged)
# transforms RNN output to character activations:
inner6 = Dense(unique_tokens, kernel_initializer='he_normal',
name='dense2')(concatenate([gru_2, gru_2b]))
y_pred = Activation('softmax', name='softmax')(inner6)
Model(inputs=input_data, outputs=y_pred)
top_paths = 3
results = []
for i in range(top_paths):
lables = K.get_value(K.ctc_decode(y_pred, input_length=np.ones(y_pred.shape[0])*y_pred.shape[1], greedy=False, beam_width=top_paths, top_paths=top_paths)[0][i])[0]
results.append(lables)
导致错误的代码:
top_paths = 3
results = []
for i in range(top_paths):
lables = K.get_value(K.ctc_decode(y_pred, input_length=np.ones(y_pred.shape[0])*y_pred.shape[1], greedy=False, beam_width=top_paths, top_paths=top_paths)[0][i])[0]
results.append(lables)
错误:
ValueError: Shape must be rank 1 but is rank 0 for 'CTCBeamSearchDecoder' (op: 'CTCBeamSearchDecoder') with input shapes: [32,?,28], [].
跟踪:
InvalidArgumentError Traceback (most recent call last)
E:\Anaconda3\lib\site-packages\tensorflow_core\python\framework\ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
1609 try:
-> 1610 c_op = c_api.TF_FinishOperation(op_desc)
1611 except errors.InvalidArgumentError as e:
InvalidArgumentError: Shape must be rank 1 but is rank 0 for 'CTCBeamSearchDecoder' (op: 'CTCBeamSearchDecoder') with input shapes: [32,?,28], [].
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-6-9dac4ef11991> in <module>
2 results = []
3 for i in range(top_paths):
----> 4 lables = K.get_value(K.ctc_decode(y_pred, input_length=np.ones(y_pred.shape[0])*y_pred.shape[1], greedy=False, beam_width=top_paths, top_paths=top_paths)[0][i])[0]
5 results.append(lables)
E:\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py in ctc_decode(y_pred, input_length, greedy, beam_width, top_paths, merge_repeated)
4531 inputs=y_pred,
4532 sequence_length=input_length, beam_width=beam_width,
-> 4533 top_paths=top_paths, merge_repeated=merge_repeated)
4534
4535 decoded_dense = []
E:\Anaconda3\lib\site-packages\tensorflow_core\python\ops\ctc_ops.py in ctc_beam_search_decoder(inputs, sequence_length, beam_width, top_paths, merge_repeated)
310 beam_width=beam_width,
311 top_paths=top_paths,
--> 312 merge_repeated=merge_repeated))
313
314 return ([
E:\Anaconda3\lib\site-packages\tensorflow_core\python\ops\gen_ctc_ops.py in ctc_beam_search_decoder(inputs, sequence_length, beam_width, top_paths, merge_repeated, name)
98 sequence_length=sequence_length,
99 beam_width=beam_width, top_paths=top_paths,
--> 100 merge_repeated=merge_repeated, name=name)
101 _result = _op.outputs[:]
102 _inputs_flat = _op.inputs
E:\Anaconda3\lib\site-packages\tensorflow_core\python\framework\op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
791 op = g.create_op(op_type_name, inputs, dtypes=None, name=scope,
792 input_types=input_types, attrs=attr_protos,
--> 793 op_def=op_def)
794 return output_structure, op_def.is_stateful, op
795
E:\Anaconda3\lib\site-packages\tensorflow_core\python\framework\func_graph.py in create_op(***failed resolving arguments***)
546 return super(FuncGraph, self)._create_op_internal( # pylint: disable=protected-access
547 op_type, inputs, dtypes, input_types, name, attrs, op_def,
--> 548 compute_device)
549
550 def capture(self, tensor, name=None):
E:\Anaconda3\lib\site-packages\tensorflow_core\python\framework\ops.py in _create_op_internal(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_device)
3427 input_types=input_types,
3428 original_op=self._default_original_op,
-> 3429 op_def=op_def)
3430 self._create_op_helper(ret, compute_device=compute_device)
3431 return ret
E:\Anaconda3\lib\site-packages\tensorflow_core\python\framework\ops.py in __init__(self, node_def, g, inputs, output_types, control_inputs, input_types, original_op, op_def)
1771 op_def, inputs, node_def.attr)
1772 self._c_op = _create_c_op(self._graph, node_def, grouped_inputs,
-> 1773 control_input_ops)
1774 # pylint: enable=protected-access
1775
E:\Anaconda3\lib\site-packages\tensorflow_core\python\framework\ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
1611 except errors.InvalidArgumentError as e:
1612 # Convert to ValueError for backwards compatibility.
-> 1613 raise ValueError(str(e))
1614
1615 return c_op
ValueError: Shape must be rank 1 but is rank 0 for 'CTCBeamSearchDecoder' (op: 'CTCBeamSearchDecoder') with input shapes: [32,?,28], [].
Python版本:3.7 keras版本:2.3.1 tensorflow版本:2.0.0
尝试了this link中的所有建议 还有很多其他东西,似乎什么都没用!
任何帮助将不胜感激。
谢谢。