问题陈述
我有 3 个班级(A、B 和 C)。
我有 6 个功能:
train_x = [[ 6.442 6.338 7.027 8.789 10.009 12.566]
[ 6.338 7.027 5.338 10.009 8.122 11.217]
[ 7.027 5.338 5.335 8.122 5.537 6.408]
[ 5.338 5.335 5.659 5.537 5.241 7.043]]
这些特征表示由 3 个类(例如 AABBC 等)组成的 5 个字符的字符串模式。
让,一个 5 字符的字符串模式被单热编码如下:
train_z = [[0. 0. 1. 0. 0. 1. 0. 0. 1. 0. 0. 1. 1. 0. 0.]
[0. 0. 1. 0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 0. 0.]
[0. 0. 1. 0. 0. 1. 1. 0. 0. 1. 0. 0. 1. 0. 0.]
[0. 0. 1. 1. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 1.]]
我的实现
我已经使用顺序模型实现了上述问题,如下所示:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import sys
import time
import random
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
import numpy as np
# <editor-fold desc="handle GPU">
# resolve GPU related issues.
try:
physical_devices = tf.config.list_physical_devices("GPU")
tf.config.experimental.set_memory_growth(physical_devices[0], True)
except Exception as e:
print("GPU not found!")
# END of try
# </editor-fold>
# Directories and files
CLASS_INDEX = 4
FEATURE_START_INDEX = 6
OUTPUT_PATH = r"./"
INPUT_PATH = r"./"
INPUT_DATA_FILE = "dist-5.dat"
TRAINING_PROGRESS_FILE = "training.txt"
MODEL_FILE = "model.h5"
# classification size
CLASSES_COUNT = 3
FEATURES_COUNT = 6
OUTPUTS_COUNT = 15
# Network parameters.
LAYER_1_NEURON_COUNT = 128
LAYER_2_NEURON_COUNT = 128
# Training parameters.
LEARNING_RATE = 0.01
EPOCHS = 1000 # 500
BATCH_SIZE = 10
NO_OF_INPUT_LINES = 10000
VALIDATION_PART = 0.5
MODEL_SAVE_FREQUENCY = 10
# <editor-fold desc="encoding()">
# <editor-fold desc="def encode(letter)">
def encode(letter: str):
if letter == 'A':
return [1.0, 0.0, 0.0]
elif letter == 'B':
return [0.0, 1.0, 0.0]
elif letter == 'C':
return [0.0, 0.0, 1.0]
# </editor-fold>
# <editor-fold desc="encode_string()">
def encode_string_1(pattern_str: str):
# Iterate over the string
one_hot_binary_str = []
for ch in pattern_str:
one_hot_binary_str = one_hot_binary_str + encode(ch)
# END of for loop
return one_hot_binary_str
# END of function
def encode_string_2(pattern_str: str):
# Iterate over the string
one_hot_binary_str = []
for ch in pattern_str:
temp_encoded_vect = [encode(ch)]
one_hot_binary_str = one_hot_binary_str + temp_encoded_vect
# END of for loop
return one_hot_binary_str
# END of function
# </editor-fold>
# <editor-fold desc="def load_data()">
def load_data_k(fname: str, class_index: int, feature_start_index: int, **selection):
i = 0
file = open(fname)
if "top_n_lines" in selection:
lines = [next(file) for _ in range(int(selection["top_n_lines"]))]
elif "random_n_lines" in selection:
tmp_lines = file.readlines()
lines = random.sample(tmp_lines, int(selection["random_n_lines"]))
else:
lines = file.readlines()
data_x, data_y, data_z = [], [], []
for l in lines:
row = l.strip().split() # return a list of words from the line.
x = [float(ix) for ix in row[feature_start_index:]] # convert 3rd to 20th word into a vector of float numbers.
y = encode(row[class_index]) # convert the 3rd word into binary.
z = encode_string_1(row[class_index+1])
data_x.append(x) # append the vector into 'data_x'
data_y.append(y) # append the vector into 'data_y'
data_z.append(z) # append the vector into 'data_z'
# END for l in lines
num_rows = len(data_x)
given_fraction = selection.get("validation_part", 1.0)
if given_fraction > 0.9999:
valid_x, valid_y, valid_z = data_x, data_y, data_z
else:
n = int(num_rows * given_fraction)
valid_x, valid_y, valid_z = data_x[n:], data_y[n:], data_z[n:]
data_x, data_y, data_z = data_x[:n], data_y[:n], data_z[:n]
# END of if-else block
tx = tf.convert_to_tensor(data_x, np.float32)
ty = tf.convert_to_tensor(data_y, np.float32)
tz = tf.convert_to_tensor(data_z, np.float32)
vx = tf.convert_to_tensor(valid_x, np.float32)
vy = tf.convert_to_tensor(valid_y, np.float32)
vz = tf.convert_to_tensor(valid_z, np.float32)
return tx, ty, tz, vx, vy, vz
# END of the function
# </editor-fold>
# </editor-fold>
# <editor-fold desc="def create_model()">
def create_model(n_hidden_1, n_hidden_2, num_outputs, num_features):
# a simple sequential model
model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(num_features,)))
model.add(tf.keras.layers.Dense(n_hidden_1, activation="relu"))
model.add(tf.keras.layers.Dense(n_hidden_2, activation="relu"))
model.add(tf.keras.layers.Dense(num_outputs))
return model
# </editor-fold>
# custom loss to take into the dependency between the 3 bits
def loss(y_true, y_pred):
l1 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, :3], y_pred[:, :3])
l2 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 3:6], y_pred[:, 3:6])
l3 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 6:9], y_pred[:, 6:9])
l4 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 9:12], y_pred[:, 9:12])
l5 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 12:], y_pred[:, 12:])
return l1 + l2 + l3 + l4 + l5
if __name__ == "__main__":
len_int = len(sys.argv)
arg_str = None
if len_int > 1:
arg_str = sys.argv[1]
else:
arg_str = os.path.join(INPUT_PATH, INPUT_DATA_FILE)
# END of if len_int > 1:
# load training data from the disk
train_x, train_y, train_z, validate_x,validate_y, validate_z = load_data_k(
os.path.join(INPUT_PATH, INPUT_DATA_FILE),
class_index=CLASS_INDEX,
feature_start_index=FEATURE_START_INDEX,
top_n_lines=NO_OF_INPUT_LINES,
validation_part=VALIDATION_PART
)
#print(train_y)
print("z = " + str(train_z))
# create Stochastic Gradient Descent optimizer for the NN model
opt_function = keras.optimizers.Adam(
learning_rate=LEARNING_RATE
)
# create a sequential NN model
model = create_model(
LAYER_1_NEURON_COUNT,
LAYER_2_NEURON_COUNT,
OUTPUTS_COUNT,
FEATURES_COUNT
)
#
model.compile(optimizer=opt_function, loss=loss, metrics=['accuracy'])
model.fit(train_x, train_z, epochs=EPOCHS,batch_size=BATCH_SIZE)
问题
这个源代码的问题是,模型没有收敛,即准确率没有随着 epoch 的增加而增加。
问题
如何使用顺序模型来解决这个问题?
答案 0 :(得分:0)
当您有单个网络输入和输出时使用顺序。在当前设置中,您有多个输出层来考虑链接的 3 个输出值的连续组。这也可以通过损失函数来强制执行。
import numpy as np
import tensorflow as tf
# random input data with 6 features
inp = tf.random.uniform(shape=(1000, 6))
# output data taking into consideration that 3 consecutive bits are one class.
out1 = tf.one_hot(tf.random.uniform(shape=(1000,), dtype=tf.int32, maxval=3), depth=3)
out2 = tf.one_hot(tf.random.uniform(shape=(1000,), dtype=tf.int32, maxval=3), depth=3)
out3 = tf.one_hot(tf.random.uniform(shape=(1000,), dtype=tf.int32, maxval=3), depth=3)
out4 = tf.one_hot(tf.random.uniform(shape=(1000,), dtype=tf.int32, maxval=3), depth=3)
out5 = tf.one_hot(tf.random.uniform(shape=(1000,), dtype=tf.int32, maxval=3), depth=3)
out = tf.concat([out1, out2, out3, out4, out5], axis=1)
# a simple sequential model
model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(6,)))
model.add(tf.keras.layers.Dense(20, activation="relu"))
model.add(tf.keras.layers.Dense(20, activation="relu"))
model.add(tf.keras.layers.Dense(15))
# custom loss to take into the dependency between the 3 bits
def loss(y_true, y_pred):
l1 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, :3], y_pred[:, :3])
l2 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 3:6], y_pred[:, 3:6])
l3 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 6:9], y_pred[:, 6:9])
l4 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 9:12], y_pred[:, 9:12])
l5 = tf.nn.softmax_cross_entropy_with_logits(y_true[:, 12:], y_pred[:, 12:])
return l1 + l2 + l3 + l4 + l5
opt_function = tf.keras.optimizers.SGD()
model.compile(optimizer=opt_function, loss=loss)
model.fit(inp, out, batch_size=10)
在评估网络时也需要使用相同的想法。您需要将 argmax 分别超过 3 位(5 次),以便获得 5 个类的序列作为输出。
答案 1 :(得分:0)
我认为这就是问题所在。
model.add(tf.keras.layers.Dense(num_classes, activation='softmax'))
...
loss=['categorical_crossentropy'] * 5
>>> Shapes (10, 3) and (10, 15) are incompatible
您真的不想像那样弄乱您的损失函数。尝试修复您的输出。使用 Sequential API 创建的模型是具有单个/输出的更简单的模型。如果您想在更简单的布局中更改功能 API 模型,您应该将输入/输出合并为一个输入/输出。这意味着您还应该在 one-hot 编码后合并标签。
<块引用>警告:tensorflow:AutoGraph 无法转换 export AUTOGRAPH_VERBOSITY=10
)并附上完整的输出。
原因:模块 'gast' 没有属性 'Index'
要消除此警告,请使用 @tf.autograph.experimental.do_not_convert
此警告不会使您的模型无法训练,因此您可以忽略它。 如果它没有训练,那么您可能应该开始调整超参数!
答案 2 :(得分:0)
在我提到我的解决方案之前,我会警告您它不正确,因为方法是错误的,但如果您有一个非常大的数据集,它可能会起作用。您想要做的是将一组 3 个值视为 multi-class
问题,将字符视为 multi-label
问题,这是不可能的。对于顺序模型,您不能像这样划分问题但是如果您有一个大型数据集,那么您可以将其视为一个 multi-label
问题作为一个整体,在这种情况下,您将获得 2 个活动标签3 套,你必须以某种方式应用后处理。说 - 将具有最高 sigmoid 值的标签设置为活动状态。