基于this的帖子,我试图创建另一个模型,在该模型中添加分类变量和连续变量。 请在下面找到代码:
from __future__ import print_function
import pandas as pd;
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import LabelEncoder
if __name__ == '__main__':
# 1 categorical input feature and a binary output
df = pd.DataFrame({'cat2': np.array(['o', 'm', 'm', 'c', 'c', 'c', 'o', 'm', 'm', 'm']),
'num1': np.random.rand(10),
'label': np.array([0, 0, 1, 1, 0, 0, 1, 0, 1, 1])})
encoder = LabelEncoder()
encoder.fit(df.cat2.values)
X1 = encoder.transform(df.cat2.values).reshape(-1,1)
X2 = np.array(df.num1.values).reshape(-1,1)
# X = np.concatenate((X1,X2), axis=1)
Y = np.zeros((len(df), 2))
Y[np.arange(len(df)), df.label.values] = 1
# Neural net parameters
training_epochs = 5
learning_rate = 1e-3
cardinality = len(np.unique(X))
embedding_size = 2
input_X_size = 1
n_labels = len(np.unique(Y))
n_hidden = 10
# Placeholders for input, output
cat2 = tf.placeholder(tf.int32, [None], name='cat2')
x = tf.placeholder(tf.float32, [None, 1], name="input_x")
y = tf.placeholder(tf.float32, [None, 2], name="input_y")
embed_matrix = tf.Variable(
tf.random_uniform([cardinality, embedding_size], -1.0, 1.0),
name="embed_matrix"
)
embed = tf.nn.embedding_lookup(embed_matrix, cat2)
inputs_with_embed = tf.concat([x, embedding_aggregated], axis=2, name="inputs_with_embed")
# Neural network weights
h = tf.get_variable(name='h2', shape=[inputs_with_embed, n_hidden],
initializer=tf.contrib.layers.xavier_initializer())
W_out = tf.get_variable(name='out_w', shape=[n_hidden, n_labels],
initializer=tf.contrib.layers.xavier_initializer())
# Neural network operations
#embedded_chars = tf.nn.embedding_lookup(embeddings, x)
layer_1 = tf.matmul(inputs_with_embed,h)
layer_1 = tf.nn.relu(layer_1)
out_layer = tf.matmul(layer_1, W_out)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=out_layer, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
for epoch in range(training_epochs):
avg_cost = 0.
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost],
feed_dict={x: X2,cat2:X1, y: Y})
print("Optimization Finished!")
但是出现以下错误。看来我没有串联连续变量并正确嵌入。但是我不知道如何解决它。
请有人指导我。
ValueError: Shape must be at least rank 3 but is rank 2 for 'inputs_with_embed_2' (op: 'ConcatV2') with input shapes: [?,1], [?,2], [] and with computed input tensors: input[2] = <2>.
谢谢!
答案 0 :(得分:1)
如果用embedding_agregated
表示embed
(可能是错字)
错误是您的情况下没有axis=2
,应该是axis=1
inputs_with_embed = tf.concat([x, embed], axis=1, name="inputs_with_embed")
embed
的形状为[None,embedding_dimension],而x
的形状为[None,1]
它们都是2D张量,因此您可以访问axis = 0或axis = 1(在0而不是1处索引),因此要使input_with_embed
的形状为[None,embedding_dimension + 1],您需要连上axis=1