我想预测tf.estimator.DNNLinearCombinedClassifier模型。
我的培训代码
m = tf.estimator.DNNLinearCombinedClassifier(
model_dir=model_dir,
linear_feature_columns=wide_columns,
dnn_feature_columns=deep_columns,
dnn_activation_fn=tf.nn.relu,
n_classes = train_label.shape[1] ,
dnn_hidden_units=[200 , 150 , 100 , 50],
dnn_optimizer=tf.train.AdamOptimizer(learning_rate = 0.01) ,
linear_optimizer=tf.train.AdamOptimizer(learning_rate = 0.01),
config=run_config)
def input_fn(data_file, num_epochs, shuffle, batch_size , Eval ):
def parse_csv(data_file ):
print('Parsing', data_file)
if Eval == "train" :
print(Eval)
columns = tf.decode_csv(data_file , record_defaults=_CSV_COLUMN_DEFAULTS)
features = dict(zip(_CSV_COLUMNS, columns))
labels = features.pop('target')
elif Eval == "test" :
print(Eval)
columns = tf.decode_csv(data_file , record_defaults=_CSV_COLUMN_DEFAULTS[:-1])
features = dict(zip(_CSV_COLUMNS[:-1] , columns))
labels = tf.constant([1] , dtype = tf.int64 , shape = (10,) ) # , name ="target"
#labels = None
return features, labels
dataset = tf.data.TextLineDataset(data_file).skip(1)
# .skip(1)
if shuffle :
dataset = dataset.shuffle(buffer_size=10000) # _NUM_EXAMPLES['train']
dataset = dataset.map(parse_csv, num_parallel_calls=10)
if Eval == "train" :
print(Eval)
dataset = dataset.repeat(num_epochs)
dataset = dataset.batch(batch_size , drop_remainder= True)
else :
print(Eval)
dataset = dataset.batch(100 , drop_remainder= True)
iterator = dataset.make_one_shot_iterator()
batch_features, batch_labels = iterator.get_next()
return batch_features , batch_labels
def train_input_fn():
return input_fn(data_file = "./Total_Data.csv" ,
num_epochs= 5 ,
shuffle= True ,
batch_size= 1000,
Eval = "train"
)
def input_fn_v2(data):
continuous_cols = {k: tf.constant(data[k].values)
for k in CONTINUOUS_COLUMNS}
categorical_cols = {k: tf.SparseTensor( indices=[[i, 0] for i in range(data[k].size)], values=data[k].astype(str).values ,
dense_shape=[data[k].size, 1] ) for k in CATEGORICAL_COLUMNS}
feature_cols = {**continuous_cols , **categorical_cols}
#labels = tf.constant(np.array([[1]] * data.shape[0] ).reshape(-1,1) , dtype = tf.int64 ) # , name ="target"
labels = None
return feature_cols , labels
def test_input_fn():
#one = input_fn_v2(data = test_)
two = input_fn(data_file = "./Test_Data.csv" ,
num_epochs= 1 ,
shuffle= False ,
batch_size= 100 ,
Eval = "test")
return one
帮助我 这还没有结束。
test_pred = m.predict(input_fn=test_input_fn )
test_pred = list(test_pred)[0]
这既是学习,也是评估。 但是,当我尝试预测测试数据时,它并没有结束。 我认为“ tf.dataset”是问题所在,那么我们该如何解决呢? 但是,由于数据量很大,因此部署至关重要。
火车形状:90000,333 测试形状:13915、333