tf.estimator.DNNLinearCombinedClassifier永远预测吗?为什么这样做呢?

时间:2019-07-19 12:28:26

标签: tensorflow

我想预测tf.estimator.DNNLinearCombinedClassifier模型。

我的培训代码

    m = tf.estimator.DNNLinearCombinedClassifier(
model_dir=model_dir,
linear_feature_columns=wide_columns,
dnn_feature_columns=deep_columns,
dnn_activation_fn=tf.nn.relu,
n_classes = train_label.shape[1]  , 
dnn_hidden_units=[200 ,  150 , 100 , 50],
dnn_optimizer=tf.train.AdamOptimizer(learning_rate = 0.01) , 
linear_optimizer=tf.train.AdamOptimizer(learning_rate = 0.01),
config=run_config) 

def input_fn(data_file, num_epochs, shuffle, batch_size , Eval ):
def parse_csv(data_file ):
    print('Parsing', data_file)
    if Eval == "train" :
        print(Eval)
        columns = tf.decode_csv(data_file , record_defaults=_CSV_COLUMN_DEFAULTS)
        features = dict(zip(_CSV_COLUMNS, columns))
        labels = features.pop('target')
    elif Eval == "test" :
        print(Eval)
        columns = tf.decode_csv(data_file , record_defaults=_CSV_COLUMN_DEFAULTS[:-1])
        features = dict(zip(_CSV_COLUMNS[:-1] , columns))
        labels = tf.constant([1] , dtype = tf.int64 , shape = (10,) ) # , name ="target" 
        #labels = None
    return features, labels

dataset = tf.data.TextLineDataset(data_file).skip(1)
# .skip(1)
if shuffle :
    dataset = dataset.shuffle(buffer_size=10000) # _NUM_EXAMPLES['train']
dataset = dataset.map(parse_csv, num_parallel_calls=10)
if Eval == "train" :
    print(Eval)
    dataset = dataset.repeat(num_epochs)
    dataset = dataset.batch(batch_size , drop_remainder= True)
else :
    print(Eval)
    dataset = dataset.batch(100 , drop_remainder= True)
iterator = dataset.make_one_shot_iterator()
batch_features, batch_labels = iterator.get_next()
return batch_features , batch_labels 
def train_input_fn():
    return input_fn(data_file = "./Total_Data.csv" ,
                    num_epochs= 5 ,
                    shuffle= True ,
                    batch_size= 1000,
                    Eval = "train"
                   )

def input_fn_v2(data):
continuous_cols = {k: tf.constant(data[k].values)
                   for k in CONTINUOUS_COLUMNS}
categorical_cols = {k: tf.SparseTensor( indices=[[i, 0] for i in range(data[k].size)], values=data[k].astype(str).values , 
                                       dense_shape=[data[k].size, 1] ) for k in CATEGORICAL_COLUMNS}

feature_cols = {**continuous_cols , **categorical_cols}
#labels = tf.constant(np.array([[1]] * data.shape[0] ).reshape(-1,1) , dtype = tf.int64 ) # , name ="target" 
labels = None
return feature_cols , labels

def test_input_fn():
    #one = input_fn_v2(data = test_)    
    two = input_fn(data_file = "./Test_Data.csv" ,
                   num_epochs= 1 ,
                   shuffle= False ,
                   batch_size= 100 ,
                   Eval = "test")
    return one

帮助我 这还没有结束。

test_pred = m.predict(input_fn=test_input_fn )
test_pred = list(test_pred)[0] 

这既是学习,也是评估。 但是,当我尝试预测测试数据时,它并没有结束。 我认为“ tf.dataset”是问题所在,那么我们该如何解决呢? 但是,由于数据量很大,因此部署至关重要。

火车形状:90000,333 测试形状:13915、333

0 个答案:

没有答案