更新后的帖子:
我调查了最初的建议,以找到问题的根源。 最初的问题是几个要素列产生了属性错误(“无名”或“ get_sparse_tensors”)
这是“ tuple”对象没有属性“ name”的示例代码:
metro = tf.feature_column.indicator_column(tf.feature_column.categorical_column_with_identity("metro",94)),
tf.feature_column.indicator_column(metro),
“地铁”列中的数据如下所示:
-"(not set)"
-"Abilene-Sweetwater TX"
-"Albany-Schenectady-Troy NY"
-"Atlanta GA"
-...
这是_get_sparse_tensor错误的示例代码:
browser = tf.feature_column.indicator_column(tf.feature_column.categorical_column_with_identity("browser",54))
tf.feature_column.indicator_column(browser),
我可以通过将categorical_column_with_identity替换为categorical_column_with_vocabulary_list来解决这两个错误
metro = tf.feature_column.categorical_column_with_vocabulary_list('metro',
vocabulary_list=['(not set)','Abilene-Sweetwater TX','Albany-Schenectady-Troy NY','Atlanta GA'])
由于使用tf.feature_column.categorical_column_with_identity比编写长词汇表要快得多,所以我很想知道为什么会出现此错误?
这是MCVE:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import pandas as pd
import argparse
import tensorflow as tf
# specify col.names.
names = [
'browser',
'metro',
'transactionRevenue'
]
# specify dtypes.
dtypes = {
'browser': str,
'metro': str,
'transactionRevenue': np.float32
}
df = pd.read_csv('dropped_train.csv', names=names, dtype=dtypes, n a_values='?',encoding ="ISO-8859-1")
def load_data(y_name="transactionRevenue", train_fraction=0.7, seed=None):
# Load the raw data columns.
data = df
# Shuffle the data
np.random.seed(seed)
# Split the data into train/test subsets.
x_train = data.sample(frac=train_fraction, random_state=seed)
x_test = data.drop(x_train.index)
# Extract the label from the features DataFrame.
y_train = x_train.pop(y_name)
y_test = x_test.pop(y_name)
return (x_train, y_train), (x_test, y_test)
load_data()
def features_columns():
metro = tf.feature_column.indicator_column (tf.feature_column.categorical_column_with_identity("metro",94)),
browser = tf.feature_column.indicator_column(tf.feature_column.categorical_column_with_identity("browser",54))
feature_columns = [
tf.feature_column.indicator_column(browser),
tf.feature_column.indicator_column(metro)
]
return feature_columns
features_columns()
log_dir = ("C:\\…\\gs sales\\model")
parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', default=500, type=int, help='batch size')
parser.add_argument('--train_steps', default=10000, type=int, help='number of training steps')
parser.add_argument('--norm_factor', default=10., type=float, help='normalization factor')
def main(argv):
"""Builds, trains, and evaluates the model."""
args = parser.parse_args(argv[1:])
(train_x, train_y), (test_x, test_y) = load_data()
train_y /= args.norm_factor
test_y /= args.norm_factor
# Build the training dataset.
training_input_fn = tf.estimator.inputs.pandas_input_fn(x=train_x, y=train_y, batch_size=64,
shuffle=True, num_epochs=None)
# Build the Estimator.
model = tf.estimator.DNNRegressor(hidden_units=[50,30,10], feature_columns=features_columns(),
model_dir=log_dir)
# Train the model.
model.train(input_fn=training_input_fn, steps=args.train_steps)
if __name__ == "__main__":
tf.logging.set_verbosity(tf.logging.INFO)
tf.app.run(main=main)