我对虹膜数据集进行了一些修改,我添加了两列,country和forest,所以我可以在数据上添加一些分类功能,并尝试实现张量流' s wide and deep neural net,如下面的代码所示:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import datetime
import tensorflow as tf
import pandas as pd
import numpy as np
import tempfile
tf.logging.set_verbosity(tf.logging.ERROR)
IRIS_TRAINING="iris_training.csv"
IRIS_TEST="iris_test.csv"
start=datetime.datetime.now()
tf.reset_default_graph()
#categorical features
country=tf.contrib.layers.sparse_column_with_keys(column_name='country',keys=[0,1,2,3])
forest=tf.contrib.layers.sparse_column_with_keys(column_name='forest',keys=[0,1,2,3,4,5])
#continous features
sepal_length=tf.contrib.layers.real_valued_column('sepal_length')
sepal_width=tf.contrib.layers.real_valued_column('sepal_width')
petal_length=tf.contrib.layers.real_valued_column('petal_length')
petal_width=tf.contrib.layers.real_valued_column('petal_width')
#wide columns
wide_columns=[forest,country,tf.contrib.layers.crossed_column([forest,country],hash_bucket_size=int(1e4))]
deep_columns=[ tf.contrib.layers.embedding_column(country,dimension=8),tf.contrib.layers.embedding_column(forest,dimension=8),sepal_length,sepal_width,petal_length,petal_width]
COLUMNS=['sepal_length','sepal_width','petal_length','petal_width','label']
LABEL_COLUMN=['label']
CATEGORICAL_COLUMNS=['country','forest']
CONTINUOUS_COLUMNS=['sepal_length','sepal_width','petal_length','petal_width']
#Load datesest
print("loading dataset")
training_set=pd.read_csv(IRIS_TRAINING,names=COLUMNS,skipinitialspace=True,skiprows=1)
np.random.seed(42)
training_set['country']=np.random.choice(range(1,4),training_set.shape[0])
training_set['forest']=np.random.choice(range(0,6),training_set.shape[0])
training_set['label']=training_set['label'].astype(int)
test_set=pd.read_csv(IRIS_TEST,names=COLUMNS,skipinitialspace=True,skiprows=1)
test_set['country']=np.random.choice(range(0,4),test_set.shape[0])
test_set['forest']=np.random.choice(range(0,6),test_set.shape[0])
test_set['label']=training_set['label'].astype(int)
print("finished loading dataset")
print(training_set.head)
print(test_set.head)
print((datetime.datetime.now()-start).seconds)
#generating input for the classifier
def input_fn(df):
continuous_cols = {k: tf.constant(df[k].values) for k in CONTINUOUS_COLUMNS}
categorical_cols = {k: tf.SparseTensor( indices=[[i, 0] for i in range(df[k].size)], values=df[k].values, shape=[df[k].size, 1]) for k in CATEGORICAL_COLUMNS}
feature_cols = dict(continuous_cols.items() | categorical_cols.items())
label = tf.constant(df[LABEL_COLUMN].values)
return feature_cols,label
#Config and initiate model
print("Initializing model")
validation_metrics = {
"accuracy":
tf.contrib.learn.metric_spec.MetricSpec(
metric_fn=tf.contrib.metrics.streaming_accuracy,
prediction_key=tf.contrib.learn.prediction_key.PredictionKey.
CLASSES),
"precision":
tf.contrib.learn.metric_spec.MetricSpec(
metric_fn=tf.contrib.metrics.streaming_precision,
prediction_key=tf.contrib.learn.prediction_key.PredictionKey.
CLASSES),
"recall":
tf.contrib.learn.metric_spec.MetricSpec(
metric_fn=tf.contrib.metrics.streaming_recall,
prediction_key=tf.contrib.learn.prediction_key.PredictionKey.
CLASSES)
}
#validation_monitor = tf.contrib.learn.monitors.ValidationMonitor( test_set.data, test_set.target, every_n_steps=50,metrics=validation_metrics)
def train_input_fn():
return input_fn(training_set)
def eval_input_fn():
return input_fn(test_set)
model_dir=tempfile.mkdtemp()
m=tf.contrib.learn.DNNLinearCombinedClassifier(linear_feature_columns=wide_columns,dnn_feature_columns=deep_columns,dnn_hidden_units=[100,50],n_classes=3,model_dir=model_dir,config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1))
print("Finished Model Init")
print((datetime.datetime.now()-start).seconds)
#fit the model
print("Train the model the model")
m.fit(input_fn=train_input_fn,steps=60)
print("Finished finiting the model")
print((datetime.datetime.now()-start).seconds)
#Evaluate Classifier
print("Evaluating model")
results=m.evaluate(input_fn=eval_input_fn,step=1)
for key in sorted(results):
print("%s : %s"+(key,results[key]))
#accuracy_score=classifier.evaluate(x=test_set.data,y=test_set.target)["accuracy"]
#print('Accuracy score:{0:f}'.format(accuracy_score))
print("Finished Evaluating the model")
print((datetime.datetime.now()-start).seconds)
但是我得到的张量流不明显:
Traceback (most recent call last):
File "iris_wide_deep.py", line 97, in <module>
m.fit(input_fn=train_input_fn,steps=60)
File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py", line 711, in fit
max_steps=max_steps)
File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/util/deprecation.py", line 191, in new_func
return func(*args, **kwargs)
File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 355, in fit
max_steps=max_steps)
File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 699, in _train_model
train_ops = self._get_train_ops(features, labels)
File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 1052, in _get_train_ops
return self._call_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN)
File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 1019, in _call_model_fn
params=self.params)
File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py", line 443, in _dnn_linear_combined_model_fn
scope=scope)
File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/feature_column_ops.py", line 247, in input_from_feature_columns
default_name='input_from_feature_columns')
File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/feature_column_ops.py", line 159, in _input_from_feature_columns
transformed_tensor = transformer.transform(column)
File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/feature_column_ops.py", line 869, in transform
feature_column.insert_transformed_feature(self._columns_to_tensors)
File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/feature_column.py", line 916, in insert_transformed_feature
self.sparse_id_column.insert_transformed_feature(columns_to_tensors)
File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/feature_column.py", line 592, in insert_transformed_feature
name="lookup")
File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/lookup/lookup_ops.py", line 622, in string_to_index
keys = ops.convert_to_tensor(mapping, dtypes.string)
File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 669, in convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 176, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 165, in constant
tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape, verify_shape=verify_shape))
File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/tensor_util.py", line 367, in make_tensor_proto
_AssertCompatible(values, dtype)
File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/tensor_util.py", line 302, in _AssertCompatible
(dtype.name, repr(mismatch), type(mismatch).__name__))
TypeError: Expected string, got 0 of type 'int' instead.
(tensorflow)
我会很感激这里有一些指导,因为我试图在给定的例子中尽可能多地复制数据集类型
答案 0 :(得分:0)
从这个answer,我发现使用数字作为分类特征(国家)值被推断为此行上的数字
categorical_cols = {k: tf.SparseTensor( indices=[[i, 0] for i in range(df[k].size)], values=df[k].values, shape=[df[k].size, 1]) for k in CATEGORICAL_COLUMNS}
由于tensorflow将其张量存储在numpy数组中,之后它会在存储之前尝试推断数据类型。
由于我从csv文件读取后未指定其类型,因此它们作为对象发送到SparseTensor对象,而SparseTensor对象又检查数据类型。
我希望它有所帮助