无法在iris数据集

时间:2017-01-03 12:12:38

标签: python-3.x tensorflow neural-network

我对虹膜数据集进行了一些修改,我添加了两列,country和forest,所以我可以在数据上添加一些分类功能,并尝试实现张量流' s wide and deep neural net,如下面的代码所示:

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import datetime

import tensorflow as tf
import pandas as pd
import numpy as np
import tempfile
tf.logging.set_verbosity(tf.logging.ERROR)

IRIS_TRAINING="iris_training.csv"
IRIS_TEST="iris_test.csv"

start=datetime.datetime.now()
tf.reset_default_graph()
#categorical features
country=tf.contrib.layers.sparse_column_with_keys(column_name='country',keys=[0,1,2,3])
forest=tf.contrib.layers.sparse_column_with_keys(column_name='forest',keys=[0,1,2,3,4,5])
#continous features
sepal_length=tf.contrib.layers.real_valued_column('sepal_length')
sepal_width=tf.contrib.layers.real_valued_column('sepal_width')
petal_length=tf.contrib.layers.real_valued_column('petal_length')
petal_width=tf.contrib.layers.real_valued_column('petal_width')
#wide columns
wide_columns=[forest,country,tf.contrib.layers.crossed_column([forest,country],hash_bucket_size=int(1e4))]
deep_columns=[ tf.contrib.layers.embedding_column(country,dimension=8),tf.contrib.layers.embedding_column(forest,dimension=8),sepal_length,sepal_width,petal_length,petal_width]
COLUMNS=['sepal_length','sepal_width','petal_length','petal_width','label']
LABEL_COLUMN=['label']
CATEGORICAL_COLUMNS=['country','forest']
CONTINUOUS_COLUMNS=['sepal_length','sepal_width','petal_length','petal_width']
#Load datesest
print("loading dataset")
training_set=pd.read_csv(IRIS_TRAINING,names=COLUMNS,skipinitialspace=True,skiprows=1)
np.random.seed(42)
training_set['country']=np.random.choice(range(1,4),training_set.shape[0])
training_set['forest']=np.random.choice(range(0,6),training_set.shape[0])
training_set['label']=training_set['label'].astype(int)
test_set=pd.read_csv(IRIS_TEST,names=COLUMNS,skipinitialspace=True,skiprows=1)
test_set['country']=np.random.choice(range(0,4),test_set.shape[0])
test_set['forest']=np.random.choice(range(0,6),test_set.shape[0])
test_set['label']=training_set['label'].astype(int)
print("finished loading dataset")
print(training_set.head)
print(test_set.head)
print((datetime.datetime.now()-start).seconds)

#generating input for the classifier

def input_fn(df):
   continuous_cols = {k: tf.constant(df[k].values) for k in CONTINUOUS_COLUMNS}
   categorical_cols = {k: tf.SparseTensor( indices=[[i, 0] for i in range(df[k].size)], values=df[k].values, shape=[df[k].size, 1]) for k in CATEGORICAL_COLUMNS}
   feature_cols = dict(continuous_cols.items() | categorical_cols.items())
   label = tf.constant(df[LABEL_COLUMN].values)
   return feature_cols,label




#Config and initiate model
print("Initializing model")
validation_metrics = {
    "accuracy":
        tf.contrib.learn.metric_spec.MetricSpec(
            metric_fn=tf.contrib.metrics.streaming_accuracy,
            prediction_key=tf.contrib.learn.prediction_key.PredictionKey.
            CLASSES),
    "precision":
        tf.contrib.learn.metric_spec.MetricSpec(
            metric_fn=tf.contrib.metrics.streaming_precision,
            prediction_key=tf.contrib.learn.prediction_key.PredictionKey.
            CLASSES),
    "recall":
        tf.contrib.learn.metric_spec.MetricSpec(
            metric_fn=tf.contrib.metrics.streaming_recall,
            prediction_key=tf.contrib.learn.prediction_key.PredictionKey.
            CLASSES)
}

#validation_monitor = tf.contrib.learn.monitors.ValidationMonitor( test_set.data, test_set.target, every_n_steps=50,metrics=validation_metrics)

def train_input_fn():
    return input_fn(training_set)


def eval_input_fn():
    return input_fn(test_set)


model_dir=tempfile.mkdtemp()
m=tf.contrib.learn.DNNLinearCombinedClassifier(linear_feature_columns=wide_columns,dnn_feature_columns=deep_columns,dnn_hidden_units=[100,50],n_classes=3,model_dir=model_dir,config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1))
print("Finished Model Init")
print((datetime.datetime.now()-start).seconds)

#fit the model 
print("Train the model the model")
m.fit(input_fn=train_input_fn,steps=60)
print("Finished finiting the model")
print((datetime.datetime.now()-start).seconds)


#Evaluate Classifier
print("Evaluating model")
results=m.evaluate(input_fn=eval_input_fn,step=1)
for key in sorted(results):
    print("%s : %s"+(key,results[key]))
#accuracy_score=classifier.evaluate(x=test_set.data,y=test_set.target)["accuracy"]

#print('Accuracy score:{0:f}'.format(accuracy_score))
print("Finished Evaluating the model")
print((datetime.datetime.now()-start).seconds)

但是我得到的张量流不明显:

Traceback (most recent call last):
  File "iris_wide_deep.py", line 97, in <module>
    m.fit(input_fn=train_input_fn,steps=60)
  File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py", line 711, in fit
    max_steps=max_steps)
  File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/util/deprecation.py", line 191, in new_func
    return func(*args, **kwargs)
  File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 355, in fit
    max_steps=max_steps)
  File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 699, in _train_model
    train_ops = self._get_train_ops(features, labels)
  File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 1052, in _get_train_ops
    return self._call_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN)
  File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/estimator.py", line 1019, in _call_model_fn
    params=self.params)
  File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py", line 443, in _dnn_linear_combined_model_fn
    scope=scope)
  File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/feature_column_ops.py", line 247, in input_from_feature_columns
    default_name='input_from_feature_columns')
  File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/feature_column_ops.py", line 159, in _input_from_feature_columns
    transformed_tensor = transformer.transform(column)
  File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/feature_column_ops.py", line 869, in transform
    feature_column.insert_transformed_feature(self._columns_to_tensors)
  File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/feature_column.py", line 916, in insert_transformed_feature
    self.sparse_id_column.insert_transformed_feature(columns_to_tensors)
  File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/feature_column.py", line 592, in insert_transformed_feature
    name="lookup")
  File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/contrib/lookup/lookup_ops.py", line 622, in string_to_index
    keys = ops.convert_to_tensor(mapping, dtypes.string)
  File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 669, in convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
  File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 176, in _constant_tensor_conversion_function
    return constant(v, dtype=dtype, name=name)
  File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/constant_op.py", line 165, in constant
    tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape, verify_shape=verify_shape))
  File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/tensor_util.py", line 367, in make_tensor_proto
    _AssertCompatible(values, dtype)
  File "/home/eliethesaiyan/anaconda3/envs/tensorflow/lib/python3.5/site-packages/tensorflow/python/framework/tensor_util.py", line 302, in _AssertCompatible
    (dtype.name, repr(mismatch), type(mismatch).__name__))
TypeError: Expected string, got 0 of type 'int' instead.
(tensorflow) 

我会很感激这里有一些指导,因为我试图在给定的例子中尽可能多地复制数据集类型

1 个答案:

答案 0 :(得分:0)

从这个answer,我发现使用数字作为分类特征(国家)值被推断为此行上的数字

categorical_cols = {k: tf.SparseTensor( indices=[[i, 0] for i in range(df[k].size)], values=df[k].values, shape=[df[k].size, 1]) for k in CATEGORICAL_COLUMNS}

由于tensorflow将其张量存储在numpy数组中,之后它会在存储之前尝试推断数据类型。

由于我从csv文件读取后未指定其类型,因此它们作为对象发送到SparseTensor对象,而SparseTensor对象又检查数据类型。

我希望它有所帮助