我正在尽力实现带有张量流的DNN到目前为止我已经能够下载文件并解析它们以便Pandas数据帧中的所有数据都是非数字的但是我试图将我的数据放在tensorflow DNN中而且我不确定如何继续前进。任何帮助将不胜感激
#==============================================================================
#
# Import statements
#
#==============================================================================
import tensorflow as tf
import pandas as pd
from pandas import Series, DataFrame
from sklearn.preprocessing import LabelEncoder
import tempfile
Columns = ['PassengerId', 'Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Embarked']
#==============================================================================
#
# read data from given file and return a pandas dataset
#
#==============================================================================
def readData(FileName):
titanic_df = pd.read_csv(FileName)
titanic_df = titanic_df.dropna()
return titanic_df
def processData(data):
gender = LabelEncoder()
gender.fit(['male', 'female'])
data["Sex"]= gender.transform(data["Sex"])
Embarked = LabelEncoder()
Embarked.fit(['S', 'C','Q'])
data["Embarked"] = Embarked.transform(data["Embarked"])
return data
def input_fn(data):
Continuous_cols = {k: tf.constant(data[k].values) for k in Columns}
label = tf.constant(data['Survived'].values)
return dict(Continuous_cols.items()),label
def train_input_fn(train_data):
return input_fn(train_data)
def eval_input_fn(test_data):
return input_fn(test_data)
def main():
test_data = readData('test.csv')
train_data = readData('train.csv')
Col_To_Drop =['Name','Ticket','Cabin','Fare']
for col in Col_To_Drop:
test_data = test_data.drop(col,1)
train_data = train_data.drop(col,1)
test_data = processData(test_data)
train_data = processData(train_data)
Columns = list(train_data.columns.values)
print(Columns)
print(test_data.info())
print(train_data.info())
print(train_data.head())
model_dir = tempfile.mkdtemp()
features = []
for c in Columns:
features.append(tf.contrib.layers.real_valued_column(str(c)))
m = tf.contrib.learn.LinearClassifier(feature_columns=features,
model_dir=model_dir)
m.fit(input_fn=lambda: train_input_fn(train_data), steps=200)
results = m.evaluate(input_fn=lambda: eval_input_fn(test_data), steps=1)
for key in sorted(results):
print("%s: %s" % (key, results[key]))
if __name__ == "__main__":
main()
------------------------输出--------------------- < / p>
NOTE: The Python console is going to be REMOVED in Spyder 3.2. Please start to migrate your work to the IPython console instead.
['PassengerId', 'Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Embarked']
<class 'pandas.core.frame.DataFrame'>
Int64Index: 87 entries, 12 to 414
Data columns (total 7 columns):
PassengerId 87 non-null int64
Pclass 87 non-null int64
Sex 87 non-null int64
Age 87 non-null float64
SibSp 87 non-null int64
Parch 87 non-null int64
Embarked 87 non-null int64
dtypes: float64(1), int64(6)
memory usage: 5.4 KB
None
<class 'pandas.core.frame.DataFrame'>
Int64Index: 183 entries, 1 to 889
Data columns (total 8 columns):
PassengerId 183 non-null int64
Survived 183 non-null int64
Pclass 183 non-null int64
Sex 183 non-null int64
Age 183 non-null float64
SibSp 183 non-null int64
Parch 183 non-null int64
Embarked 183 non-null int64
dtypes: float64(1), int64(7)
memory usage: 12.9 KB
None
PassengerId Survived Pclass Sex Age SibSp Parch Embarked
1 2 1 1 0 38.0 1 0 0
3 4 1 1 0 35.0 1 0 2
6 7 0 1 1 54.0 0 0 2
10 11 1 3 0 4.0 1 1 2
11 12 1 1 0 58.0 0 0 2
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:Rank of input Tensor (1) should be the same as output_rank (2) for column. Will attempt to expand dims. It is highly recommended that you resize your input, as this behavior may change.
WARNING:tensorflow:From C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\head.py:615: scalar_summary (from tensorflow.python.ops.logging_ops) is deprecated and will be removed after 2016-11-30.
Instructions for updating:
Please switch to tf.summary.scalar. Note that tf.summary.scalar uses the node name instead of the tag. This means that TensorFlow will automatically de-duplicate summary names based on the scope they are created in. Also, passing a tensor or list of tags to a scalar summary op is no longer supported.
up CPU computations.
e are available on your machine and could speed up CPU computations.
2017-07-23 17:48:29.227623: W d:\nwani\l\tensorflow_1498062690615\work\tensorflow-1.1.0\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use FMA instructions, but these are available on your machine and could speede\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX2 instructions, but thes98062690615\work\tensorflow-1.1.0\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use AVX instructions, but these are available on your machine and could speed up CPU computations.
2017-07-23 17:48:29.227119: W d:\nwani\l\tensorflow_1498062690615\work\tensorflow-1.1.0\tensorflow\cor_1498062690615\work\tensorflow-1.1.0\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.2 instructions, but these are available on your machine and could speed up CPU computations.
2017-07-23 17:48:29.226681: W d:\nwani\l\tensorflow_14and could speed up CPU computations.
2017-07-23 17:48:29.225877: W d:\nwani\l\tensorflow_1498062690615\work\tensorflow-1.1.0\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE4.1 instructions, but these are available on your machine and could speed up CPU computations.
2017-07-23 17:48:29.226233: W d:\nwani\l\tensorflowe_guard.cc:45] The TensorFlow library wasn't compiled to use SSE3 instructions, but these are available on your machine -07-23 17:48:29.224819: W d:\nwani\l\tensorflow_1498062690615\work\tensorflow-1.1.0\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE2 instructions, but these are available on your machine and could speed up CPU computations.
2017-07-23 17:48:29.225278: W d:\nwani\l\tensorflow_1498062690615\work\tensorflow-1.1.0\tensorflow\core\platform\cpu_featur\tensorflow\core\platform\cpu_feature_guard.cc:45] The TensorFlow library wasn't compiled to use SSE instructions, but these are available on your machine and could speed up CPU computations.
20172017-07-23 17:48:29.224365: W d:\nwani\l\tensorflow_1498062690615\work\tensorflow-1.1.0
Traceback (most recent call last):
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2393, in get_loc
return self._getitem_column(key)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2069, in _getitem_column
return self._get_item_cache(key)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\generic.py", line 1534, in _get_item_cache
values = self._data.get(item)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\internals.py", line 3590, in get
loc = self.items.get_loc(item)
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py", line 2395, in get_loc
return self._engine.get_loc(self._maybe_cast_indexer(key))
File "pandas\_libs\index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5239)
File "pandas\_libs\index.pyx", line 154, in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5085)
File "pandas\_libs\hashtable_class_helper.pxi", line 1207, in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20405)
File "pandas\_libs\hashtable_class_helper.pxi", line 1215, in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20359)
KeyError: 'Survived'
return self._engine.get_loc(key)
File "pandas\_libs\index.pyx", line 132, in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5239)
File "pandas\_libs\index.pyx", line 154, in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5085)
File "pandas\_libs\hashtable_class_helper.pxi", line 1207, in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20405)
File "pandas\_libs\hashtable_class_helper.pxi", line 1215, in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20359)
KeyError: 'Survived'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\jamil\Desktop\Code\Python\TensorFlow\titanic\titanic.py", line 90, in <module>
main()
File "C:\Users\jamil\Desktop\Code\Python\TensorFlow\titanic\titanic.py", line 83, in main
results = m.evaluate(input_fn=lambda: eval_input_fn(test_data), steps=1)
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py", line 281, in new_func
return func(*args, **kwargs)
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\estimator.py", line 518, in evaluate
log_progress=log_progress)
File "C:\ProgramData\Anaconda3\lib\site-packages\tensorflow\contrib\learn\python\learn\estimators\estimator.py", line 801, in _evaluate_model
features, labels = input_fn()
File "C:\Users\jamil\Desktop\Code\Python\TensorFlow\titanic\titanic.py", line 83, in <lambda>
results = m.evaluate(input_fn=lambda: eval_input_fn(test_data), steps=1)
File "C:\Users\jamil\Desktop\Code\Python\TensorFlow\titanic\titanic.py", line 49, in eval_input_fn
return input_fn(test_data)
File "C:\Users\jamil\Desktop\Code\Python\TensorFlow\titanic\titanic.py", line 40, in input_fn
Continuous_cols = {k: tf.constant(data[k].values) for k in Columns}
File "C:\Users\jamil\Desktop\Code\Python\TensorFlow\titanic\titanic.py", line 40, in <dictcomp>
Continuous_cols = {k: tf.constant(data[k].values) for k in Columns}
File "C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py", line 2062, in __getitem__
答案 0 :(得分:0)
问题来自以下几点:
m.fit(input_fn=train_input_fn(train_data), steps=200)
results = m.evaluate(input_fn=eval_input_fn(test_data), steps=1)
input_fn
参数应该是函数,不带参数并返回元组,而您调用train_input_fn()
/ eval_input_fn()
函数和传递他们的返回值(一个元组)。
解决此问题的最简单方法是将train_input_fn(train_data)
和eval_input_fn(test_data)
包装在lambda
表达式中,如下所示:
m.fit(input_fn=lambda: train_input_fn(train_data), steps=200)
results = m.evaluate(input_fn=lambda: eval_input_fn(test_data), steps=1)
表达式lambda: train_input_fn(train_data)
是一个不带参数的函数,当被调用时,它返回train_input_fn(train_data)
的值,这是一个元组。