我正在遵循Google的tensorflow示例,但是具有不同的数据集,并且我收到了以上错误消息。
这是添加额外列之前的原始数据集:http://www.exploredata.net/Downloads/Baseball-Data-Set
我正在anaconda3 powershell提示符下运行所有这些操作。
我已确保未跳过任何行并添加了打印语句来查找问题。似乎是在def my_input_fn中。 代码:
import numpy as np
import scipy
from sklearn import datasets, metrics
import csv
import pandas as pd
import math
from matplotlib import cm, gridspec, pyplot as plt
from IPython import display
import tensorflow as tf
from tensorflow.python.data import Dataset
tf.logging.set_verbosity(tf.logging.ERROR)
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:.3f}'.format
MLB2008_df = pd.read_csv(r'C:\Users\Alex\Downloads\MLB2008.csv', sep = ',', engine = 'python', header = None)
MLB2008_df.columns = MLB2008_df.iloc[0]
mlb = MLB2008_df.drop([0])
mlb['OBP'] = mlb['OBP'].astype(float)
mlb['SLG'] = mlb['SLG'].astype(float)
mlb['OPS'] = mlb['OBP'] + mlb['SLG']
mlb['SALARY'] = mlb['SALARY'].astype(float)
mlb = mlb.reindex(np.random.permutation(mlb.index))
type(mlb['SALARY'])
mlb['SALARY'] /= 1000.0
print(mlb)
mlb.describe()
#define feature
my_feature = mlb['OPS']
feature_columns = [tf.feature_column.numeric_column('OPS')]
#define label
targets = mlb['SALARY']
#gradient descent
myoptimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.0000001)
myoptimizer = tf.contrib.estimator.clip_gradients_by_norm(myoptimizer, 5.0)
#Configure model
linear_regressor = tf.estimator.LinearRegressor(feature_columns = feature_columns, optimizer = myoptimizer)
print ('here1')
def my_input_fn(features, targets, batch_size = 1, shuffle = True, num_epochs = None):
'''Trains a linear regression model of one feature.
Args:
features: pandas dataframe of features
targets: pandas dataframe of targets
batch_size: size of batches to be passed to the model
shuffle: True or False, whether to shuffle the data
num_epochs: number of epochs for which data should be repeated. None = repeat indefinitely
Returns:
Tuple of (features, labels) for next data batch
'''
#convert pandas data into a dict of np arrays
features = {key:np.array(value) for key, value in dict(features).items()}
#construct a dataset, and configure batching/repeating
ds = Dataset.from_tensor_slices((features,targets))
ds = ds.batch(batch_size).repeat(num_epochs)
#shuffle data if specified
if shuffle:
ds = ds.shuffle(buffer_size = 10000)
#return the next batch of data
features, labels = ds.make_one_shot_iterator().get_next()
return features, labels
_ = linear_regressor.train(
input_fn = lambda:my_input_fn(my_feature, targets),
steps = 100
)
ValueError:仅对等级> = 1支持对张量进行分批处理 这是错误,解决任何帮助将不胜感激!