在大型数据集上运行ML模型时,出现“ ZeroDivisionError:整数除法或取零的模”错误。由于它是高级API,因此很难调试。我尝试更改和删除pandas_input_fn和DNNRegressor函数中具有的所有可选参数。
# Drop unneccessary columns, rows with NaNs
merged_data = pd.merge(lc_data, macro_data, left_on='issue_d', right_on='DATE')
del merged_data['DATE'], merged_data['issue_d'], merged_data['Unnamed: 7'], merged_data['Unnamed: 8']
merged_data[merged_data.dti.apply(lambda x: type(x) == float)]
merged_data.dropna(inplace=True)
# pop the column for the target y value
y = merged_data.pop('loan_status')
y.loc[y != "Charged Off"] = 1
y.loc[y == "Charged Off"] = 0
# some stiff with feature columns
# some stiff with feature columns
# some stiff with feature columns
input_layer = tf.estimator.inputs.pandas_input_fn(x=merged_data, y=y, num_epochs=2,
shuffle=False, batch_size=10000)
estimator = tf.estimator.DNNRegressor(feature_columns=feature_columns, hidden_units=[25],
loss_reduction=tf.losses.Reduction.MEAN,
optimizer=tf.train.AdamOptimizer(learning_rate=0.01,
beta1=0.9,beta2=0.999))
estimator.train(input_fn=input_layer)
它适用于只有91行的小型数据集
ZeroDivisionErrorTraceback (most recent call last)
<ipython-input-184-058823ef4272> in <module>()
100 # # estimator = tf.estimator.LinearRegressor(feature_columns=feature_columns)
101
--> 102 estimator.train(input_fn=input_layer)
103 # # estimator.train(input_fn=input_layer, steps=?)
104
/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.pyc in train(self, input_fn, hooks, steps, max_steps, saving_listeners)
374
375 saving_listeners = _check_listeners_type(saving_listeners)
--> 376 loss = self._train_model(input_fn, hooks, saving_listeners)
377 logging.info('Loss for final step: %s.', loss)
378 return self
/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.pyc in _train_model(self, input_fn, hooks, saving_listeners)
1143 return self._train_model_distributed(input_fn, hooks, saving_listeners)
1144 else:
-> 1145 return self._train_model_default(input_fn, hooks, saving_listeners)
1146
1147 def _train_model_default(self, input_fn, hooks, saving_listeners):
/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.pyc in _train_model_default(self, input_fn, hooks, saving_listeners)
1165 features, labels, input_hooks = (
1166 self._get_features_and_labels_from_input_fn(
-> 1167 input_fn, model_fn_lib.ModeKeys.TRAIN))
1168 worker_hooks.extend(input_hooks)
1169 estimator_spec = self._call_model_fn(
/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.pyc in _get_features_and_labels_from_input_fn(self, input_fn, mode)
1009 lambda: self._call_input_fn(input_fn, mode))
1010 else:
-> 1011 result = self._call_input_fn(input_fn, mode)
1012
1013 return estimator_util.parse_input_fn_result(result)
/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/estimator.pyc in _call_input_fn(self, input_fn, mode)
1098 kwargs['config'] = self.config
1099 with ops.device('/cpu:0'):
-> 1100 return input_fn(**kwargs)
1101
1102 def _call_model_fn(self, features, labels, mode, config):
/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/inputs/pandas_io.pyc in input_fn()
137 num_threads=num_threads,
138 enqueue_size=batch_size,
--> 139 num_epochs=num_epochs)
140 if num_epochs is None:
141 features = queue.dequeue_many(batch_size)
/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/inputs/queues/feeding_functions.pyc in _enqueue_data(data, capacity, shuffle, min_after_dequeue, num_threads, seed, name, enqueue_size, num_epochs, pad_value)
482 random_start=shuffle,
483 seed=seed_i,
--> 484 num_epochs=num_epochs))
485 else:
486 feed_fns.append(
/usr/local/lib/python2.7/dist-packages/tensorflow/python/estimator/inputs/queues/feeding_functions.pyc in __init__(self, placeholders, dataframe, batch_size, random_start, seed, num_epochs)
261 random.seed(seed)
262 self._trav = random.randrange(self._max) if random_start else 0
--> 263 self._epoch_end = (self._trav - 1) % self._max
264
265 def __call__(self):
ZeroDivisionError: integer division or modulo by zero