# Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
import shutil
# Read data
df = pd.read_csv('sample.csv')
# Separate label from dataset
X = df.drop(['label'], axis=1).values
y = df[['label']].values
# Split into train and test dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# Convert to dataframe
X_train = pd.DataFrame(X_train)
X_test = pd.DataFrame(X_test)
y_train = pd.DataFrame(y_train)
y_test = pd.DataFrame(y_test)
# Concatenate for writing into csv
train = pd.concat([X_train, y_train], axis=1)
valid = pd.concat([X_valid, y_valid], axis=1)
# Write into csv file
train.to_csv('train.csv', header=False, index=False)
valid.to_csv('valid.csv', header=False, index=False)
# Specify structure for tensorflow input
CSV_COLUMNS = ['col1', 'col2', 'col3', 'col4', 'col5', 'col6', 'col7', 'col8']
LABEL_COLUMN = 'label'
DEFAULTS = [['none'], ['none'], ['none'], ['none'], ['none'], ['0'], [0], [0]]
# Function for reading input file and creating dataset
def read_dataset(filename, mode, batch_size = 512):
def _input_fn():
def decode_csv(value_column):
columns = tf.decode_csv(value_column, record_defaults=DEFAULTS)
features = dict(zip(CSV_COLUMNS, columns))
label = features.pop(LABEL_COLUMN)
return features, label
# Create list of files that match pattern
file_list = tf.gfile.Glob(filename)
# Create dataset from file list
dataset = tf.data.TextLineDataset(file_list).map(decode_csv)
if mode==tf.estimator.ModeKeys.TRAIN:
num_epochs = None # indefinitely
dataset = dataset.shuffle(buffer_size = 10 * batch_size)
else:
num_epochs = 1 # end-of-input after this
dataset = dataset.repeat(num_epochs).batch(batch_size)
return dataset.make_one_shot_iterator().get_next()
return _input_fn
# Input feature columns
INPUT_COLUMNS = [
tf.feature_column.categorical_column_with_vocabulary_list('col1', vocabulary_list=['1', '2', '3', '4']),
tf.feature_column.categorical_column_with_vocabulary_list('col2', vocabulary_list = [ '1', '2', '3', '4', '5', '6']),
tf.feature_column.categorical_column_with_vocabulary_list('col3', vocabulary_list = ['1', '2', '3', '4', '5', '6', '7', '8', '9']),
tf.feature_column.categorical_column_with_vocabulary_list('col4', vocabulary_list = [ '1', '2', '3', '4', '5', '6', '7', '8', '9', '10']),
tf.feature_column.categorical_column_with_vocabulary_list('col5', vocabulary_list = [ '0', '1', '2', '3', '4', '5']),
tf.feature_column.categorical_column_with_vocabulary_list('col6', vocabulary_list=['0', '1']),
tf.feature_column.numeric_column('col7'),
tf.feature_column.numeric_column('col8')
]
def add_more_features(feats):
# for future reference
return(feats)
feature_cols = add_more_features(INPUT_COLUMNS)
# Serving function
def serving_input_fn():
feature_placeholders = {
'col1': tf.placeholder(tf.string, [None]),
'col2': tf.placeholder(tf.string, [None]),
'col3': tf.placeholder(tf.string, [None]),
'col4': tf.placeholder(tf.string, [None]),
'col5': tf.placeholder(tf.string, [None]),
'col6': tf.placeholder(tf.string, [None]),
'col7': tf.placeholder(tf.int64, [None]),
'col8': tf.placeholder(tf.int64, [None])
}
features = {
key: tf.expand_dims(tensor, -1)
for key, tensor in feature_placeholders.items()
}
return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)
# Train and evaluate function
def train_and_evaluate(output_dir, num_train_steps):
estimator = tf.estimator.LinearClassifier(
model_dir=output_dir,
feature_columns=feature_cols)
train_spec = tf.estimator.TrainSpec(
input_fn = read_dataset('train.csv', mode = tf.estimator.ModeKeys.TRAIN),
max_steps=num_train_steps)
exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)
eval_spec = tf.estimator.EvalSpec(
input_fn = read_dataset('valid.csv', mode = tf.estimator.ModeKeys.EVAL),
steps = None,
start_delay_secs = 1,
throttle_secs = 10,
exporters = exporter)
tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
# Log level and cleanup
tf.logging.set_verbosity(tf.logging.INFO)
OUTDIR = 'sample_dir'
shutil.rmtree(OUTDIR, ignore_errors=True)
# Run training and evaluation
train_and_evaluate(OUTDIR, num_train_steps=1)
答案 0 :(得分:0)
从seq = 1的事件中选择Count(*)