import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib as plt
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import functools
COLUMNS = ["Alter", "Gender", "BMI", "Fever", "Nausea", "Fatigue",
"WBC", "RBC", "HGB", "Plat", "AST1", "ALT1", "ALT4", "ALT12", "ALT24", "ALT36", "ALT48", "ALT24w",
"RNABase", "RNA4", "Baseline", "Endstage"]
feature_name = COLUMNS[:-1]
LABEL_NAME = 'Endstage'
batch_size = 32
def get_dataset(file_path, **kwargs):
train_dataset = tf.data.experimental.make_csv_dataset(
file_path,
batch_size = batch_size,
label_name = LABEL_NAME,
ignore_errors=True,
num_epochs=5,
**kwargs
)
return train_dataset
train_data = get_dataset("HCVnew.csv")
def show_batch(dataset):
for batch, label in dataset.take(1):
for key, value in batch.items():
print("{:20s}: {}".format(key,value.numpy()))
SELECT_COLUMNS = ["Alter", "Gender", "BMI", "Fever", "Nausea", "Fatigue",
"WBC", "RBC", "HGB", "Plat", "AST1", "ALT1", "ALT4", "ALT12", "ALT24", "ALT36", "ALT48", "ALT24w",
"RNABase", "RNA4", "Baseline", "Endstage"]
DEFAULTS = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
uni_data = get_dataset("HCVnew.csv", select_columns = SELECT_COLUMNS, column_defaults = DEFAULTS)
def pack(features, label):
return tf.stack(list(features.values()), axis=-1), label
packed_dataset = uni_data.map(pack)
"""
for features, labels in packed_dataset.take(1):
print(features.numpy())
print()
print(labels.numpy())
"""
class PackNumericFeatures(object):
def __init__(self, names):
self.names = names
def __call__(self, features, labels):
numeric_features = [features.pop(name) for name in self.names]
numeric_features = [tf.cast(feat, tf.float32) for feat in numeric_features]
numeric_features = tf.stack(numeric_features, axis=-1)
features['numeric'] = numeric_features
return features, labels
NUMERIC_FEATURES = ["Alter", "Gender", "BMI", "Fever", "Nausea", "Fatigue",
"WBC", "RBC", "HGB", "Plat", "AST1", "ALT1", "ALT4", "ALT12", "ALT24", "ALT36", "ALT48", "ALT24w",
"RNABase", "RNA4", "Baseline"]
packed_train_data = train_data.map(
PackNumericFeatures(NUMERIC_FEATURES))
#show_batch(packed_train_data)
desc = pd.read_csv("HCVnew.csv")[NUMERIC_FEATURES].describe()
desc
MEAN = np.array(desc.T['mean'])
STD = np.array(desc.T['std'])
def normalize_numeric_data(data, mean, std):
# Center the data
return (data-mean)/std
normalizer = functools.partial(normalize_numeric_data, mean=MEAN, std=STD)
numeric_column = tf.feature_column.numeric_column('numeric', normalizer_fn=normalizer, shape=[len(NUMERIC_FEATURES)])
numeric_columns = [numeric_column]
numeric_layer = tf.keras.layers.DenseFeatures(numeric_columns)
#preprocessing_layer = tf.keras.layers.DenseFeatures(numeric_columns)
model = tf.keras.Sequential([
numeric_layer,
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(1),
])
model.compile(
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
optimizer='adam',
metrics=['accuracy'])
train_data = packed_train_data.shuffle(500)
model.fit(train_data, epochs=20)
我的神经网络的准确度为25%,这是非常糟糕的。我的训练数据包含1200个样本,但在一到两个纪元之后,准确度仍保持25%,我尝试更改批次大小和纪元数量,但无济于事。标签数量为4(1、2、3、4)。 如果有人知道我可以改善的地方,请告诉我。 非常感谢您的帮助!
答案 0 :(得分:0)
标签数量为4(1、2、3、4)
如果您有4个标签,那么这不是二进制问题,因此您需要使网络适应多标签分类:
如果您使用这样的密集层:
tf.keras.layers.Dense(1)
它将具有默认参数,并且在这种情况下具有线性激活,这不是您在二进制或多标签分类中想要的。