我正在尝试使用CNN对表格数据进行分类。为此,我将数据与基本图像进行卷积。我在这里了解了这种方法:https://www.researchgate.net/publication/341117286_A_novel_method_for_classification_of_tabular_data_using_convolutional_neural_networks
我的数据集有1200行(因此有1200张图像)和25个特征。这是我对数据进行卷积的代码:
import pandas as pd
from sklearn.preprocessing import StandardScaler, PowerTransformer, MinMaxScaler
import numpy as np
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img, save_img
import csv
Dataset = pd.read_csv("....csv", header=0)
feature_columns = ['...']
x = Dataset[feature_columns] # Merkmale
y = Dataset.Classifier # Label
sc = MinMaxScaler(feature_range=(0.1, 0.9))
#sc = PowerTransformer()
x = sc.fit_transform(x)
#x = x.to_numpy()
sc_2 = MinMaxScaler(feature_range=(0, 255))
dict = []
num_0 = 0
for i in range(len(y)):
class_name=""
if int(y[i]) == 0:
class_name = 'reuse'
if int(y[i]) == 1:
class_name = 'recycling'
if int(y[i]) == 2:
class_name = 'disposal'
x_new = x[i].reshape(5,5)
x_new = np.tile(x_new,(50,50))
#x_new = x_new.repeat(repeats=50, axis=0).repeat(repeats=50, axis=1)
x_new = (x_new.reshape(x_new.shape+(1,)))
img = load_img(".../Unbenannt.png", grayscale=True)
img_array = img_to_array(img)
new_array_new = np.multiply(img_array, x_new)
sc_2 = MinMaxScaler(feature_range=(0, 255))
for j in range(250):
new_array_new[j] = sc_2.fit_transform(new_array_new[j])
path = ('.../Image/%s_%s' % (y[i], num_0) +'.png')
save_img(path, new_array_new)
dict.append({'No':num_0, 'Name': '%s_%s' % (y[i], num_0)+'.png', 'Class': class_name})
num_0 = num_0 + 1
csv_columns = ['No', 'Name', 'Class']
csv_file = 'Images.csv'
try:
with open(csv_file, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
writer.writeheader()
for data in dict:
writer.writerow(data)
except IOError:
print("I/O error")
这是基本图像 base image
这是三个不同标签的结果
结果并不令人满意。我该怎么做才能改善图像?可以看到不同的图案,但是对于1200张图像中的每一张都是这样。因此,显然不可能有像这样的真正分类。 。如果我尝试用这张图片对它进行分类,那么CNN的准确度将达到50%,准确度将达到2. Epoch并保持不变。这是CNN的代码:
import tensorflow as tf
from tensorboard.plugins.hparams import api as hp
import pandas as pd
from sklearn.preprocessing import StandardScaler, PowerTransformer
from keras.utils import to_categorical
from keras import Sequential
from keras.layers import Dense, Dropout, Conv1D, LSTM, MaxPooling1D, Flatten, BatchNormalization, Activation, Conv2D, MaxPooling2D
import numpy as np
from sklearn.model_selection import cross_val_score, RepeatedKFold, KFold, cross_validate, StratifiedKFold, RepeatedStratifiedKFold
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from sklearn.model_selection import cross_val_score, RepeatedKFold, KFold, cross_validate, StratifiedKFold, RepeatedStratifiedKFold, train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
from keras.losses import binary_crossentropy, categorical_crossentropy
from keras.optimizers import RMSprop, SGD
Dataset = pd.read_csv('.../Images.csv', header=0)
num_epochs = 30
def create_new_model():
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(256, 256, 1)))
model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.2))
model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.2))
model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
model.add(Dropout(0.2))
model.add(Dense(3, activation='softmax'))
opt = SGD(lr=0.001, momentum=0.9)
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
return(model)
y = Dataset['Class']
print(y.head())
y = y.to_numpy()
rkfold = RepeatedStratifiedKFold(n_splits=4, n_repeats=1)
idg = ImageDataGenerator(width_shift_range=0.1,
height_shift_range=0.1,
zoom_range=0.3,
fill_mode='nearest',
horizontal_flip = True,
rescale=1./255)
#idg = ImageDataGenerator(rescale=1./255)
image_dir = '/content/Image'
Accuracy = []
for train_index, test_index in rkfold.split(np.zeros(len(y)),y):
training_data = Dataset.iloc[train_index]
test_data = Dataset.iloc[test_index]
train_data_generator = idg.flow_from_dataframe(training_data, directory = image_dir, x_col = "Name", y_col = 'Class', class_mode = "categorical", shuffle = True, batch_size=10, color_mode="grayscale")
test_data_generator = idg.flow_from_dataframe(test_data, directory = image_dir, x_col = "Name", y_col = 'Class', class_mode = "categorical", shuffle = True, batch_size=10, color_mode="grayscale")
model = create_new_model()
history = model.fit(train_data_generator, epochs=nb_epoch)
score = model.evaluate(test_data_generator)
Accuracy.append(score[1])
print("Accuracy: %.2f%% (+/- %.2f%%)" % (np.mean(Accuracy), np.std(Accuracy)))