我正在(多台)Windows和Ubuntu计算机上尝试以下代码,即使重复运行,所有Windows计算机的测试数据准确度均为60%,Ubuntu的测试数据准确度为〜20%。我在所有机器上都使用完全相同的python和库版本,
Python 3.6.5(Ubuntu上的anaconda,Windows上的Python shell) 熊猫0.23.4 Tensorflow 1.6.0 Keras 2.2.4
是什么原因导致输出差异,如何使代码与操作系统无关?
# importing csv , glob and pandas module
import csv
import glob
import pandas as pd
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras
from keras.preprocessing.text import Tokenizer
from keras import models
from keras import layers
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers.normalization import BatchNormalization
# Helper libraries
import numpy as np
import matplotlib.pyplot as plt
#specify a pattern of the file and pass it as a parameter into a glob function
train_csv_files = glob.glob('train_data/rain_*.csv')
test_csv_files = glob.glob('test_data/rain_*.csv')
predict_csv_files = glob.glob('predict_data/rain_*.csv')
#print csv_files which is a list of filenames print(train_csv_files)
#intialize empty list that we will append dataframes and each day's vector
list_data1 = []
train_data =[]
train_l=[]
train_rainlabel =[]
list_data2 = []
test_data =[]
test_l =[]
test_rainlabel =[]
list_data3 = []
predict_data =[]
predict_label =[]
predict_label_confidence =[]
# for loop which goes through each of the file name through globbing and the end result will be the list of dataframes matrix of 357*122 stored 90 times(years)
for filename in train_csv_files:
data = pd.read_csv(filename,header = None)
list_data1.append(data)
#print list_data1 which is a list of dataframes 1901-90 --print(list_data1)
for df in list_data1:
for i in range(0,122):
col = df.iloc[:,i]
train_data.append(col)
#print train_data which is a list of all columns of all dataframes 1901-90 matrix of 357*1 stored 122*90 times --print(train_data)
#training labels 1-10980
tlabel = pd.read_csv('train_label/rain_label_1901_1990.csv',header = None)
for filename in test_csv_files:
data = pd.read_csv(filename,header = None)
list_data2.append(data)
#print list_data2 which is a list of dataframes 1991 -2000 matrix of 357*122 stored 10 times(years) --print(list_data2)
for df in list_data2:
for i in range(0,122):
col = df.iloc[:,i]
test_data.append(col)
#print test_data which is a list of all columns of all dataframes 1991-2000 matrix of 357*1 stored 122*10 times --print(test_data)
#testing labels 1-1220
ttlabel = pd.read_csv('test_label/rain_label_1991_2000.csv',header = None)
#Pattern Names
class_names = ['One', 'Two', 'Three', 'Four', 'Five','Six', 'Seven', 'Eight', 'Nine', 'Ten']
for i in range (0, 10980):
train_rainlabel.insert(i,tlabel[i])
for i in range (0, 1220):
test_rainlabel.insert(i,ttlabel[i])
# for loop which goes through each of the file name through globbing and the end result will be the list of dataframes matrix of 357*122 stored 7 times(years)
for filename in predict_csv_files:
data = pd.read_csv(filename,header = None)
list_data3.append(data)
#print list_data3 which is a list of dataframes 2001-2007 --print(list_data3)
for df in list_data3:
for i in range(0,122):
col = df.iloc[:,i]
predict_data.append(col)
#print predict_data which is a list of all columns of all dataframes 2001-2007 matrix of 357*1 stored 122*7 times --print(predict_data)
# Set random seed
np.random.seed(0)
number_of_features = 357
#Explore the data
train_rain=np.array(train_data)
print(train_rain.shape)
train_rainlabel=np.array(train_rainlabel)
print(len(train_rainlabel))
print(train_rainlabel)
test_rain=np.array(test_data)
print(test_rain.shape)
test_rainlabel=np.array(test_rainlabel)
print(len(test_rainlabel))
print(test_rainlabel)
predict_rain = np.array(predict_data)
print(predict_rain.shape)
model = keras.Sequential([
# 1st layer - Input Visible
keras.layers.Dropout(0.1,input_shape=(number_of_features,)),#keras.layers.Flatten(input_shape=(357,)),InputLayer()
keras.layers.Dense(300,kernel_regularizer=keras.regularizers.l2(0.001),activation=tf.nn.relu),
keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None),
# 2nd layer - Hidden
keras.layers.Dropout(0.2),
keras.layers.Dense(150,kernel_regularizer=keras.regularizers.l2(0.001),activation=tf.nn.relu),
keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None),
# 3rd Layer - Hidden
keras.layers.Dropout(0.2),
keras.layers.Dense(75,kernel_regularizer=keras.regularizers.l2(0.001),activation=tf.nn.relu),
keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None),
#4th layer - Hidden
#keras.layers.Dropout(0.2),
#keras.layers.Dense(40,kernel_regularizer=keras.regularizers.l2(0.001),activation=tf.nn.relu),
#keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None),
#5th layer - Hidden
#keras.layers.Dropout(0.2),
#keras.layers.Dense(20,kernel_regularizer=keras.regularizers.l2(0.001), activation=tf.nn.relu),
#keras.layers.BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, center=True, scale=True, beta_initializer='zeros', gamma_initializer='ones', moving_mean_initializer='zeros', moving_variance_initializer='ones', beta_regularizer=None, gamma_regularizer=None, beta_constraint=None, gamma_constraint=None),
#Output layer
keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.summary()
print('Weights: ',model.get_weights())
model.compile(optimizer=tf.train.AdamOptimizer(0.001),loss='sparse_categorical_crossentropy', metrics=['accuracy'])
callbacks = [EarlyStopping(monitor='val_loss', min_delta=0, patience=2,verbose=0, mode='auto')]
history=model.fit(train_rain, train_rainlabel, epochs=50,callbacks=callbacks, batch_size=100, validation_data=(test_rain, test_rainlabel))
test_loss, test_acc = model.evaluate(test_rain, test_rainlabel)
print('Test Accuracy:', test_acc)