我正在运行Keras multi_gpu模型。我的模型有2个输入。一个输入由Imagedatagenerator提供,其他输入通过模型内部的函数生成。请看下面的代码:
import numpy as np
import keras
from keras.layers.convolutional import Conv2D
from keras.layers import ReLU,MaxPooling2D,ZeroPadding2D,BatchNormalization,Dense,Dropout, Activation, Flatten, Lambda, Concatenate, Add
from keras.models import Model
from keras.layers import Input
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot
from keras.utils import plot_model
from keras import backend as K
from keras_preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint
from keras.models import model_from_json
from keras.utils import multi_gpu_model
import pandas as pd
import os
import sys
from tqdm import *
# import skimage
import matplotlib.pyplot as plt
# %matplotlib inline
import cv2
import tensorflow as tf
import multiprocessing
# import pydot
########### Make Log directory #####################################
cwd=os.getcwd()
log_dir = cwd+'/log_dir/Relation_net_logs'
if not os.path.exists(log_dir):
os.makedirs(log_dir)
tensorboard_logsdir = log_dir+"/tensorboard_logdir"
if not os.path.exists(tensorboard_logsdir):
os.makedirs(tensorboard_logsdir)
######### Make Network##############################################
def ConvolutionNetworks(kernel_size=3, stride_size=2):
def conv(model):
model = Conv2D(24, (9, 9), strides=(stride_size, stride_size),activation='relu',input_shape=(100, 100, 3), data_format='channels_last')(model)
model = BatchNormalization()(model)
model = Conv2D(24, (7, 7), strides=(stride_size, stride_size),activation='relu')(model)
model = BatchNormalization()(model)
model = Conv2D(24, (kernel_size, kernel_size), strides=(stride_size, stride_size),activation='relu')(model)
model = BatchNormalization()(model)
model = Conv2D(24, (5, 5), strides=(1, 1),activation='relu')(model)
model = BatchNormalization()(model)
return model
return conv
######### Compute Relations #######
def compute_relations(objects):
def get_top_dim_1(t):
return t[:, 0, :, :]
def get_all_but_top_dim_1(t):
return t[:, 1:, :, :]
def get_top_dim_2(t):
return t[:, 0, :]
def get_all_but_top_dim2(t):
return t[:, 1:, :]
slice_top_dim_1 = Lambda(get_top_dim_1)
slice_all_but_top_dim_1 = Lambda(get_all_but_top_dim_1)
slice_top_dim_2 = Lambda(get_top_dim_2)
slice_all_but_top_dim2 = Lambda(get_all_but_top_dim2)
d = K.int_shape(objects)[2]
features = []
for i in range(d): #This loop extracts top layer of the feature map
features1 = slice_top_dim_1(objects)
objects = slice_all_but_top_dim_1(objects)
for j in range(d): #This loop extract each object from the "top layer" extracted in the previous loop and append it in variable "features"
features2 = slice_top_dim_2(features1)
features1 = slice_all_but_top_dim2(features1)
features.append(features2)
relations = []
concat = Concatenate()
for feature1 in features:
for feature2 in features:
relations.append(concat([feature1, feature2]))
return relations
############## f_theta ############################
def f_theta():
def f(model):
model = Dense(256,activation='relu')(model)
# model = Activation('relu')(model)
model = Dense(256,activation='relu')(model)
# model = Activation('relu')(model)
# model = Dropout(0.5)(model)
model = Dense(256,activation='relu')(model)
# model = Activation('relu')(model)
model = Dense(256,activation='relu')(model)
# model = Activation('relu')(model)
return model
return f
################# Relation module and tag building #########################################
from keras.utils import plot_model
def g_th(layers):
def f(model):
for n in range(len(layers)):
model = layers[n](model)
return model
return f
def stack_layer(layers):
def f(x):
for k in range(len(layers)):
x = layers[k](x)
return x
return f
def g_theta(h_unit=256, layers=4):
r = []
for k in range(layers):
r.append(Dense(h_unit))
r.append(Activation('relu'))
return g_th(r)
def get_MLP():
return g_th()
def RelationNetworks(objects):
g_t = g_theta()
relations = compute_relations(objects)
print("length of relations={}".format(len(relations)))
g_all = []
for r in tqdm(relations):
g_all.append(g_t(r)) #send each relation to g_t and append to a list for easy summation.
print("relation computed")
combined_relation = Add()(g_all)
print("relation combined")
f_out = f_theta()(combined_relation)
print("relation went through f_theta")
return f_out
def build_tag(conv):
d = K.int_shape(conv)[2]
tag = np.zeros((d,d,2))
print("tagging in process")
for i in range(d):
for j in range(d):
tag[i,j,0] = float(int(i%d))/(d-1)*2-1
tag[i,j,1] = float(int(j%d))/(d-1)*2-1
tag = K.variable(tag)
tag = K.expand_dims(tag, axis=0)
batch_size = K.shape(conv)[0]
tag = K.tile(tag, [batch_size,1,1,1])
print("tagging done")
return Input(tensor=tag)
################################# Build Model ###################################################################################
visual_scene = Input((100, 100, 3))
# visual_question = Input((11,))
visual_conv = ConvolutionNetworks()(visual_scene)
tag = build_tag(visual_conv)
visual_conv = Concatenate()([tag, visual_conv])
visual_RN = RelationNetworks(visual_conv)
visual_out = Dense(4, activation='softmax')(visual_RN)
VisualModel = Model(inputs=[tag,visual_scene], outputs=visual_out)
print("model made")
# plot_model(VisualModel, to_file='/home/aakash/Relation_Network/figures/VisualModel1.png')
################################ Create parallel model ###############
# This executes Data Parallelism. Batch is divided equally on all GPUs for computation
try:
parallel_model = multi_gpu_model(VisualModel, cpu_merge=True, cpu_relocation=True,gpus=2)
print("Training using multiple GPUs..")
except:
parallel_model = model
print("Training using single GPU or CPU..")
################################# Training #################################################################################
workers=multiprocessing.cpu_count()-1
batchsize=32
IMG_SIZE=100
train_df_path="/home/aakash/Relation_Network/training_df.pkl"
valid_df_path="/home/aakash/Relation_Network/validation_df.pkl"
image_dir="/home/aakash/Relation_Network/DL_Dataset"
from keras.optimizers import Adam
lr = 1e-4
adam = Adam(lr=lr)
parallel_model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
#Save architecture
NAME = "2_conv_model"
with open(NAME+".json", "w") as json_file:
json_file.write(VisualModel.to_json())
print("model architecture saved as json file")
#create callbacks
# NAME = "{}-conv-{}-nodes-{}-dense-{}".format(conv_layer, layer_size, dense_layer, int(time.time()))
checkpoint = keras.callbacks.ModelCheckpoint(log_dir+'/'+NAME+'.h5', monitor='val_loss',verbose=0, save_best_only=True, save_weights_only=True, mode='auto', period=1)
csv_logger = keras.callbacks.CSVLogger(log_dir+"/"+NAME+".csv", separator=',', append=False)
tensorboard = keras.callbacks.TensorBoard(log_dir=tensorboard_logsdir+'/'+NAME, histogram_freq=0, batch_size=batchsize, write_graph=True, write_grads=False, write_images=False, embeddings_freq=0,
embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None, update_freq='epoch')
training_df=pd.read_pickle(train_df_path)
validation_df=pd.read_pickle(valid_df_path)
datagen=ImageDataGenerator(rescale=1./255)
train_generator=datagen.flow_from_dataframe(dataframe=training_df, directory=image_dir,
x_col="image", y_col="lesion", class_mode="categorical",
target_size=(IMG_SIZE,IMG_SIZE), batch_size=batchsize,shuffle=True)
validation_generator=datagen.flow_from_dataframe(dataframe=validation_df, directory=image_dir,
x_col="image", y_col="lesion", class_mode="categorical",
target_size=(IMG_SIZE,IMG_SIZE), batch_size=batchsize)
parallel_model.fit_generator(generator = train_generator,
steps_per_epoch = (training_df.shape[0])//batchsize,
validation_data = validation_generator,
validation_steps = (validation_df.shape[0])//batchsize,
epochs = 30,verbose=1,callbacks=[checkpoint, csv_logger,tensorboard],
use_multiprocessing=True,workers=workers)
build_tag
函数返回带有张量的输入层(这是我的第二个输入)。
但是当我运行这段代码时,它显示了以下错误
!(https://drive.google.com/file/d/1gGjoO89zwRw_zUQ14sUIrdC7oRKrdVT1/view?usp=sharing)
我将build_tag
函数设置为Lambda层,并且build_tag
返回的值只是值“ tag”而不是输入层,并删除了对模型的“ tag”输入,它开始起作用