使用multi_gpu_model
进行机器学习的python keras LSTM / GRU层似乎存在数据缩放问题。
当我使用单个GPU时,预测可以正确匹配下面脚本中的正弦曲线数据。参见标记为“ 1 GPU”的图像。
当我使用多个GPU时,训练数据和测试数据的逆变换返回的结果都聚集在原始数据的低点附近,请参见标记为“ 4个GPU”的图像。
这似乎是一个错误,或者multi_gpu_model
文档不足以说明此特定情况。
这是:
multi_gpu_model
? multi_gpu_model
文档不完整
请注意以涵盖此特定情况?版本
Keras 2.2.4
Keras-Applications 1.0.6
Keras-Preprocessing 1.0.5
tensorboard 1.12.0
tensorflow-gpu 1.12.0
GPU
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 410.79 Driver Version: 410.79 CUDA Version: 10.0 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
|===============================+======================+======================|
| 0 GeForce GTX 107... Off | 00000000:08:00.0 Off | N/A |
| 30% 42C P0 36W / 180W | 0MiB / 8119MiB | 0% Default |
+-------------------------------+----------------------+----------------------+
| 1 GeForce GTX 107... Off | 00000000:09:00.0 Off | N/A |
| 36% 48C P0 37W / 180W | 0MiB / 8119MiB | 0% Default |
+-------------------------------+----------------------+----------------------+
| 2 GeForce GTX 107... Off | 00000000:41:00.0 Off | N/A |
| 34% 44C P0 34W / 180W | 0MiB / 8119MiB | 0% Default |
+-------------------------------+----------------------+----------------------+
| 3 GeForce GTX 107... Off | 00000000:42:00.0 Off | N/A |
| 31% 42C P0 32W / 180W | 0MiB / 8112MiB | 5% Default |
+-------------------------------+----------------------+----------------------+
脚本-(使用GRU时的结果相同)
#!/usr/bin/env python3
"""LSTM for sinusoidal data problem with regression framing.
Based on:
https://machinelearningmastery.com/time-series-prediction-lstm-recurrent-neural-networks-python-keras/
"""
# Standard imports
import argparse
import math
# PIP3 imports
import numpy
import matplotlib.pyplot as plt
from pandas import DataFrame
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.utils import multi_gpu_model
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back), 0]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return numpy.array(dataX), numpy.array(dataY)
def main():
# fix random seed for reproducibility
numpy.random.seed(7)
# Get CLI arguments
parser = argparse.ArgumentParser()
parser.add_argument(
'--gpus',
help='Number of GPUs to use.',
type=int, default=1)
args = parser.parse_args()
gpus = args.gpus
# load the dataset
dataframe = DataFrame(
[0.00000, 5.99000, 11.92016, 17.73121, 23.36510, 28.76553, 33.87855,
38.65306, 43.04137, 46.99961, 50.48826, 53.47244, 55.92235, 57.81349,
59.12698, 59.84970, 59.97442, 59.49989, 58.43086, 56.77801, 54.55785,
51.79256, 48.50978, 44.74231, 40.52779, 35.90833, 30.93008, 25.64279,
20.09929, 14.35496, 8.46720, 2.49484, -3.50245, -9.46474, -15.33247,
-21.04699, -26.55123, -31.79017, -36.71147, -41.26597, -45.40815,
-49.09663, -52.29455, -54.96996, -57.09612, -58.65181, -59.62146,
-59.99540, -59.76988, -58.94716, -57.53546, -55.54888, -53.00728,
-49.93605, -46.36587, -42.33242, -37.87600, -33.04113, -27.87613,
-22.43260, -16.76493, -10.92975, -4.98536, 1.00883, 6.99295, 12.90720,
18.69248, 24.29100, 29.64680, 34.70639, 39.41920, 43.73814, 47.62007,
51.02620, 53.92249, 56.28000, 58.07518, 59.29009, 59.91260, 59.93648,
59.36149, 58.19339, 56.44383, 54.13031, 51.27593, 47.90923, 44.06383,
39.77815, 35.09503, 30.06125, 24.72711, 19.14590, 13.37339, 7.46727,
1.48653, -4.50907, -10.45961, -16.30564, -21.98875, -27.45215,
-32.64127, -37.50424, -41.99248, -46.06115, -49.66959, -52.78175,
-55.36653, -57.39810, -58.85617, -59.72618, -59.99941, -59.67316,
-58.75066, -57.24115, -55.15971, -52.52713, -49.36972, -45.71902,
-41.61151, -37.08823, -32.19438, -26.97885, -21.49376, -15.79391,
-9.93625, -3.97931, 2.01738, 7.99392, 13.89059, 19.64847, 25.21002,
30.51969, 35.52441, 40.17419, 44.42255, 48.22707, 51.54971, 54.35728,
56.62174, 58.32045, 59.43644, 59.95856, 59.88160, 59.20632, 57.93947,
56.09370, 53.68747, 50.74481, 47.29512, 43.37288, 39.01727, 34.27181,
29.18392, 23.80443, 18.18710, 12.38805, 6.46522, 0.47779, -5.51441,
-11.45151])
dataset = dataframe.values
dataset = dataset.astype('float32')
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
# split into train and test sets
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size, :], dataset[train_size:len(dataset), :]
# reshape into X=t and Y=t+1
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
# reshape input to be [samples, time steps, features]
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
# create and fit the LSTM network
with tf.device('/cpu:0'):
serial_model = Sequential()
serial_model.add(LSTM(4, input_shape=(1, look_back)))
serial_model.add(Dense(1))
if gpus == 1:
parallel_model = serial_model
else:
parallel_model = multi_gpu_model(
serial_model,
cpu_relocation=True,
gpus=gpus)
parallel_model.compile(
loss='mean_squared_error', optimizer='adam')
parallel_model.fit(
trainX, trainY,
epochs=100,
batch_size=int(dataset.size * gpus / 20),
verbose=2)
# make predictions
if gpus == 1:
trainPredict = parallel_model.predict(trainX)
testPredict = parallel_model.predict(testX)
else:
trainPredict = serial_model.predict(trainX)
testPredict = serial_model.predict(testX)
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:, 0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:, 0]))
print('Test Score: %.2f RMSE' % (testScore))
# shift train predictions for plotting
trainPredictPlot = numpy.empty_like(dataset)
trainPredictPlot[:, :] = numpy.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = numpy.empty_like(dataset)
testPredictPlot[:, :] = numpy.nan
testPredictPlot[
len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
# plot baseline and predictions
plt.plot(scaler.inverse_transform(dataset), label='Complete Data')
plt.plot(trainPredictPlot, label='Training Data')
plt.plot(testPredictPlot, label='Prediction Data')
plt.legend(loc='upper left')
plt.title('Using {} GPUs'.format(gpus))
plt.show()
if __name__ == "__main__":
main()
我认为这可能与顺序模型有关,但替换后得到的结果相同:
# create and fit the LSTM network
with tf.device('/cpu:0'):
serial_model = Sequential()
serial_model.add(LSTM(4, input_shape=(1, look_back)))
serial_model.add(Dense(1))
具有:
from keras import Model, Input
# Create layers for model
x_tensor = Input(shape=(1, look_back))
layer_1 = LSTM(4)(x_tensor)
y_tensor = Dense(1)(layer_1)
# Create and fit the LSTM network
with tf.device('/cpu:0'):
serial_model = Model(inputs=x_tensor, outputs=y_tensor)
我现在认为这与multi_gpu_model
在GPU上划分时间序列数据的方式有关。 RMSE错误率明显不同。
RMSE-I GPU
Train Score: 4.49 RMSE
Test Score: 4.79 RMSE
RMSE-4个GPU
Train Score: 76.54 RMSE
Test Score: 77.55 RMSE
答案 0 :(得分:0)
在tensorflow2.0文档中提到不建议使用multi_gpu功能。使用多个GPU的最佳实践是使用tf.distribute.Strategy。我更改了一点代码,其中两个GPU都可以正常工作。
#!/usr/bin/env python3
"""LSTM for sinusoidal data problem with regression framing.
Based on:
https://machinelearningmastery.com/time-series-prediction-lstm-recurrent-neural-networks-python-keras/
"""
# Standard imports
import argparse
import math
# PIP3 imports
import numpy
import matplotlib.pyplot as plt
import tensorflow as tf
from pandas import DataFrame
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.utils import multi_gpu_model
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
tf.debugging.set_log_device_placement(True)
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back), 0]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return numpy.array(dataX), numpy.array(dataY)
def main():
# fix random seed for reproducibility
numpy.random.seed(7)
# Get CLI arguments
parser = argparse.ArgumentParser()
parser.add_argument(
'--gpus',
help='Number of GPUs to use.',
type=int, default=1)
args = parser.parse_args()
gpus = args.gpus
# load the dataset
dataframe = DataFrame(
[0.00000, 5.99000, 11.92016, 17.73121, 23.36510, 28.76553, 33.87855,
38.65306, 43.04137, 46.99961, 50.48826, 53.47244, 55.92235, 57.81349,
59.12698, 59.84970, 59.97442, 59.49989, 58.43086, 56.77801, 54.55785,
51.79256, 48.50978, 44.74231, 40.52779, 35.90833, 30.93008, 25.64279,
20.09929, 14.35496, 8.46720, 2.49484, -3.50245, -9.46474, -15.33247,
-21.04699, -26.55123, -31.79017, -36.71147, -41.26597, -45.40815,
-49.09663, -52.29455, -54.96996, -57.09612, -58.65181, -59.62146,
-59.99540, -59.76988, -58.94716, -57.53546, -55.54888, -53.00728,
-49.93605, -46.36587, -42.33242, -37.87600, -33.04113, -27.87613,
-22.43260, -16.76493, -10.92975, -4.98536, 1.00883, 6.99295, 12.90720,
18.69248, 24.29100, 29.64680, 34.70639, 39.41920, 43.73814, 47.62007,
51.02620, 53.92249, 56.28000, 58.07518, 59.29009, 59.91260, 59.93648,
59.36149, 58.19339, 56.44383, 54.13031, 51.27593, 47.90923, 44.06383,
39.77815, 35.09503, 30.06125, 24.72711, 19.14590, 13.37339, 7.46727,
1.48653, -4.50907, -10.45961, -16.30564, -21.98875, -27.45215,
-32.64127, -37.50424, -41.99248, -46.06115, -49.66959, -52.78175,
-55.36653, -57.39810, -58.85617, -59.72618, -59.99941, -59.67316,
-58.75066, -57.24115, -55.15971, -52.52713, -49.36972, -45.71902,
-41.61151, -37.08823, -32.19438, -26.97885, -21.49376, -15.79391,
-9.93625, -3.97931, 2.01738, 7.99392, 13.89059, 19.64847, 25.21002,
30.51969, 35.52441, 40.17419, 44.42255, 48.22707, 51.54971, 54.35728,
56.62174, 58.32045, 59.43644, 59.95856, 59.88160, 59.20632, 57.93947,
56.09370, 53.68747, 50.74481, 47.29512, 43.37288, 39.01727, 34.27181,
29.18392, 23.80443, 18.18710, 12.38805, 6.46522, 0.47779, -5.51441,
-11.45151])
dataset = dataframe.values
dataset = dataset.astype('float32')
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
# split into train and test sets
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size, :], dataset[train_size:len(dataset), :]
# reshape into X=t and Y=t+1
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
# reshape input to be [samples, time steps, features]
trainX = numpy.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
testX = numpy.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
# create and fit the LSTM network
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(4, input_shape=(1, look_back)))
model.add(tf.keras.layers.Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
if gpus == 1:
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(4, input_shape=(1, look_back)))
model.add(tf.keras.layers.Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
else:
strategy = tf.distribute.MirroredStrategy(devices=["/gpu:0", "/gpu:1"])
print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
# Define the model
with strategy.scope():
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(4, input_shape=(1, look_back)))
model.add(tf.keras.layers.Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
# inputs = tf.keras.layers.Input(shape=(1, look_back))
# lstm_layer = tf.keras.layers.LSTM(4)(inputs)
# outputs = tf.keras.layers.Dense(1)(lstm_layer)
# model = tf.keras.Model(inputs, outputs)
# model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(trainX, trainY, epochs=100, batch_size=4, verbose=2)
trainPredict = model.predict(trainX)
testPredict = model.predict(testX)
# invert predictions
trainPredict = scaler.inverse_transform(trainPredict)
trainY = scaler.inverse_transform([trainY])
testPredict = scaler.inverse_transform(testPredict)
testY = scaler.inverse_transform([testY])
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredict[:, 0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(testY[0], testPredict[:, 0]))
print('Test Score: %.2f RMSE' % (testScore))
# shift train predictions for plotting
trainPredictPlot = numpy.empty_like(dataset)
trainPredictPlot[:, :] = numpy.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict
# shift test predictions for plotting
testPredictPlot = numpy.empty_like(dataset)
testPredictPlot[:, :] = numpy.nan
testPredictPlot[
len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict
# plot baseline and predictions
plt.plot(scaler.inverse_transform(dataset), label='Complete Data')
plt.plot(trainPredictPlot, label='Training Data')
plt.plot(testPredictPlot, label='Prediction Data')
plt.legend(loc='upper left')
plt.title('Using {} GPUs'.format(gpus))
plt.show()
if __name__ == "__main__":
main()