我使用CNTK库在单个CPU或GPU上学习LSTM。我不了解如何更改代码以及如何在多个GPU(CPU)上进行训练。我当前的代码是:
labels = InputVariable({ numOutputClasses }, DataType::Float, L"labels");
trainingLoss = CrossEntropyWithSoftmax(lstmModel, labels,
L"lossFunction");
prediction = CrossEntropyWithSoftmax(lstmModel, labels,
L"classificationError");
//create learner
paramLearner = AdamLearner(lstmModel->Parameters(),
learningRate,
momentumSchedule,
false);
//create trainer
trainer = CreateTrainer(lstmModel, trainingLoss, prediction, vector<LearnerPtr>({ paramLearner }));
sampleShape = { inputDim };
labelsShape = { numOutputClasses };
classifierOutputVar = lstmModel->Output();
unordered_map<Variable, ValuePtr> argumentsOut;
double trainLossValue;
// run train
for (size_t i = 1; i <= countEpoch; ++i)
{
cout << "Epoch " << i << ":" << countEpoch << endl;
for (int k = 0; k < inputData.size(); ++k)
{
argumentsOut = { { classifierOutputVar, outputValue },
{ prediction, predictionErrorValue } };
featuresValue = Value::Create(sampleShape, inputData.at(k), device);
labelValue = Value::Create(labelsShape, labelsData.at(k), device);
argumentsIn = { { features, featuresValue }, { labels, labelValue } };
trainer->TrainMinibatch(argumentsIn, true, argumentsOut, device);
argumentsIn.clear();
trainLossValue = trainer->PreviousMinibatchLossAverage();
cout << "\tBatch " << k + 1 << ":" << inputData.size() << "\ttrainLossValueBatch: " << trainLossValue << endl;
}
saveModel(path);
}
自行解决问题的尝试失败了:
auto sync = MPICommunicator();
auto numWorkers = sync->Workers().size();
auto workerRank = sync->CurrentWorker().m_globalRank;
labels = InputVariable({ numOutputClasses }, DataType::Float, L"labels");
trainingLoss = CrossEntropyWithSoftmax(lstmModel, labels, L"lossFunction");
prediction = ClassificationError(lstmModel, labels, L"classificationError");
paramLearner = FSAdaGradLearner(lstmModel->Parameters(),
learningRate,
momentumSchedule,
false);
DistributedLearnerPtr distributedLearner =
CreateDataParallelDistributedLearner(MPICommunicator(), paramLearner, 0);
trainer = CreateTrainer(lstmModel, trainingLoss, prediction, { distributedLearner });
目前尚不清楚如何在多个GPU(CPU)上运行... 我了解您需要使用CreateCompositeMinibatchSource创建MinibatchSource,只是不清楚如何使用数组或MFCC序列容器创建MinibatchSourceConfig对象。