在C ++ cntk中使用多个GPU(CNTK)

时间:2019-01-06 07:09:38

标签: gpu lstm cntk

我使用CNTK库在单个CPU或GPU上学习LSTM。我不了解如何更改代码以及如何在多个GPU(CPU)上进行训练。我当前的代码是:

labels = InputVariable({ numOutputClasses }, DataType::Float, L"labels");
trainingLoss = CrossEntropyWithSoftmax(lstmModel, labels, 
L"lossFunction");
prediction   = CrossEntropyWithSoftmax(lstmModel, labels, 
L"classificationError");

//create learner 
paramLearner = AdamLearner(lstmModel->Parameters(),
                          learningRate,
                                momentumSchedule, 
                                false);

//create trainer
trainer = CreateTrainer(lstmModel, trainingLoss, prediction, vector<LearnerPtr>({ paramLearner }));

sampleShape = { inputDim };
labelsShape = { numOutputClasses };

classifierOutputVar = lstmModel->Output();
unordered_map<Variable, ValuePtr> argumentsOut;
double trainLossValue;

// run train
for (size_t i = 1; i <= countEpoch; ++i)
{
    cout << "Epoch " << i << ":" << countEpoch << endl;

    for (int k = 0; k < inputData.size(); ++k) 
    {
        argumentsOut = { { classifierOutputVar, outputValue },
                         { prediction, predictionErrorValue } };

        featuresValue = Value::Create(sampleShape, inputData.at(k),  device);
        labelValue    = Value::Create(labelsShape, labelsData.at(k), device);
        argumentsIn = { { features, featuresValue }, { labels, labelValue } };

        trainer->TrainMinibatch(argumentsIn, true, argumentsOut, device);
        argumentsIn.clear();

        trainLossValue = trainer->PreviousMinibatchLossAverage();
        cout << "\tBatch " << k + 1 << ":" << inputData.size() << "\ttrainLossValueBatch: " << trainLossValue << endl;
    }

    saveModel(path);        
}

自行解决问题的尝试失败了:

auto sync = MPICommunicator();

auto numWorkers = sync->Workers().size();
auto workerRank = sync->CurrentWorker().m_globalRank;

labels = InputVariable({ numOutputClasses }, DataType::Float, L"labels");
trainingLoss = CrossEntropyWithSoftmax(lstmModel, labels, L"lossFunction");
prediction = ClassificationError(lstmModel, labels, L"classificationError");

paramLearner = FSAdaGradLearner(lstmModel->Parameters(),
                                learningRate,
                                momentumSchedule,
                                false);

DistributedLearnerPtr distributedLearner =
    CreateDataParallelDistributedLearner(MPICommunicator(), paramLearner, 0);

trainer = CreateTrainer(lstmModel, trainingLoss, prediction, { distributedLearner });

目前尚不清楚如何在多个GPU(CPU)上运行... 我了解您需要使用CreateCompositeMinibatchSource创建MinibatchSource,只是不清楚如何使用数组或MFCC序列容器创建MinibatchSourceConfig对象。

0 个答案:

没有答案