可写数据容器的最大数量

时间:2014-03-13 06:25:10

标签: c++-amp

操作系统:Windows 8.1 64位 - 完全更新

IDE: Visual Studio Professional 2013 - 版本12.0.30110.00更新1 - 完全更新

我有一种情况,我在编译期间得到以下异常,但是在运行时。

parallel_for_each调用(17)的entry函数中引用的可写数据容器数超过了所选的加速器限制(8)。

发生这种情况的功能如下所示

void run_epoch(
    accelerator_view mainAccelView,
    ActivatorState activatorState,
    TrainingState trainingState,
    array_view<double, 2> avLayer1,
    array_view<double, 2> avLayer2,
    array_view<double, 2> avLayer3,
    array_view<const double, 2> avPredictors,
    array_view<const double, 2> avTargets,
    array_view<double> avErrors,
    int epoch
    ){
    accelerator_view mainAccelView = accelerator::accelerator().create_view(queuing_mode::queuing_mode_immediate);

    int noOfColumnsPredictors = AmpUtils::get_no_of_columns(avPredictors);
    int noOfRowsPredictors = AmpUtils::get_no_of_rows(avPredictors, noOfColumnsPredictors);

    int noOfColumnsLayer1 = AmpUtils::get_no_of_columns(avLayer1);
    int noOfColumnsLayer2 = AmpUtils::get_no_of_columns(avLayer2);
    int noOfColumnsLayer3 = AmpUtils::get_no_of_columns(avLayer3);

    int noOfRowsLayer1 = AmpUtils::get_no_of_rows(avLayer1, noOfColumnsLayer1);
    int noOfRowsLayer2 = AmpUtils::get_no_of_rows(avLayer2, noOfColumnsLayer2);
    int noOfRowsLayer3 = AmpUtils::get_no_of_rows(avLayer3, noOfColumnsLayer3);

    array_view<double, 2> avOutputLayer1(noOfRowsPredictors, noOfRowsLayer1);
    array_view<double, 2> avOutputLayer2(noOfRowsPredictors, noOfRowsLayer2);
    array_view<double, 2> avOutputLayer3(noOfRowsPredictors, noOfRowsLayer3);

    array_view<double, 2> avErrorsLayer1(noOfRowsPredictors, noOfRowsLayer1);
    array_view<double, 2> avErrorsLayer2(noOfRowsPredictors, noOfRowsLayer2);
    array_view<double, 2> avErrorsLayer3(noOfRowsPredictors, noOfRowsLayer3);

    array_view<double, 2> avThresholdLayer1(noOfRowsPredictors, noOfRowsLayer1);
    array_view<double, 2> avThresholdLayer2(noOfRowsPredictors, noOfRowsLayer2);
    array_view<double, 2> avThresholdLayer3(noOfRowsPredictors, noOfRowsLayer3);

    array_view<double, 3> avWeightsLayer1(noOfRowsPredictors, noOfRowsLayer1, (noOfColumnsLayer1 - 1));
    array_view<double, 3> avWeightsLayer2(noOfRowsPredictors, noOfRowsLayer2, (noOfColumnsLayer2 - 1));
    array_view<double, 3> avWeightsLayer3(noOfRowsPredictors, noOfRowsLayer3, (noOfColumnsLayer3 - 1));

    array_view<double, 2> avErrorsTempBuffer(noOfRowsPredictors, noOfRowsLayer3);
    int errorTempBufferSize = avErrorsTempBuffer.extent.size();

    array_view<double> avEpochErrors(noOfRowsPredictors);

    try{
        parallel_for_each(extent<1>(AmpUtils::get_no_of_rows(avPredictors)), [=](index<1> idx) restrict(cpu, amp){
            int predictorRow = idx[0];

            // step 1: compute
            // step 11: compute layer 1
            compute_layer(activatorState, avPredictors[predictorRow], avLayer1, avOutputLayer1, noOfColumnsLayer1, predictorRow);

            // step 12: compute layer 2
            compute_layer(activatorState, avPredictors[predictorRow], avLayer2, avOutputLayer2, noOfColumnsLayer2, predictorRow);

            // step 13: compute layer 3
            compute_layer(activatorState, avPredictors[predictorRow], avLayer3, avOutputLayer3, noOfColumnsLayer3, predictorRow);


            // step 2: calculate_error
            // step 21: calculate_error layer 3
            for (int column = 0; column < noOfRowsLayer3; column++){
                double neuronError = avTargets[predictorRow][column] - avOutputLayer3[predictorRow][column];
                avErrorsTempBuffer[predictorRow][column] = neuronError * neuronError;
                avErrorsLayer3[predictorRow][column] = neuronError * AmpActivator::derivative2(activatorState, avOutputLayer3[predictorRow][column]);
            }

            double errorSum = 0.0;
            for (int column = 0; column < errorTempBufferSize; column++){
                errorSum += avErrorsTempBuffer[predictorRow][column];
            }

            avEpochErrors[predictorRow] = errorSum;

            // step 22: calculate_error layer 2
            calculate_error_layer(activatorState, avErrorsLayer2[predictorRow], avErrorsLayer3, avLayer3, avOutputLayer2[predictorRow], noOfRowsLayer3, noOfRowsLayer3);

            // step 23: calculate_error layer 1
            calculate_error_layer(activatorState, avErrorsLayer1[predictorRow], avErrorsLayer2, avLayer2, avOutputLayer1[predictorRow], noOfRowsLayer2, noOfRowsLayer2);


            // step 3: calculate_updates
            // step 31: calculate_updates layer 1
            calculate_updates_layer(trainingState, avErrorsLayer1[predictorRow], avPredictors[predictorRow], avThresholdLayer1[predictorRow], avWeightsLayer1[predictorRow], (noOfColumnsLayer1 - 1), noOfRowsLayer1);

            // step 31: calculate_updates layer 2
            calculate_updates_layer(trainingState, avErrorsLayer2[predictorRow], avPredictors[predictorRow], avThresholdLayer2[predictorRow], avWeightsLayer2[predictorRow], (noOfColumnsLayer2 - 1), noOfRowsLayer2);

            // step 31: calculate_updates layer 3
            calculate_updates_layer(trainingState, avErrorsLayer3[predictorRow], avPredictors[predictorRow], avThresholdLayer3[predictorRow], avWeightsLayer3[predictorRow], (noOfColumnsLayer3 - 1), noOfRowsLayer3);


            // step 4: update_network
            // step 41: update_network layer 1
            update_layer(avLayer1, avWeightsLayer1[predictorRow], avThresholdLayer1[predictorRow], noOfColumnsLayer1, noOfRowsLayer1);

            // step 42: update_network layer 2
            update_layer(avLayer2, avWeightsLayer2[predictorRow], avThresholdLayer2[predictorRow], noOfColumnsLayer2, noOfRowsLayer2);

            // step 43: update_network layer 3
            update_layer(avLayer3, avWeightsLayer3[predictorRow], avThresholdLayer3[predictorRow], noOfColumnsLayer3, noOfRowsLayer3);
        });

        avEpochErrors.synchronize();

        double epochErrorsSum = 0.0;
        for (int i = 0; i < (int)avEpochErrors.extent.size(); i++){
            epochErrorsSum += avEpochErrors[i];
        }

        avErrors[epoch] = epochErrorsSum;
    }
    catch (std::exception e){
        std::wcout << "Exception Project::run_epoch: " << e.what() << std::endl;
    }
}

根据此MSDN帖子here以及here,自Windows 8以来,可写容器的最大数量应该增加到64个。

我现在的问题是,是否存在不同类型的可写容器,而我仍然只能使用最多8种特定类型?

1 个答案:

答案 0 :(得分:0)

严格来说,限制是无人机的数量。这与DX版本而不是Windows相关联。

  

每个内核允许的可写数组_view / array / texture / writeonly_texture_view对象数量有限   C ++ AMP支持每个内核有限数量的可写array_view / array / texture / writeonly_texture_view对象。具体来说,每个内核的可写array_view + array + texture + writeonly_texture_view的总数在DirectX 11上不应超过8,在DirectX11.1上不应超过64。每个内核允许的只读array_view /数组/纹理对象的总数为128,指定只读限制可以帮助您避免达到每个内核允许的可写可写array_view / array / texture / writeonly_texture_view对象的最大数量限制。 / p>

来自Parallel Programming in Native Code

Win8支持DX11.1,并且已经以某种有限的形式反向移植到Win7。看着我的机器它正在运行Windows 8.1,但似乎是使用DX 11而不是11.1驱动程序。 DXDIAG.EXE会告诉您正在使用的内容。您需要确保您的卡支持DX11.1,并且您已安装最新的驱动程序。