Question

我试图将CRF复制为已在Keras中实现但使用TensorFlow作为后端（https://github.com/sadeepj/crfasrnn_keras）的RNN。 Keras前端很好，但是一些后端代码是作为TensorFlow自定义操作编写的。我试图在CNTK中重复此操作，但是我有几个问题。

import cntk as C
from cntk import ops
import copy
import numpy as np

C.device.try_set_default_device(C.device.cpu())

ops.register_native_user_function('HighDimFilterOp', 'Cntk.HighDimFilter-' + C.__version__.rstrip('+'), 'CreateHighDimFilter')

def high_dim_filter(image=None, rgb=None, **kwargs):
inputs = [list(image), list(rgb)];
layer_config = copy.deepcopy(kwargs)
ops.native_user_function('HighDimFilterOp', inputs,    layer_config, 'high_dim_filter')

此代码是对我的用户C ++函数的Python调用。 C ++接口如下：

#include "HighDimFilter.h"

using namespace CNTK;

extern "C"
#ifdef _WIN32
    __declspec(dllexport)
#endif
Function* CreateHighDimFilter(const Variable* operands, size_t /*numOperands*/, const Dictionary* attributes, const wchar_t* name)
{
    printf("Creating HighDimFilter\n");
    return new HighDimFilter({operands[0], operands[1]}, *attributes, name);
}

，自定义函数本身定义为：

#pragma once
#include "CNTKLibrary.h"
#include "modified_permutohedral.h"

using namespace CNTK;

class HighDimFilter final : public Function
{
    bool _bilateral;
    float _theta_alpha;
    float _theta_beta;
    float _theta_gamma;

    enum Input : uint32_t
    {
        SCORES,
        IM_INFO
    };

public:
    HighDimFilter(const std::vector<Variable>& inputs, const Dictionary& attributes, const std::wstring& name = L"HighDimFilter")
        : Function(inputs, attributes, name)
    {
        if (attributes.Contains(L"bilateral"))
            _bilateral = attributes[L"bilateral"].Value<bool>();

        if (_bilateral == false)
        {
            if (attributes.Contains(L"theta_gamma"))
                _theta_gamma = static_cast<float>(attributes[L"theta_gamma"].Value<double>());
        }
        else
        {
            if (attributes.Contains(L"theta_alpha"))
                _theta_alpha = static_cast<float>(attributes[L"theta_alpha"].Value<double>());

            if (attributes.Contains(L"theta_beta"))
                _theta_beta = static_cast<float>(attributes[L"theta_beta"].Value<double>());
        }
    }

private:
    void _compute_spatial_kernel(NDArrayViewPtr& Tensor, const float theta_gamma)
    {
        auto output_kernel = Tensor->WritableDataBuffer<float>();
        auto outputShape = Tensor->Shape();
        //auto channels = outputShape[0];
        auto height = outputShape[1];
        auto width = outputShape[2];
        const auto num_pixels = width * height;
        for (int p = 0; p < num_pixels; ++p)
        {
            output_kernel[2 * p] = static_cast<float>(p % width) / theta_gamma;
            output_kernel[2 * p + 1] = static_cast<float>(p / width) / theta_gamma;
        }
    }

    void _compute_bilateral_kernel(NDArrayViewPtr& Tensor, const NDArrayViewPtr& Image,
                                   const float theta_alpha, const float theta_beta)
    {
        auto output_kernel = Tensor->WritableDataBuffer<float>();
        auto rgb = Image->DataBuffer<float>();
        auto outputShape = Tensor->Shape();
        //auto channels = outputShape[0];
        auto height = outputShape[1];
        auto width = outputShape[2];
        const auto num_pixels = height * width;

        for (int p = 0; p < num_pixels; ++p)
        {
            // Spatial terms
            output_kernel[5 * p] = static_cast<float>(p % width) / theta_alpha;
            output_kernel[5 * p + 1] = static_cast<float>(p / width) / theta_alpha;

            // Color terms
            output_kernel[5 * p + 2] = static_cast<float>(rgb[p] / theta_beta);
            output_kernel[5 * p + 3] = static_cast<float>(rgb[num_pixels + p] / theta_beta);
            output_kernel[5 * p + 4] = static_cast<float>(rgb[2 * num_pixels + p] / theta_beta);
        }
    }

    BackPropStatePtr Forward(const std::vector<ValuePtr>& inputValues,
                             std::unordered_map<Variable, ValuePtr>& outputs,
                             const DeviceDescriptor& computeDevice,
                             const std::unordered_set<Variable>& /*outputsToRetainBackwardStateFor */) override
    {
#if 0
        auto scoresShape = inputValues[Input::SCORES]->Shape();
        auto channels = scoresShape[0];
        auto height = scoresShape[1];
        auto width = scoresShape[2];
        const auto num_pixels = width * height;
        auto &outputValue = outputs[this->Output()];
        if (outputValue == nullptr)
        {
            outputValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(DataType::Float, scoresShape, computeDevice));
        }

        if (computeDevice.Type() != DeviceKind::CPU) 
            throw std::runtime_error("HighDimFilter: only CPU evaluation is supported at the moment.");

        ModifiedPermutohedral mp;

        if (_bilateral)
        {
            auto &kernel_vals = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(DataType::Float, NDShape({5, height, width}), computeDevice));
            //float* kernel_vals = new float[5 * num_pixels];
            _compute_bilateral_kernel(kernel_vals->Data(), inputValues[Input::IM_INFO]->Data(),
                                     _theta_alpha, _theta_beta);
            mp.init(kernel_vals->Data(), 5, num_pixels);
            mp.compute(outputValue->Data(), inputValues[Input::SCORES]->Data(), false);
        }
        else
        {
            auto &kernel_vals = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(DataType::Float, NDShape({2, height, width}), computeDevice));
            _compute_spatial_kernel(kernel_vals->Data(), _theta_gamma);
            mp.init(kernel_vals->Data(), 2, num_pixels);
            mp.compute(outputValue->Data(), inputValues[Input::SCORES]->Data(), channels, false);
        }

        return MakeSharedObject<BackPropState>(this->shared_from_this(), computeDevice, std::unordered_map<Variable, ValuePtr>({ {Inputs()[Input::IM_INFO], inputValues[Input::IM_INFO]} }));
#else
        return nullptr;
#endif
    }

    void Backward(const BackPropStatePtr& state,
                  const std::unordered_map<Variable, ValuePtr>& rootGradientValues,
                  std::unordered_map<Variable, ValuePtr>& backPropagatedGradientValuesForInputs) override
    {
#if 0
        auto gradOutputVariable = Inputs()[Input::SCORES];
        auto inputVariable = Inputs()[Input::IM_INFO];
        auto &gradValue = backPropagatedGradientValuesForInputs[gradOutputVariable];
        if (gradValue == nullptr)
            gradValue = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(DataType::Float, gradOutputVariable.Shape(), state->Device()));

        auto imageData = state->SavedForwardPropValues().at(inputVariable)->Data();
        auto imageShape = imageData->Shape();
        auto channels = imageShape[0];
        auto height = imageShape[1];
        auto width = imageShape[2];
        const auto num_pixels = width * height;

        if (state->Device().Type() != DeviceKind::CPU)
            throw std::runtime_error("HighDimFilter: only CPU evaluation is supported at the moment.");

        auto rootGradientData = rootGradientValues.at(this->Output())->Data();

        ModifiedPermutohedral mp;

        if (_bilateral)
        {
            auto &kernel_vals = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(DataType::Float, NDShape({5, height, width}), state->Device()));
            //float* kernel_vals = new float[5 * num_pixels];
            _compute_bilateral_kernel(kernel_vals->Data(), imageData,
                                      _theta_alpha, _theta_beta);
            mp.init(kernel_vals->Data(), 5, num_pixels);
            mp.compute(gradValue->Data(), rootGradientData, true);
        }
        else
        {
            auto &kernel_vals = MakeSharedObject<Value>(MakeSharedObject<NDArrayView>(DataType::Float, NDShape({2, height, width}), state->Device()));
            _compute_spatial_kernel(kernel_vals->Data(), _theta_gamma);
            mp.init(kernel_vals->Data(), 2, num_pixels);
            mp.compute(gradValue->Data(), rootGradientData, channels, true);
        }
#endif
        return;
    }

    const std::wstring& OpName() const override
    {
        static const std::wstring opName = L"HighDimFilterOp";
        return opName;
    }

    size_t CurrentVersion() const override
    {
        NOT_IMPLEMENTED;
    }

    void InferOutputs(std::vector<Variable>& /*outputs */) override
    {
        NOT_IMPLEMENTED;
    }

    FunctionPtr Clone(const std::vector<Variable>& /*clonedInputs */) override
    {
        return nullptr;
    }
};

我的python调用看起来像：

bilateral_high_dim_filter = custom_module.high_dim_filter(image=all_ones_flat,
                                                          rgb=rgb_flat,
                                                          bilateral=True,
                                                          theta_alpha=self.theta_alpha,
                                                          theta_beta=self.theta_beta)


high_dim_filter = custom_module.high_dim_filter(image=all_ones_flat,
                                                rgb=rgb_flat,
                                                bilateral=False,
                                                theta_gamma=self.theta_gamma)

问题如下：1）初始化时传递给native_user_function的“操作数”是什么？这些是否仅在初始化时通过（它们是权重和偏差初始化）？在“函数”构造初始化器中如何使用输入操作数？如果我在Python中将它们设置为“ None”，代码将崩溃。 2）您如何向前传播过滤器？只需调用“ forward（）”？向前传播所需的参数呢？ 3）CNTK中是否有类似于TensorFlow的数值梯度计算来检查梯度？

具有CNTK后端的Keras自定义层（CRF作为RNN）

0 个答案: