如何在CoreML自定义层中实现ktf.image.resize_images?

时间:2018-02-16 13:34:02

标签: swift coreml coremltools

我正在尝试在CoreML中实现一个自定义图层,该图层解决了在Swift中执行ktf.image.resize_images函数作为自定义图层类的Lambda图层。

这是我的Phyton剧本:

def resizeImage(x, size):
    return ktf.image.resize_images(x, size)

def convert_lambda(layer):
    if layer.function == resizeImage:
        params = NeuralNetwork_pb2.CustomLayerParams()

        params.className = "resizeImage"
        params.description = "Decoder Resizing"

        params.parameters["scale"].intValue = layer.arguments["size"][0].value

        print("LAMBDA CONVERSION = Size embedded to CoreML Model: %d" % layer.arguments["size"][0].value)

        return params
    else:
        return None

...

for i in range(decoder_n):
    strides = 1
    reverse_i = decoder_n - i - 1
    size = encoder_layers[decoder_n - i - 1].shape[1:3]
    out_channels = 2 ** ((decoder_n - i - 2) // 3 + 5) if i != decoder_n - 1 else 2 ** 5

    x = Lambda(resizeImage, arguments={'size':size})(x)
    x = Convolution2D(out_channels, kernel_size=(3, 3), activation='relu', strides=strides, padding='same')(x)

    x = concatenate([x, encoder_layers[decoder_n - i - 1]], axis=3)
    out_channels = 2 ** ((decoder_n - i - 2) // 3 + 5) if i != decoder_n - 1 else channels
    activation = 'relu' if i != decoder_n - 1 else 'sigmoid'
    x = Convolution2D(out_channels, kernel_size=(3, 3), activation=activation, strides=strides, padding='same')(x)

到目前为止,这是Swift类:

import Foundation
import CoreML
import Accelerate
import UIKit

@objc(resizeImage) class resizeImage: NSObject, MLCustomLayer {

    let scale: Float

    required init(parameters: [String : Any]) throws {
        if let scale = parameters["scale"] as? Float {
            self.scale = scale
        } else {
            self.scale = 1.0
        }

        print(#function, parameters)

        super.init()
    }

    func setWeightData(_ weights: [Data]) throws {
        print(#function, weights)
    }

    func outputShapes(forInputShapes inputShapes: [[NSNumber]]) throws -> [[NSNumber]] {
        print(#function, inputShapes)

        return inputShapes
    }

    func evaluate(inputs: [MLMultiArray], outputs: [MLMultiArray]) throws {
        print(#function, inputs.count, outputs.count)

        for i in 0..<inputs.count {
            let input = inputs[i]
            let output = outputs[i]

            for j in 0..<input.count {
                let x = input[j].floatValue
                let y = x * self.scale
                output[j] = NSNumber(value: y)
            }
        }
    }
}

有关为什么输出预测图像编码不正确的任何建议?

enter image description here

0 个答案:

没有答案