如何使用张量初始化内核

时间:2019-12-20 14:38:24

标签: python tensorflow keras

我在keras中创建了一个自定义层,该层仅在输入和内核之间执行点积。但是对于内核,我想使用批处理的平均值作为内核初始化,这意味着取批处理的平均值并生成一个内核,其初始值为该平均值。为此,我创建了一个自定义内核初始化程序,如下所示:

 class Tensor_Init(Initializer):
    """Initializer that generates tensors initialized to a given tensor.
    # Arguments
        Tensor:  the generator tensors.
    """

    def __init__(self, Tensor=None):
        self.Tensor = Tensor

    def __call__(self, shape, dtype=None):


        return tf.Variable(self.Tensor)

    def get_config(self):
        return {'Tensor': self.Tensor}

这是keras中自定义层的调用方法。我只是简单地计算批处理的平均值,并将其与上述初始化程序类一起使用以生成内核。我在自定义图层中使用它的方式如下

 def call(self, inputs):
        data_format = conv_utils.convert_data_format(self.data_format, self.rank + 2)    

        inputs = tf.extract_image_patches(
            inputs,
            ksizes=(1,) + self.kernel_size + (1,),
            strides=(1,) + self.strides + (1,),
            rates=(1,) + self.dilation_rate + (1,),
            padding=self.padding.upper(),
        )

        inputs = K.reshape(inputs,[-1,inputs.get_shape().as_list()[1],inputs.get_shape().as_list()
                                   [2],self.kernel_size[0]*self.kernel_size[1] ,self.output_dim])

        self.kernel = self.add_weight(name='kernel',shape=(),initializer=Tensor_Init(Tensor=tf.reduce_mean(inputs, 0)),trainable=True)


        outputs = (tf.einsum('NHWKC,HWKC->NHWC',inputs,self.kernel)+self.c)**self.p

        if self.data_format == 'channels_first':
            outputs = K.permute_dimensions(outputs, (0, 3, 1, 2))

        return outputs

通常会创建并编译模型,但是我开始训练时遇到此错误

InvalidArgumentError: You must feed a value for placeholder tensor 'conv2d_1_input' with dtype float and shape [?,48,48,3]
     [[node conv2d_1_input (defined at C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\keras\backend\tensorflow_backend.py:736) ]]

Original stack trace for 'conv2d_1_input':
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\kernelapp.py", line 563, in start
    self.io_loop.start()
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\tornado\platform\asyncio.py", line 148, in start
    self.asyncio_loop.run_forever()
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\asyncio\base_events.py", line 438, in run_forever
    self._run_once()
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\asyncio\base_events.py", line 1451, in _run_once
    handle._run()
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\tornado\ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\tornado\ioloop.py", line 743, in _run_callback
    ret = callback()
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\tornado\gen.py", line 787, in inner
    self.run()
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\tornado\gen.py", line 748, in run
    yielded = self.gen.send(value)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 378, in dispatch_queue
    yield self.process_one()
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\tornado\gen.py", line 225, in wrapper
    runner = Runner(result, future, yielded)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\tornado\gen.py", line 714, in __init__
    self.run()
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\tornado\gen.py", line 748, in run
    yielded = self.gen.send(value)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 365, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\tornado\gen.py", line 209, in wrapper
    yielded = next(result)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 272, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\tornado\gen.py", line 209, in wrapper
    yielded = next(result)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\kernelbase.py", line 542, in execute_request
    user_expressions, allow_stdin,
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\tornado\gen.py", line 209, in wrapper
    yielded = next(result)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 2855, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 2881, in _run_cell
    return runner(coro)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 3058, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 3249, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 3326, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-2-35eda01d200a>", line 75, in <module>
    model = create_vgg16()
  File "<ipython-input-2-35eda01d200a>", line 12, in create_vgg16
    model.add(Conv2D(64, (5, 5), input_shape=(48,48,3),  padding='same'))
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\keras\engine\sequential.py", line 162, in add
    name=layer.name + '_input')
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\keras\engine\input_layer.py", line 178, in Input
    input_tensor=tensor)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\keras\engine\input_layer.py", line 87, in __init__
    name=self.name)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\keras\backend\tensorflow_backend.py", line 736, in placeholder
    shape=shape, ndim=ndim, dtype=dtype, sparse=sparse, name=name)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\keras\backend.py", line 998, in placeholder
    x = array_ops.placeholder(dtype, shape=shape, name=name)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\array_ops.py", line 2143, in placeholder
    return gen_array_ops.placeholder(dtype=dtype, shape=shape, name=name)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 7401, in placeholder
    "Placeholder", dtype=dtype, shape=shape, name=name)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 788, in _apply_op_helper
    op_def=op_def)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\util\deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py", line 3616, in create_op
    op_def=op_def)
  File "C:\ProgramData\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py", line 2005, in __init__
    self._traceback = tf_stack.extract_stack()



1 个答案:

答案 0 :(得分:0)

我能够通过简单地创建一个零初始化的内核,然后将平均值分配给它,甚至不创建自定义初始化程序,就将批处理的平均值传递给内核。我对自定义图层进行了如下修改


def call(self, inputs):
        data_format = conv_utils.convert_data_format(self.data_format, self.rank + 2)    
        inputs = tf.extract_image_patches(
            inputs,
            ksizes=(1,) + self.kernel_size + (1,),
            strides=(1,) + self.strides + (1,),
            rates=(1,) + self.dilation_rate + (1,),
            padding=self.padding.upper(),
        )

        inputs = K.reshape(inputs,[-1,inputs.get_shape().as_list()[1],inputs.get_shape().as_list()
                                   [2],self.kernel_size[0]*self.kernel_size[1] ,self.output_dim])

        weights = tf.reduce_mean(inputs, 0)

        self.kernel = self.add_weight(name='kernel', 
                                      shape=(weights.get_shape().as_list()[0],weights.get_shape().as_list()
                                   [1],weights.get_shape().as_list()[2],weights.get_shape().as_list()[3]),
                                      initializer='zeros',
                                      trainable=True)

        tf.compat.v1.assign(self.kernel, weights)

        outputs = (tf.einsum('NHWKC,HWKC->NHWC',inputs,self.kernel)+self.c)**self.p

        if self.data_format == 'channels_first':
            outputs = K.permute_dimensions(outputs, (0, 3, 1, 2))

        return outputs