我目前正在尝试在TensorFlow中使用可训练的参数alpha
定义自己的激活函数,但是遇到错误。我遵循的教程中找到了here的定义方法,但是尝试运行它时始终出现以下错误:
InvalidArgumentError: ValueError: setting an array element with a sequence.
Traceback (most recent call last):
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/script_ops.py", line 207, in __call__
ret = func(*args)
File "<ipython-input-29-5aa291759d1a>", line 63, in <lambda>
np_ca_32 = lambda x: np_ca(x).astype(np.float32)
ValueError: setting an array element with a sequence.
我认为问题是由该训练参数alpha
引起的,因为没有它,我可以运行它,但是我希望能够在激活函数中使用这个alpha
。对于如何解决此错误的任何建议或帮助,将不胜感激!
import numpy as np
import tensorflow as tf
from tensorflow.python.framework import ops
alpha = tf.Variable(tf.random_normal((1,1), 0, 1),
trainable=True,name='alpha')
#Activation Function
def custom_act(x):
if x>0:
h1=x
h2=0
else:
x=-x
h1=0
h2=x
return (alpha)*h1+(1-alpha)*h2
#Convert to numpy function
import numpy as np
np_ca = np.vectorize(custom_act)
#Compute gradient
def d_ca(x):
if x>0:
return alpha
else:
return (1-alpha)
np_d_ca = np.vectorize(d_ca)
#Convert to TensorFlow function
import tensorflow as tf
from tensorflow.python.framework import ops
np_d_ca_32 = lambda x: np_d_ca(x).astype(np.float32)
def tf_d_spiky(x,name=None):
with tf.name_scope(name, "d_ca", [x]) as name:
y = tf.py_func(np_d_ca_32,
[x],
[tf.float32],
name=name,
stateful=False)
return y[0]
#Get gradient
def py_func(func, inp, Tout, stateful=True, name=None, grad=None):
# Need to generate a unique name to avoid duplicates:
rnd_name = 'PyFuncGrad' + str(np.random.randint(0, 1E+8))
tf.RegisterGradient(rnd_name)(grad) # see _MySquareGrad for grad
#example
g = tf.get_default_graph()
with g.gradient_override_map({"PyFunc": rnd_name}):
return tf.py_func(func, inp, Tout, stateful=stateful, name=name)
def ca_grad(op, grad):
x = op.inputs[0]
n_gr = tf_d_ca(x)
return grad * n_gr
np_ca_32 = lambda x: np_ca(x).astype(np.float32)
#Final activation function
def tf_ca(x, name=None):
with tf.name_scope(name, "ca", [x]) as name:
y = py_func(np_ca_32,
[x],
[tf.float32],
name=name,
grad=ca_grad) # <-- here's the call to the
#gradient
return y[0]
with tf.Session() as sess:
x = tf.constant([0.2,0.7,1.2,1.7])
y = tf_ca(x)
tf.initialize_all_variables().run()
print(x.eval(), y.eval(), tf.gradients(y, [x])[0].eval())