Question

我使用theano.tensor.grad（）来获得梯度下降，然后计算矩阵X的特征值和特征向量。我还使用numpy.linalg.eigh（）来获得特征值和特征向量。从字面上看，两种方法的特征应该是相同的，而我得到了不同的答案。这是两种方法的代码，我分别从输出中打印出前16个特征值。

import sys
import numpy as np
import numpy.linalg as linalg
import theano
import theano.tensor as T
print theano.__version__
print np.version.version

def compute_first_eigen(X,max_iteration,rate):
    sz = 32
    cur_v = T.vector('cur_v')
    xx = T.dmatrix('xx')
    dot_xxv = T.dot(xx,cur_v)
    cost = T.dot(dot_xxv.T, dot_xxv)
    grad_ = T.grad(cost,cur_v)

    eigen_first = theano.function(
        inputs=[xx,cur_v],
        outputs=grad_,
        allow_input_downcast=True)

    v = np.random.rand(sz*sz)
    t = 1
    while t < max_iteration:
        prev_v = v
        y = v+rate*eigen_first(X,v)
        v = y / linalg.norm(y)
        v_dis = linalg.norm(v-prev_v)
        if v_dis < sys.float_info.epsilon:
            break
        t = t+1
    xv = np.dot(X,v)
    value_1 = np.dot(xv.T,xv)
    return value_1,v

# set a X for testing.
sz = 32
X = np.random.rand(20,sz*sz)

rate = 0.5

# setting theano function
v = T.vector('v') 
xx = T.dmatrix('x') 
xxv = T.dot(xx,v)

coeffs = T.vector('coeffs')
vecs = T.fmatrix('vecs')
components,updates = theano.scan(fn=lambda coeffs,vecs,v:(v.dot(vecs) ** 2) * coeffs,
    outputs_info=None,
    sequences=[coeffs,vecs],
    non_sequences=v)
sum_ = components.sum()
cost = T.dot(xxv.T,xxv) - sum_

gv = T.grad(cost, v)

eigen_compute = theano.function(
  inputs=[xx,coeffs,vecs,v],
  outputs=gv,
  updates=updates,
  allow_input_downcast=True
)
val_1,vec_1 = compute_first_eigen(X, 10000, 0.5)
evals = np.array([val_1])
evecs = np.array([vec_1])
print evals

for i in range(1,16):
    t = 1

    vout = np.random.rand(sz*sz)
    while t < 10000:
        # outputs, vout = eigen_compute(X,evals,evecs)
        prev_v = vout
        y = vout + rate*eigen_compute(X,evals,evecs,vout)
        vout = y / linalg.norm(y)
        v_dis = linalg.norm(vout-prev_v)
        if v_dis < sys.float_info.epsilon:
            break
        t = t+1
    evecs = np.concatenate((evecs,vout.reshape((1,sz*sz))),axis=0)
    Xv = np.dot(X, vout)
    evals = np.append(evals,np.dot(Xv.T, Xv)) 
print evals

# numpy Linalg eigh
eigenvals,eigenvecs = linalg.eigh(np.dot(X.T,X))
idx = eigenvals.argsort()[::-1] # decreasing order of eigenvalues
eigenvals = eigenvals[idx]
D = eigenvecs[:, idx]
print eigenvals[:16]
# print sys.float_info

前16个特征值的结果如下。前者是theano渐变的结果，后者来自numpy eigh函数。

[ 5185.16126103   107.55438452   103.12505677    97.43512945    96.5613719
93.96313905    93.10809773    92.42390547    89.16485977    86.65039761
84.73367931    83.0397494     81.79609393    78.64115855    76.32032247
75.24690368]

[ 5180.1757114    844.85544447   751.07308305   705.97215297   642.28619825
 611.22852835   598.26571675   568.71283447   550.16102596   538.15434055
 534.25366614   514.62884927   513.80252085   507.78943946   475.20260796
 472.42131454]

我对这是怎么回事没有任何想法。我尝试了不同的numpy版本，但它没有帮助。有谁知道这是怎么发生的？

BTW，python版本是2.7.10，numpy 1.11.2，theano 0.9.0dev3.dev - 。

不同的特征值由theano渐变和numpy linalg.eigh

0 个答案: