下面有两个代码段来自仓库https://github.com/kang205/DVBPR。这是本文的python实现:
具有生成图像模型的视觉感知时尚推荐和设计
我正在尝试运行,理解并调整它。据我在论文中了解的那样,他们首先训练了一个暹罗CNN网络(DVBPR main.py代码)作为偏好预测器。然后是经过训练的gan网络,可以生成与客户喜欢的图像相似的图像,最后,偏好最大化块(pm main.py)运行正则化过程,以防止使用偏好预测变量的矩阵分解模型过拟合。>
我试图跳过gan部分,只是从我之前训练的暹罗网络中获取新图像的偏好得分。这就是我在下面的“优先代码”部分中要执行的操作。我从经过训练的暹罗网络重新加载检查点权重,声明一些变量,最后将用户未看到的图像和idx传递给会话。
我不完全了解idx的用途或目的。如果有人能解释我将不胜感激。
我也在尝试了解在打印sess.run时输出中返回的内容。是我通过sess.run和首选项得分获得的图像的图像张量吗?我注意到是否更改了sess.run中括号中的顺序,它会更改输出的顺序。因此,数组([99.19335938],dtype = float32)从idx开始。
我是tensorFlow的新手,因此非常感谢所有提示。
暹罗网络代码(DVBPR main.py):
import sys
import math
import random
import time
from PIL import Image
import Queue
import numpy as np
import threading
from cStringIO import StringIO
import tensorflow as tf
dataset_name = 'AmazonFashion6ImgPartitioned.npy'
#Hyper-prameters
K = 100 # Latent dimensionality
lambda1 = 0.001 # Weight decay
lambda2 = 1.0 # Regularizer for theta_u
learning_rate = 1e-4
training_epoch = 20
batch_size = 128
dropout = 0.5 # Dropout, probability to keep units
numldprocess=4 # multi-threading for loading images
dataset = np.load('../'+dataset_name)
[user_train, user_validation, user_test, Item, usernum, itemnum] = dataset
# Create some wrappers for simplicity
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k=2):
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
padding='SAME')
def avgpool2d(x, k=2):
return tf.nn.avg_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],
padding='SAME')
weights = {
'wc1': [11, 11, 3, 64],
'wc2': [5, 5, 64, 256],
'wc3': [3, 3, 256, 256],
'wc4': [3, 3, 256, 256],
'wc5': [3, 3, 256, 256],
'wd1': [7*7*256, 4096],
'wd2': [4096, 4096],
'wd3': [4096, K],
}
biases = {
'bc1': [64],
'bc2': [256],
'bc3': [256],
'bc4': [256],
'bc5': [256],
'bd1': [4096],
'bd2': [4096],
'bd3': [K],
}
def Weights(name):
return tf.get_variable(name,dtype=tf.float32,shape=weights[name],initializer=tf.contrib.layers.xavier_initializer())
def Biases(name):
return tf.get_variable(name,dtype=tf.float32,initializer=tf.zeros(biases[name]))
# Create CNN model
def CNN(x,dropout):
# Reshape input picture
x = tf.reshape(x, shape=[-1, 224, 224, 3])
conv1 = conv2d(x, Weights('wc1'), Biases('bc1'), strides=4)
conv1 = tf.nn.relu(conv1)
conv1 = maxpool2d(conv1, k=2)
conv2 = conv2d(conv1, Weights('wc2'), Biases('bc2'))
conv2 = tf.nn.relu(conv2)
conv2 = maxpool2d(conv2, k=2)
conv3 = conv2d(conv2, Weights('wc3'), Biases('bc3'))
conv3 = tf.nn.relu(conv3)
conv4 = conv2d(conv3, Weights('wc4'), Biases('bc4'))
conv4 = tf.nn.relu(conv4)
conv5 = conv2d(conv4, Weights('wc5'), Biases('bc5'))
conv5 = tf.nn.relu(conv5)
conv5 = maxpool2d(conv5, k=2)
fc1 = tf.reshape(conv5, [-1,weights['wd1'][0]])
fc1 = tf.add(tf.matmul(fc1, Weights('wd1')), Biases('bd1'))
fc1 = tf.nn.relu(fc1)
fc1 = tf.nn.dropout(fc1, dropout)
fc2 = tf.add(tf.matmul(fc1, Weights('wd2')), Biases('bd2'))
fc2 = tf.nn.relu(fc2)
fc2 = tf.nn.dropout(fc2, dropout)
fc3 = tf.add(tf.matmul(fc2, Weights('wd3')), Biases('bd3'))
return fc3
image1=(tf.to_float(image1)-127.5)/127.5
image2=(tf.to_float(image2)-127.5)/127.5
_image_test=(tf.to_float(image_test)-127.5)/127.5
u=tf.reshape(u,shape=[batch_size])
i=tf.reshape(i,shape=[batch_size])
j=tf.reshape(j,shape=[batch_size])
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
#siamese networks
with tf.variable_scope("DVBPR") as scope:
result1 = CNN(image1,dropout)
scope.reuse_variables()
result2 = CNN(image2,dropout)
result_test = CNN(_image_test,1.0)
nn_regularizers = sum(map(tf.nn.l2_loss,[Weights('wd1'), Weights('wd2'), Weights('wd3'), Weights('wc1'), Weights('wc2'), Weights('wc3'), Weights('wc4'), Weights('wc5')]))
thetau = tf.Variable(tf.random_uniform([usernum,K],minval=0,maxval=1)/100)
cost_train = tf.reduce_sum(tf.log(tf.sigmoid(tf.reduce_sum(tf.multiply(tf.gather(thetau,u),tf.subtract(result1,result2)),1,keep_dims=True))))
regularizers = tf.nn.l2_loss(tf.gather(thetau,u))
cost_train -= lambda1 * nn_regularizers + lambda2 * regularizers
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(-cost_train)
# Initializing the variables
init = tf.initialize_all_variables()
def AUC(train,test,U,I):
ans=0
cc=0
for u in train:
i=test[u][0]['productid']
T=np.dot(U[u,:],I.T)
cc+=1
M=set()
for item in train[u]:
M.add(item['productid'])
M.add(i)
count=0
tmpans=0
#for j in xrange(itemnum):
for j in random.sample(xrange(itemnum),100): #sample
if j in M: continue
if T[i]>T[j]: tmpans+=1
count+=1
tmpans/=float(count)
ans+=tmpans
ans/=float(cc)
return ans
def Evaluation(step):
print '...'
U=sess.run(thetau)
I=np.zeros([itemnum,K],dtype=np.float32)
idx=np.array_split(range(itemnum),(itemnum+batch_size-1)/batch_size)
input_images=np.zeros([batch_size,224,224,3],dtype=np.int8)
for i in range(len(idx)):
cc=0
for j in idx[i]:
input_images[cc]=np.uint8(np.asarray(Image.open(StringIO(Item[j]['imgs'])).convert('RGB').resize((224,224))))
cc+=1
I[idx[i][0]:(idx[i][-1]+1)]=sess.run(result_test,feed_dict={image_test:input_images})[:(idx[i][-1]-idx[i][0]+1)]
print 'export finised!'
np.save('UI_'+str(K)+'_'+str(step)+'.npy',[U,I])
return AUC(user_train,user_validation,U,I), AUC(user_train,user_test,U,I)
def sample(user):
u = random.randrange(usernum)
numu = len(user[u])
i = user[u][random.randrange(numu)]['productid']
M=set()
for item in user[u]:
M.add(item['productid'])
while True:
j=random.randrange(itemnum)
if (not j in M): break
return (u,i,j)
def load_image_async():
while True:
(uuu,iii,jjj)=sample(user_train)
jpg1=np.uint8(np.asarray(Image.open(StringIO(Item[iii]['imgs'])).convert('RGB').resize((224,224))))
jpg2=np.uint8(np.asarray(Image.open(StringIO(Item[jjj]['imgs'])).convert('RGB').resize((224,224))))
sess.run(batch_train_queue_op,feed_dict={queueu:np.asarray([uuu]),
queuei:np.asarray([iii]),
queuej:np.asarray([jjj]),
queueimage1:jpg1,queueimage2:jpg2,
})
f=open('DVBPR.log','w')
config = tf.ConfigProto(log_device_placement=False,allow_soft_placement=True)
sess=tf.Session(config=config)
sess.run(init)
t=[0]*numldprocess
for i in range(numldprocess):
t[i] = threading.Thread(target=load_image_async)
t[i].daemon=True
t[i].start()
oneiteration = 0
for item in user_train: oneiteration+=len(user_train[item])
step = 1
saver = tf.train.Saver([k for k in tf.global_variables() if k.name.startswith('DVBPR')])
epoch=0
while step * batch_size <= training_epoch*oneiteration+1:
sess.run(optimizer, feed_dict={keep_prob: dropout})
print 'Step#'+str(step)+' CNN update'
if step*batch_size / oneiteration >epoch:
epoch+=1
saver.save(sess,'./DVBPR_auc_'+str(K)+'_'+str(step)+'.ckpt')
auc_valid,auc_test=Evaluation(step)
print 'Epoch #'+str(epoch)+':'+str(auc_test)+' '+str(auc_valid)+'\n'
f.write('Epoch #'+str(epoch)+':'+str(auc_test)+' '+str(auc_valid)+'\n')
f.flush()
step += 1
print "Optimization Finished!"
偏好得分的代码(pm main.py):
from cStringIO import StringIO
def loadimg(item):
return np.round(np.array(Image.open(StringIO(Item[item]['imgs'])).convert('RGB').resize((224,224)),dtype=np.float64))
with tf.device('/cpu:0'):
# creating placeholder variable
# _image_test has form <tf.Tensor 'div_4:0' shape=(64, 224, 224, 3) dtype=float32>
image_test=tf.placeholder(dtype=tf.uint8,shape=[64,224,224,3])
_image_test=(tf.to_float(image_test)-127.5)/127.5
keep_prob = tf.placeholder(tf.float32) #dropout (keep probability)
# sharing variable by capturing scope and setting reuse
with tf.variable_scope("DVBPR") as scope:
# creating model from input image and drop out, returns tensor shape (100,1)
result_test = CNN(_image_test,1.0)
# creating variable thetau which is tensor of shape [usernum,D] filled with random values in range 0 to 1
thetau = tf.Variable(tf.random_uniform([usernum,D],minval=0,maxval=1)/100)
# Initializing the variables
init = tf.initialize_all_variables()
config = tf.ConfigProto(log_device_placement=False,allow_soft_placement=True)
config.gpu_options.allow_growth = True
# creating session
sess=tf.Session(config=config)
sess.run(init)
# creates object to save all variables in
saver = tf.train.Saver([k for k in tf.global_variables() if k.name.startswith('DVBPR')])
# restore model weights from DVBPR check point
saver.restore(sess,DVBPR_ckpt_path)
# creating tensor for image with same shape as gan_image
tstImg3=np.round(np.array(Image.open(StringIO(Item[119]['imgs'])).convert('RGB').resize((224,224)),dtype=np.float32))
rep_gan=tf.reshape(tstImg3, shape=[-1, 224, 224, 3])
# Note: don't have gpu switch to cpu
with tf.device('/cpu:0'):
# need to figure out how to substitute image of product customer hasn't seen for get_gen
gan_image=rep_gan
# shaping image tensor from gan_image
image=tf.image.resize_nearest_neighbor(images=gan_image, size=[224,224], align_corners=None, name=None)
with tf.variable_scope("DVBPR") as scope:
scope.reuse_variables()
# running model defined previously to get tensor from "image" variable above
result = CNN(image,1.0)
_user=12
# idx and user defined in opt section
user=tf.placeholder(dtype=tf.int32,shape=[1])
idx=tf.reduce_sum(tf.matmul(result,tf.transpose(tf.gather(thetau,user))),1)
# Note: I'm not sure what the purpose of idx is
# run piece from Preference Score with DVBPR instead of opt
print(sess.run([gan_image,idx],feed_dict={user:[_user]}))
# the results below appear to return an image tensor and a preference score
Output:
[array([[[[ 255., 255., 255.],
[ 255., 255., 255.],
[ 255., 255., 255.],
...,
[ 255., 255., 255.],
[ 255., 255., 255.],
[ 255., 255., 255.]],
[[ 255., 255., 255.],
[ 255., 255., 255.],
[ 255., 255., 255.],
...,
[ 255., 255., 255.],
[ 255., 255., 255.],
[ 255., 255., 255.]],
[[ 255., 255., 255.],
[ 255., 255., 255.],
[ 255., 255., 255.],
...,
[ 255., 255., 255.],
[ 255., 255., 255.],
[ 255., 255., 255.]],
...,
[[ 255., 255., 255.],
[ 255., 255., 255.],
[ 255., 255., 255.],
...,
[ 255., 255., 255.],
[ 255., 255., 255.],
[ 255., 255., 255.]],
[[ 255., 255., 255.],
[ 255., 255., 255.],
[ 255., 255., 255.],
...,
[ 255., 255., 255.],
[ 255., 255., 255.],
[ 255., 255., 255.]],
[[ 255., 255., 255.],
[ 255., 255., 255.],
[ 255., 255., 255.],
...,
[ 255., 255., 255.],
[ 255., 255., 255.],
[ 255., 255., 255.]]]], dtype=float32), array([ 99.19335938], dtype=float32)]