我试图在这种情况下转换大量图像shape = (42000,784)
,当我在代码中重复执行时,我遇到MemoryError
(即如果我多次调用函数scale_xy) )。我在16 GB RAM机器上运行32位Python,所以我认为错误是由于Python的32位架构造成的。这是我目前的代码。我想避免MemoryError,理想情况下不要在速度上妥协(例如,通过将中间步骤保存到磁盘)。我知道调用shape需要从内存加载数组,所以我试图在我的代码中限制它没有效果。此外,如果有一种方法可以更快地完成这项工作,我们将非常感谢您的建议。
import numpy as np
import skimage.transform as tf
import time
def initialize(data, img_shape):
"""
Helper function to save some code to create arrays to be transformed.
"""
shape = data.shape
reshaped = data.reshape((shape[0],img_shape[0],img_shape[1]))
new = np.zeros((shape[0],img_shape[0],img_shape[1]))
return shape, reshaped, new
def batch_scale_xy(data, scale_x = 1, scale_y = 1, img_shape = (28,28),
random = False, interval = (0.0,1.0) ):
"""
Scales vectors in both x and y by given amount. If random is set to True
it will randomly select values from uniform distribution on a given interval
for each individual image.
"""
shape, reshaped, new = initialize(data,img_shape)
#All images are scaled evenly
if random == False:
for loc in xrange(shape[0]):
new[loc,:,:] = tf.warp(reshaped[loc,:,:],
tf.AffineTransform(scale=(scale_x,scale_y)))
#Randomly scale each image by a factor
elif random == True:
for loc in xrange(shape[0]):
scale_x, scale_y = np.random.uniform(low = interval[0],
high = interval[1], size = (2,1))
new[loc,:,:] = tf.warp(reshaped[loc,:,:],
tf.AffineTransform(scale=(scale_x[0],scale_y[0])))
new = new.reshape((shape[0],img_shape[0]*img_shape[1]))
return new
X = np.random.uniform(low=0.0,high=1.0,size=(42000,784))
start = time.time()
new = scale_xy(X,scale_x = 0.5, scale_y = 0.5)
print "Time: %s seconds" % (time.time()-start)
start = time.time()
new = scale_xy(X,random=True, interval = (0.7,2.0))
print "Time: %s seconds" % (time.time()-start)
错误如下:
Time: 4.06599998474 seconds
Traceback (most recent call last):
File "preprocessing.py", line 100, in <module>
new = batch_scale_xy(X,random=True, interval = (0.7,2.0))
File "preprocessing.py", line 75, in batch_scale_xy
shape, reshaped, new = initialize(data,img_shape)
File "preprocessing.py", line 34, in initialize
new = np.zeros((shape[0],img_shape[0],img_shape[1]))
MemoryError