Question

我创建了图像预处理步骤的最小工作示例，该步骤将从Caffe v.1移植到张量流。我可以使用PIL + skimage复制步骤，但是我无法在tensorflow中执行相同的操作。如观察到的，Caffe中的变换图像和张量流之间的L2范数很高，而PIL + skimage则不是这种情况。我们如何使用张量流重现图像在Caffe或PIL方法中经过的步骤？

将tensorflow导入为tf 将numpy导入为np 从PIL导入图片进口咖啡从skimage.transform导入调整大小导入请求

  image_url = 'https://tinyjpg.com/images/social/website.jpg'
  TEST_IMAGE = 'test_image.jpg'
  DATA_LAYER = 'data_p'
  MEAN = np.array([131.26315308, 140.62084961, 142.71440125], dtype=np.float32)
  img_data = requests.get(image_url).content
  with open(TEST_IMAGE, 'wb') as handler:
      handler.write(img_data)



  def create_transformer():
    transformer = caffe.io.Transformer({DATA_LAYER: (1, 3, 224, 224)})
    transformer.set_transpose(DATA_LAYER, (2,0,1))
    transformer.set_channel_swap(DATA_LAYER,(2,1,0))
    transformer.set_mean(DATA_LAYER, MEAN)
    transformer.set_raw_scale(DATA_LAYER, 255)
    return transformer

  def transform_image_original(test_image):
    '''
    Creates a caffe.io.Transformer
    '''
    t = create_transformer()
    image_data = Image.fromarray(np.uint8(caffe.io.load_image(TEST_IMAGE) * 255))
    input_image = np.array(image_data) / 255.0
    transformed_image = t.preprocess(DATA_LAYER, input_image)
    return transformed_image

  def _resize(im, new_dims,interp_order=1):
      im_min, im_max = im.min(), im.max()
      if im_max > im_min:
          im_std = (im - im_min) / (im_max - im_min)
          resized_std = resize(im_std, new_dims, order=interp_order, mode='constant')
          resized_im = resized_std * (im_max - im_min) + im_min
      return resized_im

  def preprocess_image(TEST_IMAGE, height=224, width=224):
    '''
    replicates the caffe tranformation using PIL and skimage.transform
    '''
    with open(TEST_IMAGE, 'rb') as f:
      image = Image.open(f)
      image = image.convert('RGB')
    image = np.array(image, np.float32) / 255.0
    image = _resize(image, (224, 224))
    image = np.array(image, np.float32) * 255.0
    import scipy.misc
    scipy.misc.imsave('pil_file.jpg', image)
    image = np.transpose(image, (2, 0, 1))
    image = image[::-1, ...] # convert RGB to BGR 
    image = image - MEAN.reshape((3, 1, 1))
    return image


  def tf_preprocess_image(TEST_IMAGE, height=224, width=224):
      '''
      preprocessing an image in tensorflow
      '''
      image_string = tf.read_file(TEST_IMAGE)
      image = tf.image.decode_jpeg(image_string, channels=3, dct_method='INTEGER_ACCURATE', 
                                   fancy_upscaling=False, acceptable_fraction=1, try_recover_truncated=True)
      image = tf.to_float(image) / 255
      image = tf.div(
                     tf.subtract(
                        image, 
                        tf.reduce_min(image)
                     ), 
                     tf.subtract(
                        tf.reduce_max(image), 
                        tf.reduce_min(image)
                     )
                     )
      if height and width:
        # Resize the image to the specified height and width.
        image = tf.expand_dims(image, 0)
        image = tf.image.resize_bilinear(image, [height, width],
                     align_corners=False)
        image = tf.squeeze(image, [0])
      image = tf.add(
                      tf.multiply(
                      image,
                      tf.subtract(
                          tf.reduce_max(image),
                          tf.reduce_min(image)
                          )
                      ),
                      tf.reduce_min(image)
                    )
      image = tf.to_float(image) * 255
      # RGB to BGR using strided slice
      image = image[..., ::-1]

      # Channel last to channel first
      image = tf.transpose(image, [2, 0, 1])

      # Mean subtraction
      image = tf.subtract(image, MEAN.reshape(3,1,1))
      return image

  print('Preprocessing test image using Caffe...')
  image = transform_image_original(TEST_IMAGE)

  print('Preprocessing test image using PIL + skimage.transform ...')
  image2 = preprocess_image(TEST_IMAGE)

  print('Preprocessing test image using tensorflow')
  with tf.Session() as sess:
      tf_image3 = tf_preprocess_image(TEST_IMAGE)
      image3 = tf_image3.eval()

  print('L2 norm between caffe transformation and PIL + skimage', np.linalg.norm(image - image2)) # L2 norm 
  print('L2 norm between caffe transformation and tf', np.linalg.norm(image2 - image3))

Tensorflow vs Caffe vs PIL + skimage中的图像转换

0 个答案: