import pandas as pd
from scipy import misc
import numpy as np
import matplotlib.pyplot as plt
W = {'img':[misc.imread('pic.jpg')]}
df = pd.DataFrame(W)
# This displays the image
plt.imshow(df.img1[0])
plt.show()
df.to_csv('mypic.csv')
new_df= pd.read_csv('mypic.csv')
# This does not display the image
plt.imshow(new_df.img1[0])
plt.show()
当我尝试将图像显示为由csv文件加载时,我得到错误:图像数据无法转换为浮点数。但是,在使用数据框df
时,我能够正确显示图像。
当我将df存储到csv文件时,我怀疑数据类型出了问题。我该如何解决这个问题?
编辑:我应该补充一点,我的主要目标是
答案 0 :(得分:6)
从问题中不清楚为什么要使用pandas数据帧来存储图像。我认为这会使事情变得不必要地复杂化。您可以直接以二进制格式直接存储numpy数组,稍后再次加载它。
import numpy as np
import matplotlib.pyplot as plt
#create an image
imar = np.array([[[1.,0.],[0.,0.]],
[[0.,1.],[0.,1.]],
[[0.,0.],[1.,1.]]]).transpose()
plt.imsave('pic.jpg', imar)
# read the image
im = plt.imread('pic.jpg')
# show the image
plt.imshow(im)
plt.show()
#save the image array to binary file
np.save('mypic', im)
# load the image from binary file
new_im= np.load('mypic.npy')
# show the loaded image
plt.imshow(new_im)
plt.show()
作为对以下评论的回应,这些评论以某种方式将问题转向不同的方向,您可能肯定会在数据框中存储图像的路径/名称。
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#create an image
imar = np.array([[[1.,0.],[0.,0.]],
[[0.,1.],[0.,1.]],
[[0.,0.],[1.,1.]]]).transpose()
plt.imsave('pic.jpg', imar)
#create dataframe
df = pd.DataFrame([[0,""]], columns=["Feature1","Feature2"])
# read the image
im = plt.imread('pic.jpg')
plt.imshow(im)
plt.show()
#save the image array to binary file
np.save('mypic.npy', im)
# store name of image in dataframe
df.iloc[0,1] = 'mypic.npy'
#save dataframe
df.to_csv("mydf.csv")
del df
#read dataframe from csv
df = pd.read_csv("mydf.csv")
# load the image from binary file, given the path from the Dataframe
new_im= np.load(df["Feature2"][0])
# show the loaded image
plt.imshow(new_im)
plt.show()
最后,您可以按照初步计划的方式将实际图像存储在数据框单元格中,但不是写入csv,而是将pickle映射到数据框,以便可以读取它,就好像它从未被读过一样之前保存过。
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
#create an image
imar = np.array([[[1.,0.],[0.,0.]],
[[0.,1.],[0.,1.]],
[[0.,0.],[1.,1.]]]).transpose()
plt.imsave('pic.jpg', imar)
#create dataframe
df = pd.DataFrame([[0,""]], columns=["Feature1","Feature2"])
# read the image
im = plt.imread('pic.jpg')
plt.imshow(im)
plt.show()
# store the image itself in dataframe
df.iloc[0,1] = [im]
#save dataframe
pickle.dump(df, file("mydf.pickle", "wb"))
del df
#read dataframe from pickle
df = pickle.load(file("mydf.pickle", "rb"))
# show the loaded image from dataframe cell
plt.imshow(df["Feature2"][0][0])
plt.show()
答案 1 :(得分:0)
如果您的Pandas列包含URL或本地路径,则可以生成Image列,其中将显示缩略图或任何其他图像大小。
1。如果您的列表中包含图片的网址。
您首先需要根据图像URL下载图像。 adImageList
包含要作为列添加到熊猫的图像的URL列表。
dir_base = os.getcwd() # Get your current directory
for i, URL in enumerate(adImageList):
image_name= '0{}_{}'.format(i+1,'_image.jpg') # This will show for example 01_image.jpg
urllib.request.urlretrieve(URL, image_name)
local_path_thumb = os.path.join(dir_base , image_name)
df[i]['local_image_path']=local_path # adding that locally fetched image path to pandas column
2。如果您在熊猫数据框中的单独列中有图片网址。 首先创建用于获取单个图像的本地URL的功能
get_image_local(URL):
image_name= '0{}_{}'.format(i+1,'_image.jpg')
urllib.request.urlretrieve(URL, image_name)
local_path_image = os.path.join(dir_base, image_name)
return (local_path_image)
比使用lambda表达式将其映射到新列imageLocal
:
df['imageLocal'] = df.URL.map(lambda f: get_image_local(f))
df['imageLocal']
应该看起来像这样:
0 C:\Users\username\Documents\Base_folder\01_image.jpg 1 C:\Users\username\Documents\Base_folder\02_image.jpg 2 C:\Users\username\Documents\Base_folder\03_image.jpg
接下来的3个PILL功能可以复制粘贴:
import glob
import random
import base64
import pandas as pd
from PIL import Image
from io import BytesIO
from IPython.display import HTML
import io
pd.set_option('display.max_colwidth', -1)
def get_thumbnail(path):
path = "\\\\?\\"+path # This "\\\\?\\" is used to prevent problems with long Windows paths
i = Image.open(path)
return i
def image_base64(im):
if isinstance(im, str):
im = get_thumbnail(im)
with BytesIO() as buffer:
im.save(buffer, 'jpeg')
return base64.b64encode(buffer.getvalue()).decode()
def image_formatter(im):
return f'<img src="data:image/jpeg;base64,{image_base64(im)}">'
我们可以通过以下方式将本地图像路径传递到get_thumbnail(path)
:
df['imagePILL'] = df.imageLocal.map(lambda f: get_thumbnail(f))
df['imagePILL']
应该看起来像这样:
0 <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=300x400 at 0x265BA323240> 1 <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=200x150 at 0x265BA3231D0> 2 <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=300x400 at 0x265BA3238D0>
您可以使用pandas数据框将新列放置在所需位置:
df= df.reindex(sorted(df.columns), axis=1)
现在,如果您想查看具有调整大小图像的熊猫数据框,只需在image_formatter
HTML函数中调用IPython.display
函数:
HTML(df.to_html(formatters={'imagePILL': image_formatter}, escape=False))
您可以使用任何其他显示HTML的方式,重要的是要在Pandas数据框中获取PIL对象。