我创建了一个网络应用程序,允许用户上传csv并根据输入的数据(特别是针对Google我的商家导出的csv文件)以可视化形式返回pdf。
问题是可视化效果可能会在我的应用生成的pdf中拉长,或者某些标签被剪掉。我曾尝试调整图的大小,但这似乎对我的结果没有影响。
这是我用来创建这些可视化效果的代码:
def transform():
disc = open('clean.csv')
disc2 = open('clean_two.csv','w')
#cleaning up csv
for row in disc:
row = row.strip()
row = row[1:-1]
row = row.replace('""','"')
disc2.write(row+'\n')
disc2.close()
disc.close()
discovery = pd.read_csv('clean_two.csv')
discovery_clean = discovery.iloc[1:]
cols = list(discovery_clean.columns[4:])
discovery_clean[cols] = discovery_clean[cols].apply(pd.to_numeric,errors='coerce')
#return discovery_clean
#first figure
with PdfPages('plots.pdf') as pdf:
plt.figure(figsize=(16,8))
ax = discovery_clean[cols].sum().plot.bar(figsize=(20,10))
ax.axes.get_yaxis().set_visible(False)
ax.set_title('Overview\n',fontsize='15',color='black')
ax.xaxis.set_tick_params(labelsize=15)
for x in ax.patches:
ax.text(x.get_x()-.09,x.get_height()+20,\
f'{int(x.get_height()):,}',fontsize=15,color='black')
plt.rcParams['figure.figsize']=(20,10)
pdf.savefig()
plt.close()
#second image
mpl.rcParams['font.size'] = 12
disc_plot = discovery_clean.groupby('Business name')['Total views'].sum().nlargest(10)
labels = list(disc_plot.index)
nums = (disc_plot.values).astype(int)
def actual_nums(vals):
a = np.round(vals/100.*nums.sum())
return a
explode=(0.3,0,0,0,0,0,0,0,0,0)
plt.pie(nums,labels=labels,autopct=actual_nums,explode=explode,radius=1)
plt.title("Total Views per Location\n",fontsize=20)
pdf.savefig()
plt.close()
#setting metadata
pdf_plots = pdf.infodict()
pdf_plots['Title'] = 'Reports'
return pdf_plots