我正在尝试将python脚本转换为EXE。
脚本所做的是对excel文件的基本分析,并以pdf格式生成报告。
也在脚本中,我创建了一个png文件,然后通过脚本将其重新加载为pdf。
我正在尝试将py文件转换为EXE,但是它不起作用:(
脚本(作为py文件很好用):
import pandas as pd
import os
from pandasql import sqldf
from datetime import datetime
import numpy as nu
from tkinter import *
import tkinter as tk
from fpdf import FPDF
import matplotlib.pyplot as plt
def start_gui(root):
myLabel = Label(root, text='Hi! Here you can output the sessions report').grid(row=0, column=0)
start_button = Button(root, text='Produce Report', padx=30, pady=20, command=main, fg='blue').grid(row=50, column=0)
root.mainloop()
pass
def print_full_results(df):
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
print(df)
pd.reset_option('display.max_rows')
pd.reset_option('display.max_columns')
pass
def load_data(path):
df = pd.read_csv(path)
df = pd.DataFrame(df)
return df
def clean_raw_data(raw_data):
raw_data = raw_data.dropna(how='all') # Drop the rows where all elements are missing.
raw_data = raw_data.dropna(axis=1, how='all') # Drop the columns where all elements are missing.
raw_data = raw_data.reset_index() # Reset the indexes after droping rows
raw_data = raw_data.drop(columns=['index'])
raw_data = raw_data.rename(
columns={'Meeting ID': 'Meeting_ID', 'User Name': 'Admin_User_Name', 'Uzer Eam1l': 'Admin_Email',
'Has Zoom Rooms?': 'Has_Zoom_Rooms', 'Creation Time': 'Meeting_Creation_Time',
'Start Time': 'Meeting_Start_Time', 'End Time': 'Meeting_End_Time',
'Duration (Minutes)': 'Meeting_Duration_min', 'Ncmf (prjgjncl Ncmf)': 'User_Name',
'Usfr fncil': 'User_Email', 'Join Time': 'User_Join_Time', 'Leave Time': 'User_Leave_Time',
'Duration (Minutes).1': 'User_Duration_min'})
raw_data = convert_relevant_types(raw_data)
raw_data = fill_null_emails(raw_data)
return raw_data
def convert_relevant_types(db):
pd.options.mode.chained_assignment = None # default='warn'
# relevant columns (Meeting_Creation_Time,Meeting_Start_Time,Meeting_End_Time,User_Join_Time,User_Leave_Time): convert string to date
for i in range(len(db['Meeting_Start_Time'])):
creation_date = datetime.strptime(db['Meeting_Creation_Time'][i], '%m/%d/%y %H:%M')
start_date = datetime.strptime(db['Meeting_Start_Time'][i], '%m/%d/%y %H:%M')
end_date = datetime.strptime(db['Meeting_End_Time'][i], '%m/%d/%y %H:%M')
user_join_date = datetime.strptime(db['User_Join_Time'][i], '%m/%d/%y %H:%M')
user_leave_date = datetime.strptime(db['User_Leave_Time'][i], '%m/%d/%y %H:%M')
db['Meeting_Creation_Time'][i] = creation_date
db['Meeting_Start_Time'][i] = start_date
db['Meeting_End_Time'][i] = end_date
db['User_Join_Time'][i] = user_join_date
db['User_Leave_Time'][i] = user_leave_date
# relevant columns (Meeting_Duration_min,User_Duration_min): convert string to int
for i in range(len(db['Meeting_Duration_min'])):
db['Meeting_Duration_min'][i] = int(db['Meeting_Duration_min'][i])
db['User_Duration_min'][i] = int(db['User_Duration_min'][i])
return db
def fill_null_emails(db):
for i in range(len(db['User_Email'])):
if pd.isnull(db['User_Email'][i]):
db['User_Email'][i] = db['User_Name'][i] + ' Missing Mail'
return db
def pdff_space_down(pdf):
pdf.cell(0, 10, '', ln=1, align='L')
return pdf
def pdff_write(pdf, text, space=5, align='L'):
pdf.cell(0, space, text, ln=1, align='L')
return pdf
def pdff_write_table(pdf, data, spacing=1.5):
col_width = pdf.w / 4.5
row_height = pdf.font_size
for row in data:
for item in row:
pdf.cell(col_width, row_height * spacing,
txt=item, border=1)
pdf.ln(row_height * spacing)
return pdf
def create_pdf(today,min_date, max_date, sessions_num, total_cost, costs_table, num_of_users, avg_users_come):
pdf = FPDF(orientation='p', unit='mm', format='A4')
pdf.add_page()
pdf.set_font('Arial', size=10)
pdf.cell(0, 10, 'Date:{}'.format(today), ln=1, align='L')
pdf.set_font('times', 'B', size=24)
pdf.cell(0, 8, 'Home Assignment - Ziv Mor', ln=1, align='C')
pdf.set_font('times', size=18)
pdf.cell(0, 10, 'Zoom-Sessions Report (Automated by Python)', ln=1, align='C')
pdf.cell(0, 10, '({}'.format(min_date) + ' To {})'.format(max_date), ln=1, align='C')
pdf.set_font('times', 'U', size=15)
pdf = pdff_write(pdf, 'Sessions Analysis', space=20)
pdf.set_font('times', size=13)
pdf = pdff_write(pdf, 'Total Number of Sessions: {} (Team meetings are not include)'.format(sessions_num), space=15)
pdf.set_font('times', 'UB', size=13)
pdf.cell(0, 10, 'Number Of Sessions By Dates', ln=1.5, align='C')
pdf.image('sessions_by_day_plot.png', x=55, y=None, w=100, h=70, type='', link='')
pdf = pdff_space_down(pdf)
pdf.set_font('times', size=13)
pdf = pdff_write(pdf, 'Sessions Participants Segmentation:', space=10)
pdf = pdff_write_table(pdf, costs_table)
pdf.set_font('times', 'UB', size=13)
pdf.cell(0, 20, 'Sessions Total Cost: {} NIS'.format(total_cost), ln=1, align='C')
pdf.set_font('times', 'U', size=15)
pdf = pdff_write(pdf, 'Users Analysis', space=17)
pdf.set_font('times', size=13)
pdf = pdff_write(pdf, 'Total Number of Users Engaged: {}'.format(num_of_users), space=10)
pdf = pdff_write(pdf, 'The Average Frequency of Arrival of Each User : {} Sessions'.format(avg_users_come),
space=10)
pdf.output('Zoom Report_{}.pdf'.format(str(datetime.today()).replace(':', '.', 3)))
def main():
path = os.path.join(sys.path[0], 'participant sessions data.csv')
raw_data = load_data(path)
zoom_db = clean_raw_data(raw_data)
'''------------------------------SQL Queries---------------------------------'''
# todo asume פגישת צוות - not counted
question_1_query = 'Select date(Meeting_Start_Time)date, count(distinct Meeting_Start_Time)Num_Of_Sessions From zoom_db where Topic <>"פגישת צוות" Group by date(Meeting_Start_Time)'
answer_1_table = sqldf(question_1_query)
num_of_sessions = nu.sum(list(answer_1_table['Num_Of_Sessions']))
# count for each meeting the number of participants
question_2_query = 'Select Topic, Meeting_Start_Time, count(Distinct User_Email)num_of_Users From zoom_db Group by Meeting_Start_Time, Meeting_ID'
answer_2_table = sqldf(question_2_query)
# count for each user number of times the user arrived to session
# todo - mention I didnt concluded rows that user got in for less than 1 minute + there are a lot of users without mail so I assume for
question_3_query = 'select User_Email, count(*)num_of_arrivals from(Select User_Email, Meeting_Start_Time, Meeting_ID From zoom_db Where User_Duration_min <> 0 Group by User_Email, Meeting_ID , Meeting_Start_Time) group by User_Email Order by num_of_arrivals desc'
answer_3_table = sqldf(question_3_query)
# Calculate the avg times of arrival of users (Using the result of 3'rd question query #todo - asumming not conclud the host
participants_arrivals_list = list(answer_3_table['num_of_arrivals'])[1:]
avg_users_come = round((nu.average(participants_arrivals_list)), 2)
'''---------------------More Calculates for the report------------------------'''
# Calculate the intervals of dates
min_date_qu = sqldf('select min(date(Meeting_Start_Time)) from zoom_db')
min_date_qu = list(min_date_qu['min(date(Meeting_Start_Time))'])[0]
max_date_qu = sqldf('select max(date(Meeting_Start_Time)) from zoom_db')
max_date_qu = list(max_date_qu['max(date(Meeting_Start_Time))'])[0]
num_meetings0_5 = sqldf('select count(*) from answer_2_table where num_of_users<=5 and Topic <>"פגישת צוות"')
num_meetings0_5 = list(num_meetings0_5['count(*)'])[0]
num_meetings5_10 = sqldf(
'select count(*) from answer_2_table where num_of_users>5 and num_of_users<=10 and Topic <>"פגישת צוות"')
num_meetings5_10 = list(num_meetings5_10['count(*)'])[0]
num_meetings10_15 = sqldf(
'select count(*) from answer_2_table where num_of_users>10 and num_of_users<=15 and Topic <>"פגישת צוות"')
num_meetings10_15 = list(num_meetings10_15['count(*)'])[0]
num_meetings_15_plus = sqldf('select count(*) from answer_2_table where num_of_users>15 and Topic <>"פגישת צוות"')
num_meetings_15_plus = list(num_meetings_15_plus['count(*)'])[0]
total_cost = 50 * num_meetings0_5 + 100 * num_meetings5_10 + 150 * num_meetings10_15 + 200 * num_meetings_15_plus
costs_table = [['Session type', 'Number of sessions', 'Cost'],
['0-5 participants', str(num_meetings0_5), str(50 * num_meetings0_5)],
['5-10 participants', str(num_meetings5_10), str(100 * num_meetings5_10)],
['10-15 participants', str(num_meetings10_15), str(150 * num_meetings10_15)],
['15+ participants', str(num_meetings_15_plus), str(200 * num_meetings_15_plus)]]
sessions_by_day_plot = answer_1_table.plot.bar(x='date', y='Num_Of_Sessions', rot=80)
plt.savefig('sessions_by_day_plot.png')
num_of_users = sqldf('select count(*) From answer_3_table')
num_of_users = list(num_of_users['count(*)'])[0]
today = datetime.today().strftime("%b-%d-%Y")
'''----------------------------------Out-Put Results------------------------'''
create_pdf(today = today , max_date=max_date_qu, min_date=min_date_qu, sessions_num=num_of_sessions,
total_cost=total_cost, costs_table=costs_table, num_of_users=num_of_users, avg_users_come=avg_users_come)
writer = pd.ExcelWriter('Zoom Report_{}.xlsx'.format(str(datetime.today()).replace(':', '.', 3)))
(answer_2_table).to_excel(writer , sheet_name='Sessions Number of Participants')
(answer_3_table).to_excel(writer, sheet_name='Participants show-up')
writer.save()
'''---------------------Delete not relevant files------------------------'''
plot1_path = os.path.join(sys.path[0], 'sessions_by_day_plot.png')
os.remove(plot1_path)
exit()
if __name__ == '__main__':
root = Tk()
start_gui(root)
# main()