尝试弄清楚如何将python脚本的输出另存为CSV时出现问题。当我运行此脚本时,文件没有出现在我访问它所需的位置。有什么建议吗?
import pandas as pd
import os
folder_path = os.path.join("T:", "04. Testing","3. Wear Testing","TESTS","CKUW","180604 OP STRAPLESS","Survey Response Data")
mapping_path = os.path.join(folder_path + r'\Survey_MappingTable Strapless.xlsx')
# Read mapping table
mapping = pd.ExcelFile(mapping_path)
mapping.sheet_names
# ['SurveyInfo', 'Question Mapping', 'Answer Mapping']
# Transform sheets to 3 tables (surveyinfo, Q_mapping, A_mapping)
surveyinfo = mapping.parse("SurveyInfo")
Q_mapping = mapping.parse("Question Mapping", skiprows = 2)
A_mapping = mapping.parse("Answer Mapping", skiprows = 3)
# Get input file name and read the data. Table name is df.
input_file_name = surveyinfo.loc[surveyinfo['Parameter Name']=='Input File Name','Value'].to_string(index=False)
path = os.path.join(r'T:\04. Testing\3. Wear Testing\TESTS\CKUW\180604 OP STRAPLESS\Survey Response Data',input_file_name)
df = pd.read_csv(path,header=None,engine='python')
# ,encoding='utf-8' Tried this as a way to fix but it didn't work
# Fill in previous colunmn names if blank, using the preceeding header
df.iloc[0] = df.iloc[0].fillna(method='ffill')
# Read the count of columns
n_col = len(df.iloc[0])
n_respondent = len(df)-2
c_name = []
for i in range(n_col):
# Multiple columns; each columns with differnt single answer. and the question text is to combine the category ex. support, comfort, are both in the satisfaction category etc.
# If it's satisfaction question, concatenate first row and second row
if "satisfaction" in df.iloc[0][i]:
c_name.append(df.iloc[0][i]+df.iloc[1][i])
elif "functionality" in df.iloc[0][i]:
c_name.append(df.iloc[0][i]+df.iloc[1][i])
elif ("shape" in df.iloc[0][i]) and ("please specify" in df.iloc[1][i]):
c_name.append(df.iloc[0][i]+df.iloc[1][i])
elif ("room in the cup" in df.iloc[0][i]) and ("please specify" in df.iloc[1][i]):
c_name.append(df.iloc[0][i]+df.iloc[1][i])
# - in the column header which is part of the question and part of the response
elif ("wire" in df.iloc[0][i]) and ("Response" not in df.iloc[1][i]):
if "-" in df.iloc[1][i]:
c_name.append(df.iloc[0][i]+df.iloc[1][i][df.iloc[1][i].find("-")+2:])
else:
c_name.append(df.iloc[0][i]+df.iloc[1][i])
for j in range(n_respondent):
if pd.notnull(df.iloc[j+2,i]) and "please specify" not in df.iloc[1,i]:
df.iloc[j+2,i] = df.iloc[1,i][:df.iloc[1][i].find("-")-1]
# Multiple columns; each columns with differnt single answer. and the question text is not to combine the category.
# Use to combine band and cup size
elif "size bra do you typically wear?" in df.iloc[0][i]:
c_name.append(df.iloc[0][i])
for j in range(n_respondent):
if pd.notnull(df.iloc[j+2,i]):
df.iloc[j+2,i] = df.iloc[1,i] + df.iloc[j+2,i]
# Single answer to the question; or multiple answers to the question but the answer is the same as the column header
else:
c_name.append(df.iloc[0][i])
# Make the column names as the first row
df.columns = c_name
# Drop the first and second rows
df2 = df.drop(df.index[[0,1]])
# Transform the wide dataset to a long dataset;
r = list(range(10))+list(range(17,20)) # skipping "What size bra do you typically wear? (only select one size)"
df_long = pd.melt(df2,id_vars = list(df.columns[r]), var_name = 'Question', value_name = 'Answer')
# Delete rows with null value to answer
df_long_notnull = df_long[pd.notnull(df_long['Answer'])]
# Make typically wear as a column dimension
sizewear = df_long_notnull.loc[df_long_notnull['Question'] == 'What size bra do you typically wear? (Only select one size)']
sizewear2 = sizewear[['Respondent ID','Collector ID','Email Address','Answer']]
sizewear2.columns = ['Respondent ID','Collector ID','Email Address','What size bra do you typically wear?']
df_long_notnull2 = df_long_notnull[df_long_notnull['Question'] != 'What size bra do you typically wear? (Only select one size)']
df_final = pd.merge(df_long_notnull2, sizewear2, how='left', on=['Respondent ID','Collector ID','Email Address'])
# Join Answer description mapping table
df_full = pd.merge(df_final, A_mapping, how='left', left_on = ['Question','Answer'], right_on = ['Question','Answer Description'])
df_full.loc[df_full['Answer_y'].isnull(),'Answer_y'] = df_full['Answer_x']
df_full.loc[df_full['Answer Description'].isnull(),'Answer Description'] = df_full['Answer_x']
df_full = df_full.drop(labels = ['Answer_x'], axis=1)
df_full = df_full.rename(columns = {'Answer_y':'Answer','Answer Description':'Answer Desc'})
# Join Question Mapping table
df_full = pd.merge(df_full,Q_mapping, how='left', left_on = ['Question'], right_on = ['Raw Column Name'])
df_full = df_full.drop(labels = ['Raw Column Name'], axis=1)
# Get Survey Info
product_name = surveyinfo.loc[surveyinfo['Parameter Name']=='Product Name','Value'].to_string(index=False)
if "," in surveyinfo.loc[surveyinfo['Parameter Name']=='Style Number','Value'].item():
style_number = surveyinfo.loc[surveyinfo['Parameter Name']=='Style Number','Value'].to_string(index=False).split(',')
style_number = [s.strip() for s in style_number]
else:
style_number = surveyinfo.loc[surveyinfo['Parameter Name']=='Style Number','Value'].to_string(index=False)
if "," in surveyinfo.loc[surveyinfo['Parameter Name']=='Style Name','Value'].item():
style_name = surveyinfo.loc[surveyinfo['Parameter Name']=='Style Name','Value'].to_string(index=False).split(',')
style_name = [s.strip() for s in style_name]
else:
style_name = surveyinfo.loc[surveyinfo['Parameter Name']=='Style Name','Value'].to_string(index=False)
# get survey information
survey_name = surveyinfo.loc[surveyinfo['Parameter Name']=='Survey Name','Value'].to_string(index=False)
survey_id = surveyinfo.loc[surveyinfo['Parameter Name']=='Survey ID','Value'].item()
survey_year = surveyinfo.loc[surveyinfo['Parameter Name']=='Survey Year','Value'].item()
survey_mo = surveyinfo.loc[surveyinfo['Parameter Name']=='Survey Month','Value'].item()
output_file_name = surveyinfo.loc[surveyinfo['Parameter Name']=='Output File Name','Value'].to_string(index=False)
# adding columns for survey information
df_full['Product Name'] = product_name
df_full['Survey Name'] = survey_name
df_full['Survey ID'] = survey_id
df_full['Survey Year'] = survey_year
df_full['Survey Month'] = survey_mo
### create a table with style_number and style_name
if type(style_name) == list:
style_t = pd.DataFrame(list(zip(style_name, style_number)), columns = list(["Style_Name","Style_Number"]))
df_full = pd.merge(df_full, style_t, how='left', left_on = ['Which style did you receive?'], right_on = ['Style_Name'])
else:
df_full['Style Name'] = style_name
df_full['Style Number'] = style_number
# Identify the path for saving output file
path_out = os.path.join("C:","Users","Sali3",output_file_name)
# Save as comma separated csv file
df_full.to_csv(path_out, sep=',', index = False)
此脚本的最后一部分是我遇到问题的地方。 path_out应该作为CSV文件位于我的本地“ C”驱动器上。请帮忙。
答案 0 :(得分:1)
假设您使用的是Windows,documentation on os.path.join表示:
在Windows上,遇到绝对路径成分(例如r'\ foo')时,不会重置驱动器号。如果组件包含驱动器号,则会丢弃所有先前的组件,并重置驱动器号。请注意,由于每个驱动器都有一个当前目录,所以os.path.join(“ c:”,“ foo”)表示相对于驱动器C:(c:foo)上当前目录的路径,而不是c:\ foo
这应该可以解决您的问题:
path_out = os.path.join("C:\\","Users","Sali3",output_file_name)