Question

尝试弄清楚如何将python脚本的输出另存为CSV时出现问题。当我运行此脚本时，文件没有出现在我访问它所需的位置。有什么建议吗？

import pandas as pd
import os

folder_path = os.path.join("T:", "04. Testing","3. Wear Testing","TESTS","CKUW","180604 OP STRAPLESS","Survey Response Data")
mapping_path = os.path.join(folder_path + r'\Survey_MappingTable Strapless.xlsx')

# Read mapping table
mapping = pd.ExcelFile(mapping_path)
mapping.sheet_names
# ['SurveyInfo', 'Question Mapping', 'Answer Mapping']
# Transform sheets to 3 tables (surveyinfo, Q_mapping, A_mapping)
surveyinfo = mapping.parse("SurveyInfo")
Q_mapping = mapping.parse("Question Mapping", skiprows = 2)
A_mapping = mapping.parse("Answer Mapping", skiprows = 3)

# Get input file name and read the data. Table name is df.
input_file_name = surveyinfo.loc[surveyinfo['Parameter Name']=='Input File Name','Value'].to_string(index=False)

path = os.path.join(r'T:\04. Testing\3. Wear Testing\TESTS\CKUW\180604 OP STRAPLESS\Survey Response Data',input_file_name)
df = pd.read_csv(path,header=None,engine='python')
# ,encoding='utf-8'  Tried this as a way to fix but it didn't work
# Fill in previous colunmn names if blank, using the preceeding header
df.iloc[0] = df.iloc[0].fillna(method='ffill')

# Read the count of columns
n_col = len(df.iloc[0])
n_respondent = len(df)-2
c_name = []
for i in range(n_col):
# Multiple columns; each columns with differnt single answer. and the question text is to combine the category ex. support, comfort, are both in the satisfaction category etc.
# If it's satisfaction question, concatenate first row and second row
    if "satisfaction" in df.iloc[0][i]: 
        c_name.append(df.iloc[0][i]+df.iloc[1][i])
    elif "functionality" in df.iloc[0][i]:
        c_name.append(df.iloc[0][i]+df.iloc[1][i])
    elif ("shape" in df.iloc[0][i]) and ("please specify" in df.iloc[1][i]):
        c_name.append(df.iloc[0][i]+df.iloc[1][i])
    elif ("room in the cup" in df.iloc[0][i]) and ("please specify" in df.iloc[1][i]):
        c_name.append(df.iloc[0][i]+df.iloc[1][i])       
# - in the column header which is part of the question and part of the response
    elif ("wire" in df.iloc[0][i]) and ("Response" not in df.iloc[1][i]):
        if "-" in df.iloc[1][i]:
            c_name.append(df.iloc[0][i]+df.iloc[1][i][df.iloc[1][i].find("-")+2:])
        else:
            c_name.append(df.iloc[0][i]+df.iloc[1][i])
        for j in range(n_respondent):
            if pd.notnull(df.iloc[j+2,i]) and "please specify" not in df.iloc[1,i]:
                df.iloc[j+2,i] = df.iloc[1,i][:df.iloc[1][i].find("-")-1]               
# Multiple columns; each columns with differnt single answer. and the question text is not to combine the category.
# Use to combine band and cup size
    elif "size bra do you typically wear?" in df.iloc[0][i]:
        c_name.append(df.iloc[0][i])
        for j in range(n_respondent):
            if pd.notnull(df.iloc[j+2,i]):
                df.iloc[j+2,i] = df.iloc[1,i] + df.iloc[j+2,i]
# Single answer to the question; or multiple answers to the question but the answer is the same as the column header
    else:
        c_name.append(df.iloc[0][i])

# Make the column names as the first row
df.columns = c_name

# Drop the first and second rows
df2 = df.drop(df.index[[0,1]])

# Transform the wide dataset to a long dataset; 
r = list(range(10))+list(range(17,20))  # skipping "What size bra do you typically wear? (only select one size)"
df_long = pd.melt(df2,id_vars = list(df.columns[r]), var_name = 'Question', value_name = 'Answer')

# Delete rows with null value to answer
df_long_notnull = df_long[pd.notnull(df_long['Answer'])]

# Make typically wear as a column dimension
sizewear = df_long_notnull.loc[df_long_notnull['Question'] == 'What size bra do you typically wear? (Only select one size)']
sizewear2 = sizewear[['Respondent ID','Collector ID','Email Address','Answer']]
sizewear2.columns = ['Respondent ID','Collector ID','Email Address','What size bra do you typically wear?']
df_long_notnull2 = df_long_notnull[df_long_notnull['Question'] != 'What size bra do you typically wear? (Only select one size)']
df_final = pd.merge(df_long_notnull2, sizewear2, how='left', on=['Respondent ID','Collector ID','Email Address'])

# Join Answer description mapping table
df_full = pd.merge(df_final, A_mapping, how='left', left_on = ['Question','Answer'], right_on = ['Question','Answer Description'])
df_full.loc[df_full['Answer_y'].isnull(),'Answer_y'] = df_full['Answer_x']
df_full.loc[df_full['Answer Description'].isnull(),'Answer Description'] = df_full['Answer_x']
df_full = df_full.drop(labels = ['Answer_x'], axis=1)
df_full = df_full.rename(columns = {'Answer_y':'Answer','Answer Description':'Answer Desc'})

# Join Question Mapping table
df_full = pd.merge(df_full,Q_mapping, how='left', left_on = ['Question'], right_on = ['Raw Column Name'])
df_full = df_full.drop(labels = ['Raw Column Name'], axis=1)

# Get Survey Info
product_name = surveyinfo.loc[surveyinfo['Parameter Name']=='Product Name','Value'].to_string(index=False)

if "," in surveyinfo.loc[surveyinfo['Parameter Name']=='Style Number','Value'].item():
    style_number = surveyinfo.loc[surveyinfo['Parameter Name']=='Style Number','Value'].to_string(index=False).split(',')
    style_number = [s.strip() for s in style_number]
else:
    style_number = surveyinfo.loc[surveyinfo['Parameter Name']=='Style Number','Value'].to_string(index=False)

if "," in surveyinfo.loc[surveyinfo['Parameter Name']=='Style Name','Value'].item():
    style_name = surveyinfo.loc[surveyinfo['Parameter Name']=='Style Name','Value'].to_string(index=False).split(',')
    style_name = [s.strip() for s in style_name]
else: 
    style_name = surveyinfo.loc[surveyinfo['Parameter Name']=='Style Name','Value'].to_string(index=False)

# get survey information
survey_name = surveyinfo.loc[surveyinfo['Parameter Name']=='Survey Name','Value'].to_string(index=False)
survey_id = surveyinfo.loc[surveyinfo['Parameter Name']=='Survey ID','Value'].item()
survey_year = surveyinfo.loc[surveyinfo['Parameter Name']=='Survey Year','Value'].item()
survey_mo = surveyinfo.loc[surveyinfo['Parameter Name']=='Survey Month','Value'].item()
output_file_name = surveyinfo.loc[surveyinfo['Parameter Name']=='Output File Name','Value'].to_string(index=False)

# adding columns for survey information
df_full['Product Name'] = product_name
df_full['Survey Name'] = survey_name
df_full['Survey ID'] = survey_id
df_full['Survey Year'] = survey_year
df_full['Survey Month'] = survey_mo

### create a table with style_number and style_name
if type(style_name) == list: 
    style_t = pd.DataFrame(list(zip(style_name, style_number)), columns = list(["Style_Name","Style_Number"]))
    df_full = pd.merge(df_full, style_t, how='left', left_on = ['Which style did you receive?'], right_on = ['Style_Name'])
else: 
    df_full['Style Name'] = style_name
    df_full['Style Number'] = style_number


# Identify the path for saving output file
path_out = os.path.join("C:","Users","Sali3",output_file_name)

# Save as comma separated csv file 
df_full.to_csv(path_out, sep=',', index = False)

此脚本的最后一部分是我遇到问题的地方。 path_out应该作为CSV文件位于我的本地“ C”驱动器上。请帮忙。

Answer 1

假设您使用的是Windows，documentation on os.path.join表示：

在Windows上，遇到绝对路径成分（例如r'\ foo'）时，不会重置驱动器号。如果组件包含驱动器号，则会丢弃所有先前的组件，并重置驱动器号。请注意，由于每个驱动器都有一个当前目录，所以os.path.join（“ c：”，“ foo”）表示相对于驱动器C：（c：foo）上当前目录的路径，而不是c：\ foo

这应该可以解决您的问题：

path_out = os.path.join("C:\\","Users","Sali3",output_file_name)

如何将python脚本的输出另存为CSV文件？

1 个答案: