Question

我正在尝试保存一个数据帧，该数据帧已被我用来计算重复行的均值，中位数和总计。但是，该脚本似乎没有问题，但没有实际输出我请求的文件。谁能给我任何有关正在发生的事情的建议？

这是我正在使用的代码：

"""Separate and combine frequencies of like relations, 
then produce extra columns with mean and median of these to
get a better overall picture of each relation"""

import numpy as np
import pandas as pd
from numpy.random.mtrand import pareto

def sort_table(fname):
    #read in file
    parent_child_rel = pd.read_csv(fname)
    print(parent_child_rel)

    #drop first column
    parent_child_rel = parent_child_rel.iloc[:,1:]
    print(parent_child_rel)


    #put all upper case
    parent_child_rel = parent_child_rel.apply(lambda x:x.astype(str).str.upper())

    print(parent_child_rel.dtypes) 

    #change datatype to float for nnmbers
    parent_child_rel['Hits'] = parent_child_rel['Hits'].astype('float') 
    parent_child_rel['Score'] = parent_child_rel['Score'].astype('float')

    #group and provide totals and means for hits and score
    aggregated = parent_child_rel.groupby(['parent', 'child'], as_index=False).aggregate({'Hits': np.sum, 'Score': [np.mean, np.median]})


    print(aggregated.dtypes)

    print(aggregated)

    with open('./Sketch_grammar/aggregated_relations_SkG_1.csv', 'a') as outfile:
        aggregated.to_csv(outfile)


def main():
    sort_table('./Sketch_grammar/parent_child_SkG_relations.csv')


if __name__ == '__main__':
    main ()

Answer 1

您无需打开文件即可将其另存为CSV。只需指定to_csv函数的路径即可。

此外，您在fname参数中有文件名，因此您无需手动再次写入文件名。

您的代码应为：

"""Separate and combine frequencies of like relations, 
then produce extra columns with mean and median of these to
get a better overall picture of each relation"""

import numpy as np
import pandas as pd
from numpy.random.mtrand import pareto

def sort_table(fname):
    #read in file
    parent_child_rel = pd.read_csv(fname)
    print(parent_child_rel)

    #drop first column
    parent_child_rel = parent_child_rel.iloc[:,1:]
    print(parent_child_rel)


    #put all upper case
    parent_child_rel = parent_child_rel.apply(lambda x:x.astype(str).str.upper())

    print(parent_child_rel.dtypes) 

    #change datatype to float for nnmbers
    parent_child_rel['Hits'] = parent_child_rel['Hits'].astype('float') 
    parent_child_rel['Score'] = parent_child_rel['Score'].astype('float')

    #group and provide totals and means for hits and score
    aggregated = parent_child_rel.groupby(['parent', 'child'], as_index=False).aggregate({'Hits': np.sum, 'Score': [np.mean, np.median]})


    print(aggregated.dtypes)

    print(aggregated)

    aggregated.to_csv(fname)


def main():
    sort_table('./Sketch_grammar/parent_child_SkG_relations.csv')


if __name__ == '__main__':
    main ()

如果您不想添加带有索引的额外列（您可能不想），则应指定它：

aggregated.to_csv(fname, index = False)

如@brittenb所建议，您要将数据追加到文件中，因此应使用mode = "a"

aggregated.to_csv(fname, mode = "a")

无法在Python中将数据框保存到csv

1 个答案: