Python,如何获取每个文件的唯一ID的数量以及如何在CSV文件中存储文件名和每个文件的唯一ID?

时间:2018-09-24 15:40:49

标签: python json csv parsing

我正在一个项目中,对聊天中的唯一评论者进行计数,并将每个聊天的文件名和该评论的评论者数量存储在csv中。但是,我现在的代码是打开所有文档,并计算多个文件中的所有注释者。因此,它不是在每个文件中获取单独的唯一评论者,而是在多个文件中计算所有评论者。所有文件中都有10个唯一注释者,但是,我需要能够看到每个文件的唯一注释者的数量并将该数据存储在csv文件中(有关csv文件图片,请参见所需的输出)。我觉得自己很亲近,但被困住了。谁能解决这个问题或提出其他建议的方法?

import os, sys, json
from collections import Counter
import csv
filename=""
filepath = ""
jsondata = ""
dictjson = ""
commenterid = []
FName = []
UList = []
TextFiles = []
UCommenter = 0


def get_FilePathList():
    for root, dirs, files in os.walk("/Users/ammcc/Desktop/"):
        for file in files:
            ##Find File with specific ending
            if file.endswith("chatinfo.txt"):
                path = "/Users/ammcc/Desktop/"
                ##Get the File Path of the file
                filepath = os.path.join(path,file)
                ##Get the Filename of the file ending in chatinfo.txt
                head, filename = os.path.split(filepath)
                ##Append all Filepaths of files ending in chatinfo.txt to TextFiles array/list
                TextFiles.append(filepath)
                ##Append all Filenames of files ending in chatinfo.txt to FName array/list
                FName.append(filename)

def open_FilePath():
     for x in TextFiles:
            ##Open each filepath in TextFiles one by one
            open_file = open(x)
            ##Read that file line by line
            for line in open_file:
                    ##Parse the Json of the file into jsondata
                    jsondata = json.loads(line)
                    ##Line not needed but, Parse the Json of the file into dictjson as Dictionary
                    dictjson = json.dumps(jsondata)

                    ## if the field commenter is found in jsondata
                    if "commenter" in jsondata:
                                    ##Then, append the field  ["commenter"]["_id"] **(nested value in the json)** into list commenterid
                                    commenterid.append(jsondata["commenter"]["_id"])
                                    ##Get and count the unique ids for the commenter
                                    Ucommenter = (len(set(commenterid)))
                                    ##Appended that unique count in UList
                                    UList.append(Ucommenter)


                                    ## create or append to the Commenter.csv file
                                    with open('Commenter.csv', 'a') as csvfile:
                                            filewriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
                                            ##Write the individual filename and the unique commenters for that file
                                            filewriter.writerow([filename, Ucommenter])
            commenterid.clear()

                                            ##Issue: Not counting the commenters for each file and storing the filename and its specific number of commneters in csv.
                                            ##the cvs is being created but the rows in the csv is not generating correctly.

##Call the functions                                                     
get_FilePathList()
open_FilePath()

csv文件中的当前输出

enter image description here

csv文件所需的输出

enter image description here

建议后的输出

enter image description here

NemanjaRadojković解决方案之后的输出和代码:

**正确的输出格式,但仍不计算每个文件的唯一评论者。

enter image description here

import json, os
import pandas as pd
import numpy as np
from collections import Counter
TextFiles = []
FName = []
csv_rows = []
commenterid = []
unique_id = []
NC = []
for root, dirs, files in os.walk("/Users/ammcc/Desktop/"):
 for file in files:
     if file.endswith("chatinfo.txt"):
         path = "/Users/ammcc/Desktop/"
         filepath = os.path.join(path,file)
         head, filename = os.path.split(filepath)
         TextFiles.append(filepath)
         FName.append(filename)

         n_commenters = 0
         with open(filepath) as open_file:
             for line in open_file:
                 jsondata = json.loads(line)
                 if "commenter" in jsondata:
                     commenterid.append(jsondata["commenter"]["_id"])

                     list_set = set(commenterid)
                     unique_list = (list(list_set))

                 for x in list_set:
                     n_commenters += 1

                     commenterid.clear()
             csv_rows.append([filename, n_commenters])
df = pd.DataFrame(csv_rows, columns=['FileName', 'Unique_Commenters'])
df.to_csv('CommeterID.csv', index=False)

1 个答案:

答案 0 :(得分:0)

尝试一下:

import json
import os
import pandas as pd

TextFiles = []
FName = []
csv_rows = []

for root, dirs, files in os.walk("/Users/ammcc/Desktop/"):

    for file in files:
        ##Find File with specific ending
        if file.endswith("chatinfo.txt"):

            path = "/Users/ammcc/Desktop/"
            ##Get the File Path of the file
            filepath = os.path.join(path,file)
            ##Get the Filename of the file ending in chatinfo.txt
            head, filename = os.path.split(filepath)
            ##Append all Filepaths of files ending in chatinfo.txt to TextFiles array/list
            TextFiles.append(filepath)
            ##Append all Filenames of files ending in chatinfo.txt to FName array/list
            FName.append(filename)

            n_commenters = 0

            with open(filepath) as open_file:
                ##Read that file line by line


                for line in open_file:
                        ##Parse the Json of the file into jsondata
                        jsondata = json.loads(line)

                        ## if the field commenter is found in jsondata
                        if "commenter" in jsondata:

                            n_commenters += 1

            csv_rows.append([filename, n_commenters])


df = pd.DataFrame(csv_rows, columns=['filename', 'n_commenters'])

df.to_csv('some_filename.csv', index=False)