Python如何访问字典中的数据帧?

时间:2017-10-12 11:23:18

标签: python pandas dictionary

说我正在从不同的csv文件创建数据帧。 我写了一个函数来将文件读入DF并将它们作为字典返回。现在我无法单独访问它们。

import pandas as pd
import glob
import os

path = ("/folder")


def readDatafromFile(path):
    store_data = {}
    a = 0
    path = (path + "/data")
    os.chdir(path)
    filenames = glob.glob("*.txt")
    for filename in filenames:
        if filename.__contains__("_Sale") == 1:
            Salesource = path + "/" + filename
            store_data[1] = pd.read_csv(Salesource, sep=',', header=None)
        elif filename.__contains__("_Emplo") == 1:
            Empsource = path + "/" + filename
            store_data[2] = pd.read_csv(Empsource, sep=',', header=None)
        elif filename.__contains__("_Prod") == 1:
            Prodsource = path + "/" + filename
            store_data[3] = pd.read_csv(Prodsource, sep=',', header=None)
        elif filename.__contains__("_Resou") == 1:
            Ressource = path + "/" + filename
            store_data[4] = pd.read_csv(Ressource, sep=',', header=None)

            a = a + 1
    return (store_data)
    print(a)

shop = readDatafromFile(path)

如何访问商店销售,例如商店或商店?

3 个答案:

答案 0 :(得分:0)

我认为您需要将每个DataFrame附加到列表,然后concat

def readDatafromFile(path):
    sales, emp, prod, res = [], [], [], []
    path = (path + "/data")
    os.chdir(path)
    filenames = glob.glob("*.txt")
    for filename in filenames:
        if filename.__contains__("_Sale") == 1:
            Salesource = path + "/" + filename
            sales.append(pd.read_csv(Salesource, sep=',', header=None))
        elif filename.__contains__("_Emplo") == 1:
            Empsource = path + "/" + filename
            emp.append(pd.read_csv(Empsource, sep=',', header=None))
        elif filename.__contains__("_Prod") == 1:
            Prodsource = path + "/" + filename
            prod.append(pd.read_csv(Prodsource, sep=',', header=None))
        elif filename.__contains__("_Resou") == 1:
            Ressource = path + "/" + filename
            res.append(pd.read_csv(Ressource, sep=',', header=None))

    salesdf = pd.concat(sales, ignore_index=True)
    empdf = pd.concat(emp, ignore_index=True)
    proddf = pd.concat(prod, ignore_index=True)
    resdf = pd.concat(res, ignore_index=True)

    store_data = {'sales':salesdf, 'emp':empdf,'prod':proddf, 'res':resdf}
    return (store_data)

shop = readDatafromFile(path)

print (shop['sales'])

编辑:

defaultdict的解决方案,感谢您Reti43的想法。

def readDatafromFile(path):

    from collections import defaultdict
    store_data = defaultdict(list)

    path = (path + "/data")
    os.chdir(path)
    filenames = glob.glob("*.txt")
    for filename in filenames:
        if filename.__contains__("_Sale") == 1:
            Salesource = path + "/" + filename
            store_data['sales'].append(pd.read_csv(Salesource, sep=',', header=None))
        elif filename.__contains__("_Emplo") == 1:
            Empsource = path + "/" + filename
            store_data['emp'].append(pd.read_csv(Empsource, sep=',', header=None))
        elif filename.__contains__("_Prod") == 1:
            Prodsource = path + "/" + filename
            store_data['prod'].append(pd.read_csv(Prodsource, sep=',', header=None))
        elif filename.__contains__("_Resou") == 1:
            Ressource = path + "/" + filename
            store_data['res'].append(pd.read_csv(Ressource, sep=',', header=None))

    store_data['sales'] = pd.concat(store_data['sales'], ignore_index=True)
    store_data['emp'] = pd.concat(store_data['emp'], ignore_index=True)
    store_data['prod'] = pd.concat(store_data['prod'], ignore_index=True)
    store_data['res'] = pd.concat(store_data['res'], ignore_index=True)

    return (store_data)

答案 1 :(得分:0)

按原样

您可以使用原始代码访问数据框,只需使用实际放在字典中的密钥:

In [6]: shop.keys()
Out[6]: dict_keys([2, 3, 4, 1])

因此,您可以使用shop[1]来获得"销售"数据帧根据您的代码输出。

简化

可能,虽然你真的在寻找有意义的钥匙。在这种情况下,您可以只使用文件名。事实上,如果您愿意,可以使这段代码完全通用:

import pandas as pd
import glob
import os

path = ("/folder")


def readDatafromFile(path):
    store_data = {}
    a = 0
    path = (path + "/data")
    os.chdir(path)
    filenames = glob.glob("*.txt")
    for filename in filenames:
        store_data[filename] = pd.read_csv(filename, sep=',', header=None)
        a = a + 1
    return (store_data)
    print(a)

shop = readDatafromFile(path)

请注意,这将包含所有* .txt文件并使用其确切的文件名。所以,如果你愿意,你可以从密钥和文件名片段的字典开始:{'employees' : '_Emp,...}`然后循环遍历该字典以在相同的密钥上创建一个新的字典,但是包含内容每个文件的值。

答案 2 :(得分:0)

该方法看起来有点过于复杂恕我直言。可能有充分的理由创建这些对象。但由于它们基本相同,因此您可以使用更少的代码实现相同的功能:

import pandas as pd
import glob
import os

path = ("/folder")


def readDatafromFile(path):
    store_data = {}
    a = 0
    path = (path + "/data")
    os.chdir(path)
    filenames = glob.glob("*.txt")

    valid_file_names = [
        "_Sale",
        "_Emplo",
        "_Prod",
        "_Resou"
    ]

    for filename in filenames:
        if any(val in filename for val in valid_file_names):
            source_path = os.path.join(path, filename)
            # remove .txt
            filename = filename[:-4]
            store_data[filename] = pd.read_csv(source_path, sep=',', header=None)
            a = a + 1

    print(a)
    return store_data

shop = readDatafromFile(path)

然后,您可以使用shop[filename](不带 .txt 的文件名)访问数据框。