说我正在从不同的csv文件创建数据帧。 我写了一个函数来将文件读入DF并将它们作为字典返回。现在我无法单独访问它们。
import pandas as pd
import glob
import os
path = ("/folder")
def readDatafromFile(path):
store_data = {}
a = 0
path = (path + "/data")
os.chdir(path)
filenames = glob.glob("*.txt")
for filename in filenames:
if filename.__contains__("_Sale") == 1:
Salesource = path + "/" + filename
store_data[1] = pd.read_csv(Salesource, sep=',', header=None)
elif filename.__contains__("_Emplo") == 1:
Empsource = path + "/" + filename
store_data[2] = pd.read_csv(Empsource, sep=',', header=None)
elif filename.__contains__("_Prod") == 1:
Prodsource = path + "/" + filename
store_data[3] = pd.read_csv(Prodsource, sep=',', header=None)
elif filename.__contains__("_Resou") == 1:
Ressource = path + "/" + filename
store_data[4] = pd.read_csv(Ressource, sep=',', header=None)
a = a + 1
return (store_data)
print(a)
shop = readDatafromFile(path)
如何访问商店销售,例如商店或商店?
答案 0 :(得分:0)
我认为您需要将每个DataFrame
附加到列表,然后concat
:
def readDatafromFile(path):
sales, emp, prod, res = [], [], [], []
path = (path + "/data")
os.chdir(path)
filenames = glob.glob("*.txt")
for filename in filenames:
if filename.__contains__("_Sale") == 1:
Salesource = path + "/" + filename
sales.append(pd.read_csv(Salesource, sep=',', header=None))
elif filename.__contains__("_Emplo") == 1:
Empsource = path + "/" + filename
emp.append(pd.read_csv(Empsource, sep=',', header=None))
elif filename.__contains__("_Prod") == 1:
Prodsource = path + "/" + filename
prod.append(pd.read_csv(Prodsource, sep=',', header=None))
elif filename.__contains__("_Resou") == 1:
Ressource = path + "/" + filename
res.append(pd.read_csv(Ressource, sep=',', header=None))
salesdf = pd.concat(sales, ignore_index=True)
empdf = pd.concat(emp, ignore_index=True)
proddf = pd.concat(prod, ignore_index=True)
resdf = pd.concat(res, ignore_index=True)
store_data = {'sales':salesdf, 'emp':empdf,'prod':proddf, 'res':resdf}
return (store_data)
shop = readDatafromFile(path)
print (shop['sales'])
编辑:
defaultdict
的解决方案,感谢您Reti43
的想法。
def readDatafromFile(path):
from collections import defaultdict
store_data = defaultdict(list)
path = (path + "/data")
os.chdir(path)
filenames = glob.glob("*.txt")
for filename in filenames:
if filename.__contains__("_Sale") == 1:
Salesource = path + "/" + filename
store_data['sales'].append(pd.read_csv(Salesource, sep=',', header=None))
elif filename.__contains__("_Emplo") == 1:
Empsource = path + "/" + filename
store_data['emp'].append(pd.read_csv(Empsource, sep=',', header=None))
elif filename.__contains__("_Prod") == 1:
Prodsource = path + "/" + filename
store_data['prod'].append(pd.read_csv(Prodsource, sep=',', header=None))
elif filename.__contains__("_Resou") == 1:
Ressource = path + "/" + filename
store_data['res'].append(pd.read_csv(Ressource, sep=',', header=None))
store_data['sales'] = pd.concat(store_data['sales'], ignore_index=True)
store_data['emp'] = pd.concat(store_data['emp'], ignore_index=True)
store_data['prod'] = pd.concat(store_data['prod'], ignore_index=True)
store_data['res'] = pd.concat(store_data['res'], ignore_index=True)
return (store_data)
答案 1 :(得分:0)
您可以使用原始代码访问数据框,只需使用实际放在字典中的密钥:
In [6]: shop.keys()
Out[6]: dict_keys([2, 3, 4, 1])
因此,您可以使用shop[1]
来获得"销售"数据帧根据您的代码输出。
可能,虽然你真的在寻找有意义的钥匙。在这种情况下,您可以只使用文件名。事实上,如果您愿意,可以使这段代码完全通用:
import pandas as pd
import glob
import os
path = ("/folder")
def readDatafromFile(path):
store_data = {}
a = 0
path = (path + "/data")
os.chdir(path)
filenames = glob.glob("*.txt")
for filename in filenames:
store_data[filename] = pd.read_csv(filename, sep=',', header=None)
a = a + 1
return (store_data)
print(a)
shop = readDatafromFile(path)
请注意,这将包含所有* .txt文件并使用其确切的文件名。所以,如果你愿意,你可以从密钥和文件名片段的字典开始:{'employees' : '_Emp
,...}`然后循环遍历该字典以在相同的密钥上创建一个新的字典,但是包含内容每个文件的值。
答案 2 :(得分:0)
该方法看起来有点过于复杂恕我直言。可能有充分的理由创建这些源对象。但由于它们基本相同,因此您可以使用更少的代码实现相同的功能:
import pandas as pd
import glob
import os
path = ("/folder")
def readDatafromFile(path):
store_data = {}
a = 0
path = (path + "/data")
os.chdir(path)
filenames = glob.glob("*.txt")
valid_file_names = [
"_Sale",
"_Emplo",
"_Prod",
"_Resou"
]
for filename in filenames:
if any(val in filename for val in valid_file_names):
source_path = os.path.join(path, filename)
# remove .txt
filename = filename[:-4]
store_data[filename] = pd.read_csv(source_path, sep=',', header=None)
a = a + 1
print(a)
return store_data
shop = readDatafromFile(path)
然后,您可以使用shop[filename]
(不带 .txt 的文件名)访问数据框。