我想在Python中使用 ev_begin_date = datetime.datetime.strptime('2017-02-28T23:00:00.000Z','%Y-%M-%dT%H:%M:%S.%Z' )
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/_strptime.py", line 577, in _strptime_datetime
tt, fraction, gmtoff_fraction = _strptime(data_string, format)
File "/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/_strptime.py", line 342, in _strptime
format_regex = _TimeRE_cache.compile(format)
File "/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/_strptime.py", line 272, in compile
return re_compile(self.pattern(format), IGNORECASE)
File "/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/re.py", line 234, in compile
return _compile(pattern, flags)
File "/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/re.py", line 286, in _compile
p = sre_compile.compile(pattern, flags)
File "/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/sre_compile.py", line 764, in compile
p = sre_parse.parse(p, flags)
File "/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/sre_parse.py", line 930, in parse
p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, 0)
File "/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/sre_parse.py", line 426, in _parse_sub
not nested and not items))
File "/usr/local/Cellar/python/3.7.2_2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/sre_parse.py", line 813, in _parse
raise source.error(err.msg, len(name) + 1) from None
re.error: redefinition of group name 'M' as group 5; was group 2 at position 105
运行一个函数。这是我所拥有的功能:
concurrent
我想将数据帧合并为一个数据帧import concurrent.futures
import pandas as pd
import time
def putIndf(file):
listSel = getline(file)
datFram = savetoDataFrame(listSel)
return datFram #datatype : dataframe
def main():
newData = pd.DataFrame()
with concurrent.futures.ProcessPoolExecutor(max_workers=30) as executor:
for i,file in zip(fileList, executor.map(dp.putIndf, fileList)):
df = newData.append(file, ignore_index=True)
return df
if __name__ == '__main__':
main()
,但结果只是该函数的最后一个数据帧
答案 0 :(得分:1)
基本上,您每次迭代都会重新分配 df ,并且永远不会对其进行扩展。您可能的意思(建议)是初始化一个空的 df 并迭代地添加:
df = pd.DataFrame()
...
df = df.append(file, ignore_index=True)
尽管如此,首选的方法是在循环外部一次构建要附加在一起的数据帧集合,并避免在循环内部增加任何复杂的对象,例如数据帧。
def main():
with concurrent.futures.ProcessPoolExecutor(max_workers=30) as executor:
# LIST COMPREHENSION
df_list = [file for i,file in zip(fileList, executor.map(dp.putIndf, fileList))]
# DICTIONARY COMPREHENSION
# df_dict = {i:file for i,file in zip(fileList, executor.map(dp.putIndf, fileList))}
df = pd.concat(df_list, ignore_index=True)
return df
或者,由于池处理的缘故,请将数据帧追加到列表中,但仍然在循环外串联一次:
def main():
df_list = [] # df_dict = {}
with concurrent.futures.ProcessPoolExecutor(max_workers=30) as executor:
for i,file in zip(fileList, executor.map(dp.putIndf, fileList)):
df_list.append(file)
# df_dict[i] = file
df = pd.concat(df_list, ignore_index=True)
return df