我是python的新手,一直在网上搜索该问题的解决方案,但未找到任何解决方案。我有一个pandas数据框字典,其中的键是“ Year”,值是该年的pandas数据框。这是示例数据:
def target_non_target(input_frames_folder,model_file,output):
if not os.path.exists(output):
os.makedirs(output,exist_ok=True)
count=0
folders = glob(input_frames_folder)
img_list = []
for folder in folders:
folder_name=os.path.basename(folder)
#print(folder_name)
out_path = output +"\\" + folder_name
print(out_path)
os.makedirs(out_path,exist_ok=True)
for f in glob(folder+"/*.jpg"):
img_list.append(f)
for i in range(len(img_list)):
v1=os.path.basename(img_list[i])
img_name = os.path.splitext(v1)[0]
image = cv2.imread(img_list[i])
orig = image.copy()
image = cv2.resize(image, (28, 28))
image = image.astype("float") / 255.0
image = img_to_array(image)
image = np.expand_dims(image, axis=0)
print("[INFO] loading network...")
model = load_model(model_file)
(non_target, target) = model.predict(image)[0]
if target > non_target:
label = "Target"
else:
label = "Non Target"
probab = target if target > non_target else non_target
label = "{}: {:.2f}%".format(label, probab * 100)
op = imutils.resize(orig, width=400)
cv2.putText(op, label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX,0.7, (0, 255, 0), 2)
if target > non_target:
cv2.imwrite(out_path+"/"+"\\{}.jpg".format(img_name),orig)
cv2.waitKey(0)
img_list = [] # this is the end of for folder in folders, reset list
#return target_op
frames_folder = ("C:\\Python36\\videos\\videos_new\\*")
model = ("C:\\Python35\\target_non_target\\target_non_target.model")
output_folder = ("C:\\Python35\\target_non_target\\Target_images_new\\")
target_check = target_non_target(frames_folder,model,output_folder)
我选择从所有数据框的列表开始,因为这是我真正的问题中数据的导入方式。有了数据框列表后,我就创建了这些数据框的字典。
import pandas as pd
import numpy as np
from collections import defaultdict
##Creating Dataframes
data1_2018 =[[1,2018,80], [2,2018,70]]
data2_2018 = [[1,2018,77], [3,2018,62]]
data3_2018 = [[1,2018,82], [2,2018,88], [4,2018,66]]
data1_2017 = [[1,2017,80], [5,2017,70]]
data2_2017 = [[1,2017,77], [3,2017,62]]
data3_2017 = [[1,2017,50], [2,2017,52], [4,2017,51]]
df1_2018 = pd.DataFrame(data1_2018, columns = ['ID', 'Year', 'Score_1'])
df2_2018 = pd.DataFrame(data2_2018, columns = ['ID', 'Year', 'Score_2'])
df3_2018 = pd.DataFrame(data3_2018, columns = ['ID', 'Year', 'Score_3'])
df1_2017 = pd.DataFrame(data1_2017, columns = ['ID', 'Year', 'Score_1'])
df2_2017 = pd.DataFrame(data2_2017, columns = ['ID', 'Year', 'Score_2'])
df3_2017 = pd.DataFrame(data3_2017, columns = ['ID', 'Year', 'Score_3'])
###Creating list of all dataframes
all_df_list = [df1_2018,df2_2018,df3_2018,df1_2017,df2_2017,df3_2017]
现在,我的问题是..您能否在每个组中循环一个数据帧,并通过“ ID”将它们与外部合并在一起。所需的输出将是每年仅一个数据帧的列表或字典。这是每年所需的结果:
yearly_dfs = defaultdict(list)
####Loop for creating dict with keys being years and values being dfs for that year
for df in all_df_list:
for yr, yr_df in df.groupby('Year'):
yearly_dfs[yr].append(yr_df)
任何帮助将不胜感激!
谢谢!
答案 0 :(得分:1)
使用agg函数pandas.concat
使用DataFrame.groupby
和first
'Year'&'ID',然后将grouby
与'{Year' :
df_all = (pd.concat(all_df_list, sort=False)
.groupby(['ID', 'Year']).first().reset_index())
df_years = {yr: df for yr, df in df_all.groupby('Year')}
访问方式:
df_years[2017]
ID Year Score_1 Score_2 Score_3
0 1 2017 80.0 77.0 50.0
2 2 2017 NaN NaN 52.0
4 3 2017 NaN 62.0 NaN
6 4 2017 NaN NaN 51.0
8 5 2017 70.0 NaN NaN
df_years[2018]
ID Year Score_1 Score_2 Score_3
1 1 2018 80.0 77.0 82.0
3 2 2018 70.0 NaN 88.0
5 3 2018 NaN 62.0 NaN
7 4 2018 NaN NaN 66.0