此处'df'是我们的原始数据帧,而'df_1'是我们所需的数据帧。 原始数据集太长。它包含近100000行。我想编写一个“函数”,该函数从A列中给出“ Back_day”列,该列选择前一个日期但同时的值。
dict_nary= {"Date": ["01-09-2019", "02-09-2019", "03-09-2019", "04-09-2019", "05-09-2019",
"01-09-2019", "02-09-2019", "03-09-2019", "04-09-2019", "05-09-2019",
"01-09-2019", "02-09-2019", "03-09-2019", "04-09-2019", "05-09-2019",
"01-09-2019", "02-09-2019", "03-09-2019", "04-09-2019", "05-09-2019"],
"Time": ["14:00:00", "14:00:00", "14:00:00", "14:00:00", "14:00:00",
"14:15:00","14:15:00","14:15:00","14:15:00","14:15:00",
"14:30:00","14:30:00","14:30:00","14:30:00","14:30:00",
"14:45:00","14:45:00","14:45:00","14:45:00","14:45:00",
], "A": [2,3,5,8,9,2,3,5,7,8,9,0,8,6,0,8,2,1,9,6] }
df= pd.DataFrame(dict_nary)
df=df.sort_values(["Date", "Time"], ascending=[True, True])
dict_nary= {"Date": ["01-09-2019", "02-09-2019", "03-09-2019", "04-09-2019", "05-09-2019",
"01-09-2019", "02-09-2019", "03-09-2019", "04-09-2019", "05-09-2019",
"01-09-2019", "02-09-2019", "03-09-2019", "04-09-2019", "05-09-2019",
"01-09-2019", "02-09-2019", "03-09-2019", "04-09-2019", "05-09-2019"],
"Time": ["14:00:00", "14:00:00", "14:00:00", "14:00:00", "14:00:00",
"14:15:00","14:15:00","14:15:00","14:15:00","14:15:00",
"14:30:00","14:30:00","14:30:00","14:30:00","14:30:00",
"14:45:00","14:45:00","14:45:00","14:45:00","14:45:00",
], "A": [2,3,5,8,9,2,3,5,7,8,9,0,8,6,0,8,2,1,9,6],
"Back_day" : ["", 2,3,5,8,"",2,3,5,7,"",9,0,8,6,"",8,2,1,9] }
df_1= pd.DataFrame(dict_nary)
df_1=df_1.sort_values(["Date", "Time"], ascending=[True, True])
print(df)
print(df_1)