import pandas as pd
df_dict = [{'buy_date': '2016-01-19', 'yearweek': 20163, 'buy_price': 228.48}, {'buy_date': '2016-01-20', 'yearweek': 20163, 'buy_price': 219.4}, {'buy_date': '2016-01-21', 'yearweek': 20163, 'buy_price': 220.13}, {'buy_date': '2016-01-22', 'yearweek': 20163, 'buy_price': 221.07}, {'buy_date': '2016-01-25', 'yearweek': 20164, 'buy_price': 218.4}, {'buy_date': '2016-02-01', 'yearweek': 20165, 'buy_price': 218.55}, {'buy_date': '2016-02-02', 'yearweek': 20165, 'buy_price': 213.25}, {'buy_date': '2016-02-03', 'yearweek': 20165, 'buy_price': 210.2}, {'buy_date': '2016-02-04', 'yearweek': 20165, 'buy_price': 215.12}, {'buy_date': '2016-02-05', 'yearweek': 20165, 'buy_price': 211.1}, {'buy_date': '2016-02-08', 'yearweek': 20166, 'buy_price': 212.05}, {'buy_date': '2016-02-16', 'yearweek': 20167, 'buy_price': 199.48}, {'buy_date': '2016-02-17', 'yearweek': 20167, 'buy_price': 207.7}, {'buy_date': '2016-02-18', 'yearweek': 20167, 'buy_price': 210.4}, {'buy_date': '2016-02-19', 'yearweek': 20167, 'buy_price': 208.65}, {'buy_date': '2016-02-22', 'yearweek': 20168, 'buy_price': 205.9}, {'buy_date': '2016-03-02', 'yearweek': 20169, 'buy_price': 216.73}, {'buy_date': '2016-03-03', 'yearweek': 20169, 'buy_price': 224.87}, {'buy_date': '2016-03-04', 'yearweek': 20169, 'buy_price': 225.15}, {'buy_date': '2016-03-08', 'yearweek': 201610, 'buy_price': 227.42}, {'buy_date': '2016-03-09', 'yearweek': 201610, 'buy_price': 226.0}, {'buy_date': '2016-04-11', 'yearweek': 201615, 'buy_price': 225.9}, {'buy_date': '2016-04-12', 'yearweek': 201615, 'buy_price': 231.57}, {'buy_date': '2016-04-13', 'yearweek': 201615, 'buy_price': 233.55}, {'buy_date': '2016-04-18', 'yearweek': 201616, 'buy_price': 231.3}, {'buy_date': '2016-04-20', 'yearweek': 201616, 'buy_price': 233.5}, {'buy_date': '2016-05-26', 'yearweek': 201621, 'buy_price': 184.57}, {'buy_date': '2016-05-27', 'yearweek': 201621, 'buy_price': 189.88}, {'buy_date': '2016-05-30', 'yearweek': 201622, 'buy_price': 189.9}, {'buy_date': '2016-05-31', 'yearweek': 201622, 'buy_price': 191.13}, {'buy_date': '2016-06-01', 'yearweek': 201622, 'buy_price': 199.37}, {'buy_date': '2016-06-02', 'yearweek': 201622, 'buy_price': 204.13}, {'buy_date': '2016-06-03', 'yearweek': 201622, 'buy_price': 204.3}, {'buy_date': '2016-06-06', 'yearweek': 201623, 'buy_price': 201.42}, {'buy_date': '2016-06-07', 'yearweek': 201623, 'buy_price': 205.45}, {'buy_date': '2016-07-04', 'yearweek': 201627, 'buy_price': 210.9}, {'buy_date': '2016-07-05', 'yearweek': 201627, 'buy_price': 214.55}, {'buy_date': '2016-07-07', 'yearweek': 201627, 'buy_price': 210.05}, {'buy_date': '2016-07-11', 'yearweek': 201628, 'buy_price': 214.57}, {'buy_date': '2016-07-12', 'yearweek': 201628, 'buy_price': 217.77}, {'buy_date': '2016-07-13', 'yearweek': 201628, 'buy_price': 217.3}, {'buy_date': '2016-07-15', 'yearweek': 201628, 'buy_price': 220.5}, {'buy_date': '2016-08-01', 'yearweek': 201631, 'buy_price': 228.55}]
df = pd.DataFrame(df_dict)
在此数据框中,如何过滤数据框,以便每个yearweek
值只有一个日期。
答案 0 :(得分:0)
IIUC需要:
df = df.drop_duplicates('yearweek')
print (df)
buy_date buy_price yearweek
0 2016-01-19 228.48 20163
4 2016-01-25 218.40 20164
5 2016-02-01 218.55 20165
10 2016-02-08 212.05 20166
11 2016-02-16 199.48 20167
15 2016-02-22 205.90 20168
16 2016-03-02 216.73 20169
19 2016-03-08 227.42 201610
21 2016-04-11 225.90 201615
24 2016-04-18 231.30 201616
26 2016-05-26 184.57 201621
28 2016-05-30 189.90 201622
33 2016-06-06 201.42 201623
35 2016-07-04 210.90 201627
38 2016-07-11 214.57 201628
42 2016-08-01 228.55 201631
另一个解决方案应该是year
yearweek
first
和df['buy_date'] = pd.to_datetime(df['buy_date'])
df = df.groupby(df['buy_date'].dt.strftime('%Y%U').rename('YW')).first().reset_index()
print (df)
YW buy_date buy_price yearweek
0 201603 2016-01-19 228.48 20163
1 201604 2016-01-25 218.40 20164
2 201605 2016-02-01 218.55 20165
3 201606 2016-02-08 212.05 20166
4 201607 2016-02-16 199.48 20167
5 201608 2016-02-22 205.90 20168
6 201609 2016-03-02 216.73 20169
7 201610 2016-03-08 227.42 201610
8 201615 2016-04-11 225.90 201615
9 201616 2016-04-18 231.30 201616
10 201621 2016-05-26 184.57 201621
11 201622 2016-05-30 189.90 201622
12 201623 2016-06-06 201.42 201623
13 201627 2016-07-04 210.90 201627
14 201628 2016-07-11 214.57 201628
15 201631 2016-08-01 228.55 201631
聚合{<1}}:
TypeError: Bad input string
at Decipher.update (crypto.js:144:26)
at decrypt (APPDIRECTORY\encryption.js:17:22)
at app.get (APPDIRECTORY\encryption.js:35:17)
at Layer.handle [as handle_request] (APPDIRECTORY\node_modules\express\lib\router\layer.js:95:5)
at next (APPDIRECTORY\node_modules\express\lib\router\route.js:137:13)
at Route.dispatch (APPDIRECTORY\node_modules\express\lib\router\route.js:112:3)
at Layer.handle [as handle_request] (APPDIRECTORY\node_modules\express\lib\router\layer.js:95:5)
at APPDIRECTORY\node_modules\express\lib\router\index.js:281:22
at param (APPDIRECTORY\node_modules\express\lib\router\index.js:354:14)
at param (APPDIRECTORY\node_modules\express\lib\router\index.js:365:14)