我有一个看起来像这样的数据框
user booking_date origin destination price sale_channel
1 user5 2018-11-01 MAD PMI 58.20 online
2 user7 2018-11-01 DUB MAD 147.50 online
3 user4 2018-11-02 TFS MAD 24.05 online
4 user7 2018-11-01 LPA MAD 37.30 call center
5 user1 2018-11-01 AMS MAD 149.74 travel agency
6 user1 2018-11-01 MAD PMI 19.95 online
现在我要:
df <- df[df$booking_date == '2018-11-02',]
我尝试了使用group_by或aggregate的几种方法,但是我设法获得的只是一个附加的列,而不是对每个用户的值进行分组或汇总。
'user1' 'user10' 'user 7' etc.
示例数据
structure(list(user = c("user4", "user5", "user3", "user10",
"user1", "user2", "user7", "user6", "user5", "user6", "user6",
"user7", "user1", "user7", "user4", "user4", "user1", "user7",
"user7", "user8", "user4", "user10", "user4", "user8", "user3",
"user9", "user5", "user2", "user5", "user3", "user3", "user9",
"user6", "user10", "user9", "user5", "user3", "user5", "user7",
"user9", "user2", "user2", "user7", "user10", "user7", "user3",
"user1", "user2", "user8", "user6", "user6", "user10", "user4",
"user7", "user4", "user1", "user4", "user2", "user1", "user7",
"user5", "user4", "user4", "user7", "user10"), booking_date = structure(c(17837,
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837,
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837,
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837,
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837,
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837,
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837,
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837,
17837), class = "Date"), origin = c("TFS", "MAD", "TFN", "MAD",
"LYS", "LPA", "MAD", "MAD", "AMS", "MAD", "MAD", "MAD", "MAD",
"LPA", "MAD", "MAD", "MAD", "TXL", "MAD", "MAD", "TXL", "TXL",
"TFS", "MAD", "NAP", "TFS", "PMI", "TLS", "TFS", "NTE", "AMS",
"FUE", "TFN", "CPH", "TFN", "MAD", "SVQ", "SCQ", "SVQ", "MAD",
"PMI", "MAD", "PMI", "MAD", "MAD", "MAD", "MAD", "MAD", "SVQ",
"NCE", "CDG", "MAD", "MAD", "MAD", "MAD", "MAD", "TFN", "LGW",
"LGW", "MAD", "TFN", "MAD", "RNS", "AGP", "CDG"), destination = c("MAD",
"DUB", "MAD", "TFS", "MAD", "MAD", "NAP", "TLS", "MAD", "SCQ",
"LPA", "TFN", "TXL", "MAD", "TFN", "SVQ", "ACE", "MAD", "TLS",
"SCQ", "MAD", "MAD", "MAD", "LPA", "MAD", "MAD", "MAD", "MAD",
"MAD", "MAD", "MAD", "MAD", "MAD", "MAD", "MAD", "CAG", "MAD",
"MAD", "MAD", "LPA", "MAD", "CDG", "MAD", "LPA", "TFS", "TFN",
"PMI", "NAP", "MAD", "MAD", "MAD", "LPA", "LGW", "LPA", "CDG",
"SPC", "MAD", "MAD", "MAD", "SCQ", "MAD", "SVQ", "MAD", "MAD",
"MAD"), price = c(24.0499992371, 41.5400009155, 251.199996948,
15.6000003815, 44.0099983215, 73.8499984741, 115.470001221, 69.4400024414,
81.3899993896, 15.2399997711, 41.1199989319, 274.559997559, 150,
29.3199996948, 332.440002441, 94.9100036621, 97.9800033569, 55.1199989319,
81.7399978638, 4.86000013351, 39.0299987793, 53.6300010681, 39.3199996948,
114.559997559, 65.4000015259, 96.2900009155, 41.75, 28.9099998474,
25.1900005341, 14, 50.3100013733, 47.2999992371, 53.8199996948,
91.3199996948, 77.6800003052, 17.8099994659, 96.5400009155, 27.6900005341,
34.1399993896, 34.3300018311, 15.5600004196, 158.449996948, 45.2999992371,
36.8100013733, 50.6800003052, 62.9000015259, 13.7399997711, 49.4399986267,
66.1100006104, 95.4400024414, 41.8400001526, 69.8300018311, 60.6599998474,
34.3300018311, 97.1399993896, 84.3099975586, 25.8099994659, 185.899993896,
34.8899993896, 304.380004883, 15.1300001144, 29.1399993896, 133.529998779,
208.910003662, 152.960006714), sale_channel = c("online", "online",
"travel agency", "online", "online", "online", "travel agency",
"online", "travel agency", "travel agency", "travel agency",
"online", "travel agency", "online", "online", "travel agency",
"online", "travel agency", "travel agency", "call center", "online",
"online", "online", "travel agency", "travel agency", "travel agency",
"online", "online", "online", "online", "online", "travel agency",
"travel agency", "online", "travel agency", "call center", "online",
"travel agency", "online", "online", "travel agency", "travel agency",
"online", "travel agency", "online", "online", "online", "travel agency",
"online", "travel agency", "travel agency", "online", "online",
"online", "online", "online", "online", "online", "online", "travel agency",
"travel agency", "travel agency", "online", "travel agency",
"online"), total = c(876.0300006858, 250.5000009536, 540.3500022886,
420.1500110628, 424.9299983977, 512.1099882118, 1233.9500045785,
316.9000034332, 250.5000009536, 316.9000034332, 316.9000034332,
1233.9500045785, 424.9299983977, 1233.9500045785, 876.0300006858,
876.0300006858, 424.9299983977, 1233.9500045785, 1233.9500045785,
185.52999830291, 876.0300006858, 420.1500110628, 876.0300006858,
185.52999830291, 540.3500022886, 255.6000022889, 250.5000009536,
512.1099882118, 250.5000009536, 540.3500022886, 540.3500022886,
255.6000022889, 316.9000034332, 420.1500110628, 255.6000022889,
250.5000009536, 540.3500022886, 250.5000009536, 1233.9500045785,
255.6000022889, 512.1099882118, 512.1099882118, 1233.9500045785,
420.1500110628, 1233.9500045785, 540.3500022886, 424.9299983977,
512.1099882118, 185.52999830291, 316.9000034332, 316.9000034332,
420.1500110628, 876.0300006858, 1233.9500045785, 876.0300006858,
424.9299983977, 876.0300006858, 512.1099882118, 424.9299983977,
1233.9500045785, 250.5000009536, 876.0300006858, 876.0300006858,
1233.9500045785, 420.1500110628)), row.names = c(3L,
26L, 37L, 42L, 48L, 82L, 89L, 100L, 112L, 124L, 133L, 144L, 148L,
150L, 166L, 167L, 173L, 182L, 217L, 243L, 259L, 285L, 300L, 304L,
306L, 336L, 341L, 366L, 388L, 397L, 413L, 417L, 423L, 452L, 457L,
473L, 474L, 478L, 482L, 483L, 486L, 496L, 499L, 504L, 510L, 513L,
529L, 531L, 558L, 605L, 615L, 628L, 629L, 664L, 669L, 672L, 684L,
722L, 730L, 752L, 766L, 767L, 779L, 804L, 819L), class = c("data.table",
"data.frame"))
关于如何处理此问题的任何建议?
编辑: 最终解决方案
df <- df[df$booking_date == '2018-11-02',]
total <- aggregate(price ~ user, df, sum)
top_10 <- total[order(total$price, decreasing = T), ]
top_10[1:5, 1]
那给了我
"user7" "user4" "user3" "user2" "user1"
答案 0 :(得分:0)
尝试一下
total <- aggregate(price ~ user, df, sum)
total[order(total$price, decreasing = T), ]
或
dplyr::arrange(total, desc(price))
或
sort(tapply(df$price, df$user, sum), decreasing = T)