这是我的样本df:
df <- structure(list(user_id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), obs_id = c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), timestamp = c(135560962809215,
135560977720600, 135560994815985, 135561010710946, 135561027891754,
135561044085292, 135561060963292, 135561079116292, 135561096005254,
135561112681985, 135561129308946, 135561145911561, 135561162521485,
135561179346754, 135561196266869, 135561207020177, 135561208190561,
135561684298600, 135561696513330, 135561712869100, 135561729868407,
135561746919830, 135561762999292, 135561779818446, 135561796528676,
135561812920676, 135561830544369, 135561846396561, 135561863018138,
135561886197176, 135561896428599, 135561913210561, 135561930595830,
135561946400638, 135561962972830, 135561979715292, 135561991182253,
135561992557715, 135562792606330, 135562813626137, 135562830549483,
135562847068137, 135562863564560, 135562887464368, 135562896809753,
135562913609945, 135562930201291, 135562946752868, 135562963382137,
135562979985022), x = c(866.4551, 866.4551, 865.9743, 865.4467,
861.27234, 858.7928, 860.4923, 860.8814, 863.5331, 868.2798,
873.31616, 878.6383, 885.2693, 897.54736, 911.0174, 924.72656,
924.72656, 852.2168, 852.2168, 851.1328, 849.8168, 843.0342,
840.08405, 839.5462, 839.83057, 842.2078, 844.60345, 846.9745,
850.0212, 853.7801, 858.75287, 865.1152, 872.62573, 880.48303,
888.81494, 898.601, 908.6426, 908.6426, 870.1465, 870.1393, 869.60895,
869.08875, 863.813, 861.976, 862.23035, 861.96906, 864.5737,
868.3425, 875.642, 880.27234), y = c(1142.71, 1139.997, 1133.6244,
1124.1506, 1075.3293, 1041.501, 1014.3225, 979.9761, 952.1288,
922.7904, 892.8203, 863.80347, 830.62524, 789.3959, 756.1295,
714.53613, 714.53613, 1182.2754, 1181.1726, 1175.6511, 1166.3911,
1127.9597, 1093.8245, 1069.8573, 1045.0938, 1022.8574, 1002.0753,
982.60486, 967.4147, 953.06824, 935.83545, 916.78284, 889.82056,
867.2317, 843.1273, 820.83777, 789.87305, 789.87305, 1219.6729,
1216.9186, 1210.3121, 1200.0981, 1146.955, 1113.3568, 1086.0355,
1056.4296, 1028.7742, 997.1078, 964.5531, 936.01086), size = c(0.027450982,
0.03137255, 0.03137255, 0.03137255, 0.03137255, 0.03529412, 0.03529412,
0.039215688, 0.039215688, 0.043137256, 0.039215688, 0.043137256,
0.043137256, 0.043137256, 0.050980397, 0.050980397, 0.050980397,
0.03137255, 0.027450982, 0.03137255, 0.03529412, 0.03529412,
0.03529412, 0.039215688, 0.039215688, 0.039215688, 0.043137256,
0.039215688, 0.039215688, 0.039215688, 0.043137256, 0.043137256,
0.039215688, 0.043137256, 0.039215688, 0.04705883, 0.050980397,
0.050980397, 0.023529414, 0.023529414, 0.023529414, 0.027450982,
0.03137255, 0.03529412, 0.03529412, 0.039215688, 0.03529412,
0.039215688, 0.039215688, 0.043137256), pressure = c(1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1), digit = c(2131165279, 2131165279, 2131165279,
2131165279, 2131165279, 2131165279, 2131165279, 2131165279, 2131165279,
2131165279, 2131165279, 2131165279, 2131165279, 2131165279, 2131165279,
2131165279, 2131165279, 2131165279, 2131165279, 2131165279, 2131165279,
2131165279, 2131165279, 2131165279, 2131165279, 2131165279, 2131165279,
2131165279, 2131165279, 2131165279, 2131165279, 2131165279, 2131165279,
2131165279, 2131165279, 2131165279, 2131165279, 2131165279, 2131165279,
2131165279, 2131165279, 2131165279, 2131165279, 2131165279, 2131165279,
2131165279, 2131165279, 2131165279, 2131165279, 2131165279),
state = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), press_id = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L), n = c(124L, 124L, 124L, 124L, 124L, 124L,
124L, 124L, 124L, 124L, 124L, 124L, 124L, 124L, 124L, 124L,
124L, 110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L,
110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L, 110L,
110L, 110L, 145L, 145L, 145L, 145L, 145L, 145L, 145L, 145L,
145L, 145L, 145L, 145L)), .Names = c("user_id", "obs_id",
"timestamp", "x", "y", "size", "pressure", "digit", "state",
"press_id", "n"), row.names = c(NA, -50L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), vars = c("user_id", "press_id"
), drop = TRUE, indices = list(0:16, 17:37, 38:49), group_sizes = c(17L,
21L, 12L), biggest_group_size = 21L, labels = structure(list(
user_id = c(1L, 1L, 1L), press_id = 1:3), row.names = c(NA,
-3L), class = "data.frame", vars = c("user_id", "press_id"), drop = TRUE, .Names = c("user_id",
"press_id")))
根据Akrun here提供的食谱,我已执行以下操作:
df %>% group_by(user_id, press_id) %>% mutate(c = n())
但是第一组我得到124,这不是正确的数字。 user_id = 1 + press_id = 1
应该为17。
请在这里告诉我出了什么问题,我使用了Akrun的配方。 之前未分组。
答案 0 :(得分:0)
这里没有错误
df %>% group_by( user_id, press_id) %>% mutate( c = n() )
# # A tibble: 50 x 12
# # Groups: user_id, press_id [3]
# user_id obs_id timestamp x y size pressure digit state press_id n c
# <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int> <int>
# 1 1 1 1.36e14 866. 1143. 0.0275 1 2131165279 1 1 124 17
# 2 1 1 1.36e14 866. 1140. 0.0314 1 2131165279 0 1 124 17
# 3 1 1 1.36e14 866. 1134. 0.0314 1 2131165279 0 1 124 17
# 4 1 1 1.36e14 865. 1124. 0.0314 1 2131165279 0 1 124 17
# 5 1 1 1.36e14 861. 1075. 0.0314 1 2131165279 0 1 124 17
# 6 1 1 1.36e14 859. 1042. 0.0353 1 2131165279 0 1 124 17
# 7 1 1 1.36e14 860. 1014. 0.0353 1 2131165279 0 1 124 17
# 8 1 1 1.36e14 861. 980. 0.0392 1 2131165279 0 1 124 17
# 9 1 1 1.36e14 864. 952. 0.0392 1 2131165279 0 1 124 17
# 10 1 1 1.36e14 868. 923. 0.0431 1 2131165279 0 1 124 17