我想使用max和min来查看较大组中的哪些子组明显小于该特定较大组中的其余子组。
这太罗word了,所以举几个例子:
x <- read.csv(text = 'grp,subgrp,num
1,1,2
1,1,3
1,2,4
1,2,6
1,2,7
2,3,7
2,4,6
2,4,7,
2,5,7')
在组1中,子组1明显小于子组2,因为max(2,3) 我可以轻松地获得一个组的最大/最小和一个子组的最大/最小,但是如果我可以获得一个可以区分每一行子组的最大/最小组(例如dplyr :: mutate),那么我可以轻松地进行比较最大子组,其中有+排除子组min。就像在示例中一样。 我不知道的关键变量是mingrpexclsubgrp,整个结果看起来像: maxsubgroup是: 没有的将是: 基于R的基本解决方案可以基于正确的合并,但是我希望有一个整洁和/或矢量化的版本。 x <- read.csv(text = 'grp,subgrp,num,maxsubgrp,mingrpexclsubgrp,isless
1,1,2,3,4,T
1,1,3,3,4,T
1,2,4,7,2,F
1,2,6,7,2,F
1,2,7,7,2,F
2,3,7,7,6,F
2,4,6,7,7,F
2,4,7,7,7,F
2,5,7,7,6,F')
x %>%
group_by(subgrp) %>%
mutate(maxsubgrp = max(num))
x %>%
mutate(isless = maxsubgrp < mingrpexclsubgrp)
for(i in unique(x$grp)){
y <- x[x$grp == i, ]
for(j in unique(y$subgrp)){
print(paste(i,j))
print(min(x$num[x$grp == i & x$subgrp != j]))
}
}
答案 0 :(得分:2)
这里是使用data.table
library(data.table)
setDT(x)[, {
sg <- .SD[['subgrp']]
nm <- .SD[['num']]
setnames(.SD[, .(max(num), min(nm[sg != subgrp])), subgrp],
2:3, c('maxsubgrp', 'mingrpexclsubgrp'))
}, by = grp
][x, on = .(grp, subgrp)
][ , isless := maxsubgrp < mingrpexclsubgrp][]
# grp subgrp maxsubgrp mingrpexclsubgrp num isless
#1: 1 1 3 4 2 TRUE
#2: 1 1 3 4 3 TRUE
#3: 1 2 7 2 4 FALSE
#4: 1 2 7 2 6 FALSE
#5: 1 2 7 2 7 FALSE
#6: 2 3 7 6 7 FALSE
#7: 2 4 7 7 6 FALSE
#8: 2 4 7 7 7 FALSE
#9: 2 5 7 6 7 FALSE
或使用tidyverse
library(tidyverse)
x %>%
split(.$grp, .$subgrp, drop = TRUE) %>%
map_df(~
.x %>%
group_by(subgrp) %>%
mutate(maxsubgrp = max(num),
mingrpexclsubgrp = min(.$num[!.$subgrp %in% subgrp]),
isless = maxsubgrp < mingrpexclsubgrp))
# A tibble: 9 x 6
# Groups: subgrp [5]
# grp subgrp num maxsubgrp mingrpexclsubgrp isless
# <int> <int> <int> <dbl> <int> <lgl>
#1 1 1 2 3 4 TRUE
#2 1 1 3 3 4 TRUE
#3 1 2 4 7 2 FALSE
#4 1 2 6 7 2 FALSE
#5 1 2 7 7 2 FALSE
#6 2 3 7 7 6 FALSE
#7 2 4 6 7 7 FALSE
#8 2 4 7 7 7 FALSE
#9 2 5 7 7 6 FALSE
或在创建unnest
列后使用list
x %>%
group_by(grp, subgrp) %>%
group_by(maxsubgrp = max(num), add = TRUE) %>%
summarise(num = list(num)) %>%
group_by(grp) %>%
mutate(mingrpexclsubgrp = map_int(row_number(), ~
num[-.x] %>%
unlist %>%
min)) %>%
unnest %>%
mutate(isless = maxsubgrp < mingrpexclsubgrp)
# A tibble: 9 x 6
# Groups: grp [2]
# grp subgrp maxsubgrp mingrpexclsubgrp num isless
# <int> <int> <dbl> <int> <int> <lgl>
#1 1 1 3 4 2 TRUE
#2 1 1 3 4 3 TRUE
#3 1 2 7 2 4 FALSE
#4 1 2 7 2 6 FALSE
#5 1 2 7 2 7 FALSE
#6 2 3 7 6 7 FALSE
#7 2 4 7 7 6 FALSE
#8 2 4 7 7 7 FALSE
#9 2 5 7 6 7 FALSE
答案 1 :(得分:1)
使用setdiff
df %>%
group_by(grp, subgrp) %>%
mutate(
maxsubgrp = max(num),
num.subgrp = list(num)) %>%
group_by(grp) %>%
mutate(
mingrpexclsubgrp = map_dbl(num.subgrp, function(x) {
diff <- setdiff(num, x);
if (length(diff) > 0) min(diff) else min(maxsubgrp) }),
isless = maxsubgrp < mingrpexclsubgrp) %>%
select(-num.subgrp)
## A tibble: 9 x 6
## Groups: grp [2]
# grp subgrp num maxsubgrp mingrpexclsubgrp isless
# <int> <int> <int> <dbl> <dbl> <lgl>
#1 1 1 2 3. 4. TRUE
#2 1 1 3 3. 4. TRUE
#3 1 2 4 7. 2. FALSE
#4 1 2 6 7. 2. FALSE
#5 1 2 7 7. 2. FALSE
#6 2 3 7 7. 6. FALSE
#7 2 4 6 7. 7. FALSE
#8 2 4 7 7. 7. FALSE
#9 2 5 7 7. 6. FALSE
答案 2 :(得分:1)
//h3[contains(., 'Browse Publications') and contains(., 'filter')]
要获得结果作为您的表格,我们可以这样做:
String category = "//h3[contains(., 'Browse Publications') and contains(., 'filter')]";
WebElement settingSection = FindElements(By.XPath(category)).FirstOrDefault(x => x.Displayed);
答案 3 :(得分:0)
我知道了。按每一行进行汇总而不是进行突变是正确的。
x %>%
# the key variable and generalizable answer
group_by(rownum = 1:n(),grp,subgrp,num) %>%
summarize(
mingrpexclsubgrp = min(c(.$num)[.$grp == grp & .$subgrp != subgrp])
) %>%
# the rest of the variables
group_by(subgrp) %>%
mutate(maxsubgrp = max(num),
isless = maxsubgrp < mingrpexclsubgrp)
我相信它可以进一步简化:
x %>%
group_by(subgrp) %>%
mutate(
# essentially ignores the grouping with .$ and uses custom grouping by comparison
mingrpexclsubgrp = min(.$num[.$grp == grp[1] & .$subgrp != subgrp[1]]), # [1] prevent a warning where it chooses the first from >1 anyway
maxsubgrp = max(num),
isless = maxsubgrp < mingrpexclsubgrp
)