我正在尝试使用grep()函数为两个群体中的每个群体生成一个平均得分变量。我的所有代码都运行,但avgScore.pop1和avgScore.pop2变量值之间没有区别,我认为这与人口标识符位于变量名称末尾的事实有关。
以下是我正在使用的代码示例:
rm(list = ls())
measure <- c("m1", "m2", "m3", "m4", "m5", "m6",
"m1", "m2", "m3", "m4", "m5", "m6",
"m1", "m2", "m3", "m4", "m5", "m6",
"m1", "m2", "m3", "m4", "m5", "m6",
"m1", "m2", "m3", "m4", "m5", "m6",
"m1", "m2", "m3", "m4", "m5", "m6",
"m1", "m2", "m3", "m4", "m5", "m6",
"m1", "m2", "m3", "m4", "m5", "m6")
population <- c("pop1", "pop1", "pop1", "pop1", "pop1", "pop1",
"pop2", "pop2", "pop2", "pop2", "pop2", "pop2",
"pop1", "pop1", "pop1", "pop1", "pop1", "pop1",
"pop2", "pop2", "pop2", "pop2", "pop2", "pop2",
"pop1", "pop1", "pop1", "pop1", "pop1", "pop1",
"pop2", "pop2", "pop2", "pop2", "pop2", "pop2",
"pop1", "pop1", "pop1", "pop1", "pop1", "pop1",
"pop2", "pop2", "pop2", "pop2", "pop2", "pop2")
name <- c("name1", "name1", "name1", "name1", "name1", "name1",
"name1", "name1", "name1", "name1", "name1", "name1",
"name2", "name2", "name2", "name2", "name2", "name2",
"name2", "name2", "name2", "name2", "name2", "name2",
"name3", "name3", "name3", "name3", "name3", "name3",
"name3", "name3", "name3", "name3", "name3", "name3",
"name4", "name4", "name4", "name4", "name4", "name4",
"name4", "name4", "name4", "name4", "name4", "name4")
facility <- c("fac1", "fac1", "fac1", "fac1", "fac1", "fac1",
"fac1", "fac1", "fac1", "fac1", "fac1", "fac1",
"fac1", "fac1", "fac1", "fac1", "fac1", "fac1",
"fac1", "fac1", "fac1", "fac1", "fac1", "fac1",
"fac2", "fac2", "fac2", "fac2", "fac2", "fac2",
"fac2", "fac2", "fac2", "fac2", "fac2", "fac2",
"fac3", "fac3", "fac3", "fac3", "fac3", "fac3",
"fac3", "fac3", "fac3", "fac3", "fac3", "fac3")
set.seed(12); denominator <- runif(48, 10, 100)
set.seed(12); score <- runif(48, 0, 1)
dat <- data.frame(name, facility, population, measure, denominator, score)
wide1 <- reshape(data=dat,
idvar= c("name", "facility", "population"),
timevar = "measure",
direction="wide")
wide2 <- reshape(data=wide1,
idvar = c("name", "facility"),
timevar= "population",
direction="wide")
wide2$avgScore.pop1 <- rowSums(wide2[, grep("score.", names(wide2), '.pop1')], na.rm=T)/ 6
wide2$avgScore.pop2 <- rowSums(wide2[, grep("score.", names(wide2), '.pop2')], na.rm=T)/ 6
wide2$avgDenom.pop1 <- rowSums(wide2[, grep("denominator.", names(wide2), '.pop1')], na.rm=T)/ 6
wide2$avgDenom.pop2 <- rowSums(wide2[, grep("denominator.", names(wide2), '.pop2')], na.rm=T)/ 6
非常感谢任何关于如何总结每个人口的分数和分母的所有措施的想法! 谢谢!
答案 0 :(得分:3)
你想要paste0
。 grep
在字符向量中搜索正则表达式模式。您想要将多个字符串粘贴在一起。只需在代码中将grep
替换为paste0
:
wide2$avgScore.pop1 <- rowSums(wide2[, paste0("score.", names(wide2), '.pop1')],
na.rm=T)/ 6
wide2$avgScore.pop2 <- rowSums(wide2[, paste0("score.", names(wide2), '.pop2')],
na.rm=T)/ 6
如果你想找到所有变量,例如以“得分”开头,你可以在这里使用grep
。并以“.pop1”
grep("score\\.[^.]+\\.pop1", colnames(wide2))
将返回与:
大致相同的内容paste0("score.", names(wide2), ".pop1")
答案 1 :(得分:2)
您是否有可能在寻找聚合物?
> aggregate(score ~ population + measure, dat, sum)
population measure score
1 pop1 m1 1.357344
2 pop2 m1 2.062984
3 pop1 m2 2.310233
4 pop2 m2 1.845279
5 pop1 m3 2.096953
6 pop2 m3 1.968227
7 pop1 m4 1.288433
8 pop2 m4 1.705252
9 pop1 m5 1.654866
10 pop2 m5 1.504966
11 pop1 m6 1.774900
12 pop2 m6 2.510683
或者使用dplyr:
library(dplyr)
dat %>%
group_by(population, measure) %>%
summarize(sum(score))
# A tibble: 12 x 3
# Groups: population [?]
population measure `sum(score)`
<fctr> <fctr> <dbl>
1 pop1 m1 1.357344
2 pop1 m2 2.310233
3 pop1 m3 2.096953
4 pop1 m4 1.288433
5 pop1 m5 1.654866
6 pop1 m6 1.774900
7 pop2 m1 2.062984
8 pop2 m2 1.845279
9 pop2 m3 1.968227
10 pop2 m4 1.705252
11 pop2 m5 1.504966
12 pop2 m6 2.510683
答案 2 :(得分:0)
string like '12,34,46,767'
这应该做你想要的。它使用wide2$avgScore.pop1 <- rowSums(wide2[, grepl('.pop1', names(wide2))],na.rm=T)/ 6
wide2$avgScore.pop2 <- rowSums(wide2[, grepl('.pop2', names(wide2))], na.rm=T)/ 6
来匹配结尾的所有名称&#34; .pop1&#34;和&#34; pop2&#34;分别返回一个逻辑向量来指示要求的变量索引。
不确定这是否属于您之后的情况,但为了获取数字,还有更简单的解决方案,只需使用原始数据:
grepl