我的样品。
data=structure(list(add = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("x",
"y"), class = "factor"), x1 = c(14L, 15L, 36L, 0L, 0L, 0L, 53L,
10L, 39L, 27L, 67L, 25L, 19L, 49L, 53L, 64L, 61L, 12L, 75L, 34L,
88L, 43L, 85L, 93L, 44L, 31L, 37L, 90L, 66L, 39L, 59L, 96L, 41L,
23L, 20L, 26L, 69L, 28L, 35L, 96L, 87L, 82L, 70L, 68L, 26L, 12L,
58L, 18L, 76L, 93L, 3L, 31L), group = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("female", "male"), class = "factor")), .Names = c("add",
"x1", "group"), class = "data.frame", row.names = c(NA, -52L))
在此数据中有组变量(性别(男性和女性),我需要统计平均值,所有在女性之前的男性的平均百分比为25%。在女性之后,我不接触的男性。女性,我也不接触触摸。
这是根据添加列中的组x
和y
进行的分析。
如果对于男性而言,女性的价值比女性的价值高出x1> 25%(我们在女性之前计算得出),则该值必须用女性之前的男性平均值代替“女性类别,我们请勿触摸”。
AntoniosK的解决方案非常好
library(tidyverse)
library(data.table)
data %>%
group_by(add) %>% # for each add do the below...
mutate(group2 = rleid(group)) %>%
group_by(add, group, group2) %>%
mutate(MEAN = mean(x1[group=="male" & group2==1]),
Q25 = quantile(x1[group=="male" & group2==1], 0.25)) %>%
group_by(add) %>% # for each add update x1 values....
mutate(x1 = ifelse(group=="male" & group2==3 & x1 > unique(Q25[!is.na(Q25)]), unique(MEAN[!is.na(MEAN)]), x1)) %>%
ungroup() %>%
select(-group2) %>%
data.frame()
但现在我想将x的0值替换为Na。
data$x1[data$x1 == 0] <- NA
在此之后,当我取消脚本编写时,出现错误
mutate_impl(.data,点)中的错误:评估错误:丢失 如果“ na.rm”为FALSE,则不允许使用值和NaN。
该怎么办,该脚本通过了NA,并且只能使用int值?
data=structure(list(add = c(11202L, 11202L, 11202L, 11202L, 11202L,
11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L,
11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L,
11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L,
11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L,
11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L,
11202L, 11202L, 11202L, 11202L, 11202L, 11202L, 11202L), x1 = c(NA,
2L, NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA, 1L, 1L, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 3L, NA, NA, NA, NA, 1L, 1L, NA, NA,
NA, NA, NA), group = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("female",
"male"), class = "factor")), .Names = c("add", "x1", "group"), class = "data.frame", row.names = c(NA,
-52L))
library(tidyverse)
library(data.table)
data %>%
group_by(add) %>%
mutate(group2 = rleid(group)) %>%
group_by(add, group, group2) %>%
mutate(MEAN = mean(x1[group=="male" & group2==1]),
Q25 = quantile(x1[group=="male" & group2==1], 0.25)) %>%
group_by(add) %>%
mutate(x1 = ifelse(group=="male" & group2==3 & x1 > unique(Q25[!is.na(Q25)]), unique(MEAN[!is.na(MEAN)]), x1),
x1 = ifelse(x1==0, NA, x1)) %>% # new code added
ungroup() %>%
select(-group2) %>%
data.frame()
代码结果
add x1 group MEAN Q25
x 14.00000 male 23.72727 5.0
x 15.00000 male 23.72727 5.0
x 36.00000 male 23.72727 5.0
x 0.00000 male 23.72727 5.0
x 0.00000 male 23.72727 5.0
x 0.00000 male 23.72727 5.0
x 53.00000 male 23.72727 5.0
x 10.00000 male 23.72727 5.0
x 39.00000 male 23.72727 5.0
x 27.00000 male 23.72727 5.0
x 67.00000 male 23.72727 5.0
x 25.00000 female NaN NA
x 19.00000 female NaN NA
x 49.00000 female NaN NA
x 53.00000 female NaN NA
x 64.00000 female NaN NA
x 61.00000 female NaN NA
x 12.00000 female NaN NA
x 23.72727 male NaN NA
x 23.72727 male NaN NA
x 23.72727 male NaN NA
x 23.72727 male NaN NA
x 23.72727 male NaN NA
x 23.72727 male NaN NA
x 23.72727 male NaN NA
x 23.72727 male NaN NA
之后
add x1 group
x 94.90 male
女性之后的前4位男性的总和= 94.90
答案 0 :(得分:1)
我添加了一段代码来解决您的问题,并简要说明了错误。
更新代码
data=structure(list(add = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("x",
"y"), class = "factor"), x1 = c(14L, 15L, 36L, 0L, 0L, 0L, 53L,
10L, 39L, 27L, 67L, 25L, 19L, 49L, 53L, 64L, 61L, 12L, 75L, 34L,
88L, 43L, 85L, 93L, 44L, 31L, 37L, 90L, 66L, 39L, 59L, 96L, 41L,
23L, 20L, 26L, 69L, 28L, 35L, 96L, 87L, 82L, 70L, 68L, 26L, 12L,
58L, 18L, 76L, 93L, 3L, 31L), group = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("female", "male"), class = "factor")), .Names = c("add",
"x1", "group"), class = "data.frame", row.names = c(NA, -52L))
library(tidyverse)
library(data.table)
data %>%
group_by(add) %>%
mutate(group2 = rleid(group)) %>%
group_by(add, group, group2) %>%
mutate(MEAN = mean(x1[group=="male" & group2==1]),
Q25 = quantile(x1[group=="male" & group2==1], 0.25)) %>%
group_by(add) %>%
mutate(x1 = ifelse(group=="male" & group2==3 & x1 > unique(Q25[!is.na(Q25)]), unique(MEAN[!is.na(MEAN)]), x1),
x1 = ifelse(x1==0, NA, x1)) %>% # new code added
ungroup() %>%
select(-group2) %>%
data.frame()
错误说明
您必须运行代码的上半部分,最后只更新x1
列。之所以会出现此错误,是因为NA
值破坏了您需要执行的mean
和quantile
计算。
另一种方法是在开始时更新x1
,然后使用na.rm=T
进行计算。
对于您提到的新案例,从NA
的{{1}}值开始,请尝试以下操作:
x1
对于您提到的新案例(编辑2),首先将之前代码的输出保存为data %>%
group_by(add) %>%
mutate(group2 = rleid(group)) %>%
group_by(add, group, group2) %>%
mutate(MEAN = mean(x1[group=="male" & group2==1], na.rm = T), ## extra code here ##
Q25 = quantile(x1[group=="male" & group2==1], 0.25, na.rm = T)) %>% ## extra code here ##
group_by(add) %>%
mutate(x1 = ifelse(group=="male" & group2==3 & x1 > unique(Q25[!is.na(Q25)]), unique(MEAN[!is.na(MEAN)]), x1))%>%
ungroup() %>%
select(-group2) %>%
data.frame()
:
data2
然后运行:
data2 = data %>% ...