Question

假设我们有，

library(data.table)
dt <- data.table(id = 1:4, x1 = 10:13, x2=21:24, wt=c(1,0,0.5,0.7))

返回，

   id x1 x2  wt
1:  1 10 21 1.0
2:  2 11 22 0.0
3:  3 12 23 0.5
4:  4 13 24 0.7

我想在以下条件下复制观察结果：

如果wt为0或1，我们分别将flag指定为1和0
如果0 < wt＆lt; 1，我们将flag指定为0.此外，我们使用wt = 1-wt复制此观察值，并将flag指定为1。

我期待的回报将是

   id x1 x2  wt flag
1:  1 10 21 1.0    0
2:  2 11 22 0.0    1
3:  3 12 23 0.5    0
4:  3 12 23 0.5    1
5:  4 13 24 0.7    0
6:  4 13 24 0.3    1

我尝试过我的代码

dt[,flag:=ifelse(wt==1,0, ifelse(wt==0, 1, 0))]
dt[,freq:=ifelse(wt > 0 & wt < 1, 2, 1)]
dtr <- dt[rep(1:.N, freq)][,Indx:=1:.N, by = id]
dtr[freq==2&Indx==2, wt:=1-wt]
dtr[Indx==2,flag:=1]
dtr[,`:=`(freq=NULL, Indx=NULL)]

但是，我觉得效率不高。

你有什么建议吗？

Answer 1

以下是使用数据框的方法：

dt <- data.frame(id = 1:4, x1 = 10:13, x2=21:24, wt=c(1,0,0.5,0.7))

# create the flag column
dt$flag = 1 - ceiling(dt$wt)

#create a new data frame with the rows that fulfill condition 2 
dt2 = dt[dt$wt < 1 && dt$wt > 0, ]
dt2$wt = 1 - dt2$wt
dt2$flag = 1

#rbind it to the original data frame and reorder by id
dt = rbind(dt,dt2)
dt = dt[order(dt$id),]

结果：

   id x1 x2  wt flag
1   1 10 21 1.0    0
2   2 11 22 0.0    1
3   3 12 23 0.5    0
31  3 12 23 0.5    1
4   4 13 24 0.7    0
41  4 13 24 0.3    1

Answer 2

我们可以更改一些步骤以使其更紧凑，即删除ifelse并通过将逻辑转换为二进制直接使用赋值，复制行而不创建列，然后获取索引（'i1 '）分配'flag'和'wt'中的值。

dt1 <- dt[, flag := +(wt == 0)][rep(1:.N, (wt > 0 & wt < 1) +1)][]
i1 <- dt1[, .I[seq_len(.N)==2], id]$V1
dt1[i1, c('flag', 'wt') := .(1, 1-wt)][]
#    id x1 x2  wt flag
#1:  1 10 21 1.0    0
#2:  2 11 22 0.0    1
#3:  3 12 23 0.5    0
#4:  3 12 23 0.5    1
#5:  4 13 24 0.7    0
#6:  4 13 24 0.3    1

Answer 3

tidyverse方式：

dt2 <- dt %>%
  mutate( flag = if_else(wt == 0, 1, 0, missing = NULL)) %>%
  mutate( flag = if_else(wt == 1, 0, flag, missing = NULL)) %>%
  mutate( flag2 = if_else(wt %in% c(1,0), 1, 2, missing = NULL)) %>%
  slice(rep(1:n(), flag2)) %>%
  group_by(id) %>%
  mutate( wt = if_else( row_number() == 1, 1-wt, wt, missing = NULL)) %>%
  mutate( flag = if_else( row_number() == 1, 1, flag, missing = NULL)) %>%
  select(id, x1, x2, wt, flag)

这给出了

#Source: local data frame [6 x 5]
#Groups: id [4]
#
#     id    x1    x2    wt  flag
#  <int> <int> <int> <dbl> <dbl>
#1     1    10    21   0.0     1
#2     2    11    22   1.0     1
#3     3    12    23   0.5     1
#4     3    12    23   0.5     0
#5     4    13    24   0.3     1
#6     4    13    24   0.7     0

P.S。如果我们改变组中的第一行或最后一行，我认为这不重要，所以我选择了row_number() == 1

如何根据重量复制观察

3 个答案: