Question

示例data.frame：

df = read.table(text = 'ID  Day Count   Count_group
            18  1933    6   15
            33  1933    6   15
            37  1933    6   15
            18  1933    6   15
            16  1933    6   15
            11  1933    6   15
            111 1932    5   9
            34  1932    5   9
            60  1932    5   9
            88  1932    5   9
            18  1932    5   9
            33  1931    3   4
            13  1931    3   4
            56  1931    3   4
            23  1930    1   1
            6   1800    6   12
            37  1800    6   12
            98  1800    6   12
            52  1800    6   12
            18  1800    6   12
            76  1800    6   12
            55  1799    4   6
            6   1799    4   6
            52  1799    4   6
            133 1799    4   6
            112 1798    2   2
            677 1798    2   2
            778 888     4   8
            111 888     4   8
            88  888     4   8
            10  888     4   8
            37  887     2   4
            26  887     2   4
            8   886     1   2
            56  885     1   1
            22  120     2   6
            34  120     2   6
            88  119     1   6
            99  118     2   5
            12  118     2   5
            90  117     1   3
            22  115     2   2
            99  115     2   2', header = TRUE)

Count列显示每个ID的{{1}}值总数，而Day列显示每个Count_group值的总和{ {1}}，ID，Day，Day - 1和Day -2。

e.g。 1933 = Day -3 15因为Day -4 6（1933）+ Count_group 5（1932）+ Count 3（1931）+ Count 1（1930）+ { {1}} 0（1929）。

我需要做的是为每个Count创建重复的观察结果并将其添加到其中，以便按Count Count Count_group，Count_group展示每个Day {1}}，Day - 1和Day -2。

e.g。 Day -3 = 15由Day -4 1933年，1932年，1931年，1930年（以及Count_group中未存在的1929年）的计数值组成。所以五天需要包含在Day = 15中。下一个将是df = 9，由1932年，1931年，1930年，1929年和1928年组成;等...

期望的输出：

Count_group

（请注意，为了使它们更清晰，每个5天的不同组用空行分隔）

我有不同的data.frames，按n天分组，因此我想专门为每个代码调整代码（稍微改一下）。

由于

Answer 1

我附上了一种相当机械的方法，但我相信这是一个很好的起点。我注意到在原始表中的条目

ID Day Count Count_group 18 1933 6 14

重复;为了清楚起见，我没有动过它。

方法的结构：

阅读原始数据
为每个Day
生成最终数据框，将列表折叠为2。

1。阅读原始数据

我们从

df = read.table(text = 'ID  Day Count   Count_group
                18  1933    6   14
                33  1933    6   14
                37  1933    6   14
                18  1933    6   14
                16  1933    6   14
                11  1933    6   14
                111 1932    5   9
                34  1932    5   9
                60  1932    5   9
                88  1932    5   9
                18  1932    5   9
                33  1931    3   4
                13  1931    3   4
                56  1931    3   4
                23  1930    1   1
                6   1800    6   12
                37  1800    6   12
                98  1800    6   12
                52  1800    6   12
                18  1800    6   12
                76  1800    6   12
                55  1799    4   6
                6   1799    4   6
                52  1799    4   6
                133 1799    4   6
                112 1798    2   2
                677 1798    2   2
                778 888     4   7
                111 888     4   7
                88  888     4   7
                10  888     4   7
                37  887     2   4
                26  887     2   4
                8   886     1   2
                56  885     1   1', header = TRUE)

# ordered vector of unique values for "Day"
ord_day <- unique(df$Day[order(df$Day)])
ord_day
 [1]  885  886  887  888 1798 1799 1800 1930 1931 1932 1933

2.为每个`Day`

对于ord_day中的每个元素，我们引入一个data.frame作为名为df_new_aug的列表的元素。此类数据框是通过for循环定义的ord_day除ord_day[2]和ord_day[1]之外的所有值，这些值将单独处理。

循环背后的理念：对于每个ord_day[i] i > 2 {{}}}，我们会检查ord_day[i-1]和ord_day[i-2]（或两者！）之间的哪些天数（通过变量{{ 1}}）"Count"处的值"Count_Group"。

因此，我们在循环中引入ord_day[i]语句。我们走了

if else

3。生成最终数据框，将列表折叠为2。

我们通过一个丑陋的循环崩溃# Recursive generation of the list of data.frames (for days > 886) #----------------------------------------------------------------- df_new <- list() df_new_aug <- list() # we exclude cases i=1, 2: they are manually treated below for ( i in 3: length(ord_day) ) { # is "Count_Group" for ord_day[i] equal to the sum of "Count" at ord_day[i-1] and ord_day[i-2]? if ( unique(df[df$Day == ord_day[i], "Count_group"]) == unique(df[df$Day == ord_day[i], "Count"]) + unique(df[df$Day == ord_day[i-1], "Count"]) + unique(df[df$Day == ord_day[i-2], "Count"]) ) { # we create columns ID | Day | Count df_new[[i]] <- data.frame(df[df$Day == ord_day[i] | df$Day == ord_day[i-1] | df$Day == ord_day[i-2], c("ID", "Day", "Count")]) # we append the Count_Group of the Day in ord_day[i] df_new_aug[[i]] <- data.frame( df_new[[i]], Count_group = rep(unique(df[df$Day == ord_day[i], "Count_group"]), nrow(df_new[[i]]) ) ) } else if (unique(df[df$Day == ord_day[i], "Count_group"]) == unique(df[df$Day == ord_day[i], "Count"]) + unique(df[df$Day == ord_day[i-1], "Count"]) ) #only "Count" at i and i-1 contribute to "Count_group" at i { df_new[[i]] <- data.frame(df[df$Day == ord_day[i] | df$Day == ord_day[i-1], c("ID", "Day", "Count")]) # we append the Count_Group of the Day in ord_day[2] df_new_aug[[i]] <- data.frame(df_new[[i]], Count_group = rep(unique(df[df$Day == ord_day[i], "Count_group"]), nrow(df_new[[i]]) ) ) } else #only "Count" at i contributes to "Count_group" at i df_new[[i]] <- data.frame(df[df$Day == ord_day[i], c("ID", "Day", "Count")]) # we append the Count_Group of the Day in ord_day[i] df_new_aug[[i]] <- data.frame(df_new[[i]], Count_group = rep(unique(df[df$Day == ord_day[i], "Count_group"]), nrow(df_new[[i]]) ) ) #closing the for loop } # for ord_day[2] = "886" (both "Count" at i =2 and i = 1 contribute to "Count_group" at i=2) #------------------------------------------------------------------------------------- df_new[[2]] <- data.frame(df[df$Day == ord_day[2] | df$Day == ord_day[1], c("ID", "Day", "Count")]) # we append the Count_Group of the Day in ord_day[2] df_new_aug[[2]] <- data.frame(df_new[[2]], Count_group = rep(unique(df[df$Day == ord_day[2], "Count_group"]), nrow(df_new[[2]]) ) ) # for ord_day[1] = "885" (only "count" at i = 1 contributes to "Count_group" at i =1) #------------------------------------------------------------------------------------ df_new[[1]] <- data.frame(df[df$Day == ord_day[1], c("ID", "Day", "Count")]) # we append the Count_Group of the Day in ord_day[i] df_new_aug[[1]] <- data.frame(df_new[[1]], Count_group = rep(unique(df[df$Day == ord_day[1], "Count_group"]), nrow(df_new[[1]]) ) ) # produced list df_new_aug，但其他解决方案（例如df_new_aug和Reduce()也可以）：

merge()

一个人到达# merging the list (mechanically): final result df_result <- df_new_aug[[1]] for (i in 1:10){ df_result <- rbind(df_result, df_new_aug[[i+1]]) }并停止分析。

Answer 2

我之前回答的一般化版本......

#first add grouping variables
days <- 5 #grouping no of days
df$smalldaygroup <- c(0,cumsum(sapply(2:nrow(df),function(i) df$Day[i]!=df$Day[i-1]))) #individual days
df$bigdaygroup <- c(0,cumsum(sapply(2:nrow(df),function(i) df$Day[i]<df$Day[i-1]-days+1))) #blocks of linked days

#duplicate days in each big group
df2 <- lapply(split(df,df$bigdaygroup),function(x) {
  n <- max(x$Day)-min(x$Day)+1 #number of consecutive days in big group
  dayvec <- (max(x$Day):min(x$Day)) #possible days in range
  daylog <- dayvec[dayvec %in% x$Day] #actual days in range
  pattern <- data.frame(base=rep(dayvec,each=days))
  pattern$rep <- sapply(1:nrow(pattern),function(i) pattern$base[i]+1-sum(pattern$base[1:i]==pattern$base[i])) #indices to repeat
  pattern$offset <- match(pattern$rep,daylog)-match(pattern$base,daylog) #offsets (used later)
  pattern <- pattern[(pattern$base %in% x$Day) & (pattern$rep %in% x$Day),] #remove invalid elements
  #store pattern in list as offsets needed in next loop
  return(list(df=split(x,x$smalldaygroup)[match(pattern$rep,daylog)],pat=pattern))
})

#change the Count_group to previous value in added entries
df2 <- lapply(df2,function(L) lapply(1:length(L$df),function(i) {
  x <- L$df[[i]]
  offset <- L$pat$offset #pointer to day to copy Count_group from
  x$Count_group <- L$df[[i-offset[i]]]$Count_group[1]
  return(x)
}))

df2 <- do.call(rbind,unlist(df2,recursive=FALSE)) #bind back together

df2[,5:6] <- NULL #remove grouping variables

head(df2,30) #ignore rownames!

       ID  Day Count Count_group
01.1   18 1933     6          15
01.2   33 1933     6          15
01.3   37 1933     6          15
01.4   18 1933     6          15
01.5   16 1933     6          15
01.6   11 1933     6          15
02.7  111 1932     5          15
02.8   34 1932     5          15
02.9   60 1932     5          15
02.10  88 1932     5          15
02.11  18 1932     5          15
03.12  33 1931     3          15
03.13  13 1931     3          15
03.14  56 1931     3          15
04     23 1930     1          15
05.7  111 1932     5           9
05.8   34 1932     5           9
05.9   60 1932     5           9
05.10  88 1932     5           9
05.11  18 1932     5           9
06.12  33 1931     3           9
06.13  13 1931     3           9
06.14  56 1931     3           9
07     23 1930     1           9
08.12  33 1931     3           4
08.13  13 1931     3           4
08.14  56 1931     3           4
09     23 1930     1           4
010    23 1930     1           1
11.16   6 1800     6          12

通过基于组条件创建重复项来扩展data.frame（3）

2 个答案:

1。阅读原始数据

2.为每个`Day`

3。生成最终数据框，将列表折叠为2。

通过基于组条件创建重复项来扩展data.frame（3）

2 个答案:

1。阅读原始数据

2.为每个Day

3。生成最终数据框，将列表折叠为2。

2.为每个`Day`