R 组合两个 For 循环(嵌套 For 循环)

时间:2021-07-04 11:03:08

标签: r for-loop nested-for-loop

我一直在尝试将两个 For 循环合并为一个循环。

循环 1:

    Unique.Order.Comment <- unique(df2$Rebuilt.Order.Comment)
length(Unique.Order.Comment)

#loop for the calculations
for (i in 1:length(Unique.Order.Comment)) {
  #a <- i-11
  #c[i] <-  print(sum(n.Cases.per.month$nCases[a:i]))
  a <- subset.data.frame(Rebuilt.Data, Rebuilt.Order.Comment == Unique.Order.Comment[i])  
  assign(Unique.Order.Comment[i],a)


}

循环 2:

    #loop for the calculations
c <- rep(0, nrow(BR))
for (ii in 1:nrow(BR)) {
  if (ii < 12){
    print(0)
  }else { 
    a <- ii-11
    c[ii] <-  print(sum(BR$Number.Cases.Authorised[a:ii]))
  }
  
} 


c <- data.frame(c)
c <- c %>% 
  rename(
    n.Seen.Cum = c
  )

#View(c)

BR <- cbind(BR,c)

BR 必须是循环 2 中的Unique.Order.Comment[i]

我相信/希望它看起来应该如下所示。 但我收到错误消息 Error in rep(0, nrow(Unique.Order.Comment[i])) : invalid 'times' argument

(我认为它应该是什么样子)

    Unique.Order.Comment <- unique(df2$Rebuilt.Order.Comment)
length(Unique.Order.Comment)

#loop for the calculations
for (i in 1:length(Unique.Order.Comment)) {
  #a <- i-11
  #c[i] <-  print(sum(n.Cases.per.month$nCases[a:i]))
  a <- subset.data.frame(Rebuilt.Data, Rebuilt.Order.Comment == Unique.Order.Comment[i])  
  assign(Unique.Order.Comment[i],a)

  
  
  #loop for the calculations
  c <- rep(0, nrow(Unique.Order.Comment[i]))
  for (ii in 1:nrow(Unique.Order.Comment[i])) {
    if (ii < 12){
      print(0)
    }else { 
      a <- ii-11
      c[ii] <-  print(sum(Unique.Order.Comment[i]$Number.Cases.Authorised[a:ii]))
    }
    
  } 
  
  
  c <- data.frame(c)
  c <- c %>% 
    rename(
      n.Seen.Cum = c
    )
  
  #View(c)
  
  Unique.Order.Comment[i] <- cbind(Unique.Order.Comment[i],c)
}

编辑示例数据: dput(Unique.Order.Comment)

c("CN", "DM", "DR", "FF", "PG", "HN", "SK", "GI", "GYN", "BR", 
"UR", "LYMPH", "HPB", "BST", "ENDOC", "PAEDGI", "CT", "PERI", 
"NEURO", "MOHS", "ICC", "RE", "PAED", "MN", "EMR", "PR", "LBX", 
"HAEM", "CTT", "UGI", "NEUR", "URGI", "GYNAE")

dput(head(Rebuilt.Data))

structure(list(Rebuilt.Order.Comment = c("BR", "BR", "BR", "BR", 
"BR", "BR"), Period.Received = c("2019-01", "2019-02", "2019-03", 
"2019-04", "2019-05", "2019-06"), Number.Cases.Received = c(838L, 
730L, 778L, 832L, 574L, 626L), Number.Cases.Authorised = c(680L, 
587L, 896L, 715L, 761L, 554L), Number.Cases.Authorised.Less7Days = c(550L, 
343L, 520L, 389L, 393L, 374L), Number.Cases.Authorised.Less10.Days = c(628L, 
475L, 723L, 595L, 555L, 474L), Percentage.Authorsied.Less7Days = c(0.808823529411765, 
0.584327086882453, 0.580357142857143, 0.544055944055944, 0.516425755584757, 
0.675090252707581), Percentage.Authorsied.Less10Days = c(0.923529411764706, 
0.809199318568995, 0.806919642857143, 0.832167832167832, 0.729303547963206, 
0.855595667870036), Avg.TaT.for.Authorised.Cases = structure(c(5.26470588235294, 
8.74616695059625, 8.34709821428571, 8.09370629370629, 12.826544021025, 
6.22021660649819), class = "difftime", units = "days"), MDM.Received = c(2L, 
13L, 2L, NA, NA, 5L), MDM.Received.Avg.TAT = structure(c(5, 29.2307692307692, 
0.5, NA, NA, 5.4), class = "difftime", units = "days"), So.Received = c(NA, 
1L, NA, 1L, NA, 2L), So.Received.Avg.TAT = structure(c(NA, 14, 
NA, 9, NA, 54), class = "difftime", units = "days")), row.names = c(NA, 
6L), class = "data.frame")

如果我将 print(Unique.Order.Comment[i]) 放在第二个单独的循环之前,我会得到: “CN”

理论上,第一个循环基于 Order.Comment 的唯一列表(它可以做到)对数据进行子集化。 然后它做一个累积和,这将 cbind 绑定到子集数据上。

2 个答案:

答案 0 :(得分:1)

首先,如果您提供一个小示例以及您的预期输出,则更容易提供帮助。您可以共享您的原始数据,删除问题中不需要的列,或者创建一个与原始数据相似的假数据集。

其次,我认为你把这个问题复杂化了。在您的全球环境中创建多个数据集从来都不是一个好主意。它们非常难以管理并且对全球环境造成不必要的污染。您可以改用列表。

在这种情况下,我认为我们不需要将数据集拆分到不同的列表中,因为我们有不同的包可以执行滚动计算。例如,下面我使用了具有 zoo 函数的 rollsumr 包。

library(dplyr)
library(zoo)

df <- df %>%
  group_by(Rebuilt.Order.Comment) %>%
  mutate(n.Seen.Cum = rollsumr(Number.Cases.Authorised, 12, fill = 0)) %>%
  ungroup

df

#   Rebuilt.Order.Comment Period.Received Number.Cases.Authorised n.Seen.Cum
#   <chr>                 <chr>                             <int>      <int>
# 1 BR                    2019-01                             680          0
# 2 BR                    2019-02                             587          0
# 3 BR                    2019-03                             896          0
# 4 BR                    2019-04                             715          0
# 5 BR                    2019-05                             761          0
# 6 BR                    2019-06                             554          0
# 7 BR                    2019-07                             843          0
# 8 BR                    2019-08                             815          0
# 9 BR                    2019-09                             704          0
#10 BR                    2019-10                             939          0
#11 BR                    2019-11                             834          0
#12 BR                    2019-12                             880       9208
#13 BR                    2020-01                             801       9329
#14 BR                    2020-02                             610       9352
#15 BR                    2020-03                             853       9309

答案 1 :(得分:0)

我想我明白你的目标了,但我可能错过了一些东西。让我知道,我可以编辑。

据我所知,您只需要一个循环,而不是SELECT Short_ID, FORMAT(SUM( IIF (Status = 'Completed', 1 , 0)) / COUNT (Status), 'percent' ) AS completion_metric FROM PROMIS_LT_Long_ID GROUP BY Short_ID; 处理一堆数据框,您可以迭代地构建一个汇总表。

编辑

这里的另一个答案非常优雅!我正在根据您的新评论更新我的答案,只是为了好玩。不知道为什么我们有不同的 assign() 值...

n.Seen.Cum

如果您有多个年份并希望重置累计总和,请更新 df2 <- structure(list( Rebuilt.Order.Comment = c("BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR", "BR" ), Period.Received = c("2019-01", "2019-02", "2019-03", "2019-04", "2019-05", "2019-06", "2019-07", "2019-08", "2019-09", "2019-10", "2019-11", "2019-12", "2020-01", "2020-02", "2020-03"), Number.Cases.Authorised = c(680L, 587L, 896L, 715L, 761L, 554L, 843L, 815L, 704L, 939L, 834L, 880L, 801L, 610L, 853L), n.Seen.Cum = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9208, 9329, 9352, 9309)), row.names = c(NA, 15L), class = "data.frame") # This will hold results output <- list() # Loop over this vector Unique.Order.Comment <- unique(df2$Rebuilt.Order.Comment) for(comment in Unique.Order.Comment){ # Temporary dataframe that is subset of 'df2' temp <- df2[df2$Rebuilt.Order.Comment == comment,] # We can do arithmetic with dates that have days temp$Period.Received2 <- as.Date(paste(temp$Period.Received, "-01", sep="")) # Calculate cumsum after 333 days have passed temp$n.Seen.cum2 <- ifelse( test = temp$Period.Received2 - min(temp$Period.Received2) > 333, yes = cumsum(temp$Number.Cases.Authorised), no = NA) # better output[[comment]] <- temp # quick and dirty # assign(x = comment, value = temp) } output[[1]] #> Rebuilt.Order.Comment Period.Received Number.Cases.Authorised n.Seen.Cum #> 1 BR 2019-01 680 0 #> 2 BR 2019-02 587 0 #> 3 BR 2019-03 896 0 #> 4 BR 2019-04 715 0 #> 5 BR 2019-05 761 0 #> 6 BR 2019-06 554 0 #> 7 BR 2019-07 843 0 #> 8 BR 2019-08 815 0 #> 9 BR 2019-09 704 0 #> 10 BR 2019-10 939 0 #> 11 BR 2019-11 834 0 #> 12 BR 2019-12 880 9208 #> 13 BR 2020-01 801 9329 #> 14 BR 2020-02 610 9352 #> 15 BR 2020-03 853 9309 #> Period.Received2 n.Seen.cum2 #> 1 2019-01-01 NA #> 2 2019-02-01 NA #> 3 2019-03-01 NA #> 4 2019-04-01 NA #> 5 2019-05-01 NA #> 6 2019-06-01 NA #> 7 2019-07-01 NA #> 8 2019-08-01 NA #> 9 2019-09-01 NA #> 10 2019-10-01 NA #> 11 2019-11-01 NA #> 12 2019-12-01 9208 #> 13 2020-01-01 10009 #> 14 2020-02-01 10619 #> 15 2020-03-01 11472 中的 test 参数以包含一些最大天数。

相关问题