R:满足多个标准的数据报告范围

时间:2018-04-12 08:47:16

标签: r criteria intervals

查看本文末尾的更新

我正在使用R来分析来自小鼠实验的车轮运行数据。我有以下数据框:

daily   diff    diff_abs    RSI revRSI  date
31115   0   0   NA  41.2414625757225    01/25
26455   -4660   4660    NA  48.2658030703815    01/26
43746   17291   17291   NA  45.9494803496455    01/27
56477   12731   12731   NA  48.9645147955033    01/28
20487   -35990  35990   NA  50.8358888804281    01/29
48091   27604   27604   NA  27.3308517626451    01/30
40780   -7311   7311    NA  55.0797465588814    01/31
66862   26082   26082   NA  47.1060765697854    02/01
56070   -10792  10792   58.7585374242775    54.7090598105694    02/02
36727   -19343  19343   51.7341969296185    52.8298034135076    02/03
39402   2675    2675    54.0505196503545    36.9980600069096    02/04
46852   7450    7450    51.0354852044967    50.6400779877448    02/05
54062   7210    7210    49.1641111195719    48.523901908252 02/06
82605   28543   28543   72.6691482373549    48.3957328746788    02/07
31816   -50789  50789   44.9202534411186    53.2082055322621    02/08
50723   18907   18907   52.8939234302146    34.3408339939252    02/09
52931   2208    2208    45.2909401894306    49.3214157131005    02/10
48032   -4899   4899    47.1701965864924    48.7951434058106    02/11
75867   27835   27835   63.0019399930904    45.400714012296 02/12
37012   -38855  38855   49.3599220122552    52.3590727677325    02/13
52604   15592   15592   51.476098091748 33.3383758162519    02/14
60330   7726    7726    51.6042671253212    52.719803210509 02/15
71204   10874   10874   46.7917944677379    58.8525137798564    02/16
71719   515 515 65.6591660060748    71.5165371132449    02/17
52457   -19262  19262   50.6785842868995    76.8903074849309    02/18
56043   3586    3586    51.2048565941894    64.6605567935721    02/19
59807   3764    3764    54.599285987704 71.2648737251093    02/20
70630   10823   10823   47.6409272322675    73.3683338802169    02/21
63446   -7184   7184    66.6616241837481    89.0878793194177    02/22
48314   -15132  15132   47.280196789491 61.6081268190681    02/23
47610   -704    704 41.1474862201436    43.4304453935326    02/24
42970   -4640   4640    28.4834628867551    34.2488899851998    02/25
29427   -13543  13543   23.109692515069 27.2125091182907    02/26
33773   4346    4346    35.3394432064279    41.9740001500713    02/27
28021   -5752   5752    28.7351262748907    39.5260512811454    02/28
29896   1875    1875    26.6316661197831    40.432047746235 03/01
26062   -3834   3834    10.9121206805824    38.1126164545335    03/02
47014   20952   20952   38.3918731809319    32.1529563466628    03/03
56928   9914    9914    56.5695546064674    51.6408833585561    03/04
73152   16224   16224   65.7511100148002    55.2920789479607    03/05
81706   8554    8554    72.7874908817093    60.3469873993958    03/06
46541   -35165  35165   58.0259998499287    56.7947848600666    03/07
57490   10949   10949   60.4739487188546    39.5527595903204    03/08
50016   -7474   7474    59.567952253765 55.3361524561763    03/09
58886   8870    8870    61.8873835454665    48.9233030011869    03/10
73397   14511   14511   67.8470436533372    53.8702524564152    03/11
42330   -31067  31067   48.3591166414439    50.062386569873 03/12
42819   489 489 44.7079210520393    31.5655846006373    03/13
47877   5058    5058    39.6530126006042    50.021484430949 03/14
64070   16193   16193   43.2052151399334    54.2381247338469    03/15
66901   2831    2831    60.4472404096796    65.200747697026 03/16
46032   -20869  20869   44.6638475438237    70.6002913025385    03/17
52302   6270    6270    51.0766969988131    58.9211826366076    03/18
51276   -1026   1026    46.1297475435848    61.7373482579221    03/19
73265   21989   21989   49.937613430127 61.4629369889115    03/20
70792   -2473   2473    68.4344153993627    87.5024084356554    03/21
42774   -28018  28018   49.978515569051 78.8086504788302    03/22
39119   -3655   3655    45.7618752661531    80.5670973928131    03/23
36909   -2210   2210    34.799252302974 NA  03/28
27299   -9610   9610    29.3997086974615    NA  03/29
31802   4503    4503    41.0788173633924    NA  03/30
34434   2632    2632    38.2626517420779    NA  03/31
33950   -484    484 38.5370630110885    NA  04/01
30444   -3506   3506    12.4975915643446    NA  04/02
36077   5633    5633    21.1913495211698    NA  04/03
2607    -33470  33470   19.4329026071869    NA  04/04

NA应该在那里,归因于基于RSI计算revRSIdaily的方式

我有兴趣找到满足以下条件的时间间隔:

  1. RSI < 50连续9次观察或更多
  2. RSI < 25进行至少1次观察
  3. 第一次观察RSI < 50是区间中的第一个数据点。 间隔随观察revRSI > 50而停止(不包括在间隔中)。 如果在>9 RSI < 50 RSI > 50之后revRSI > 50 revRSI > 50之前出现1/0 ,则间隔不会终止。它应该只在TRUE/FALSE时终止。

    如果可能的话,我希望输出显示在添加到发布的数据框中的额外列中(类似值为FIRST/LASTfor (n in names(daily)){ animal$daily <- daily[,n] animal$diff <- ave(animal$daily, FUN=function(x) c(0, diff(x))) animal$diff_abs <- abs(ave(animal$daily, FUN=function(x) c(0, diff(x)))) animal$diff[animal$diff < 0] <- 0 animal$RSI <- abs(100 * rollsumr(animal$diff, k = 9, fill = NA, align="right") / rollsumr(animal$diff_abs, k = 9, fill = NA, align="right")) animal$diff <- ave(animal$daily, FUN=function(x) c(0, diff(x))) animal$diff[animal$diff > 0] <- 0 animal$revRSI <- abs(100 * rollsumr(animal$diff, k = 9, fill = NA, align="left") / rollsumr(animal$diff_abs, k = 9, fill = NA, align="left")) animal$diff <- ave(animal$daily, FUN=function(x) c(0, diff(x))) animal$date = datax animal$indi <- cut(animal$RSI,c(-Inf,50,Inf),c("L50","M50")) animal$id <- 1:length(animal$RSI) ind <- which(animal$indi == "M50" | is.na(animal$RSI)) ind <- c(ind,length(animal$RSI) + 1) final_df <- NULL for(i in ind){ stop = which(i == ind) start = ifelse(stop == 1,1,stop-1) res <- animal[(ind[start] + 1):(ind[stop] -1) ,] # We create 'little dataframe between each succesiv upper value if(length(res[,1]) >= 9 ) { # We flag OK all element res$FL <- "OK" # We flag the first element of our little df res[1,"FL"] <- "FIRST" # We flag the first element of our little df res[length(res[,1]),"FL"] <- "LAST" # Finaly we add our new df to our results final_df <- rbind(final_df,res[,c("id","FL")]) } } animal <- merge(animal,final_df,by.x = "id",by.y="id",all.x =T) assign(paste("animal", n, sep = ''), animal) } Error in fix.by(by.y, y) : 'by' must specify a uniquely valid column 以指示间隔)。

    我尝试搜索,但无法找到与此相关的任何内容。 我希望有人可能有一些好的建议,或者可以指出我正确的方向:) 我将对30只动物进行半年的观察,所以我希望我不必手动这样做;)

    谢谢!

    编辑在这里开始 我试图将建议合并到我现有的代码中。我正在运行for循环,并希望它成为循环的一部分。 代码(添加新代码)如下所示:

    animal

    我得到的错误是:

    FL.x

    我可以在FL.y data.frame中看到生成FL列,但也有名为-Dlog4j.configuration=/your/config.properties和{{1}}的列。 我不确定那是怎么回事......

    另外,我最初忘了提到每只动物没有或多个间隔的可能性(真的应该有,对不起),并且在查看建议的解决方案后,我不确定它是否可以容纳这个......?< / p>

    谢谢!

1 个答案:

答案 0 :(得分:1)

想法是将您的数据帧与每个RSI值分开,其中RSI>通过这个技巧,您可以评估每个小数据帧的大小。并将df保持在大于9或1的位置。

识别

df$indi <- cut(df$RSI,c(-Inf,50,Inf),c("L50","M50"))
df$id <- 1:length(df$RSI)
ind <- which(df$indi == "M50" | is.na(df$RSI))
ind <- c(ind,length(df$RSI) + 1)
final_df <- NULL

开始执行

 for(i in ind){
  stop = which(i == ind) 
  start = ifelse(stop == 1,1,stop-1)
  res <- df[(ind[start] + 1):(ind[stop] -1) ,]
  # We create 'little dataframe between each succesiv upper value
  if(length(res[,1]) >= 9 ) {
   # We flag OK all element
   res$FL <- "OK"
   # We flag the first element of our little df
   res[1,"FL"] <- "FIRST"
   # We flag the first element of our little df
   res[length(res[,1]),"FL"] <- "LAST"
   # Finaly we add our new df to our results
   final_df <- rbind(final_df,res[,c("id","FL")])
 }

}

收集我们的结果

df <- merge(df,final_df,by.x = "id",by.y="id",all.x =T) 

结果

id daily   diff diff_abs      RSI   revRSI    date indi    FL
26 26 56043   3586     3586 51.20486 64.66056 févr-19  M50  <NA>
27 27 59807   3764     3764 54.59929 71.26487 févr-20  M50  <NA>
28 28 70630  10823    10823 47.64093 73.36833 févr-21  L50  <NA>
29 29 63446  -7184     7184 66.66162 89.08788 févr-22  M50  <NA>
30 30 48314 -15132    15132 47.28020 61.60813 févr-23  L50 FIRST
31 31 47610   -704      704 41.14749 43.43045 févr-24  L50    OK
32 32 42970  -4640     4640 28.48346 34.24889 févr-25  L50    OK
33 33 29427 -13543    13543 23.10969 27.21251 févr-26  L50    OK
34 34 33773   4346     4346 35.33944 41.97400 févr-27  L50    OK
35 35 28021  -5752     5752 28.73513 39.52605 févr-28  L50    OK
36 36 29896   1875     1875 26.63167 40.43205 03-janv  L50    OK
37 37 26062  -3834     3834 10.91212 38.11262 03-févr  L50    OK
38 38 47014  20952    20952 38.39187 32.15296 03-mars  L50  LAST
39 39 56928   9914     9914 56.56955 51.64088  03-avr  M50  <NA>
40 40 73152  16224    16224 65.75111 55.29208  03-mai  M50  <NA>

希望这会有所帮助

Gottavianoni