dat <- structure(list(doy = c(274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294,
295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315,
316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336,
337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357,
358, 359, 360, 361, 362, 363, 364, 365),
no.plant = c(0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
cum.value = c(0, 1.34973713866726e-05, 0.000107973870218436, 0.000364365089792096, 0.000863464598244823, 0.00168576031682954,
0.00291120609231443, 0.00291120609231443, 0.0046189294134239, 0.00688687680728461, 0.00688687680728461,
0.00979139917551386, 0.0134067801825104, 0.0178047117788614, 0.0230537220148601, 0.0292185614529241,
0.0292185614529241, 0.0363595556987137, 0.0363595556987137, 0.0445319328097977, 0.0537851355741434,
0.0641621298405947, 0.0756987211882645, 0.0884228931969177, 0.102354181379628, 0.102354181379628, 0.117503097415405,
0.133870618627253, 0.151447757647197, 0.151447757647197, 0.170215226855778, 0.170215226855778,
0.190143211447851, 0.211191263836225, 0.233308330547831, 0.256432920794094, 0.280493423522773, 0.305408577012532,
0.331088091999851, 0.357433425992349, 0.384338702900249, 0.411691768499651, 0.439375368630229, 0.467268433537531,
0.495247448513112, 0.523187888081939, 0.550965688550059, 0.578458731861707, 0.605548312515632, 0.632120558828558,
0.658067780159839, 0.683289712849355, 0.707694639565394, 0.731200359474982, 0.753734990069534, 0.753734990069534,
0.753734990069534, 0.753734990069534, 0.775237585508182, 0.795658560857758, 0.814959916467899, 0.833115261761304,
0.850109642771837, 0.865939182653005, 0.865939182653005, 0.880610548937487, 0.894140265397845, 0.906553889802375,
0.917885081566473, 0.928174585188328, 0.93746915638157, 0.945820457966355, 0.95328395187962, 0.959917812174526,
0.965781881688334, 0.970936692282333, 0.975442565331355, 0.97935880560985, 0.97935880560985, 0.982742998037354,
0.985650413056059, 0.988133522855331, 0.990241627354782, 0.992020585910824, 0.993512648199701, 0.994756375705273,
0.995786643728671, 0.996634712840931, 0.997328358197721, 0.997892045086969, 0.998347139430071, 0.998347139430071)),
class = "data.frame", row.names = c(NA, -92L))
delta <- 0.04991736
我需要选择doy
到达cum.value
,1*delta
,2*delta
,3*delta
.... {{ 1}}以及
如果4*delta
未达到doy 365,则包括n*delta
的最后一个doy。
此刻,我正在通过反复试验来选择365
,这是通过首先创建n*delta
的顺序来实现的。例如1:19:
n
如果我将1:n
更改为1:20
qt.vec.19 <- 1:19 * delta
max(qt.vec.19) >= max(dat$cum.value)
FALSE
这意味着我可以做1 * delta,2 * delta .... 19 * delta,然后选择最后一个doy。
qt.vec
我想知道是否有更好的方法来做到这一点,例如选择我的qt.vec.20 <- 1:20 * delta
max(qt.vec.20) >= max(dat$cum.value)
TRUE
值或避免使用较长的sample.dat <- dat %>% dplyr::slice(unique(c(which.max(cum.value > qt.vec.19[1]),
which.max(cum.value > qt.vec.19[2]),
which.max(cum.value > qt.vec.19[3]),
which.max(cum.value > qt.vec.19[4]),
which.max(cum.value > qt.vec.19[5]),
which.max(cum.value > qt.vec.19[6]),
which.max(cum.value > qt.vec.19[7]),
which.max(cum.value > qt.vec.19[8]),
which.max(cum.value > qt.vec.19[9]),
which.max(cum.value > qt.vec.19[10]),
which.max(cum.value > qt.vec.19[11]),
which.max(cum.value > qt.vec.19[12]),
which.max(cum.value > qt.vec.19[13]),
which.max(cum.value > qt.vec.19[14]),
which.max(cum.value > qt.vec.19[15]),
which.max(cum.value > qt.vec.19[16]),
which.max(cum.value > qt.vec.19[17]),
which.max(cum.value > qt.vec.19[18]),
which.max(cum.value > qt.vec.19[19]))))
last.doy <- dat %>% dplyr::filter(doy == 365)
all.doy <- as.data.frame(rbind(sample.dat, last.doy))
doy no.plant cum.value
294 0 0.05378514
298 0 0.10235418
302 0 0.15144776
307 0 0.21119126
309 0 0.25643292
311 0 0.30540858
313 0 0.35743343
315 0 0.41169177
317 0 0.46726843
319 0 0.52318789
320 0 0.55096569
322 0 0.60554831
324 0 0.65806778
326 0 0.70769464
328 0 0.75373499
334 0 0.81495992
336 0 0.85010964
341 0 0.90655389
346 0 0.95328395
365 1 0.99834714
部分?
答案 0 :(得分:1)
关于口味和上下文的问题,您读了很多关于“循环在R中皱眉”的信息-但是它们提供了结果并且易于阅读,并且它们是Base R-不需要额外的程序包或新的语法来学习:
options( scipen = 10, digits = 15 ) # display all digits
dat <- read.csv( "crop89.csv" ) # load your data from a file
delta <- 0.04991736 # selected threshold
n <- 1 # initiate multiplier variable
all.doy <- dat[ 1, ] # initiate receiving data.frame
for( i in 1:length( dat$doy ) ){ # loop through dat rows
if( dat[ i, "cum.value"] >= n * delta ){ # as soon as threshold is passed
all.doy[ n, ] <- dat[ i, ] # write the line to the target data.frame
n <- n + 1 # increment multiplier
}
}
all.doy[ n, ] <- dat[ i, ] # add the last row anyway
all.doy
> all.doy
doy no.plant cum.value
1 294 0 0.0537851355741434
25 298 0 0.1023541813796280
29 302 0 0.1514477576471970
34 307 0 0.2111912638362250
36 309 0 0.2564329207940940
38 311 0 0.3054085770125320
40 313 0 0.3574334259923490
42 315 0 0.4116917684996510
44 317 0 0.4672684335375310
46 319 0 0.5231878880819389
47 320 0 0.5509656885500590
49 322 0 0.6055483125156320
51 324 0 0.6580677801598390
53 326 0 0.7076946395653940
55 328 0 0.7537349900695340
61 334 0 0.8149599164678990
63 336 0 0.8501096427718370
68 341 0 0.9065538898023749
73 346 0 0.9532839518796200
92 365 1 0.9983471394300710
答案 1 :(得分:0)
要点是此处的cut
函数:
library(data.table)
DT<-as.data.table(dat)
DT[,group:=as.numeric(cut(cum.value,c(-Inf,qt.vec.19,Inf),ordered_result = T))-1]
DT[,position:=frank(cum.value,ties.method = "first" ),by=group]
DT<-DT[position==1 & group>0]
DT[,position:=NULL]
DT[,group:=NULL]
if (max(DT$cum.value)!=max(dat$cum.value)) DT<-rbind(DT,dat[dat$doy==max(dat$doy),])