在野外电镀过程中对鳗进行取样。大多数是测量的(批次= S), 有些不是(批次= L)。 我想重新分配L批次中丢失的鳗鱼,使用最近的10毫米值和 在单个“S”测量中观察到的尺寸结构。
eel <- structure(list(op = c(529L, 529L, 529L, 529L, 529L, 529L, 529L,
529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L,
529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L,
529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L,
529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L,
529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L,
529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L,
529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L, 529L,
529L, 529L, 529L, 529L, 529L, 529L, 545L, 545L, 545L, 545L, 545L,
545L, 545L, 545L, 545L, 545L, 545L, 545L, 545L, 545L, 545L, 545L,
545L, 545L, 545L, 545L, 545L, 545L, 545L, 545L, 545L, 545L, 545L,
545L, 545L, 545L, 545L, 545L, 545L, 545L, 545L, 545L, 545L, 545L,
545L, 545L), size = c(101L, 103L, 110L, 112L, 115L, 119L, 120L,
121L, 121L, 121L, 123L, 127L, 128L, 129L, 135L, 140L, 146L, 147L,
147L, 148L, 150L, 152L, 152L, 155L, 159L, 160L, 164L, 164L, 164L,
175L, 180L, 184L, 190L, 192L, 193L, 213L, 216L, 227L, 233L, 235L,
240L, 253L, 256L, 278L, 287L, 289L, 303L, 307L, 312L, 323L, 80L,
82L, 92L, 93L, 100L, 112L, 114L, 120L, 121L, 122L, 128L, 131L,
147L, 149L, 151L, 156L, 159L, 161L, 164L, 165L, 167L, 168L, 172L,
195L, 222L, 228L, 242L, 257L, 265L, 265L, 275L, 290L, 294L, 294L,
307L, 310L, 315L, 330L, 374L, 80L, 143L, 176L, 165L, 141L, 139L,
93L, 138L, 129L, 143L, 139L, 126L, 84L, 126L, 119L, 129L, 111L,
112L, 426L, 188L, 186L, 293L, 235L, 188L, 173L, 177L, 176L, 165L,
165L, 166L, 141L, 231L, 168L, 167L, 186L, 168L, 161L, 187L, 129L,
155L, 84L), batch = c("S", "S", "S", "S", "S", "S", "S", "S",
"S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S",
"S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S",
"S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S",
"S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S",
"S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S",
"S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S",
"S", "S", "S", "L", "S", "S", "S", "S", "S", "S", "S", "S", "S",
"S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S",
"S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S", "S",
"S", "S", "S", "S", "L"), number = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 133L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 67L)), .Names = c("op",
"size", "batch", "number"), row.names = 4:133, class = "data.frame")
我尝试使用直方图进行tidyverse,我使用我的大小结构提取数据 以下函数(将确保所有观察值都在中断范围内)。我希望每10毫米放置一个新尺寸。
fn<-function(x) hist(x,
breaks=seq(min(plyr::round_any(x, 10,f=floor)),plyr::round_any(max(x),10,f=ceiling),by=10),
plot=FALSE)
然后我应用以下代码
hist <- eel%>%
filter(batch=='S') %>%
select (size,op) %>%
group_by(op) %>%
by_slice(~fn(.x$size))
我在.out列中有breaks
和counts
的直方图,并使用我想要的那些
在我的数据框中创建新行。任何帮助将不胜感激。
答案 0 :(得分:0)
我找到了一种方法,可能不是最好的,我使用browser
参数来弄清楚细节。其中一个难点是重新分配的数字需要是整数,当根据每个类大小的百分比舍入数字时,会增加或丢失一些计数。所以我不得不将一些鳗鱼随机重新分配到大小结构中。为了加工容差,参数sample(1:nrow(df),rr)
不起作用,我不得不围绕rr
。请注意,我尝试使用函数map
和map2
并且没有管理它,所以任何其他更简单的方法都会非常受欢迎。
group_sample <-
eel%>%
filter(batch=='L')%>%
select (op,number)
individual_sample <-
eel%>%
filter(batch=='S') %>%
select (size,op)%>%
group_by(op) %>%
by_slice(~fn(.x$size)) %>%
rename(hist=.out)
reassigned_sample<- inner_join(individual_sample,group_sample,by=c("op")) %>%
by_row(..f=function(this_row){
#browser()
# frequencies
vec <-this_row["hist"][[1]][[1]]$counts/sum(this_row["hist"][[1]][[1]]$counts)*pull(this_row["number"])
# numbers are rounded, but there is a problem with sum
roundvec <- round(vec)
sumvec <- sum(vec)
sumroundvec <- sum(roundvec)
# difference between rounded numbers and numbers
rr <- sumroundvec-sumvec
# creation du jeu de données ressemblant au tableau de départ (moins id première colonne)
df <- data.frame("op"=pull(this_row["op"]),
"size"=this_row["hist"][[1]][[1]]$mids-5,
"batch"="SL",
"number"=roundvec
)
# remove lines with 0 number
df<-df[df$number>0,]
if (rr >0) {
# randomly removing eels from some samples
# round(rr) necessary otherwise might not be exact integer
sss <- sample(1:nrow(df),round(rr))
df[sss,"number"]<-df[sss,"number"]-1
# randomly adding eels for some samples
} else if (rr <0){
sss<-sample(1:nrow(df),round(-rr))
df[sss,"number"]<-df[sss,"number"]+1
} else {
# do nothing
}
stopifnot(round(sum(df$number))==round(sumvec))
return(df)
}) %>%
rename(table=.out)
bind_rows(reassigned_sample$table)
op size batch number
1 529 80 SL 3
2 529 90 SL 4
3 529 100 SL 4