我有两个数据帧-一个7个bin的数据帧,指定每个bin的限制和名称(称为FJX_bins),以及一个波长-西格玛对的帧(test_spectra)。我想根据FJX_bins
数据中的bin限制在Test_Spectra中创建一个名为bin_number的新变量。两者中的dput
包含在下面。
使用mutate
和case_when
强行执行暴力攻击相对容易,但是这里的关键是我希望该解决方案可以扩展到任意数量的垃圾箱。我的感觉是,apply
中可能有某种dplyr
方法可以在这里使用,但是我想做的就是使用for循环,如下所示:>
df <- test_spectra %>%
mutate(bin_number = case_when(
for(ii in 1:nrow(FJX_bins)){
Wavelength >= FJX_bins$Lambda_Start[ii] & Wavelength < FJX_bins$Lambda_End[ii] ~
FJX_bins$Bin_Number[ii]}
))
此策略失败,并引发错误
案例1(
for (ii in 1:nrow(FJX_bins)) {...
)必须是双向公式,不能为NULL
是否可以使用dplyr
解决此问题?还是我需要退后一步看看apply
和cut
之类的东西?由于其他原因,我宁愿坚持使用dplyr
框架,但也可以不在此范围之内。
谢谢
FJX_bins <- structure(list(Bin_Number = 1:7, Lambda_Start = c(289, 298.25,
307.45, 312.45, 320.3, 345, 412.45), Lambda_End = c(298.25, 307.45,
312.45, 320.3, 345, 412.45, 850)), row.names = c(NA, -7L), class = c("tbl_df",
"tbl", "data.frame"), spec = structure(list(cols = structure(list(
Bin_Number = structure(list(), class = c("collector_integer",
"collector")), Lambda_Start = structure(list(), class = c("collector_double",
"collector")), Lambda_End = structure(list(), class = c("collector_double",
"collector")), Effective_Lambda = structure(list(), class = c("collector_integer",
"collector"))), .Names = c("Bin_Number", "Lambda_Start",
"Lambda_End", "Effective_Lambda")), default = structure(list(), class = c("collector_guess",
"collector"))), .Names = c("cols", "default"), class = "col_spec"), .Names = c("Bin_Number",
"Lambda_Start", "Lambda_End"))
test_spectra <- structure(list(Wavelength = c(289L, 290L, 291L, 292L, 293L, 294L,
295L, 296L, 297L, 298L, 299L, 300L, 289L, 290L, 291L, 292L, 293L,
294L, 295L, 296L, 297L, 298L, 299L, 300L, 289L, 290L, 291L, 292L,
293L, 294L, 295L, 296L, 297L, 298L, 299L, 300L, 289L, 290L, 291L,
292L, 293L, 294L, 295L, 296L, 297L, 298L, 299L, 300L), Sigma = c(3.97790085259898e-20,
3.88773011066234e-20, 3.77170497723194e-20, 3.63990173255768e-20,
3.53611020195826e-20, 3.39379425027765e-20, 3.24540998352932e-20,
3.08629426249589e-20, 2.93243925380076e-20, 2.80431593390348e-20,
2.64345023340469e-20, 2.49597804268261e-20, 4.79587956800083e-20,
4.67040607723134e-20, 4.5134283789068e-20, 4.32731814710643e-20,
4.13196812361237e-20, 3.93856298421813e-20, 3.77050786831795e-20,
3.62340670271797e-20, 3.49404344374885e-20, 3.36066462681245e-20,
3.20871974271263e-20, 3.03438697547602e-20, 5.27803299371575e-20,
5.12475486084599e-20, 4.99112054163632e-20, 4.86399784101602e-20,
4.73236079731255e-20, 4.56798834656559e-20, 4.36887241590191e-20,
4.13697643104457e-20, 3.89697643104457e-20, 3.66909671059429e-20,
3.46634646072095e-20, 3.28648835305714e-20, 5.71590756444018e-20,
5.57618648066173e-20, 5.44949261656802e-20, 5.33110977304272e-20,
5.21177991137917e-20, 5.07478142704849e-20, 4.9100984463428e-20,
4.70660943398542e-20, 4.47661068638463e-20, 4.24314737804269e-20,
4.02176301884806e-20, 3.82570654305878e-20)), row.names = c(NA,
-48L), class = c("tbl_df", "tbl", "data.frame"), .Names = c("Wavelength",
"Sigma"))
答案 0 :(得分:4)
fuzzyjoin
实现了dplyr
范围/间隔联接:
library(fuzzyjoin)
interval_left_join(
FJX_bins,
test_spectra,
by = c('Wavelength' = 'Lambda_Start', 'Wavelength' = 'Lambda_End')
)
# A tibble: 52 x 5 Wavelength Sigma Bin_Number Lambda_Start Lambda_End <int> <dbl> <int> <dbl> <dbl> 1 289 3.98e-20 1 289 298. 2 290 3.89e-20 1 289 298. 3 291 3.77e-20 1 289 298. 4 292 3.64e-20 1 289 298. 5 293 3.54e-20 1 289 298. 6 294 3.39e-20 1 289 298. 7 295 3.25e-20 1 289 298. 8 296 3.09e-20 1 289 298. 9 297 2.93e-20 1 289 298. 10 298 2.80e-20 1 289 298. # … with 42 more rows
答案 1 :(得分:3)
如果您找不到dplyr
解决方案:
library(data.table)
setDT(test_spectra)
setDT(FJX_bins)
test_spectra[FJX_bins,
bin_number := i.Bin_Number,
on = .(Wavelength >= Lambda_Start, Wavelength < Lambda_End)]
这将与bins表进行非等价联接,并相应地设置bin编号。
答案 2 :(得分:2)
与dplyr:
要创建箱号作为因素
library(dplyr)
Test_Spectra <- mutate(test_spectra,
bin = cut(Wavelength, breaks = c(FJX_bins$Lambda_Start, 850),
labels = FJX_bins$Bin_Number, right = F))
或创建bin号作为字符变量
Test_Spectra <- mutate(test_spectra,
bin = as.character(cut(Wavelength,
breaks = c(FJX_bins$Lambda_Start, 850),
labels = FJX_bins$Bin_Number, right = F)))