我正在尝试执行以下操作:
从{{1}的列ScanNo
和Intensity
中获取值,如果df1
的值满足m/z
到对应的df1['m/z'] >= 126.126226) & df1['m/z'] <= 126.129226
和ScanNo
中的TMT126
列;
从df2
的{{1}}和ScanNo
列中获取值,如果Intensity
的值满足df1
,则将{{1 }和{{1}中的m/z
到df1['m/z'] >= 127.123261) & df1['m/z'] <= 127.126261
中相应的ScanNo
和Intensity
列中。
等
df1
df1
df2
ScanNo
有人知道如何使用R 来做到这一点吗?谢谢!
答案 0 :(得分:1)
这是一项功能,可帮助您处理质谱数据。它使用dplyr
函数。
library(dplyr)
select_scans <- function(data, mz_min, mz_max) {
data %>% # setting up the pipe
mutate(across(everything(), as.numeric)) %>% # convert all columns to numeric if needed
filter(between(`m_z`, mz_min, mz_max)) %>% # filtering only the m/z values you want
select(ScanNo, Intensity) %>% # keeping only the columns you want
rename(paste0("TM", round(average(mz_min, mz_max), 0)) = Intensity)
# rename the intensity column like you want
}
所以,您运行
df126 <- select_scans(df1, 126.126226, 126.129226)
答案 1 :(得分:1)
在内部使用within
和ifelse
。如果值不在范围内,则可能需要NA
。我创建了一个简化的m.z
列进行演示。
df2 <- within(df1, {
TMT126 <- ifelse(m.z >= 1 & m.z <= 2, m.z, NA)
TMT127 <- ifelse(m.z >= 3 & m.z <= 4, m.z, NA)
TMT128 <- ifelse(m.z >= 5 & m.z <= 6, m.z, NA)
rm(m.z, Intensity)
})
df2
# ScanNo TMT128 TMT127 TMT126
# 1 3 NA NA 2
# 2 3 NA 3 NA
# 3 3 NA 3 NA
# 4 3 NA NA 2
# 5 3 NA 4 NA
# 6 5 NA 4 NA
# 7 5 6 NA NA
# 8 5 6 NA NA
# 9 5 5 NA NA
# 10 7 5 NA NA
# 11 9 NA 4 NA
# 12 13 NA NA 2
# 13 13 NA NA 2
# 14 13 6 NA NA
# 15 13 NA 4 NA
# 16 16 5 NA NA
# 17 16 NA NA 2
# 18 16 NA NA 1
# 19 16 NA 4 NA
# 20 19 NA 4 NA
数据:
df1 <- structure(list(ScanNo = c(3L, 3L, 3L, 3L, 3L, 5L, 5L, 5L, 5L,
7L, 9L, 13L, 13L, 13L, 13L, 16L, 16L, 16L, 16L, 19L), m.z = c(2L,
3L, 3L, 2L, 4L, 4L, 6L, 6L, 5L, 5L, 4L, 2L, 2L, 6L, 4L, 5L, 2L,
1L, 4L, 4L), Intensity = c(499.050819190312, 502.115755613237,
498.921830630967, 500.373553890647, 498.659124958938, 500.670703826751,
499.295448634045, 499.948336887528, 499.49054987242, 500.160221846888,
500.036135738485, 500.946913174943, 500.580928969496, 498.996895445679,
496.507093594431, 500.788140622824, 500.167440904356, 499.120163471469,
497.046420199033, 499.682652479155)), row.names = c(NA, -20L), class = "data.frame")