我有以下代码:
library(tidyverse)
# Function ----------------------------------------------------------------
convert <- function(three_aa_seq = NULL) {
pep_dat <- structure(list(full = c(
"Alanine", "Arginine", "Asparagine",
"Aspartate", "Cysteine", "Glutamine", "Glutamate", "Glycine",
"Histidine", "Isoleucine", "Leucine", "Lysine", "Methionine",
"Phenylalanine", "Proline", "Serine", "Threonine", "Tryptophan",
"Tyrosine", "Valine"
), three = c(
"Ala", "Arg", "Asn", "Asp",
"Cys", "Gln", "Glu", "Gly", "His", "Ile", "Leu", "Lys", "Met",
"Phe", "Pro", "Ser", "Thr", "Trp", "Tyr", "Val"
), one = c(
"A",
"R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F",
"P", "S", "T", "W", "Y", "V"
)), .Names = c("full", "three", "one"), class = c("tbl_df", "tbl", "data.frame"), row.names = c(
NA,
-20L
))
str_split(three_aa_seq, pattern = "-")[[1]] %>%
as.tibble() %>%
rename(three = value) %>%
inner_join(pep_dat, by = "three") %>%
pull(one) %>%
paste(., collapse = "")
}
基本上将一组三个字母的氨基酸字符串更改为单个字母字符串。例如"His-Ser-Leu"
到HSL
但是当我尝试使用此代码时:
tribble(
~ pep_name, ~ three_seq,
"PA_19", "His-Ser-Leu-Gly-Lys-Trp-Leu-Gly-His-Pro-Asp-Lys-Phe",
"PA_20", "Thr-Ala-Pro-Arg-Ser-Leu-Arg-Arg-Ser-Ser-Cys-Phe-Gly-Gly-Arg-Met-Asp-Arg-Ile-Gly-Ala-Gln-Ser-Gly-Leu-Gly-Cys-Asn-Ser-Phe-Arg-Tyr"
) %>%
mutate(pep = convert(three_aa_seq = three_seq)) %>%
select(pepname, pep)
它返回:
# A tibble: 2 x 2
pep_name pep
<chr> <chr>
1 PA_19 HSLGKWLGHPDKF
2 PA_20 HSLGKWLGHPDKF
只有pep
的结果列PA_20
不会被TAPRSLRRSSCFGGRMDRIGAQSGLGCNSFRY
更新。正确的方法是什么?
答案 0 :(得分:3)
str_split
的输出为list
,而OP仅使用list
提取[[1]]
的第一个元素。相反,它应该被循环。如果初始数据集为“ tbl”
tbl %>%
mutate(pep = str_split(three_seq, pattern = "-") %>%
map_chr(~
as_tibble(.x) %>%
rename(three=value) %>%
inner_join(pep_dat, by = 'three') %>%
pull(one) %>%
paste(collapse=""))) %>%
select(-three_seq)
# A tibble: 2 x 2
# pep_name pep
# <chr> <chr>
#1 PA_19 HSLGKWLGHPDKF
#2 PA_20 TAPRSLRRSSCFGGRMDRIGAQSGLGCNSFRY
将以上内容转换为功能
convertfn <- function(three_aa_seq, keydat) {
str_split(three_aa_seq, pattern = "-") %>%
map_chr(~
as_tibble(.x) %>%
rename(three = value) %>%
inner_join(keydat, by = 'three') %>%
pull(one) %>%
paste(collapse = ""))
}
tbl %>%
mutate(pep = convertfn(three_seq, pep_dat)) %>%
select(-three_seq)
答案 1 :(得分:1)
请注意,与更多行一起使用时,为每个替换创建一个tibble
会非常慢。最好的方法是立即使用stringr
中可用的矢量化字符串替换:
library(tidyverse)
library(stringr)
tibble(three = c(
"Ala", "Arg", "Asn", "Asp",
"Cys", "Gln", "Glu", "Gly", "His", "Ile", "Leu", "Lys", "Met",
"Phe", "Pro", "Ser", "Thr", "Trp", "Tyr", "Val"
), one = c(
"A",
"R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F",
"P", "S", "T", "W", "Y", "V"
)) %>%
{set_names(.$one, .$three)} ->
aa_map
tribble(
~ pep_name, ~ three_seq,
"PA_19", "His-Ser-Leu-Gly-Lys-Trp-Leu-Gly-His-Pro-Asp-Lys-Phe",
"PA_20", "Thr-Ala-Pro-Arg-Ser-Leu-Arg-Arg-Ser-Ser-Cys-Phe-Gly-Gly-Arg-Met-Asp-Arg-Ile-Gly-Ala-Gln-Ser-Gly-Leu-Gly-Cys-Asn-Ser-Phe-Arg-Tyr") %>%
mutate(one_seq = str_replace_all(three_seq, aa_map) %>% str_replace_all("-", ""))