尚不支持dplyr长向量

时间:2020-10-15 18:58:10

标签: r vector dplyr

您好,我需要帮助以解决问题。 所以我用了这段代码:

df_interval$shorti = do.call(rbind, strsplit(as.character(df_interval$scaffold), "_"))[,1]
df_t = df1 %>%
  left_join(df_interval, by = c("scaf_name" = "shorti")) %>%
  filter(coor >= start & coor <= end) %>%
  mutate(scaf_name = ifelse(coor >= start &
                              coor <= end, as.character(scaffold), scaf_name)) %>%
  select(c("scaf_name", "coor", "dinates.value"))

它确实可以处理某些数据,但是现在我使用了另一个df并收到了此错误消息:

Error in vec_slice(x_out, x_slicer) :    long vectors not supported yet: ../../src/include/Rinlinedfuns.h:519

有人会想办法解决这个问题吗?我想这是因为我的数据很大吗?

这是我的工作:

我有一个 df1

scaf_name       coordinates value    
JZSA01000001.1  1   2
JZSA01000001.1  2   2
JZSA01000001.1  3   2
JZSA01000001.1  4   2
JZSA01000001.1  5   2
JZSA01000001.1  6   2
JZSA01000001.1  7   2
JZSA01000001.1  8   2
JZSA01000001.1  9   2
JZSA01000001.1  10  2
JZSA01000001.1  11  5
JZSA01000001.1  12  5
JZSA01000001.1  13  5
JZSA01000001.1  14  5
JZSA01000001.1  15  5
JZSA01000001.1  16  5
JZSA01000001.1  17  5
JZSA01000001.1  18  6
JZSA01000002.1  1   2
JZSA01000002.1  2   2
JZSA01000002.1  3   2
JZSA01000002.1  4   2
JZSA01000002.1  5   2
JZSA01000002.1  6   2
JZSA01000003.1  1   5
JZSA01000003.1  2   5
JZSA01000003.1  3   6
JZSA01000003.1  4   6
JZSA01000003.1  5   6
JZSA01000003.1  6   6
JZSA01000003.1  7   6
JZSA01000003.1  8   6
JZSA01000003.1  9   6

和另一个 df_interval

scaffold          start     end
JZSA01000001.1_0  1         14
JZSA01000001.1_1  15        18
JZSA01000002.1    1         12
JZSA01000003.1_0  1         3
JZSA01000003.1_1  4         6
JZSA01000003.1_2  7         9

然后根据df1$scaf_namedf1$scaf_name$start

更改df1$scaf_name$end

例如

每个df1$scaf_name包含df_interval$scaffold,并且df1$coordinates之间的1-14操作系统将被命名为JZSA01000001.1_0

在这里我应该得到输出

scaf_name       coordinates value    
JZSA01000001.1_0    1   2
JZSA01000001.1_0    2   2
JZSA01000001.1_0    3   2
JZSA01000001.1_0    4   2
JZSA01000001.1_0    5   2
JZSA01000001.1_0    6   2
JZSA01000001.1_0    7   2
JZSA01000001.1_0    8   2
JZSA01000001.1_0    9   2
JZSA01000001.1_0    10  2
JZSA01000001.1_0    11  5
JZSA01000001.1_0    12  5
JZSA01000001.1_0    13  5
JZSA01000001.1_0    14  5
JZSA01000001.1_1    15  5
JZSA01000001.1_1    16  5
JZSA01000001.1_1    17  5
JZSA01000001.1_1    18  6
JZSA01000002.1      1   2
JZSA01000002.1      2   2
JZSA01000002.1      3   2
JZSA01000002.1      4   2
JZSA01000002.1      5   2
JZSA01000002.1      6   2
JZSA01000003.1_0    1   5
JZSA01000003.1_0    2   5
JZSA01000003.1_0    3   6
JZSA01000003.1_1    4   6
JZSA01000003.1_1    5   6
JZSA01000003.1_1    6   6
JZSA01000003.1_2    7   6
JZSA01000003.1_2    8   6
JZSA01000003.1_2    9   6

但是由于某种原因,它可以与我上面给你的代码一起使用,但是其他时候文件非常大,我得到了错误解释。如果有人有最快的想法,那就太神奇了。 谢谢

数据

df1

structure(list(scaf_name = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("JZSA01000001.1", 
"JZSA01000002.1", "JZSA01000003.1"), class = "factor"), coor = c(1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 
16L, 17L, 18L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L), dinates.value = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 2L, 2L, 2L, 2L, 2L, 2L, 
5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L)), class = "data.frame", row.names = c(NA, 
-33L))

df_interval

structure(list(scaffold = structure(1:6, .Label = c("JZSA01000001.1_0", 
"JZSA01000001.1_1", "JZSA01000002.1", "JZSA01000003.1_0", "JZSA01000003.1_1", 
"JZSA01000003.1_2"), class = "factor"), start = c(1L, 15L, 1L, 
1L, 4L, 7L), end = c(14L, 18L, 12L, 3L, 6L, 9L)), class = "data.frame", row.names = c(NA, 
-6L))

0 个答案:

没有答案