Question

我在R中编写了一个函数，它解析数据框中的参数，并输出旧数据框+一个包含每行数据的新列。

我收到以下警告：警告信息：在[[.data.frame（xx，sxx [j]）中：命名参数除了＆＃39; exact＆＃39;不鼓励

说实话，我不确定这意味着什么。我对结果进行了抽查，对我来说似乎没问题。功能本身很长，如果需要，我会发布它以更好地回答问题。编辑：

这是一个示例数据框：

my_df<- data.frame('ALT'= c('A,C', 'A,G'),
                   'Sample1'= c('1/1:35,3,0,35,3,35:1:1:0:0,1,0', './.:0,0,0,0,0,0:0:0:0:0,0,0'),
                   'Sample2'= c('2/2:188,188,188,33,33,0:11:11:0:0,0,11', '1/1:255,99,0,255,99,255:33:33:0:0,33,0'),
                   'Sample3'= c('1/1:219,69,0,219,69,219:23:23:0:0,23,0', '0/1:36,0,78,48,87,120:7:3:0:4,3,0'))

这就是功能：

multi_allelic_filter_v2<- function(in_vcf, start_col, end_col, threshold=1){
  #input: must have gone through biallelic_assessment first
  table0<- in_vcf
  #ALT_alleles is the number of alt alleles with coverage > threshold across samples
  #The following function calculates coverage across samples for a single allele
  single_allele_tot_cov_count<- function(list_of_unparsed_cov,
                                         allele_pos){ 
    single_allele_coverage_count<- 0
    for (i in 1:length(list_of_unparsed_cov)) { # i is each group of coverages/sample
      single_allele_coverage_count<- single_allele_coverage_count+
        as.numeric(strsplit(as.character(list_of_unparsed_cov[i]),
                            split= ',')[[1]])[allele_pos]}
    return(single_allele_coverage_count)}
  #single row function
  #Now we need to reiterate on each ALT allele in the row
  single_row_assessment<- function(single_row){
    # No. of alternative alleles over threshold
    alt_alleles0 <- 0 
    if (single_row$is_biallelic==TRUE){
      alt_alleles0<- 1
    } else {
      alt_coverages <- numeric() #coverages across sample of each ALT allele
      altcovs_unparsed<- character() #Unparsed coverages from each sample
      for (i in start_col:end_col) {
        #Let's fill altcovs_unparsed
        altcovs_unparsed<- c(altcovs_unparsed,
                             strsplit(x = as.character(single_row[1,i]), split = ':')[[1]][6])}
      #Now let's calculate alt_coverages 
      for (i in 1:lengths(strsplit(as.character(
        single_row$ALT),',',fixed = TRUE))) {
        alt_coverages<- c(alt_coverages, single_allele_tot_cov_count(
          list_of_unparsed_cov = altcovs_unparsed, allele_pos = i+1))}
      #Now, let's see how many ALT alleles are over threshold
      alt_alleles0<- sum(alt_coverages>threshold)}
    return(alt_alleles0)}
  #Now, let's reiterate across each row:
  #ALT_alleles is no. of alt alleles with coverage >threshold across samples
  table0$ALT_alleles<- -99 # Just as a marker, to make sure function works
  for (i in 1:nrow(table0)){
    table0[i,'ALT_alleles'] <- single_row_assessment(single_row = table0[i,])}
  #Now we now how many ALT alleles >= threshold coverage are in each SNP
  return(table0)}

基本上，在以下行中：＆＃39; 1/1：219,69,0,219,69,219：23：23：0：0,23,0＆＃39; 字段由＆＃34;：＆＃34;分隔，我对最后一个字段的最后两个数字感兴趣（23和0）;在每一行中，我想总结这些位置中的所有数字（两个单独的总和），并输出多少＆＃34;总和＆＃34;超过了一个门槛。希望它有意义......

Answer 1

行，

我在同一台计算机（同一项目，然后是新项目）上使用相同的数据集重新运行脚本，然后在另一台计算机上再次运行它，无论如何都无法再次收到警告。我不确定发生了什么，结果似乎是正确的。没关系。无论如何，谢谢你的意见和建议

需要帮助在R中表示警告

1 个答案: