使用索引替换df中的值 - 为什么不工作?

时间:2017-10-25 09:36:44

标签: r outliers

我正在使用此处提供的功能:Replacing values in df using index和此处:How to repeat the Grubbs test and flag the outliers

# Function to detect outliers with Grubbs test in a vector
grubbs.flag <- function(vector) {
  outliers <- NULL
  test <- vector
  grubbs.result <- grubbs.test(test)
  pv <- grubbs.result$p.value
  # throw an error if there are too few values for the Grubb's test
  if (length(test) < 3 ) stop("Grubb's test requires > 2 input values")
  na.vect <- test
  while(pv < 0.05) {
    outliers <- c(outliers,as.numeric(strsplit(grubbs.result$alternative," ")[[1]][3]))
    test <- vector[!vector %in% outliers]
    # stop if all but two values are flagged as outliers
    if (length(test) < 3 ) {
      warning("All but two values flagged as outliers")
      break
    }
    grubbs.result <- grubbs.test(test)
    pv <- grubbs.result$p.value
    idx.outlier <- which(vector %in% outliers)
    na.vect <- replace(vector, idx.outlier, NA)

  }
  return(na.vect)
}

它完全适用于那里提供的示例数据。但是当我试图在我的数据帧上运行它时,似乎循环没有结束或者什么。有谁知道为什么会这样?

我的数据:

test <- structure(list(Abs_18 = c(0.04359, 0.05682, 0.05002, 0.04997, 
0.03433, 0.060055, 0.0447, 0.0499, 0.04509, 0.04875, 0.04052, 
0.062785, 0.07602, 0.05072, 0.04253, 0.05595, 0.02888, 0.077018, 
0.05416, 0.04966, 0.0476, 0.04252, 0.03891, 0.065207, 0.02675, 
0.05892, 0.03523, 0.04546, 0.02696, 0.024995, 0.02469, 0.0442, 
0.04504, 0.04421, 0.04683, 0.08017, -0.065334, 0.04914, 0.04086, 
0.05341, 0.02706, 0.065362, 0.01571, 0.01021, 0.04802, 0.04807, 
0.02735, 0.062755), FL_18 = c(3618, 3526, 3543, 5323, 5050, 767, 
3641, 3418, 3353, 4179, 4864, 760, 3693, 3408, 3309, 5057, 4686, 
748, 3693, 3349, 3240, 3934, 4876, 741, 2394, 3477, 3417, 4254, 
4899, 755, 2375, 3486, 3370, 4516, 4838, 772, 817, 3449, 3361, 
3945, 4856, 802, 2293, 2529, 3410, 4460, 5175, 813), Abs_25 = c(0.04261, 
0.05332, 0.04966, 0.0482, 0.03355, 0.059344, 0.04572, 0.04967, 
0.04275, 0.04989, 0.02745, 0.059196, 0.04649, 0.05517, 0.04181, 
0.06214, 0.02749, 0.074719, 0.05264, 0.044, 0.04486, 0.03999, 
0.0331, 0.058829, 0.03119, 0.05943, 0.03781, 0.04003, 0.02383, 
0.069582, 0.02868, 0.04943, 0.04566, 0.0422, 0.03265, 0.067265, 
-0.067674, 0.05038, 0.03828, 0.03854, 0.02671, 0.071176, 0.01602, 
0.01055, 0.03961, 0.04729, 0.03009, 0.06377), FL_25 = c(2714, 
2656, 2625, 3856, 3642, 606, 2759, 2580, 2498, 3276, 3495, 596, 
2808, 2590, 2482, 3759, 3365, 586, 2838, 2548, 2433, 2864, 3557, 
591, 1878, 2664, 2588, 3081, 3603, 602, 1820, 2672, 2576, 3154, 
3589, 617, 572, 2661, 2575, 2918, 3601, 635, 1739, 1924, 2650, 
3260, 3866, 655)), .Names = c("Abs_18", "FL_18", "Abs_25", "FL_25"
), row.names = c(NA, -48L), class = "data.frame")

我正在使用:

apply(test,2,grubbs.flag)

0 个答案:

没有答案