用NA替换特定的字符串

时间:2019-09-02 15:54:37

标签: r

我正在尝试替换特定的NA值,即当单词包含“ missing”时。我想将其替换为NA值。

下面的代码没有按照我的意愿做。我也想对数据中的所有列执行此操作。

data %>% 
  mutate(tmax_na = na_if(tmax, str_detect(tmax, tolower("missing"))))

数据:

structure(list(yyyy = c(1908L, 1908L, 1908L, 1908L, 1908L, 1908L, 
1908L, 1908L, 1908L, 1908L, 1908L, 1908L, 1909L, 1909L, 1909L, 
1909L, 1909L, 1909L, 1909L, 1909L), month = structure(c(5L, 4L, 
8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L, 
7L, 6L, 2L), .Label = c("April", "August", "December", "February", 
"January", "July", "June", "March", "May", "November", "October", 
"September"), class = "factor"), tmax = structure(c(10L, 15L, 
14L, 17L, 18L, 8L, 19L, 7L, 5L, 2L, 16L, 13L, 10L, 11L, 12L, 
1L, 3L, 4L, 6L, 9L), .Label = c("12.2", "14.6", "14.7", "15.0", 
"16.3", "17.3", "17.5", "17.7", "18.8", "5.0", "5.5", "5.6", 
"5.8", "6.2", "7.3", "9.6", "Missing_1", "Missing_2", "Missing_3"
), class = "factor"), tmin = structure(c(2L, 5L, 4L, 9L, 14L, 
17L, 8L, 18L, 16L, 15L, 11L, 19L, 3L, 1L, 1L, 10L, 12L, 13L, 
7L, 6L), .Label = c("-0.3", "-1.4", "0.1", "0.3", "1.9", "10.7", 
"10.8", "11.0", "2.1", "3.3", "3.4", "4.8", "7.5", "7.7", "8.0", 
"8.4", "8.7", "9.7", "Missing_4"), class = "factor")), class = "data.frame", row.names = c(NA, 
-20L))

3 个答案:

答案 0 :(得分:1)

在基数R中:

sapply(d, function(x){
  replace(x, grepl("missing", x, ignore.case = T), NA)
})

答案 1 :(得分:0)

我们可以使用mutate_at

library(dplyr)
data  %>% 
   mutate_at(3:4, list(na = ~ replace(., str_detect(., "Missing"), NA_real_)))
#   yyyy     month      tmax      tmin tmax_na tmin_na
#1  1908   January       5.0      -1.4     5.0    -1.4
#2  1908  February       7.3       1.9     7.3     1.9
#3  1908     March       6.2       0.3     6.2     0.3
#4  1908     April Missing_1       2.1    <NA>     2.1
#5  1908       May Missing_2       7.7    <NA>     7.7
#6  1908      June      17.7       8.7    17.7     8.7
#7  1908      July Missing_3      11.0    <NA>    11.0
#8  1908    August      17.5       9.7    17.5     9.7
#9  1908 September      16.3       8.4    16.3     8.4
#10 1908   October      14.6       8.0    14.6     8.0
#12 1908  December       5.8 Missing_4     5.8    <NA>
#13 1909   January       5.0       0.1     5.0     0.1
#14 1909  February       5.5      -0.3     5.5    -0.3
#15 1909     March       5.6      -0.3     5.6    -0.3
#16 1909     April      12.2       3.3    12.2     3.3
#17 1909       May      14.7       4.8    14.7     4.8
#18 1909      June      15.0       7.5    15.0     7.5
#19 1909      July      17.3      10.8    17.3    10.8
#20 1909    August      18.8      10.7    18.8    10.7

答案 2 :(得分:0)

df[which(array(grepl("Missing", as.matrix(df)), dim(df)), T)] <- NA