我正在尝试替换特定的NA值,即当单词包含“ missing”时。我想将其替换为NA值。
下面的代码没有按照我的意愿做。我也想对数据中的所有列执行此操作。
data %>%
mutate(tmax_na = na_if(tmax, str_detect(tmax, tolower("missing"))))
数据:
structure(list(yyyy = c(1908L, 1908L, 1908L, 1908L, 1908L, 1908L,
1908L, 1908L, 1908L, 1908L, 1908L, 1908L, 1909L, 1909L, 1909L,
1909L, 1909L, 1909L, 1909L, 1909L), month = structure(c(5L, 4L,
8L, 1L, 9L, 7L, 6L, 2L, 12L, 11L, 10L, 3L, 5L, 4L, 8L, 1L, 9L,
7L, 6L, 2L), .Label = c("April", "August", "December", "February",
"January", "July", "June", "March", "May", "November", "October",
"September"), class = "factor"), tmax = structure(c(10L, 15L,
14L, 17L, 18L, 8L, 19L, 7L, 5L, 2L, 16L, 13L, 10L, 11L, 12L,
1L, 3L, 4L, 6L, 9L), .Label = c("12.2", "14.6", "14.7", "15.0",
"16.3", "17.3", "17.5", "17.7", "18.8", "5.0", "5.5", "5.6",
"5.8", "6.2", "7.3", "9.6", "Missing_1", "Missing_2", "Missing_3"
), class = "factor"), tmin = structure(c(2L, 5L, 4L, 9L, 14L,
17L, 8L, 18L, 16L, 15L, 11L, 19L, 3L, 1L, 1L, 10L, 12L, 13L,
7L, 6L), .Label = c("-0.3", "-1.4", "0.1", "0.3", "1.9", "10.7",
"10.8", "11.0", "2.1", "3.3", "3.4", "4.8", "7.5", "7.7", "8.0",
"8.4", "8.7", "9.7", "Missing_4"), class = "factor")), class = "data.frame", row.names = c(NA,
-20L))
答案 0 :(得分:1)
在基数R中:
sapply(d, function(x){
replace(x, grepl("missing", x, ignore.case = T), NA)
})
答案 1 :(得分:0)
我们可以使用mutate_at
library(dplyr)
data %>%
mutate_at(3:4, list(na = ~ replace(., str_detect(., "Missing"), NA_real_)))
# yyyy month tmax tmin tmax_na tmin_na
#1 1908 January 5.0 -1.4 5.0 -1.4
#2 1908 February 7.3 1.9 7.3 1.9
#3 1908 March 6.2 0.3 6.2 0.3
#4 1908 April Missing_1 2.1 <NA> 2.1
#5 1908 May Missing_2 7.7 <NA> 7.7
#6 1908 June 17.7 8.7 17.7 8.7
#7 1908 July Missing_3 11.0 <NA> 11.0
#8 1908 August 17.5 9.7 17.5 9.7
#9 1908 September 16.3 8.4 16.3 8.4
#10 1908 October 14.6 8.0 14.6 8.0
#12 1908 December 5.8 Missing_4 5.8 <NA>
#13 1909 January 5.0 0.1 5.0 0.1
#14 1909 February 5.5 -0.3 5.5 -0.3
#15 1909 March 5.6 -0.3 5.6 -0.3
#16 1909 April 12.2 3.3 12.2 3.3
#17 1909 May 14.7 4.8 14.7 4.8
#18 1909 June 15.0 7.5 15.0 7.5
#19 1909 July 17.3 10.8 17.3 10.8
#20 1909 August 18.8 10.7 18.8 10.7
答案 2 :(得分:0)
df[which(array(grepl("Missing", as.matrix(df)), dim(df)), T)] <- NA