我有一些数据,我正在尝试从中删除NA值。
我应用以下内容:
data %>%
head(20) %>%
drop_na(contains("gvkey"))
删除所有行。我想保留第12和16行,因为这两行在gvkey[X]
列之一中具有值。如果gvkey2
中有一个谷值,我也想保留这些行。 gvkey[1]...[5]
也是如此。
数据:
data <- structure(list(cod = structure(c(12L, 2L, 3L, 3L, 3L, 2L, 2L,
3L, 3L, 3L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 5L, 5L, 2L), .Label = c("01 Unassigned",
"02 US corporation", "03 Foreign corp, incl. state-owned", "04 US individual",
"05 Foreign individual", "06 US government", "07 Foreign government",
"08 US local government", "09 US state government", "US university",
"Foreign university", "US institute", "Foreign institute", "US hospital/med inst",
"Foreign hospital/med inst"), class = "factor"), cod_fix = c(32L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), pdpass = c(12832332L, 11624193L, 12549858L, 11170486L,
10513915L, 13143949L, 10817896L, 12517845L, 13028479L, 12986393L,
11719402L, 12298245L, 12249804L, 12898008L, 13100249L, 12298245L,
10256638L, -19842L, -19842L, 11485267L), standard_name = c("& AEROSPACE FOUND",
"& COMMUNICATIONS", "& DESIGN LTD", "& FR", "& SA FAB DEBAUCHES",
"& SON INC", "& YET INC", "&&T TECH CO LTD", "01 COMMUNIQUE LAB INC",
"01 DB METRAVIB", "02 MICRO INC", "02 MICRO INT LTD", "02 TECH INC",
"02IC INC", "02MICRO", "02MICRO INT LTD", "03 CO", "080 STUDIO INC",
"0KAMOTO ; SHOSUKE", "1"), uspto_assignee = c(835951L, 721167L,
806186L, 182855L, 182585L, NA, 30140L, 802588L, NA, NA, 729315L,
782400L, 775728L, 825676L, NA, 782400L, 641605L, NA, NA, 708378L
), pdpco1 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 138961L,
NA, NA, NA, 138961L, NA, NA, NA, NA), source = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, "m2006", NA, NA, NA, "m2006",
NA, NA, NA, NA), begyr1 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 2000L, NA, NA, NA, 2000L, NA, NA, NA, NA), gvkey1 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 138961L, NA, NA, NA,
138961L, NA, NA, NA, NA), endyr1 = c(NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 2006L, NA, NA, NA, 2006L, NA, NA, NA, NA),
pdpco2 = c(NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_), begyr2 = c(NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_), gvkey2 = c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_), endyr2 = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), pdpco3 = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), begyr3 = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), gvkey3 = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), endyr3 = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), pdpco4 = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), begyr4 = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), gvkey4 = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), endyr4 = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), pdpco5 = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), begyr5 = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), gvkey5 = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_), endyr5 = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_)), row.names = c(NA,
20L), class = "data.frame")
答案 0 :(得分:2)
尝试:
library(dplyr)
data %>%
filter_at(vars(starts_with('gvkey')), any_vars(!is.na(.)))
输出:
cod cod_fix pdpass standard_name uspto_assignee pdpco1 source begyr1 gvkey1
1 03 Foreign corp, incl. state-owned 0 12298245 02 MICRO INT LTD 782400 138961 m2006 2000 138961
2 03 Foreign corp, incl. state-owned 0 12298245 02MICRO INT LTD 782400 138961 m2006 2000 138961
endyr1 pdpco2 begyr2 gvkey2 endyr2 pdpco3 begyr3 gvkey3 endyr3 pdpco4 begyr4 gvkey4 endyr4 pdpco5 begyr5 gvkey5
1 2006 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
2 2006 NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
endyr5
1 NA
2 NA
答案 1 :(得分:1)
对于base R
,我们可以将Reduce
与lapply
一起使用
data[!Reduce(`&`, lapply(data[startsWith(names(data), 'gvkey')], is.na)),]
# cod cod_fix pdpass standard_name uspto_assignee pdpco1 source
#12 03 Foreign corp, incl. state-owned 0 12298245 02 MICRO INT LTD 782400 138961 m2006
#16 03 Foreign corp, incl. state-owned 0 12298245 02MICRO INT LTD 782400 138961 m2006
# begyr1 gvkey1 endyr1 pdpco2 begyr2 gvkey2 endyr2 pdpco3 begyr3 gvkey3 endyr3 pdpco4 begyr4 gvkey4
#12 2000 138961 2006 NA NA NA NA NA NA NA NA NA NA NA
#16 2000 138961 2006 NA NA NA NA NA NA NA NA NA NA NA
# endyr4 pdpco5 begyr5 gvkey5 endyr5
#12 NA NA NA NA NA
#16 NA NA NA NA NA