在R中的数据帧中将NA转换为数字的问题

时间:2018-05-18 19:57:28

标签: r character numeric na data-conversion

我有一个包含数值和字符的数据框。 NA不会显示为R定义的NA,而是显示为字符。如何将字符NA转换为数字值0?我仍然希望列显示0,因为我不想从我的数据框中删除它们。

我试过了

df[is.na(df)] <-0 

但它只返回"0"作为字符,而不返回值。

df <- as.numeric(as.character(df))

给了我警告信息:

NAs introduced by coercion 

是否有其他解决方案?谢谢。

这是一个可重复的小例子:

structure(list(DNB = c(2.05, 2.05, 2.06, 2.32, 2.32, 2.32), `NORSK HYDRO` = 
c(2.59, 
2.59, 2.65, 2.81, 2.63, 2.63), ORKLA = c(2.29, 2.29, 2.18, 2.31, 
2.25, 2.25), STOREBRAND = c(2.28, 2.28, 2.56, 2.88, 2.94, 2.94
), ATEA = c(2.25, 2.25, 2, 2, 2, 2), `SCHIBSTED A` = c(3.23, 
3.23, 3.08, 2.92, 2.92, 2.92), BONHEUR = c(2, 2, 2, 2, 2, 2), 
EKORNES = c(2.25, 2.25, 2.25, 2.25, 2.25, 2.25), `KONGSBERG GRUPPEN` = 
c(2.8, 
2.8, 2.5, 2.5, 2.5, 2.5), `TOMRA SYSTEMS` = c(2.43, 2.43, 
2.29, 2.29, 2.29, 2.29), VEIDEKKE = c(2.33, 2.33, 2.5, 2.5, 
2.33, 2.33), `ARENDALS FOSSEKOMPANI` = c(NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_
), `OLAV THON EIEP.` = c(3, 3, 2.8, 2.8, 2.8, 2.8), `PETROLEUM GEO SERVICES` = c(3.13, 
3.13, 2.86, 2.63, 2.63, 2.63), `SPAREBANK 1 SR BANK` = c(3, 
3, 3, 3, 3, 3), `STOLT-NIELSEN` = c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), `ODFJELL 'A'` = c(2.45, 2.45, 
2.4, 2.6, 2.4, 2.4), `SPAREBANK 1 NORD-NORGE` = c(3, 3, 3, 
3, 3, 3), `SPAREBANK 1 SMN` = c(3, 3, 3, 3, 3, 3), `WILHS.WILHELMSEN HDG.'A'` = c(2.67, 
2.67, 2.78, 2.67, 2.67, 2.67), `NORDEA BANK (~NK)` = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `ATLAS COPCO 'A' (~NK)` = c(3.08, 
3.08, 3.1, 2.95, 2.95, 2.95), `VOLVO 'B' (~NK)` = c(3.13, 
3.13, 3.17, 2.79, 2.59, 2.59), `SANDVIK (~NK)` = c(3, 3, 
2.75, 3.04, 3.09, 3.09), `SWEDBANK 'A' (~NK)` = c(2.29, 2.29, 
2.21, 2.05, 2.1, 2.1), `ERICSSON 'B' (~NK)` = c(2.33, 2.33, 
2.38, 2.52, 2.44, 2.44), `SVENSKA HANDBKN.'A' (~NK)` = c(2.32, 
2.32, 2.33, 2.55, 2.55, 2.55), `HENNES & MAURITZ 'B' (~NK)` = c(3.35, 
3.35, 3.42, 3.17, 3.06, 3.06), `SEB 'A' (~NK)` = c(2.9, 2.9, 
2.9, 3, 3.09, 3.09), `INVESTOR 'B' (~NK)` = c(2.47, 2.47, 
2.38, 2.69, 2.62, 2.62), `SWEDISH MATCH (~NK)` = c(2.08, 
2.08, 1.83, 1.69, 1.69, 1.69), `ELECTROLUX 'B' (~NK)` = c(3.38, 
3.38, 3.23, 3.13, 3.13, 3.13), `SKANSKA 'B' (~NK)` = c(2.5, 
2.5, 2.43, 2.85, 2.86, 2.86), `SCA 'B' (~NK)` = c(2.96, 2.96, 
2.87, 2.64, 2.55, 2.55), `SECURITAS 'B' (~NK)` = c(3.64, 
3.64, 3.78, 4, 4, 4), `HOLMEN 'B' (~NK)` = c(3.16, 3.16, 
3.26, 3.05, 3.24, 3.24), `SSAB 'A' (~NK)` = c(2.33, 2.33, 
2.29, 2.41, 2.41, 2.41), `ERICSSON 'A' (~NK)` = c(NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_, 
NA_character_), `INVESTOR 'A' (~NK)` = c(NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_
), `VOLVO 'A' (~NK)` = c(NA_character_, NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_), `NOVO NORDISK 'B' (~NK)` = c(2.52, 
2.52, 2.55, 2.64, 2.55, 2.55), `DANSKE BANK (~NK)` = c(2.12, 
2.12, 2.38, 2.53, 2.58, 2.58), `COLOPLAST 'B' (~NK)` = c(3.8, 
3.8, 4.13, 4.13, 4.13, 4.13), `CARLSBERG 'B' (~NK)` = c(3.11, 
3.11, 3.06, 3.24, 3.24, 3.24), `A P MOLLER - MAERSK 'B' (~NK)` = c(2.89, 
2.89, 2.75, 2.63, 2.75, 2.75), `TDC (~NK)` = c(2.93, 2.93, 
2.96, 2.96, 3.04, 3.04), `TOPDANMARK (~NK)` = c(2.78, 2.78, 
2.56, 2.8, 2.8, 2.8), `WILLIAM DEMANT HLDG. (~NK)` = c(4, 
4, 3.78, 4, 3.78, 3.78), `JYSKE BANK (~NK)` = c(1.5, 1.5, 
1.5, 1.5, 1.5, 1.5), `KOBENHAVNS LUFTHAVNE (~NK)` = c(2.56, 
2.56, 2.47, 2.75, 2.56, 2.56), `NKT (~NK)` = c(2.25, 2.25, 
2.25, 2.25, 2.25, 2.25), `ROCKWOOL 'B' (~NK)` = c(3.25, 3.25, 
3, 3, 3, 3), `SYDBANK (~NK)` = c(3.6, 3.6, 3.2, 4, 4, 4), 
`FLSMIDTH & CO.'B' (~NK)` = c(2.6, 2.6, 2.4, 2.4, 2.4, 2.4
), `GN STORE NORD (~NK)` = c(3, 3, 2.78, 2.89, 3.11, 3.11
), `ALK-ABELLO (~NK)` = c(NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_), `BANG & OLUFSEN 'B' (~NK)` = c(4, 4, 
3.67, 3.22, 3.22, 3.22), `SANTA FE GROUP (~NK)` = c(3.5, 
3.5, 3.4, 3.22, 3.44, 3.44), `CARLSBERG 'A' (~NK)` = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `ROCKWOOL 'A' (~NK)` = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), `NOKIA (~NK)` = c(1.89, 
1.89, 2.04, 1.86, 1.81, 1.81), `SAMPO 'A' (~NK)` = c(2.08, 
2.08, 2, 2.36, 2.36, 2.36), `KONE 'B' (~NK)` = c(3.71, 3.71, 
3.77, 3.67, 3.64, 3.64), `UPM-KYMMENE (~NK)` = c(2.43, 2.43, 
2.45, 2.09, 2.04, 2.04), `WARTSILA (~NK)` = c(2.13, 2.13, 
2.07, 2.07, 2.07, 2.07), `METSO (~NK)` = c(2.41, 2.41, 2.41, 
2.47, 2.47, 2.47), `STORA ENSO 'R' (~NK)` = c(2.76, 2.76, 
2.95, 2.74, 2.57, 2.57), `HUHTAMAKI (~NK)` = c(2.33, 2.33, 
2.13, 2.25, 2.25, 2.25), `FINNAIR (~NK)` = c(3, 3, 3, 2.92, 
2.92, 2.92), `KEMIRA (~NK)` = c(2.4, 2.4, 2.4, 2.67, 2.8, 
2.8), `UPONOR (~NK)` = c(2, 2, 2, 1.8, 1.8, 1.8), `KESKO 'B' (~NK)` = c(2.45, 
2.45, 3.09, 2.58, 2.67, 2.67), `ORION 'B' (~NK)` = c(2.57, 
2.57, 2.57, 2.63, 2.63, 2.63), `OUTOKUMPU 'A' (~NK)` = c(3.31, 
3.31, 3.31, 3, 2.63, 2.63), `RAISIO (~NK)` = c(2.91, 2.91, 
3.09, 3.08, 3, 3), `TIETO OYJ (~NK)` = c(2, 2, 2.11, 2.4, 
2.4, 2.4), `METSA BOARD 'B' (~NK)` = c(3.26, 3.26, 3.32, 
3.14, 2.84, 2.84), `ORION 'A' (~NK)` = c(NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_
), `STOCKMANN 'A' (~NK)` = c(NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_
), `STORA ENSO 'A' (~NK)` = c(NA_character_, NA_character_, 
NA_character_, NA_character_, NA_character_, NA_character_
)), .Names = c("DNB", "NORSK HYDRO", "ORKLA", "STOREBRAND", 
"ATEA", "SCHIBSTED A", "BONHEUR", "EKORNES", "KONGSBERG GRUPPEN", 
"TOMRA SYSTEMS", "VEIDEKKE", "ARENDALS FOSSEKOMPANI", "OLAV THON EIEP.", 
"PETROLEUM GEO SERVICES", "SPAREBANK 1 SR BANK", "STOLT-NIELSEN", 
"ODFJELL 'A'", "SPAREBANK 1 NORD-NORGE", "SPAREBANK 1 SMN", 
"WILHS.WILHELMSEN HDG.'A'", 
"NORDEA BANK (~NK)", "ATLAS COPCO 'A' (~NK)", "VOLVO 'B' (~NK)", 
"SANDVIK (~NK)", "SWEDBANK 'A' (~NK)", "ERICSSON 'B' (~NK)", 
"SVENSKA HANDBKN.'A' (~NK)", "HENNES & MAURITZ 'B' (~NK)", "SEB 'A' (~NK)", 
"INVESTOR 'B' (~NK)", "SWEDISH MATCH (~NK)", "ELECTROLUX 'B' (~NK)", 
"SKANSKA 'B' (~NK)", "SCA 'B' (~NK)", "SECURITAS 'B' (~NK)", 
"HOLMEN 'B' (~NK)", "SSAB 'A' (~NK)", "ERICSSON 'A' (~NK)", "INVESTOR 'A' 
(~NK)", 
"VOLVO 'A' (~NK)", "NOVO NORDISK 'B' (~NK)", "DANSKE BANK (~NK)", 
"COLOPLAST 'B' (~NK)", "CARLSBERG 'B' (~NK)", "A P MOLLER - MAERSK 'B' 
(~NK)", 
"TDC (~NK)", "TOPDANMARK (~NK)", "WILLIAM DEMANT HLDG. (~NK)", 
"JYSKE BANK (~NK)", "KOBENHAVNS LUFTHAVNE (~NK)", "NKT (~NK)", 
"ROCKWOOL 'B' (~NK)", "SYDBANK (~NK)", "FLSMIDTH & CO.'B' (~NK)", 
"GN STORE NORD (~NK)", "ALK-ABELLO (~NK)", "BANG & OLUFSEN 'B' (~NK)", 
"SANTA FE GROUP (~NK)", "CARLSBERG 'A' (~NK)", "ROCKWOOL 'A' (~NK)", 
"NOKIA (~NK)", "SAMPO 'A' (~NK)", "KONE 'B' (~NK)", "UPM-KYMMENE (~NK)", 
"WARTSILA (~NK)", "METSO (~NK)", "STORA ENSO 'R' (~NK)", "HUHTAMAKI (~NK)", 
"FINNAIR (~NK)", "KEMIRA (~NK)", "UPONOR (~NK)", "KESKO 'B' (~NK)", 
"ORION 'B' (~NK)", "OUTOKUMPU 'A' (~NK)", "RAISIO (~NK)", "TIETO OYJ (~NK)", 
"METSA BOARD 'B' (~NK)", "ORION 'A' (~NK)", "STOCKMANN 'A' (~NK)", 
"STORA ENSO 'A' (~NK)"), row.names = c(NA, 6L), class = "data.frame")

1 个答案:

答案 0 :(得分:1)

我们可以循环遍历数据集的列,replace NAs为0并将其转换为numeric(因为有一些character列)

df[] <- lapply(df, function(x) as.numeric(replace(x, is.na(x), 0)))

OP首先将N替换为0的方法也可行,但character列仍为character,除非我们更改

df[is.na(df)] <-0 
df[] <- lapply(df, as.numeric)

此处,我们没有factor列,因此不需要as.character。请注意,as.character/as.numeric适用于vector/columns,而不适用于整个数据集