计算r中所选列的非空白单元格

时间:2015-04-09 11:40:35

标签: r

我想添加另一列来计算r.my工作中所选列的非空值,如下所示。

> mydata29<-read.csv("C:\\zawadi\\exam.CSV",header=T,sep=",")
> mydata29
  ID  NAMES SCIENCE MATHS ENG SWAHILI SSTUDIES
1  1   JOHN      80    56  88      NA       90
2  2   JANE     100    56  64      67       87
3  3   MARY      NA    74  NA      NA       NA
4  4 JOSEPH      70    78  45      44       54
5  5  JOYCE      60    90  89      89       78
6  6   JUNE      NA    NA  67      52       NA
> length(mydata29[,c(3:7)])
[1] 5
> mydata29$subjectsdone<-length(mydata29[,c(3:7)])
> mydata29$subjectsdone
[1] 5 5 5 5 5 5
> mydata29
  ID  NAMES SCIENCE MATHS ENG SWAHILI SSTUDIES subjectsdone
1  1   JOHN      80    56  88      NA       90            5
2  2   JANE     100    56  64      67       87            5
3  3   MARY      NA    74  NA      NA       NA            5
4  4 JOSEPH      70    78  45      44       54            5
5  5  JOYCE      60    90  89      89       78            5
6  6   JUNE      NA    NA  67      52       NA            5
> mydata29$subjectsdone<-length(mydata29[,c(3:7)],na.rm=TRUE)
Error in length(mydata29[, c(3:7)], na.rm = TRUE) : 
  2 arguments passed to 'length' which requires 1

我想要的输出是

  ID  NAMES SCIENCE MATHS ENG SWAHILI SSTUDIES subjectsdone
1  1   JOHN      80    56  88      NA       90            4
2  2   JANE     100    56  64      67       87            5
3  3   MARY      NA    74  NA      NA       NA            1
4  4 JOSEPH      70    78  45      44       54            5
5  5  JOYCE      60    90  89      89       78            5
6  6   JUNE      NA    NA  67      52       NA            2

2 个答案:

答案 0 :(得分:3)

您可以在所选列的逻辑矩阵(rowSums)上尝试!is.na(...),并将结果分配给新列(“subjectdone”)

mydata29$subjectsdone <- rowSums(!is.na(mydata29[-(1:2)]))
mydata29
#   ID  NAMES SCIENCE MATHS ENG SWAHILI SSTUDIES subjectsdone
#1  1   JOHN      80    56  88      NA       90            4
#2  2   JANE     100    56  64      67       87            5
#3  3   MARY      NA    74  NA      NA       NA            1
#4  4 JOSEPH      70    78  45      44       54            5
#5  5  JOYCE      60    90  89      89       78            5
#6  6   JUNE      NA    NA  67      52       NA            2

数据

mydata29 <- structure(list(ID = 1:6, NAMES = c("JOHN", "JANE", 
"MARY",    "JOSEPH", 
"JOYCE", "JUNE"), SCIENCE = c(80L, 100L, NA, 70L, 60L, NA),
MATHS = c(56L, 
56L, 74L, 78L, 90L, NA), ENG = c(88L, 64L, NA, 45L, 89L, 67L), 
SWAHILI = c(NA, 67L, NA, 44L, 89L, 52L), SSTUDIES = c(90L, 
87L, NA, 54L, 78L, NA)), .Names = c("ID", "NAMES", "SCIENCE", 
"MATHS", "ENG", "SWAHILI", "SSTUDIES"), class = "data.frame", 
row.names = c("1", "2", "3", "4", "5", "6"))

答案 1 :(得分:1)

或者申请:

mydata29$subjectsdone <-apply(mydata29[,c(3:7)],1,function(x) length(which(!is.na(x))))
mydata29
#   ID  NAMES SCIENCE MATHS ENG SWAHILI SSTUDIES subjectsdone
#1  1   JOHN      80    56  88      NA       90            4
#2  2   JANE     100    56  64      67       87            5
#3  3   MARY      NA    74  NA      NA       NA            1
#4  4 JOSEPH      70    78  45      44       54            5
#5  5  JOYCE      60    90  89      89       78            5
#6  6   JUNE      NA    NA  67      52       NA            2

在应用中,您首先要考虑哪些列(3到7),然后您要执行应用每行(margin = 1)发送数据的函数,并将函数定义为条目的长度每行不是NA。