根据数据框中的4个单元格对内容进行分类

时间:2018-07-17 20:18:18

标签: r dataframe classification classifyjs

我想基于数据框(m1,m2,f1和f2)中的4种情况对内容进行分类。除了南,东,西和中部,还可以在数据中找到“空”或“未分类”。我已经通过for循环解决了该问题,该循环遍历每行并检查所有可能的组合。

我的代码非常糟糕,并且花了很多时间来运行,为什么我会高度赞赏相关软件包或更好的解决方案的任何提示。

这是我希望达到的目标:

m1      m2      f1      f2             CLASSIFIED
south   south   south   south          SOUTH
south   empty   west    empty          SOUTH_WEST
central west    east    south          MIXED
empty   empty   empty   central        CENTRAL
south   west    east    empty          MIXED
south   south   south   unclassified   UNCLASSIFIED

上述数据的DF:

m1 <- c("south","south","central","empty","south","south")
m2 <- c("south","empty","west","empty","west","south")
f1 <- c("south","west","east","empty","east","south")
f2 <- c("south","empty","south","central","empty","unclassified")
df <- data.frame(m1,m2,f1,f2)

这是我的代码的一部分,这部分代码特别长,尤其是当其他类(如南方和中央等)混合在一起时。

output.mixed <- data.frame(Region=1)
output.mixed

i = 1 
for (i in 1:32857){ # IMPORTANT TO SPECIFY NUMBER OF ROWS TO LOOP OVER or fix it with n variable
  if(data.c[i:i,1:1] == "central" &
     data.c[i:i,2:2] == "central" &
     data.c[i:i,3:3] == "central" &
     data.c[i:i,4:4] == "central" | # All central or
     data.c[i:i,1:1] == "central" &
     data.c[i:i,2:2] == "central" &
     data.c[i:i,3:3] == "central" &
     data.c[i:i,4:4] == ""        | # All except last
     data.c[i:i,1:1] == "central" &
     data.c[i:i,2:2] == "central" &
     data.c[i:i,3:3] == ""        &
     data.c[i:i,4:4] == "central" | # All except 3rd
     data.c[i:i,1:1] == "central" &
     data.c[i:i,2:2] == ""        &
     data.c[i:i,3:3] == "central" &
     data.c[i:i,4:4] == "central" | # All except 2nd
     data.c[i:i,1:1] == ""        &
     data.c[i:i,2:2] == "central" &
     data.c[i:i,3:3] == "central" &
     data.c[i:i,4:4] == "central" | # Alle except 1st
     data.c[i:i,1:1] == "central" &
     data.c[i:i,2:2] == "central" &
     data.c[i:i,3:3] == ""        &
     data.c[i:i,4:4] == ""        | # 3&4 empty
     data.c[i:i,1:1] == ""        &
     data.c[i:i,2:2] == ""        &
     data.c[i:i,3:3] == "central" &
     data.c[i:i,4:4] == "central" | # 1&2 empty
     data.c[i:i,1:1] == "central" &
     data.c[i:i,2:2] == ""        &
     data.c[i:i,3:3] == "central" &
     data.c[i:i,4:4] == ""        | # 2&4 emoty
     data.c[i:i,1:1] == ""        &
     data.c[i:i,2:2] == "central" &
     data.c[i:i,3:3] == ""        &
     data.c[i:i,4:4] == "central" | # 1st single
     data.c[i:i,1:1] == "central" &
     data.c[i:i,2:2] == ""        &
     data.c[i:i,3:3] == ""        &
     data.c[i:i,4:4] == ""        | # 2nd single
     data.c[i:i,1:1] == ""        &
     data.c[i:i,2:2] == "central" &
     data.c[i:i,3:3] == ""        &
     data.c[i:i,4:4] == ""        | # 3rd single
     data.c[i:i,1:1] == ""        &
     data.c[i:i,2:2] == ""        &
     data.c[i:i,3:3] == "central" &
     data.c[i:i,4:4] == ""        |
     data.c[i:i,1:1] == ""        &
     data.c[i:i,2:2] == ""        &
     data.c[i:i,3:3] == ""        &
     data.c[i:i,4:4] == "central" |
     data.c[i:i,1:1] == ""        &
     data.c[i:i,2:2] == "central" &
     data.c[i:i,3:3] == "central" &
     data.c[i:i,4:4] == ""        |
     data.c[i:i,1:1] == "central" &
     data.c[i:i,2:2] == ""        &
     data.c[i:i,3:3] == ""        &
     data.c[i:i,4:4] == "central"){ # 1&3 empty
    (output.mixed[i,] <- c("CENTRAL"))
  } else if (data.c[i:i,1:1] == "south" &
             data.c[i:i,2:2] == "south" &
             data.c[i:i,3:3] == "south" &
             data.c[i:i,4:4] == "south" | # All south or
             data.c[i:i,1:1] == "south" &
             data.c[i:i,2:2] == "south" &
             data.c[i:i,3:3] == "south" &
             data.c[i:i,4:4] == ""| # All except last
             data.c[i:i,1:1] == "south" &
             data.c[i:i,2:2] == "south" &
             data.c[i:i,3:3] == "" &
             data.c[i:i,4:4] == "south" | # All except 3rd
             data.c[i:i,1:1] == "south" &
             data.c[i:i,2:2] == "" &
             data.c[i:i,3:3] == "south" &
             data.c[i:i,4:4] == "south" | # All except 2nd
             data.c[i:i,1:1] == "" &
             data.c[i:i,2:2] == "south" &
             data.c[i:i,3:3] == "south" &
             data.c[i:i,4:4] == "south" | # Alle except 1st
             data.c[i:i,1:1] == "south" &
             data.c[i:i,2:2] == "south" &
             data.c[i:i,3:3] == "" &
             data.c[i:i,4:4] == "" | # 3&4 empty
             data.c[i:i,1:1] == "" &
             data.c[i:i,2:2] == "" &
             data.c[i:i,3:3] == "south" &
             data.c[i:i,4:4] == "south" | # 1&2 empty
             data.c[i:i,1:1] == "south" &
             data.c[i:i,2:2] == "" &
             data.c[i:i,3:3] == "south" &
             data.c[i:i,4:4] == "" | # 2&4 emoty
             data.c[i:i,1:1] == "" &
             data.c[i:i,2:2] == "south" &
             data.c[i:i,3:3] == "" &
             data.c[i:i,4:4] == "south" | # 1st single
             data.c[i:i,1:1] == "south" &
             data.c[i:i,2:2] == "" &
             data.c[i:i,3:3] == "" &
             data.c[i:i,4:4] == "" | # 2nd single
             data.c[i:i,1:1] == "" &
             data.c[i:i,2:2] == "south" &
             data.c[i:i,3:3] == "" &
             data.c[i:i,4:4] == "" | # 3rd single
             data.c[i:i,1:1] == "" &
             data.c[i:i,2:2] == "" &
             data.c[i:i,3:3] == "south" &
             data.c[i:i,4:4] == "" |
             data.c[i:i,1:1] == "" &
             data.c[i:i,2:2] == "" &
             data.c[i:i,3:3] == "" &
             data.c[i:i,4:4] == "south"){
    (output.mixed[i,] <- c("SOUTH"))
  } else if (data.c[i:i,1:1] == "west" &
             data.c[i:i,2:2] == "west" &
             data.c[i:i,3:3] == "west" &
             data.c[i:i,4:4] == "west" | # All west or
             data.c[i:i,1:1] == "west" &
             data.c[i:i,2:2] == "west" &
             data.c[i:i,3:3] == "west" &
             data.c[i:i,4:4] == ""| # All except last
             data.c[i:i,1:1] == "west" &
             data.c[i:i,2:2] == "west" &
             data.c[i:i,3:3] == "" &
             data.c[i:i,4:4] == "west" | # All except 3rd
             data.c[i:i,1:1] == "west" &
             data.c[i:i,2:2] == "" &
             data.c[i:i,3:3] == "west" &
             data.c[i:i,4:4] == "west" | # All except 2nd
             data.c[i:i,1:1] == "" &
             data.c[i:i,2:2] == "west" &
             data.c[i:i,3:3] == "west" &
             data.c[i:i,4:4] == "west" | # Alle except 1st
             data.c[i:i,1:1] == "west" &
             data.c[i:i,2:2] == "west" &
             data.c[i:i,3:3] == "" &
             data.c[i:i,4:4] == "" | # 3&4 empty
             data.c[i:i,1:1] == "" &
             data.c[i:i,2:2] == "" &
             data.c[i:i,3:3] == "west" &
             data.c[i:i,4:4] == "west" | # 1&2 empty
             data.c[i:i,1:1] == "west" &
             data.c[i:i,2:2] == "" &
             data.c[i:i,3:3] == "west" &
             data.c[i:i,4:4] == "" | # 2&4 emoty
             data.c[i:i,1:1] == "" &
             data.c[i:i,2:2] == "west" &
             data.c[i:i,3:3] == "" &
             data.c[i:i,4:4] == "west" | # 1st single
             data.c[i:i,1:1] == "west" &
             data.c[i:i,2:2] == "" &
             data.c[i:i,3:3] == "" &
             data.c[i:i,4:4] == "" | # 2nd single
             data.c[i:i,1:1] == "" &
             data.c[i:i,2:2] == "west" &
             data.c[i:i,3:3] == "" &
             data.c[i:i,4:4] == "" | # 3rd single
             data.c[i:i,1:1] == "" &
             data.c[i:i,2:2] == "" &
             data.c[i:i,3:3] == "west" &
             data.c[i:i,4:4] == "" |
             data.c[i:i,1:1] == "" &
             data.c[i:i,2:2] == "" &
             data.c[i:i,3:3] == "" &
             data.c[i:i,4:4] == "west"){
    (output.mixed[i,] <- c("WEST"))
  } else if (data.c[i:i,1:1] == "east" &
             data.c[i:i,2:2] == "east" &
             data.c[i:i,3:3] == "east" &
             data.c[i:i,4:4] == "east" | # All east or
             data.c[i:i,1:1] == "east" &
             data.c[i:i,2:2] == "east" &
             data.c[i:i,3:3] == "east" &
             data.c[i:i,4:4] == ""| # All except last
             data.c[i:i,1:1] == "east" &
             data.c[i:i,2:2] == "east" &
             data.c[i:i,3:3] == "" &
             data.c[i:i,4:4] == "east" | # All except 3rd
             data.c[i:i,1:1] == "east" &
             data.c[i:i,2:2] == "" &
             data.c[i:i,3:3] == "east" &
             data.c[i:i,4:4] == "east" | # All except 2nd
             data.c[i:i,1:1] == "" &
             data.c[i:i,2:2] == "east" &
             data.c[i:i,3:3] == "east" &
             data.c[i:i,4:4] == "east" | # Alle except 1st
             data.c[i:i,1:1] == "east" &
             data.c[i:i,2:2] == "east" &
             data.c[i:i,3:3] == "" &
             data.c[i:i,4:4] == "" | # 3&4 empty
             data.c[i:i,1:1] == "" &
             data.c[i:i,2:2] == "" &
             data.c[i:i,3:3] == "east" &
             data.c[i:i,4:4] == "east" | # 1&2 empty
             data.c[i:i,1:1] == "east" &
             data.c[i:i,2:2] == "" &
             data.c[i:i,3:3] == "east" &
             data.c[i:i,4:4] == "" | # 2&4 emoty
             data.c[i:i,1:1] == "" &
             data.c[i:i,2:2] == "east" &
             data.c[i:i,3:3] == "" &
             data.c[i:i,4:4] == "east" | # 1st single
             data.c[i:i,1:1] == "east" &
             data.c[i:i,2:2] == "" &
             data.c[i:i,3:3] == "" &
             data.c[i:i,4:4] == "" | # 2nd single
             data.c[i:i,1:1] == "" &
             data.c[i:i,2:2] == "east" &
             data.c[i:i,3:3] == "" &
             data.c[i:i,4:4] == "" | # 3rd single
             data.c[i:i,1:1] == "" &
             data.c[i:i,2:2] == "" &
             data.c[i:i,3:3] == "east" &
             data.c[i:i,4:4] == "" |
             data.c[i:i,1:1] == "" &
             data.c[i:i,2:2] == "" &
             data.c[i:i,3:3] == "" &
             data.c[i:i,4:4] == "east"){
    (output.mixed[i,] <- c("EAST"))
  } else if (data.c[i:i,1:1] == "central"  & # Mixed Central & East 1
             data.c[i:i,2:2] == "east"     &
             data.c[i:i,3:3] == ""         &
             data.c[i:i,4:4] == ""         |
             data.c[i:i,1:1] == "central"  & # Row 2
             data.c[i:i,2:2] == ""         &
             data.c[i:i,3:3] == "east"     &
             data.c[i:i,4:4] == ""         |
             data.c[i:i,1:1] == "central"  & # Row 3
             data.c[i:i,2:2] == ""         &
             data.c[i:i,3:3] == ""         &
             data.c[i:i,4:4] == "east"     |
             data.c[i:i,1:1] == ""         & # Row 4
             data.c[i:i,2:2] == "central"  &
             data.c[i:i,3:3] == "east"     &
             data.c[i:i,4:4] == ""         |
             data.c[i:i,1:1] == ""         & # Row 5
             data.c[i:i,2:2] == "central"  &
             data.c[i:i,3:3] == ""         &
             data.c[i:i,4:4] == "east"     |
             data.c[i:i,1:1] == "east"     & # Row 6
             data.c[i:i,2:2] == "central"  &
             data.c[i:i,3:3] == ""         &
             data.c[i:i,4:4] == ""         |
             data.c[i:i,1:1] == "east"     & # Row 7
             data.c[i:i,2:2] == ""         &
             data.c[i:i,3:3] == "central"  &
             data.c[i:i,4:4] == ""         |
             data.c[i:i,1:1] == ""         & # Row 8
             data.c[i:i,2:2] == "east"     &
             data.c[i:i,3:3] == "central"  &
             data.c[i:i,4:4] == ""         |
             data.c[i:i,1:1] == ""         & # Row 9
             data.c[i:i,2:2] == ""         &
             data.c[i:i,3:3] == "central" &
             data.c[i:i,4:4] == "east"     |
             data.c[i:i,1:1] == "east"     & # Row 10
             data.c[i:i,2:2] == ""         &
             data.c[i:i,3:3] == ""         &
             data.c[i:i,4:4] == "central"  |
             data.c[i:i,1:1] == ""         & # Row 11
             data.c[i:i,2:2] == "central"  &
             data.c[i:i,3:3] == ""         &
             data.c[i:i,4:4] == "east"     |
             data.c[i:i,1:1] == ""         & # Row 12
             data.c[i:i,2:2] == ""         &
             data.c[i:i,3:3] == "east"     &
             data.c[i:i,4:4] == "central"  |
             data.c[i:i,1:1] == "central"  & # Row 13
             data.c[i:i,2:2] == "east"     &
             data.c[i:i,3:3] == "central"  &
             data.c[i:i,4:4] == ""         |
             data.c[i:i,1:1] == "central"  & # Row 14
             data.c[i:i,2:2] == ""         &
             data.c[i:i,3:3] == "central"  &
             data.c[i:i,4:4] == "east"     |
             data.c[i:i,1:1] == "east"     & # Row 15
             data.c[i:i,2:2] == "central"  &
             data.c[i:i,3:3] == ""         &
             data.c[i:i,4:4] == "central"  |
             data.c[i:i,1:1] == ""         & # Row 16
             data.c[i:i,2:2] == "central"  &
             data.c[i:i,3:3] == "east"     &
             data.c[i:i,4:4] == "central"  |
             data.c[i:i,1:1] == "central"  & # Row 17
             data.c[i:i,2:2] == "central"  &
             data.c[i:i,3:3] == "central"  &
             data.c[i:i,4:4] == "east"     |
             data.c[i:i,1:1] == "east"     & # Row 18
             data.c[i:i,2:2] == "central"  &
             data.c[i:i,3:3] == "central"  &
             data.c[i:i,4:4] == "central"  |
             data.c[i:i,1:1] == "central"  & # Row 19
             data.c[i:i,2:2] == "central"  &
             data.c[i:i,3:3] == ""         &
             data.c[i:i,4:4] == "east"     |
             data.c[i:i,1:1] == "central"  & # Row 20
             data.c[i:i,2:2] == "central"  &
             data.c[i:i,3:3] == "east"     &
             data.c[i:i,4:4] == ""         |
             data.c[i:i,1:1] == ""         & # Row 21
             data.c[i:i,2:2] == "east"     &
             data.c[i:i,3:3] == "central"  &
             data.c[i:i,4:4] == "central"  |
             data.c[i:i,1:1] == "east"     & # Row 22
             data.c[i:i,2:2] == ""         &
             data.c[i:i,3:3] == "central"  &
             data.c[i:i,4:4] == "central"  |
             data.c[i:i,1:1] == "east"     & # Mixed East & Central 2 MIRRORED ON EAST CENTRAL
             data.c[i:i,2:2] == "central"  &
             data.c[i:i,3:3] == ""         &
             data.c[i:i,4:4] == ""         |
             data.c[i:i,1:1] == "east"  & # Row 2
             data.c[i:i,2:2] == ""         &
             data.c[i:i,3:3] == "central"     &
             data.c[i:i,4:4] == ""         |
             data.c[i:i,1:1] == "east"  & # Row 3
             data.c[i:i,2:2] == ""         &
             data.c[i:i,3:3] == ""         &
             data.c[i:i,4:4] == "central"     |
             data.c[i:i,1:1] == ""         & # Row 4
             data.c[i:i,2:2] == "east"  &
             data.c[i:i,3:3] == "central"     &
             data.c[i:i,4:4] == ""         |
             data.c[i:i,1:1] == ""         & # Row 5
             data.c[i:i,2:2] == "east"  &
             data.c[i:i,3:3] == ""         &
             data.c[i:i,4:4] == "central"     |
             data.c[i:i,1:1] == "central"     & # Row 6
             data.c[i:i,2:2] == "east"  &
             data.c[i:i,3:3] == ""         &
             data.c[i:i,4:4] == ""         |
             data.c[i:i,1:1] == "central"     & # Row 7
             data.c[i:i,2:2] == ""         &
             data.c[i:i,3:3] == "east"  &
             data.c[i:i,4:4] == ""         |
             data.c[i:i,1:1] == ""         & # Row 8
             data.c[i:i,2:2] == "central"     &
             data.c[i:i,3:3] == "east"  &
             data.c[i:i,4:4] == ""         |
             data.c[i:i,1:1] == ""         & # Row 9
             data.c[i:i,2:2] == ""         &
             data.c[i:i,3:3] == "east" &
             data.c[i:i,4:4] == "central"     |
             data.c[i:i,1:1] == "central"     & # Row 10
             data.c[i:i,2:2] == ""         &
             data.c[i:i,3:3] == ""         &
             data.c[i:i,4:4] == "east"  |
             data.c[i:i,1:1] == ""         & # Row 11
             data.c[i:i,2:2] == "east"  &
             data.c[i:i,3:3] == ""         &
             data.c[i:i,4:4] == "central"     |
             data.c[i:i,1:1] == ""         & # Row 12
             data.c[i:i,2:2] == ""         &
             data.c[i:i,3:3] == "central"     &
             data.c[i:i,4:4] == "east"  |
             data.c[i:i,1:1] == "east"  & # Row 13
             data.c[i:i,2:2] == "central"     &
             data.c[i:i,3:3] == "east"  &
             data.c[i:i,4:4] == ""         |
             data.c[i:i,1:1] == "east"  & # Row 14
             data.c[i:i,2:2] == ""         &
             data.c[i:i,3:3] == "east"  &
             data.c[i:i,4:4] == "central"     |
             data.c[i:i,1:1] == "central"     & # Row 15
             data.c[i:i,2:2] == "east"  &
             data.c[i:i,3:3] == ""         &
             data.c[i:i,4:4] == "east"  |
             data.c[i:i,1:1] == ""         & # Row 16
             data.c[i:i,2:2] == "east"  &
             data.c[i:i,3:3] == "central"     &
             data.c[i:i,4:4] == "east"  |
             data.c[i:i,1:1] == "east"  & # Row 17
             data.c[i:i,2:2] == "east"  &
             data.c[i:i,3:3] == "east"  &
             data.c[i:i,4:4] == "central"     |
             data.c[i:i,1:1] == "central"     & # Row 18
             data.c[i:i,2:2] == "east"  &
             data.c[i:i,3:3] == "east"  &
             data.c[i:i,4:4] == "east"  |
             data.c[i:i,1:1] == "east"  & # Row 19
             data.c[i:i,2:2] == "east"  &
             data.c[i:i,3:3] == ""         &
             data.c[i:i,4:4] == "central"     |
             data.c[i:i,1:1] == "east"  & # Row 20
             data.c[i:i,2:2] == "east"  &
             data.c[i:i,3:3] == "central"     &
             data.c[i:i,4:4] == ""         |
             data.c[i:i,1:1] == ""         & # Row 21
             data.c[i:i,2:2] == "central"     &
             data.c[i:i,3:3] == "east"  &
             data.c[i:i,4:4] == "east"  |
             data.c[i:i,1:1] == "central"     & # Row 22
             data.c[i:i,2:2] == ""         &
             data.c[i:i,3:3] == "east"  &
             data.c[i:i,4:4] == "east" ) {
    (output.mixed[i,] <- c("CENTRAL/EAST"))
  } else if ........ 

感谢您的任何投入 /马丁

2 个答案:

答案 0 :(得分:0)

也许有更简单更好的方法,但是下面的方法可以满足您的需求。

df1 <- df    # Work with a copy

df1[] <- lapply(df1, as.character)
is.na(df1) <- df1 == "unclassified"
df1 <- apply(df1, 1, unique)
df1 <- lapply(df1, function(x) x[x != "empty"])
df1 <- lapply(df1, function(x){
  if(anyNA(x))
    "UNCLASSIFIED"
  else if(length(x) == 1)
    toupper(x)
  else if(length(x) == 2)
    paste(toupper(x), collapse = "_")
  else
    "MIXED"
})

df$CLASSIFIED <- unlist(df1)
rm(df1)    # Tidy up

df
#       m1    m2    f1           f2   CLASSIFIED
#1   south south south        south        SOUTH
#2   south empty  west        empty   SOUTH_WEST
#3 central  west  east        south        MIXED
#4   empty empty empty      central      CENTRAL
#5   south  west  east        empty        MIXED
#6   south south south unclassified UNCLASSIFIED

答案 1 :(得分:0)

horizontal <- c("east","central","west")
vertical   <- c("south","central","north")

#  we take the first horizontal and vertical value and build a new column from them
first_horizontal <- toupper(apply(df,1,function(x) x[x %in% horizontal][1]))
first_vertical   <- toupper(apply(df,1,function(x) x[x %in% vertical][1]))
CLASSIFIED <- gsub("_*NA_*","",paste(first_vertical,first_horizontal,sep="_"))

# But if there are several horizontal or several vertical directions,
# including central, we call it mixed
mixed <- 
  apply(df,1,function(x) length(unique(x[x %in% horizontal])) > 1) |
  apply(df,1,function(x) length(unique(x[x %in% vertical])) > 1)
CLASSIFIED[mixed] <- "MIXED"

# And if it contains "unclassified", whatever we set it to previously, 
# we'll set it to "UNCLASSIFIED"
unclassified <- apply(df,1,function(x) "unclassified" %in% x)
CLASSIFIED[unclassified] <- "UNCLASSIFIED"

df$CLASSIFIED <- CLASSIFIED
df
#        m1    m2    f1           f2      CLASSIFIED
# 1   south south south        south           SOUTH
# 2   south empty  west        empty      SOUTH_WEST
# 3 central  west  east        south           MIXED
# 4   empty empty empty      central CENTRAL_CENTRAL
# 5   south  west  east        empty           MIXED
# 6   south south south unclassified    UNCLASSIFIED