我想基于数据框(m1,m2,f1和f2)中的4种情况对内容进行分类。除了南,东,西和中部,还可以在数据中找到“空”或“未分类”。我已经通过for循环解决了该问题,该循环遍历每行并检查所有可能的组合。
我的代码非常糟糕,并且花了很多时间来运行,为什么我会高度赞赏相关软件包或更好的解决方案的任何提示。
这是我希望达到的目标:
m1 m2 f1 f2 CLASSIFIED
south south south south SOUTH
south empty west empty SOUTH_WEST
central west east south MIXED
empty empty empty central CENTRAL
south west east empty MIXED
south south south unclassified UNCLASSIFIED
上述数据的DF:
m1 <- c("south","south","central","empty","south","south")
m2 <- c("south","empty","west","empty","west","south")
f1 <- c("south","west","east","empty","east","south")
f2 <- c("south","empty","south","central","empty","unclassified")
df <- data.frame(m1,m2,f1,f2)
这是我的代码的一部分,这部分代码特别长,尤其是当其他类(如南方和中央等)混合在一起时。
output.mixed <- data.frame(Region=1)
output.mixed
i = 1
for (i in 1:32857){ # IMPORTANT TO SPECIFY NUMBER OF ROWS TO LOOP OVER or fix it with n variable
if(data.c[i:i,1:1] == "central" &
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "central" | # All central or
data.c[i:i,1:1] == "central" &
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "" | # All except last
data.c[i:i,1:1] == "central" &
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "central" | # All except 3rd
data.c[i:i,1:1] == "central" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "central" | # All except 2nd
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "central" | # Alle except 1st
data.c[i:i,1:1] == "central" &
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "" | # 3&4 empty
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "central" | # 1&2 empty
data.c[i:i,1:1] == "central" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "" | # 2&4 emoty
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "central" | # 1st single
data.c[i:i,1:1] == "central" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "" | # 2nd single
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "" | # 3rd single
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "" |
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "central" |
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "" |
data.c[i:i,1:1] == "central" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "central"){ # 1&3 empty
(output.mixed[i,] <- c("CENTRAL"))
} else if (data.c[i:i,1:1] == "south" &
data.c[i:i,2:2] == "south" &
data.c[i:i,3:3] == "south" &
data.c[i:i,4:4] == "south" | # All south or
data.c[i:i,1:1] == "south" &
data.c[i:i,2:2] == "south" &
data.c[i:i,3:3] == "south" &
data.c[i:i,4:4] == ""| # All except last
data.c[i:i,1:1] == "south" &
data.c[i:i,2:2] == "south" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "south" | # All except 3rd
data.c[i:i,1:1] == "south" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "south" &
data.c[i:i,4:4] == "south" | # All except 2nd
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "south" &
data.c[i:i,3:3] == "south" &
data.c[i:i,4:4] == "south" | # Alle except 1st
data.c[i:i,1:1] == "south" &
data.c[i:i,2:2] == "south" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "" | # 3&4 empty
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "south" &
data.c[i:i,4:4] == "south" | # 1&2 empty
data.c[i:i,1:1] == "south" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "south" &
data.c[i:i,4:4] == "" | # 2&4 emoty
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "south" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "south" | # 1st single
data.c[i:i,1:1] == "south" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "" | # 2nd single
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "south" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "" | # 3rd single
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "south" &
data.c[i:i,4:4] == "" |
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "south"){
(output.mixed[i,] <- c("SOUTH"))
} else if (data.c[i:i,1:1] == "west" &
data.c[i:i,2:2] == "west" &
data.c[i:i,3:3] == "west" &
data.c[i:i,4:4] == "west" | # All west or
data.c[i:i,1:1] == "west" &
data.c[i:i,2:2] == "west" &
data.c[i:i,3:3] == "west" &
data.c[i:i,4:4] == ""| # All except last
data.c[i:i,1:1] == "west" &
data.c[i:i,2:2] == "west" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "west" | # All except 3rd
data.c[i:i,1:1] == "west" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "west" &
data.c[i:i,4:4] == "west" | # All except 2nd
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "west" &
data.c[i:i,3:3] == "west" &
data.c[i:i,4:4] == "west" | # Alle except 1st
data.c[i:i,1:1] == "west" &
data.c[i:i,2:2] == "west" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "" | # 3&4 empty
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "west" &
data.c[i:i,4:4] == "west" | # 1&2 empty
data.c[i:i,1:1] == "west" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "west" &
data.c[i:i,4:4] == "" | # 2&4 emoty
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "west" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "west" | # 1st single
data.c[i:i,1:1] == "west" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "" | # 2nd single
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "west" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "" | # 3rd single
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "west" &
data.c[i:i,4:4] == "" |
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "west"){
(output.mixed[i,] <- c("WEST"))
} else if (data.c[i:i,1:1] == "east" &
data.c[i:i,2:2] == "east" &
data.c[i:i,3:3] == "east" &
data.c[i:i,4:4] == "east" | # All east or
data.c[i:i,1:1] == "east" &
data.c[i:i,2:2] == "east" &
data.c[i:i,3:3] == "east" &
data.c[i:i,4:4] == ""| # All except last
data.c[i:i,1:1] == "east" &
data.c[i:i,2:2] == "east" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "east" | # All except 3rd
data.c[i:i,1:1] == "east" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "east" &
data.c[i:i,4:4] == "east" | # All except 2nd
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "east" &
data.c[i:i,3:3] == "east" &
data.c[i:i,4:4] == "east" | # Alle except 1st
data.c[i:i,1:1] == "east" &
data.c[i:i,2:2] == "east" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "" | # 3&4 empty
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "east" &
data.c[i:i,4:4] == "east" | # 1&2 empty
data.c[i:i,1:1] == "east" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "east" &
data.c[i:i,4:4] == "" | # 2&4 emoty
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "east" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "east" | # 1st single
data.c[i:i,1:1] == "east" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "" | # 2nd single
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "east" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "" | # 3rd single
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "east" &
data.c[i:i,4:4] == "" |
data.c[i:i,1:1] == "" &
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "east"){
(output.mixed[i,] <- c("EAST"))
} else if (data.c[i:i,1:1] == "central" & # Mixed Central & East 1
data.c[i:i,2:2] == "east" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "" |
data.c[i:i,1:1] == "central" & # Row 2
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "east" &
data.c[i:i,4:4] == "" |
data.c[i:i,1:1] == "central" & # Row 3
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "east" |
data.c[i:i,1:1] == "" & # Row 4
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "east" &
data.c[i:i,4:4] == "" |
data.c[i:i,1:1] == "" & # Row 5
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "east" |
data.c[i:i,1:1] == "east" & # Row 6
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "" |
data.c[i:i,1:1] == "east" & # Row 7
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "" |
data.c[i:i,1:1] == "" & # Row 8
data.c[i:i,2:2] == "east" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "" |
data.c[i:i,1:1] == "" & # Row 9
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "east" |
data.c[i:i,1:1] == "east" & # Row 10
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "central" |
data.c[i:i,1:1] == "" & # Row 11
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "east" |
data.c[i:i,1:1] == "" & # Row 12
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "east" &
data.c[i:i,4:4] == "central" |
data.c[i:i,1:1] == "central" & # Row 13
data.c[i:i,2:2] == "east" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "" |
data.c[i:i,1:1] == "central" & # Row 14
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "east" |
data.c[i:i,1:1] == "east" & # Row 15
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "central" |
data.c[i:i,1:1] == "" & # Row 16
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "east" &
data.c[i:i,4:4] == "central" |
data.c[i:i,1:1] == "central" & # Row 17
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "east" |
data.c[i:i,1:1] == "east" & # Row 18
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "central" |
data.c[i:i,1:1] == "central" & # Row 19
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "east" |
data.c[i:i,1:1] == "central" & # Row 20
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "east" &
data.c[i:i,4:4] == "" |
data.c[i:i,1:1] == "" & # Row 21
data.c[i:i,2:2] == "east" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "central" |
data.c[i:i,1:1] == "east" & # Row 22
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "central" |
data.c[i:i,1:1] == "east" & # Mixed East & Central 2 MIRRORED ON EAST CENTRAL
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "" |
data.c[i:i,1:1] == "east" & # Row 2
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "" |
data.c[i:i,1:1] == "east" & # Row 3
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "central" |
data.c[i:i,1:1] == "" & # Row 4
data.c[i:i,2:2] == "east" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "" |
data.c[i:i,1:1] == "" & # Row 5
data.c[i:i,2:2] == "east" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "central" |
data.c[i:i,1:1] == "central" & # Row 6
data.c[i:i,2:2] == "east" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "" |
data.c[i:i,1:1] == "central" & # Row 7
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "east" &
data.c[i:i,4:4] == "" |
data.c[i:i,1:1] == "" & # Row 8
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "east" &
data.c[i:i,4:4] == "" |
data.c[i:i,1:1] == "" & # Row 9
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "east" &
data.c[i:i,4:4] == "central" |
data.c[i:i,1:1] == "central" & # Row 10
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "east" |
data.c[i:i,1:1] == "" & # Row 11
data.c[i:i,2:2] == "east" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "central" |
data.c[i:i,1:1] == "" & # Row 12
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "east" |
data.c[i:i,1:1] == "east" & # Row 13
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "east" &
data.c[i:i,4:4] == "" |
data.c[i:i,1:1] == "east" & # Row 14
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "east" &
data.c[i:i,4:4] == "central" |
data.c[i:i,1:1] == "central" & # Row 15
data.c[i:i,2:2] == "east" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "east" |
data.c[i:i,1:1] == "" & # Row 16
data.c[i:i,2:2] == "east" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "east" |
data.c[i:i,1:1] == "east" & # Row 17
data.c[i:i,2:2] == "east" &
data.c[i:i,3:3] == "east" &
data.c[i:i,4:4] == "central" |
data.c[i:i,1:1] == "central" & # Row 18
data.c[i:i,2:2] == "east" &
data.c[i:i,3:3] == "east" &
data.c[i:i,4:4] == "east" |
data.c[i:i,1:1] == "east" & # Row 19
data.c[i:i,2:2] == "east" &
data.c[i:i,3:3] == "" &
data.c[i:i,4:4] == "central" |
data.c[i:i,1:1] == "east" & # Row 20
data.c[i:i,2:2] == "east" &
data.c[i:i,3:3] == "central" &
data.c[i:i,4:4] == "" |
data.c[i:i,1:1] == "" & # Row 21
data.c[i:i,2:2] == "central" &
data.c[i:i,3:3] == "east" &
data.c[i:i,4:4] == "east" |
data.c[i:i,1:1] == "central" & # Row 22
data.c[i:i,2:2] == "" &
data.c[i:i,3:3] == "east" &
data.c[i:i,4:4] == "east" ) {
(output.mixed[i,] <- c("CENTRAL/EAST"))
} else if ........
感谢您的任何投入 /马丁
答案 0 :(得分:0)
也许有更简单更好的方法,但是下面的方法可以满足您的需求。
df1 <- df # Work with a copy
df1[] <- lapply(df1, as.character)
is.na(df1) <- df1 == "unclassified"
df1 <- apply(df1, 1, unique)
df1 <- lapply(df1, function(x) x[x != "empty"])
df1 <- lapply(df1, function(x){
if(anyNA(x))
"UNCLASSIFIED"
else if(length(x) == 1)
toupper(x)
else if(length(x) == 2)
paste(toupper(x), collapse = "_")
else
"MIXED"
})
df$CLASSIFIED <- unlist(df1)
rm(df1) # Tidy up
df
# m1 m2 f1 f2 CLASSIFIED
#1 south south south south SOUTH
#2 south empty west empty SOUTH_WEST
#3 central west east south MIXED
#4 empty empty empty central CENTRAL
#5 south west east empty MIXED
#6 south south south unclassified UNCLASSIFIED
答案 1 :(得分:0)
horizontal <- c("east","central","west")
vertical <- c("south","central","north")
# we take the first horizontal and vertical value and build a new column from them
first_horizontal <- toupper(apply(df,1,function(x) x[x %in% horizontal][1]))
first_vertical <- toupper(apply(df,1,function(x) x[x %in% vertical][1]))
CLASSIFIED <- gsub("_*NA_*","",paste(first_vertical,first_horizontal,sep="_"))
# But if there are several horizontal or several vertical directions,
# including central, we call it mixed
mixed <-
apply(df,1,function(x) length(unique(x[x %in% horizontal])) > 1) |
apply(df,1,function(x) length(unique(x[x %in% vertical])) > 1)
CLASSIFIED[mixed] <- "MIXED"
# And if it contains "unclassified", whatever we set it to previously,
# we'll set it to "UNCLASSIFIED"
unclassified <- apply(df,1,function(x) "unclassified" %in% x)
CLASSIFIED[unclassified] <- "UNCLASSIFIED"
df$CLASSIFIED <- CLASSIFIED
df
# m1 m2 f1 f2 CLASSIFIED
# 1 south south south south SOUTH
# 2 south empty west empty SOUTH_WEST
# 3 central west east south MIXED
# 4 empty empty empty central CENTRAL_CENTRAL
# 5 south west east empty MIXED
# 6 south south south unclassified UNCLASSIFIED