我有一个数据框,该数据框基于一列(数字列)进行排序以分配等级。如果此列的值为零,则针对数字列中值为零的那些行,基于另一个字符列来排列数据框。
但是要给排名,我必须考虑var2,这就是我基于var2进行排序的原因,如果这些行中var2中存在任何相同的值,我就必须考虑var3来给排名。请参阅数据帧2和3行,在这种情况下var2值是相同的,我必须考虑var3来给出排名。如果var2为零,则必须按字母顺序对var1列(字符列)进行排序并给出等级。如果var2是NA,则没有等级。请参考下面给出的数据框。
下面,数据帧是根据var2列的降序排序的,但是如果var2为零,则var2也包含零。对于var2中零的行,我必须基于var1对数据帧进行排序。对于var2为零,然后按字母顺序var1的NA,我需要按var1排序。
example:
# var1 var2 var3 rank
# 1 c 556 45 1
# 2 a 345 35 3
# 3 f 345 64 2
# 4 b 134 87 4
# 5 z 0 34 5
# 6 d 0 32 6
# 7 c 0 12 7
# 8 a 0 23 8
# 9 e NA
# 10 b NA
below is my code
df <- data.frame(var1=c("c","a","f","b","z","d", "c","a", "e", "b", "ad", "gf", "kg", "ts", "mp"), var2=c(134, NA,345, 200, 556,NA, 345, 200, 150, 0, 25,10,0,150,0), var3=c(65,'',45,34,68,'',73,12,35,23,34,56,56,78,123))
# To break the tie between var3 and var2
orderdf <- df[order(df$var2, df$var1, decreasing = TRUE), ]
#assigning rank
rankdf <- orderdf %>% mutate(rank = ifelse(is.na(var2),'', seq(1:nrow(orderdf))))
如果var2值为零(对于var2值为零的行),则预期输出将按字母顺序对var1进行排序
expected output:
# var1 var2 var3 rank
# 1 c 556 45 1
# 2 a 345 35 3
# 3 f 345 64 2
# 4 b 134 87 4
# 5 a 0 34 5
# 6 c 0 32 6
# 7 d 0 12 7
# 8 z 0 23 8
# 9 b NA
# 10 e NA
答案 0 :(得分:1)
通过dplyr
,您可以使用
df %>%
arrange(desc(var2), var1)
然后创建列rank
以下代码比较麻烦,但是可以完成工作。基本上,它将var2
等于或不同于零的行排序,然后将两个排序的数据帧组合在一起,最后创建rank
列。
数据
df <- data.frame(
var1 = c("c","a","f","b","z","d", "c","a", "e", "z", "ad", "gf", "kg", "ts", "mp"),
var2 = c(134, NA,345, 200, 556,NA, 345, 200, 150, 0, 25,10,0,150,0),
var3 = as.numeric(c(65,'',45,34,68,'',73,12,35,23,34,56,56,78,123))
)
df
# var1 var2 var3
# 1 c 134 65
# 2 a NA NA
# 3 f 345 45
# 4 b 200 34
# 5 z 556 68
# 6 d NA NA
# 7 c 345 73
# 8 a 200 12
# 9 e 150 35
# 10 z 0 23
# 11 ad 25 34
# 12 gf 10 56
# 13 kg 0 56
# 14 ts 150 78
# 15 mp 0 123
代码
df %>%
# work on rows with var2 different from 0 or NA
filter(var2 != 0) %>%
arrange(desc(var2), desc(var3)) %>%
# merge with rows with var2 equal to 0 or NA
bind_rows(df %>% filter(var2 == 0 | is.na(var2)) %>% arrange(var1)) %>%
arrange(desc(var2)) %>%
# create the rank column only for the rows with var2 different from NA
mutate(
rank = seq_len(nrow(df)),
rank = ifelse(is.na(var2), NA, rank)
)
输出
# var1 var2 var3 rank
# 1 z 556 68 1
# 2 c 345 73 2
# 3 f 345 45 3
# 4 b 200 34 4
# 5 a 200 12 5
# 6 ts 150 78 6
# 7 e 150 35 7
# 8 c 134 65 8
# 9 ad 25 34 9
# 10 gf 10 56 10
# 11 kg 0 56 11
# 12 mp 0 123 12
# 13 z 0 23 13
# 14 a NA NA NA
# 15 d NA NA NA
答案 1 :(得分:0)
您可以使用order
在底数R中实现:
cols <- c('var1', 'var2')
remaining_cols <- setdiff(names(df), cols)
df1 <- df[cols]
cbind(transform(df1[with(df1, order(-var2, var1)), ],
rank = seq_len(nrow(df1))), df[remaining_cols])
# var1 var2 rank var3
#1 c 556 1 45
#2 a 345 2 35
#3 f 345 3 64
#4 b 134 4 87
#8 a 0 5 34
#7 c 0 6 32
#6 d 0 7 12
#5 z 0 8 23
#10 b NA 9 10
#9 e NA 10 11
数据
df <- structure(list(var1 = structure(c(3L, 1L, 6L, 2L, 7L, 4L, 3L,
1L, 5L, 2L), .Label = c("a", "b", "c", "d", "e", "f", "z"), class = "factor"),
var2 = c(556L, 345L, 345L, 134L, 0L, 0L, 0L, 0L, NA, NA),
var3 = c(45L, 35L, 64L, 87L, 34L, 32L, 12L, 23L, 10L, 11L
)), class = "data.frame", row.names = c(NA, -10L))
答案 2 :(得分:0)
仅使用基数R的order()
函数,首先按var2
的降序排序,然后按var1
的升序排序,方法是将后续的整数向量传递给方括号,从而对数据进行排序
df[order(-df$var2, df$var1), ]
然后添加排名列也是如此
df[order(-df$var2, df$var1), "rank"] <- 1:length(df$var1)
答案 3 :(得分:0)
使用data.table
library(data.table)
setDT(df)[order(-var2, var1)][, rank := seq_len(.N)][]
df <- structure(list(var1 = structure(c(3L, 1L, 6L, 2L, 7L, 4L, 3L,
1L, 5L, 2L), .Label = c("a", "b", "c", "d", "e", "f", "z"), class = "factor"),
var2 = c(1456L, 456L, 345L, 134L, 0L, 0L, 0L, 0L, NA, NA)),
class = "data.frame", row.names = c(NA, -10L))