我有如下数据。 我正在尝试对它们进行排序
df<-structure(list(string = structure(c(4L, 4L, 4L, 9L, 9L, 6L, 6L,
5L, 2L, 1L, 7L, 7L, 7L, 8L, 8L, 3L, 3L), .Label = c("CGSKDNIKHVPGGGSVQIVYKPVDLSK",
"ESPLQTPTEDGSEEPGSETSDAK", "KDQGGYTMHQDQEGDTDAGLKESPLQTPTEDGSEEPGSETSDAK",
"SKDGTGSDDKK", "SPSSAKSRLQTAPVPMPDLKNVK", "SRLQTAPVPMPDLK", "SRLQTAPVPMPDLKNVKSK",
"SRLQTAPVPMPDLKNVKSKIGSTENLK", "VQIINKKLDLSNVQSK"), class = "factor"),
key = structure(c(1L, 2L, 3L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
2L, 3L, 1L, 3L, 2L, 3L, 3L), .Label = c("Mys: G52: ru1",
"Mys: G52: ru2", "Mys: G52: ru3"), class = "factor"), val = structure(c(3L,
13L, 16L, 15L, 6L, 2L, 2L, 11L, 9L, 5L, 1L, 7L, 8L, 12L,
4L, 10L, 14L), .Label = c("1442983324", "1451319531", "1512864.443",
"1612410048", "16349475.63", "1784901841", "30553282.01",
"317403612.9", "3612004.547", "3686081.063", "39135868.44",
"43701608", "64223793.8", "64959501.42", "775987137.8", "9767666215"
), class = "factor")), .Names = c("string", "key", "val"), class = "data.frame", row.names = c(NA,
-17L))
我尝试根据两个column.first列对它们进行排序,基于大小(字母数),第二列将它们保持在一起(ru1,ru2,ru3等)(ru1,ru2,ru3)等等。
所需的输出应如下所示
以下命令不起作用
df2 <- df[order(df$string, df$key), ]
应该像这样排序
SKDGTGSDDKK Mys: G52: ru1 1512864.443
SKDGTGSDDKK Mys: G52: ru2 64223793.8
SKDGTGSDDKK Mys: G52: ru3 9767666215
VQIINKKLDLSNVQSK Mys: G52: ru1 775987137.8
VQIINKKLDLSNVQSK Mys: G52: ru2 1784901841
SRLQTAPVPMPDLKNVKSK Mys: G52: ru2 1442983324
SRLQTAPVPMPDLKNVKSK Mys: G52: ru3 30553282.01
SRLQTAPVPMPDLKNVKSK Mys: G52: ru1 317403612.9
SRLQTAPVPMPDLKNVKSKIGSTENLK Mys: G52: ru3 43701608
SRLQTAPVPMPDLKNVKSKIGSTENLK Mys: G52: ru2 1612410048
答案 0 :(得分:1)
以分号
分割第二列listdf<-strsplit(as.character(df[,2]),split=":")
从列表中提取矢量
listdf3<-sapply(listdf, function(X)X[3])
计算订单 DF [顺序(的nchar(as.character(DF [1])),listdf3)]
string key val
1 SKDGTGSDDKK Mys: G52: ru1 1512864.443
2 SKDGTGSDDKK Mys: G52: ru2 64223793.8
3 SKDGTGSDDKK Mys: G52: ru3 9767666215
6 SRLQTAPVPMPDLK Mys: G52: ru1 1451319531
7 SRLQTAPVPMPDLK Mys: G52: ru1 1451319531
4 VQIINKKLDLSNVQSK Mys: G52: ru1 775987137.8
5 VQIINKKLDLSNVQSK Mys: G52: ru2 1784901841
13 SRLQTAPVPMPDLKNVKSK Mys: G52: ru1 317403612.9
11 SRLQTAPVPMPDLKNVKSK Mys: G52: ru2 1442983324
12 SRLQTAPVPMPDLKNVKSK Mys: G52: ru3 30553282.01
8 SPSSAKSRLQTAPVPMPDLKNVK Mys: G52: ru1 39135868.44
9 ESPLQTPTEDGSEEPGSETSDAK Mys: G52: ru1 3612004.547
10 CGSKDNIKHVPGGGSVQIVYKPVDLSK Mys: G52: ru1 16349475.63
15 SRLQTAPVPMPDLKNVKSKIGSTENLK Mys: G52: ru2 1612410048
14 SRLQTAPVPMPDLKNVKSKIGSTENLK Mys: G52: ru3 43701608
16 KDQGGYTMHQDQEGDTDAGLKESPLQTPTEDGSEEPGSETSDAK Mys: G52: ru3 3686081.063
17 KDQGGYTMHQDQEGDTDAGLKESPLQTPTEDGSEEPGSETSDAK Mys: G52: ru3 64959501.42
答案 1 :(得分:1)
您需要先计算string
的长度,然后根据该列进行排序。我首先创建了一个新数据框(df_tmp
),然后将其合并到df2
中。
<强>代码强>
library(dplyr)
df_tmp <- data.frame(names=df$string,chr=apply(df,2,nchar)[,1])
colnames(df_tmp)[1] <- "string"
df2 <- inner_join(df, df_tmp)
df2 <- df2[order(df2$chr, df2$key), ]
结果的补充文件
string key val chr
SKDGTGSDDKK Mys: G52: ru1 1512864.443 11
SKDGTGSDDKK Mys: G52: ru1 1512864.443 11
SKDGTGSDDKK Mys: G52: ru1 1512864.443 11
SKDGTGSDDKK Mys: G52: ru2 64223793.8 11
SKDGTGSDDKK Mys: G52: ru2 64223793.8 11
SKDGTGSDDKK Mys: G52: ru2 64223793.8 11
SKDGTGSDDKK Mys: G52: ru3 9767666215 11
SKDGTGSDDKK Mys: G52: ru3 9767666215 11
SKDGTGSDDKK Mys: G52: ru3 9767666215 11
SRLQTAPVPMPDLK Mys: G52: ru1 1451319531 14
SRLQTAPVPMPDLK Mys: G52: ru1 1451319531 14
RLQTAPVPMPDLK Mys: G52: ru1 1451319531 14
SRLQTAPVPMPDLK Mys: G52: ru1 1451319531 14
答案 2 :(得分:1)
尝试Hadley的整摆功能:
library(tidyverse)
df_sorted <- df %>%
# get length of string
mutate(length_string = map_dbl(as.character(string), nchar)) %>%
# arrange first by number of characters, then string, then key
arrange(length_string, string, key) %>%
# remove length column
select(-length_string)
答案 3 :(得分:1)
您需要使用nchar()
功能,但首先必须将df$string
从factor
转换为character
类型。
以下是使用tidyverse
工具的解决方案:
library('tidyverse')
df<-structure(list(string = structure(c(4L, 4L, 4L, 9L, 9L, 6L, 6L,
5L, 2L, 1L, 7L, 7L, 7L, 8L, 8L, 3L, 3L), .Label = c("CGSKDNIKHVPGGGSVQIVYKPVDLSK",
"ESPLQTPTEDGSEEPGSETSDAK", "KDQGGYTMHQDQEGDTDAGLKESPLQTPTEDGSEEPGSETSDAK",
"SKDGTGSDDKK", "SPSSAKSRLQTAPVPMPDLKNVK", "SRLQTAPVPMPDLK", "SRLQTAPVPMPDLKNVKSK",
"SRLQTAPVPMPDLKNVKSKIGSTENLK", "VQIINKKLDLSNVQSK"), class = "factor"),
key = structure(c(1L, 2L, 3L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
2L, 3L, 1L, 3L, 2L, 3L, 3L), .Label = c("Mys: G52: ru1",
"Mys: G52: ru2", "Mys: G52: ru3"), class = "factor"), val = structure(c(3L,
13L, 16L, 15L, 6L, 2L, 2L, 11L, 9L, 5L, 1L, 7L, 8L, 12L,
4L, 10L, 14L), .Label = c("1442983324", "1451319531", "1512864.443",
"1612410048", "16349475.63", "1784901841", "30553282.01",
"317403612.9", "3612004.547", "3686081.063", "39135868.44",
"43701608", "64223793.8", "64959501.42", "775987137.8", "9767666215"
), class = "factor")), .Names = c("string", "key", "val"), class = "data.frame", row.names = c(NA,
-17L))
df2 <- df %>%
mutate(string = as.character(string)) %>%
arrange(nchar(string), key)
df2
#> string key val
#> 1 SKDGTGSDDKK Mys: G52: ru1 1512864.443
#> 2 SKDGTGSDDKK Mys: G52: ru2 64223793.8
#> 3 SKDGTGSDDKK Mys: G52: ru3 9767666215
#> 4 SRLQTAPVPMPDLK Mys: G52: ru1 1451319531
#> 5 SRLQTAPVPMPDLK Mys: G52: ru1 1451319531
#> 6 VQIINKKLDLSNVQSK Mys: G52: ru1 775987137.8
#> 7 VQIINKKLDLSNVQSK Mys: G52: ru2 1784901841
#> 8 SRLQTAPVPMPDLKNVKSK Mys: G52: ru1 317403612.9
#> 9 SRLQTAPVPMPDLKNVKSK Mys: G52: ru2 1442983324
#> 10 SRLQTAPVPMPDLKNVKSK Mys: G52: ru3 30553282.01
#> 11 SPSSAKSRLQTAPVPMPDLKNVK Mys: G52: ru1 39135868.44
#> 12 ESPLQTPTEDGSEEPGSETSDAK Mys: G52: ru1 3612004.547
#> 13 CGSKDNIKHVPGGGSVQIVYKPVDLSK Mys: G52: ru1 16349475.63
#> 14 SRLQTAPVPMPDLKNVKSKIGSTENLK Mys: G52: ru2 1612410048
#> 15 SRLQTAPVPMPDLKNVKSKIGSTENLK Mys: G52: ru3 43701608
#> 16 KDQGGYTMHQDQEGDTDAGLKESPLQTPTEDGSEEPGSETSDAK Mys: G52: ru3 3686081.063
#> 17 KDQGGYTMHQDQEGDTDAGLKESPLQTPTEDGSEEPGSETSDAK Mys: G52: ru3 64959501.42
以下是使用基本R工具的解决方案,正如您在示例中使用的那样:
df<-structure(list(string = structure(c(4L, 4L, 4L, 9L, 9L, 6L, 6L,
5L, 2L, 1L, 7L, 7L, 7L, 8L, 8L, 3L, 3L), .Label = c("CGSKDNIKHVPGGGSVQIVYKPVDLSK",
"ESPLQTPTEDGSEEPGSETSDAK", "KDQGGYTMHQDQEGDTDAGLKESPLQTPTEDGSEEPGSETSDAK",
"SKDGTGSDDKK", "SPSSAKSRLQTAPVPMPDLKNVK", "SRLQTAPVPMPDLK", "SRLQTAPVPMPDLKNVKSK",
"SRLQTAPVPMPDLKNVKSKIGSTENLK", "VQIINKKLDLSNVQSK"), class = "factor"),
key = structure(c(1L, 2L, 3L, 1L, 2L, 1L, 1L, 1L, 1L, 1L,
2L, 3L, 1L, 3L, 2L, 3L, 3L), .Label = c("Mys: G52: ru1",
"Mys: G52: ru2", "Mys: G52: ru3"), class = "factor"), val = structure(c(3L,
13L, 16L, 15L, 6L, 2L, 2L, 11L, 9L, 5L, 1L, 7L, 8L, 12L,
4L, 10L, 14L), .Label = c("1442983324", "1451319531", "1512864.443",
"1612410048", "16349475.63", "1784901841", "30553282.01",
"317403612.9", "3612004.547", "3686081.063", "39135868.44",
"43701608", "64223793.8", "64959501.42", "775987137.8", "9767666215"
), class = "factor")), .Names = c("string", "key", "val"), class = "data.frame", row.names = c(NA,
-17L))
df2 <- df[order(nchar(as.character(df$string)), df$key),]
df2
#> string key val
#> 1 SKDGTGSDDKK Mys: G52: ru1 1512864.443
#> 2 SKDGTGSDDKK Mys: G52: ru2 64223793.8
#> 3 SKDGTGSDDKK Mys: G52: ru3 9767666215
#> 6 SRLQTAPVPMPDLK Mys: G52: ru1 1451319531
#> 7 SRLQTAPVPMPDLK Mys: G52: ru1 1451319531
#> 4 VQIINKKLDLSNVQSK Mys: G52: ru1 775987137.8
#> 5 VQIINKKLDLSNVQSK Mys: G52: ru2 1784901841
#> 13 SRLQTAPVPMPDLKNVKSK Mys: G52: ru1 317403612.9
#> 11 SRLQTAPVPMPDLKNVKSK Mys: G52: ru2 1442983324
#> 12 SRLQTAPVPMPDLKNVKSK Mys: G52: ru3 30553282.01
#> 8 SPSSAKSRLQTAPVPMPDLKNVK Mys: G52: ru1 39135868.44
#> 9 ESPLQTPTEDGSEEPGSETSDAK Mys: G52: ru1 3612004.547
#> 10 CGSKDNIKHVPGGGSVQIVYKPVDLSK Mys: G52: ru1 16349475.63
#> 15 SRLQTAPVPMPDLKNVKSKIGSTENLK Mys: G52: ru2 1612410048
#> 14 SRLQTAPVPMPDLKNVKSKIGSTENLK Mys: G52: ru3 43701608
#> 16 KDQGGYTMHQDQEGDTDAGLKESPLQTPTEDGSEEPGSETSDAK Mys: G52: ru3 3686081.063
#> 17 KDQGGYTMHQDQEGDTDAGLKESPLQTPTEDGSEEPGSETSDAK Mys: G52: ru3 64959501.42