基于时间戳排序数据帧行

时间:2015-11-14 07:55:03

标签: r sorting

我有一个数据框,我想根据时间戳排序。

                     V1    V2                     V3    V4                     V5    V6
1 {"2014-08-01T01:00:00": "64", "2014-08-01T13:00:00": "53", "2014-08-01T01:20:00": "73",
2 {"2014-08-02T18:00:00": "37", "2014-08-02T22:00:00": "56", "2014-08-02T17:00:00": "24",
3 {"2014-08-03T17:50:00": "78", "2014-08-03T04:20:00": "83", "2014-08-03T00:20:00": "73",
4 {"2014-08-04T15:00:00": "37", "2014-08-04T21:00:00": "39", "2014-08-04T15:20:00": "43",
5 {"2014-08-05T19:20:00": "78", "2014-08-05T13:20:00": "46", "2014-08-05T00:00:00": "62",
6 {"2014-08-06T11:00:00": "45", "2014-08-06T09:00:00": "56", "2014-08-06T21:50:00": "68",
                      V7    V8                     V9   V10                    V11   V12
1 "2014-08-01T13:20:00": "57", "2014-08-01T13:50:00": "47", "2014-08-01T20:50:00": "44",
2 "2014-08-02T01:00:00": "56", "2014-08-02T17:20:00": "42", "2014-08-02T01:20:00": "68",
3 "2014-08-03T23:00:00": "81", "2014-08-03T00:00:00": "63", "2014-08-03T00:50:00": "73",
4 "2014-08-04T02:00:00": "81", "2014-08-04T18:00:00": "29", "2014-08-04T02:20:00": "88",
5 "2014-08-05T00:20:00": "72", "2014-08-05T00:50:00": "77", "2014-08-05T19:00:00": "75",
6 "2014-08-06T14:20:00": "53", "2014-08-06T14:00:00": "40", "2014-08-06T23:20:00": "77",

渴望

只有一行的输出如下所示。

{"2014-08-01T01:00:00": "64",   "2014-08-01T01:20:00":  "73",   "2014-08-01T13:00:00":  "53",   "2014-08-01T13:20:00":  "57",   "2014-08-01T13:50:00":  "47",   "2014-08-01T20:50:00":  "44",

1 个答案:

答案 0 :(得分:1)

我们将日期时间列(df2[c(TRUE, FALSE)] - 我们通过回收逻辑向量进行子集化)转换为POSIXct类,方法是使用lapply循环,然后order使用applyMARGIN=1('m1')的行。我们split时间列和值逐行创建两个列表'l1','l2',然后使用Mappaste在我们订购元素后将字符串连接在一起基于'm1'。这可以通过一列转换为data.frame

 df2[c(TRUE, FALSE)] <- lapply(df1[c(TRUE, FALSE)], function(x) as.POSIXct(sub('[{]', '', x), format = '%Y-%m-%dT%H:%M:%S:'))
 m1 <- apply(df2[c(TRUE, FALSE)], 1, order)
 l1 <- split(as.matrix(df1[c(TRUE, FALSE)]), row(df1[c(TRUE, FALSE)]))
 l2 <- split(as.matrix(df2[c(FALSE, TRUE)]), row(df2[c( FALSE, TRUE)]))


data.frame(col1=unlist(Map(function(x,y,z) paste0('{', 
               paste(gsub('^\\{*(\\d+.*)(\\:)', '"\\1"\\2', x[z]),
                     gsub('(\\d+)', '"\\1"', y[z]), sep=' ', collapse=' ')),
                 l1, l2, split(m1, col(m1)))), stringsAsFactors=FALSE)
                                                                                                                                                                            col1
#1 {"2014-08-01T01:00:00": "64", "2014-08-01T01:20:00": "73", "2014-08-01T13:00:00": "53", "2014-08-01T13:20:00": "57", "2014-08-01T13:50:00": "47", "2014-08-01T20:50:00": "44",
#2 {"2014-08-02T01:00:00": "56", "2014-08-02T01:20:00": "68", "2014-08-02T17:00:00": "24", "2014-08-02T17:20:00": "42", "2014-08-02T18:00:00": "37", "2014-08-02T22:00:00": "56",
#3 {"2014-08-03T00:00:00": "63", "2014-08-03T00:20:00": "73", "2014-08-03T00:50:00": "73", "2014-08-03T04:20:00": "83", "2014-08-03T17:50:00": "78", "2014-08-03T23:00:00": "81",
#4 {"2014-08-04T02:00:00": "81", "2014-08-04T02:20:00": "88", "2014-08-04T15:00:00": "37", "2014-08-04T15:20:00": "43", "2014-08-04T18:00:00": "29", "2014-08-04T21:00:00": "39",
#5 {"2014-08-05T00:00:00": "62", "2014-08-05T00:20:00": "72", "2014-08-05T00:50:00": "77", "2014-08-05T13:20:00": "46", "2014-08-05T19:00:00": "75", "2014-08-05T19:20:00": "78",
#6 {"2014-08-06T09:00:00": "56", "2014-08-06T11:00:00": "45", "2014-08-06T14:00:00": "40", "2014-08-06T14:20:00": "53", "2014-08-06T21:50:00": "68", "2014-08-06T23:20:00": "77",

数据

lines <- readLines(textConnection('V1    V2                     V3    V4                     V5    V6
1 {"2014-08-01T01:00:00": "64", "2014-08-01T13:00:00": "53", "2014-08-01T01:20:00": "73",
2 {"2014-08-02T18:00:00": "37", "2014-08-02T22:00:00": "56", "2014-08-02T17:00:00": "24",
3 {"2014-08-03T17:50:00": "78", "2014-08-03T04:20:00": "83", "2014-08-03T00:20:00": "73",
4 {"2014-08-04T15:00:00": "37", "2014-08-04T21:00:00": "39", "2014-08-04T15:20:00": "43",
5 {"2014-08-05T19:20:00": "78", "2014-08-05T13:20:00": "46", "2014-08-05T00:00:00": "62",
6 {"2014-08-06T11:00:00": "45", "2014-08-06T09:00:00": "56", "2014-08-06T21:50:00": "68",'))

lines2 <- readLines(textConnection('V7    V8                     V9   V10                    V11   V12
1 "2014-08-01T13:20:00": "57", "2014-08-01T13:50:00": "47", "2014-08-01T20:50:00": "44",
2 "2014-08-02T01:00:00": "56", "2014-08-02T17:20:00": "42", "2014-08-02T01:20:00": "68",
3 "2014-08-03T23:00:00": "81", "2014-08-03T00:00:00": "63", "2014-08-03T00:50:00": "73",
4 "2014-08-04T02:00:00": "81", "2014-08-04T18:00:00": "29", "2014-08-04T02:20:00": "88",
5 "2014-08-05T00:20:00": "72", "2014-08-05T00:50:00": "77", "2014-08-05T19:00:00": "75",
6 "2014-08-06T14:20:00": "53", "2014-08-06T14:00:00": "40", "2014-08-06T23:20:00": "77",'))

 d1 <- read.table(text=gsub('^\\d+\\s+|"', '', lines), header=TRUE, stringsAsFactors=FALSE)
 d2 <- read.table(text=gsub('^\\d+\\s+|"', '', lines2), header=TRUE, stringsAsFactors=FALSE)
 df1 <- cbind(d1, d2)
 df2 <- df1