每次到达模式时,将多列转换为单行

时间:2016-12-13 17:12:04

标签: r

我正在以下列格式从文件中读取数据

set_1

我想获得两个输出:

空格值输出(必须删除空格关键字)

0 DRY HEAT 
space 2175324
pulse 30725
space 51151
pulse 3363
space 1698
pulse 422
space 1256
pulse 420
space 412
...
1 TEMP 16 
space 2426116
pulse 30729
space 51144
pulse 3393
space 1670
pulse 418
space 1260
pulse 417
space 415
...

脉冲值输出(必须删除脉冲关键字)

0 DRY HEAT 2175324 51151 1698 1256 412 ...
1 TEMP 16 2426116 51144 1670 1260 415 ...

谢谢

这是数据样本的0 DRY HEAT 30725 3363 422 420 ... 1 TEMP 16 30729 3393 418 417 ...

dput()

空间输出

structure(list(V1 = structure(c(1L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 
3L, 4L, 1L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 1L, 4L, 3L, 4L, 
3L, 4L, 3L, 4L, 3L, 4L, 2L, 4L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 4L
), .Label = c("0", "1", "pulse", "space"), class = "factor"), 
    V2 = structure(c(29L, 28L, 12L, 26L, 14L, 5L, 21L, 3L, 19L, 
    19L, 29L, 1L, 11L, 25L, 14L, 6L, 18L, 4L, 17L, 21L, 30L, 
    8L, 10L, 27L, 13L, 7L, 23L, 2L, 22L, 16L, 31L, 9L, 12L, 24L, 
    15L, 5L, 21L, 3L, 20L, 18L), .Label = c("1069157", "1256", 
    "1260", "1263", "1670", "1674", "1698", "2175324", "2426116", 
    "30725", "30728", "30729", "3363", "3389", "3393", "412", 
    "414", "415", "416", "417", "418", "420", "422", "51144", 
    "51147", "51148", "51151", "766304", "COLD", "DRY", "TEMP"
    ), class = "factor"), V3 = structure(c(3L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L), .Label = c("", "16", "COLD", "DRY", 
    "HEAT"), class = "factor")), .Names = c("V1", "V2", "V3"), class = "data.frame", row.names = c(NA, 
-40L))

脉冲输出

structure(list(V1 = c(0L, 0L, 0L, 1L), V2 = structure(c(1L, 1L, 
2L, 3L), .Label = c("COLD", "DRY", "TEMP"), class = "factor"), 
    V3 = structure(c(2L, 3L, 4L, 1L), .Label = c("16", "COLD", 
    "DRY", "HEAT"), class = "factor"), V4 = c(766304L, 1069157L, 
    2175324L, 2426116L), V5 = c(51148L, 51147L, 51151L, 51144L
    ), V6 = c(1670L, 1674L, 1698L, 1670L), V7 = c(1260L, 1263L, 
    1256L, 1260L), V8 = c(416L, 418L, 412L, 415L)), .Names = c("V1", 
"V2", "V3", "V4", "V5", "V6", "V7", "V8"), class = "data.frame", row.names = c(NA, 
-4L))

1 个答案:

答案 0 :(得分:0)

使用data.table包:

library(data.table)
setDT(dat)[, Block:= cumsum(grepl('^\\d',V1))]

Values = dat[!grepl('^\\d',V1), list(vals=paste(V2, collapse=' ')), by=c('Block','V1')]

# Chained operations: [subset][merge][drop]
Space = dat[grepl('^\\d',V1)][Values[V1=='space'], on='Block'][,-c('Block','i.V1')]

Space[, paste0('V',4:8):= tstrsplit(vals, ' ')][, vals:= NULL]

> Space
#    V1   V2   V3      V4    V5   V6   V7  V8
# 1:  0 COLD COLD  766304 51148 1670 1260 416
# 2:  0 COLD  DRY 1069157 51147 1674 1263 418
# 3:  0  DRY HEAT 2175324 51151 1698 1256 412
# 4:  1 TEMP   16 2426116 51144 1670 1260 415

同样适用于pulse