编辑数据表中一列的变量

时间:2014-03-13 15:46:44

标签: r data.table

我有一个如下所示的数据表:

> head(dput(dt))
structure(list(A = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), T = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 
29L, 30L, 31L, 32L, 33L), X = c(26.91, 23.92, 23.92, 23.92, 23.92, 
23.92, 23.92, 23.92, 23.92, 20.93, 20.93, 20.93, 20.93, 20.93, 
20.93, 20.93, 20.93, 20.93, 20.93, 20.93, 29.9, 20.93, 14.95, 
8.97, 8.97, 858.13, 861.12, 858.13, 858.13, 858.13, 858.13, 858.13, 
858.13, 858.13, 858.13, 858.13, 858.13, 858.13, 858.13, 858.13, 
858.13, 858.13, 858.13, 858.13, 858.13, 858.13, 858.13, 858.13, 
858.13, 858.13, 858.13, 858.13, 858.13, 858.13, 858.13, 858.13, 
858.13, 858.13), Y = c(167.44, 167.44, 164.45, 164.45, 164.45, 
164.45, 164.45, 164.45, 164.45, 143.52, 143.52, 143.52, 143.52, 
143.52, 143.52, 143.52, 143.52, 143.52, 143.52, 143.52, 176.41, 
182.39, 185.38, 188.37, 188.37, 257.14, 257.14, 257.14, 257.14, 
257.14, 257.14, 257.14, 257.14, 257.14, 257.14, 257.14, 257.14, 
257.14, 257.14, 257.14, 257.14, 257.14, 257.14, 257.14, 257.14, 
257.14, 257.14, 257.14, 257.14, 257.14, 257.14, 257.14, 257.14, 
257.14, 257.14, 257.14, 257.14, 260.13), V = c(2.99, 2.99, 0, 
0, 0, 0, 0, 0, 21.142, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34.091, 
10.781, 6.6858, 6.6858, 0, 2.99, 2.99, 2.99, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 2.99, 0), P = c(180, -90, 0, 0, 0, 0, 0, 0, -98.13, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 74.745, 146.31, 153.43, 153.43, 0, 
180, 0, 180, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90, 0)), .Names = c("A", 
"T", "X", "Y", "V", "P"), row.names = c(NA, -58L), class = c("data.table", 
"data.frame"), sorted = "A", .internal.selfref = <pointer: 0x0000000000320788>)

> head(dt[dt$A==1, ])
   A T     X      Y    V   P
1: 1 1 26.91 167.44 2.99 180
2: 1 2 23.92 167.44 2.99 -90
3: 1 3 23.92 164.45 0.00   0
4: 1 4 23.92 164.45 0.00   0
5: 1 5 23.92 164.45 0.00   0
6: 1 6 23.92 164.45 0.00   0

> tail(dt[dt$A==1, ])
   A  T     X      Y       V       P
1: 1 20 20.93 143.52 34.0910  74.745
2: 1 21 29.90 176.41 10.7810 146.310
3: 1 22 20.93 182.39  6.6858 153.430
4: 1 23 14.95 185.38  6.6858 153.430
5: 1 24  8.97 188.37  0.0000   0.000
6: 1 25  8.97 188.37  2.9900 180.000

它按A排序。我想要做的是编辑V和P列(而不是整行)。对于两者,我想:

  1. 删除最后一个值
  2. 将值从第一行传输到第二行,依此类推......
  3. 使第一个值= 0
  4. 所以,最后我想要一个看起来像这样的数据表。

    > head(dt[dt$A==1, ])
       A T     X      Y    V   P
    1: 1 1 26.91 167.44 2.99   0
    2: 1 2 23.92 167.44 2.99 180
    3: 1 3 23.92 164.45 0.00 -90
    4: 1 4 23.92 164.45 0.00   0
    5: 1 5 23.92 164.45 0.00   0
    6: 1 6 23.92 164.45 0.00   0
    
    > tail(dt[dt$A==1, ])
        A     X      Y       V       P
    1: 20 20.93 143.52  0.0000   0.000
    2: 21 29.90 176.41 34.0910  74.745
    3: 22 20.93 182.39 10.7810 146.310
    4: 23 14.95 185.38  6.6858 153.430
    5: 24  8.97 188.37  6.6858 153.430
    6: 25  8.97 188.37  0.0000   0.000
    

    我需要在数据表上执行此操作,因为我想将我的数据拆分为A(我也可以通过使用拆分来完成),但我仍然坚持如何做我想做的三件事。

    感谢。

1 个答案:

答案 0 :(得分:3)

这样做:

dt[, P := c(0, head(P, -1)), by = A]

另请注意,您不需要在dt$中指定[.data.table,即使用dt[A == 1]代替OP。