我正在处理一个大型数据框(30000多个观测值和20个变量),因此无法转置我的数据框。对于某些行,某些列会移至Date-class列的右侧,但不会移至Date-class列的左侧。我尝试根据发生偏移的列编写一个if
语句,但是似乎无法绕开它。
这是一些示例代码:
structure(list(Site = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("1", "2", "3"), class = "factor"),
Vial = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L), Date = structure(c(15156, 15156, 15156,
15156, 15156, 15156, 15156, 15156, 15156, 15156, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 15156, 15156, 15156, 15156,
15156, 15156, 15156, 15156, 15156, 15156), class = "Date"),
Value_1 = c("a", "a", "a", "a", "a", "a", "a", "a", "a",
"a", "2011-07-01", "2011-07-01", "2011-07-01", "2011-07-01",
"2011-07-01", "2011-07-01", "2011-07-01", "2011-07-01", "2011-07-01",
"2011-07-01", "a", "a", "a", "a", "a", "a", "a", "a", "a",
"a"), Value_2 = c("b", "b", "b", "b", "b", "b", "b", "b",
"b", "b", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a",
"b", "b", "b", "b", "b", "b", "b", "b", "b", "b"), Value_3 = c("c",
"c", "c", "c", "c", "c", "c", "c", "c", "c", "b", "b", "b",
"b", "b", "b", "b", "b", "b", "b", "c", "c", "c", "c", "c",
"c", "c", "c", "c", "c"), Value_4 = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, "c", "c", "c", "c", "c", "c", "c", "c",
"c", "c", "d", "d", "d", "d", "d", "d", "d", "d", "d", "d"
)), row.names = c(NA, -30L), class = "data.frame")
请注意,最后一列包含NA
的值,也包含值。
答案 0 :(得分:1)
我再次敦促上游流程 应该是固定的。在此期间,此骇客暂时应该可以正常工作。
nadate <- is.na(x$Date)
newdate <- as.Date(x$Value_1[nadate])
newnotna <- !is.na(newdate)
x$Date[nadate] <- newdate[newnotna]
ind <- seq(which(colnames(x) == "Date") + 1L, ncol(x) - 1L)
x[nadate & newnotna, ind] <- x[nadate & newnotna, ind + 1L]
x[nadate & newnotna, ncol(x)] <- NA
x
# Site Vial Date Value_1 Value_2 Value_3 Value_4
# 1 1 1 2011-07-01 a b c <NA>
# 2 1 2 2011-07-01 a b c <NA>
# 3 1 3 2011-07-01 a b c <NA>
# 4 1 4 2011-07-01 a b c <NA>
# 5 1 5 2011-07-01 a b c <NA>
# 6 1 6 2011-07-01 a b c <NA>
# 7 1 7 2011-07-01 a b c <NA>
# 8 1 8 2011-07-01 a b c <NA>
# 9 1 9 2011-07-01 a b c <NA>
# 10 1 10 2011-07-01 a b c <NA>
# 11 2 1 2011-07-01 a b c <NA>
# 12 2 2 2011-07-01 a b c <NA>
# 13 2 3 2011-07-01 a b c <NA>
# 14 2 4 2011-07-01 a b c <NA>
# 15 2 5 2011-07-01 a b c <NA>
# 16 2 6 2011-07-01 a b c <NA>
# 17 2 7 2011-07-01 a b c <NA>
# 18 2 8 2011-07-01 a b c <NA>
# 19 2 9 2011-07-01 a b c <NA>
# 20 2 10 2011-07-01 a b c <NA>
# 21 3 1 2011-07-01 a b c d
# 22 3 2 2011-07-01 a b c d
# 23 3 3 2011-07-01 a b c d
# 24 3 4 2011-07-01 a b c d
# 25 3 5 2011-07-01 a b c d
# 26 3 6 2011-07-01 a b c d
# 27 3 7 2011-07-01 a b c d
# 28 3 8 2011-07-01 a b c d
# 29 3 9 2011-07-01 a b c d
# 30 3 10 2011-07-01 a b c d
这应该足够稳定:如果对同一数据多次运行,则仅此而已。如果$Date
列不是NA
,则不尝试移动。如果$Value_1
没有解析为日期,则没有任何变化。