我有一个data.frame d
包含一些POSIX日期,我想用d$date$year <- 100
修改年份。这似乎最初工作正常,但是在此data.frame中选择了一些行之后,除了第一个修改日期之外的所有行都转换为<NA>
。我在这做错了什么?请参阅下面的代码。 (R-Fiddle)
date <- c("2014-01-01","2015-01-02","2016-01-03")
val <- c("a","b","c")
d <- data.frame(date,val)
d$date <- strptime(d$date,format="%Y-%m-%d")
d
# date val
#1 2014-01-01 a
#2 2015-01-02 b
#3 2016-01-03 c
# correct date as expected
d[c(TRUE,TRUE,TRUE),]
# date val
#1 2014-01-01 a
#2 2015-01-02 b
#3 2016-01-03 c
# correct dates as expected
d$date2000 <- d$date
d$date2000$year <- 100 # set year to 2000
d
# date val date2000
#1 2014-01-01 a 2000-01-01
#2 2015-01-02 b 2000-01-02
#3 2016-01-03 c 2000-01-03
# correct dates as expected
d[c(TRUE,TRUE,TRUE),]
# date val date2000
#1 2014-01-01 a 2000-01-01
#2 2015-01-02 b <NA>
#3 2016-01-03 c <NA>
# first entry correct, second and third entry <NA>
答案 0 :(得分:1)
问题似乎出现在d$date2000$year <- 100
中。它使用d$date2000$year <- rep(100,length(d$date2000))
按预期工作,不知道为什么修改后的data.frame会在选择之前给出预期的结果。
答案 1 :(得分:1)
这个问题什么时候发生?
似乎在调用函数[.data.frame
期间发生(请参阅d[c(TRUE,TRUE,TRUE),]
,但d[1:3,]
甚至d[3,]
)。以下是该功能的定义:
> `[.data.frame`
function (x, i, j, drop = if (missing(i)) TRUE else length(cols) ==
1)
{
mdrop <- missing(drop)
Narg <- nargs() - (!mdrop)
has.j <- !missing(j)
if (!all(names(sys.call()) %in% c("", "drop")) && !isS4(x))
warning("named arguments other than 'drop' are discouraged")
if (Narg < 3L) {
if (!mdrop)
warning("'drop' argument will be ignored")
if (missing(i))
return(x)
if (is.matrix(i))
return(as.matrix(x)[i])
nm <- names(x)
if (is.null(nm))
nm <- character()
if (!is.character(i) && anyNA(nm)) {
names(nm) <- names(x) <- seq_along(x)
y <- NextMethod("[")
cols <- names(y)
if (anyNA(cols))
stop("undefined columns selected")
cols <- names(y) <- nm[cols]
}
else {
y <- NextMethod("[")
cols <- names(y)
if (!is.null(cols) && anyNA(cols))
stop("undefined columns selected")
}
if (anyDuplicated(cols))
names(y) <- make.unique(cols)
attr(y, "row.names") <- .row_names_info(x, 0L)
attr(y, "class") <- oldClass(x)
return(y)
}
if (missing(i)) {
if (drop && !has.j && length(x) == 1L)
return(.subset2(x, 1L))
nm <- names(x)
if (is.null(nm))
nm <- character()
if (has.j && !is.character(j) && anyNA(nm)) {
names(nm) <- names(x) <- seq_along(x)
y <- .subset(x, j)
cols <- names(y)
if (anyNA(cols))
stop("undefined columns selected")
cols <- names(y) <- nm[cols]
}
else {
y <- if (has.j)
.subset(x, j)
else x
cols <- names(y)
if (anyNA(cols))
stop("undefined columns selected")
}
if (drop && length(y) == 1L)
return(.subset2(y, 1L))
if (anyDuplicated(cols))
names(y) <- make.unique(cols)
nrow <- .row_names_info(x, 2L)
if (drop && !mdrop && nrow == 1L)
return(structure(y, class = NULL, row.names = NULL))
else {
attr(y, "class") <- oldClass(x)
attr(y, "row.names") <- .row_names_info(x, 0L)
return(y)
}
}
xx <- x
cols <- names(xx)
x <- vector("list", length(x))
x <- .Internal(copyDFattr(xx, x))
oldClass(x) <- attr(x, "row.names") <- NULL
if (has.j) {
nm <- names(x)
if (is.null(nm))
nm <- character()
if (!is.character(j) && anyNA(nm))
names(nm) <- names(x) <- seq_along(x)
x <- x[j]
cols <- names(x)
if (drop && length(x) == 1L) {
if (is.character(i)) {
rows <- attr(xx, "row.names")
i <- pmatch(i, rows, duplicates.ok = TRUE)
}
xj <- .subset2(.subset(xx, j), 1L)
return(if (length(dim(xj)) != 2L) xj[i] else xj[i,
, drop = FALSE])
}
if (anyNA(cols))
stop("undefined columns selected")
if (!is.null(names(nm)))
cols <- names(x) <- nm[cols]
nxx <- structure(seq_along(xx), names = names(xx))
sxx <- match(nxx[j], seq_along(xx))
}
else sxx <- seq_along(x)
rows <- NULL
if (is.character(i)) {
rows <- attr(xx, "row.names")
i <- pmatch(i, rows, duplicates.ok = TRUE)
}
for (j in seq_along(x)) {
xj <- xx[[sxx[j]]]
x[[j]] <- if (length(dim(xj)) != 2L)
xj[i]
else xj[i, , drop = FALSE]
}
if (drop) {
n <- length(x)
if (n == 1L)
return(x[[1L]])
if (n > 1L) {
xj <- x[[1L]]
nrow <- if (length(dim(xj)) == 2L)
dim(xj)[1L]
else length(xj)
drop <- !mdrop && nrow == 1L
}
else drop <- FALSE
}
if (!drop) {
if (is.null(rows))
rows <- attr(xx, "row.names")
rows <- rows[i]
if ((ina <- anyNA(rows)) | (dup <- anyDuplicated(rows))) {
if (!dup && is.character(rows))
dup <- "NA" %in% rows
if (ina)
rows[is.na(rows)] <- "NA"
if (dup)
rows <- make.unique(as.character(rows))
}
if (has.j && anyDuplicated(nm <- names(x)))
names(x) <- make.unique(nm)
if (is.null(rows))
rows <- attr(xx, "row.names")[i]
attr(x, "row.names") <- rows
oldClass(x) <- oldClass(xx)
}
x
}
<bytecode: 0x7fe8cc3a5548>
<environment: namespace:base>
相关位发生在这里:
for (j in seq_along(x)) {
xj <- xx[[sxx[j]]]
x[[j]] <- if (length(dim(xj)) != 2L)
xj[i]
else xj[i, , drop = FALSE]
}
此时(例如d[3,]
示例中),我们有:
> str(xx)
'data.frame': 3 obs. of 3 variables:
$ date : POSIXlt, format: "2014-01-01" "2015-01-02" "2016-01-03"
$ val : Factor w/ 3 levels "a","b","c": 1 2 3
$ date2000: POSIXlt, format: "2000-01-01" "2000-01-02" "2000-01-03"
> str(x)
List of 3
$ date : NULL
$ val : NULL
$ date2000: NULL
> i
[1] 3
> str(sxx)
int [1:3] 1 2 3
对于j = 3,我们有:
> str(xj)
POSIXlt[1:3], format: "2000-01-01" "2000-01-02" "2000-01-03"
> dim(xj)
NULL
> xj[3]
[1] NA
所以这就是失败的地方。
我认为问题出现了(正如您所说),因为您将d$date2000$year
替换为1而不是3:
> xj$wday
[1] 3 5 0
> xj$year
[1] 100
> xj[3]
[1] NA
> xj$year <- c(100,100,100)
> xj[3]
[1] "2000-01-03 CET"
似乎在显示xj
(或d
)时,xj$year
的值会被回收,但是当仅显示xj[3]
时,它会尝试构建POSIXlt并失败因为它缺少year
元素。事实上,如果我们尝试使用两个元素,而不是一个或三个元素,我们可以看到矢量被回收:
> xj$year <- c(100,101)
> xj
[1] "2000-01-01 CET" "2001-01-02 CET" "2000-01-03 CET"
> xj[2]
[1] "2001-01-02 CET"
> xj[3]
[1] NA