Question

我有一个包含两个字符变量的数据框：一个是时间戳，另一个是美国州。我没有成功尝试将每个时间戳转换为POSIX对象，根据状态的相应值分配时区：佛罗里达州（FL）的东部时间（EST）和德克萨斯州（TX）的中部时间（CST6CDT）。 /> 但是，无论我尝试什么，R都只返回单个时区中的所有时间戳，或者返回包含自原点以来的秒数的字符串。我当然可以将字符串转换为POSIX对象但我完整的圆圈并不能声明多个时区。我也尝试使用循环而不是索引，但这也不起作用。

我特别想知道发生了什么。我的猜测（可能不正确）是问题与列中声明的数据类型有关，因为这可以解释示例3中发生的情况。但是，无论我阅读或尝试过什么，我都无法找到如何使其发挥作用。

示例1 - 将df $ time转换为POSIX对象，然后尝试通过df $ state分配不同的时区

 df <- data.frame(time = c("2010-03-05 07:03:00", "2010-03-05 16:00:00", "2010-03-06 07:01:00"), state = c("FL", "FL", "TX"))
df$time <- as.character(df$time); df$state <- as.character(df$state)
df$time <- as.POSIXct (strptime(df$time, "%Y-%m-%d %H:%M:%S"))
df$time  
#-----
#[1] "2010-03-05 07:03:00 PST" "2010-03-05 16:00:00 PST" "2010-03-06 07:01:00 PST"

df$time已成功转换为POSIX对象。但是当我尝试按州分配时区时，时区保持在初始化的任何状态（在我的位置，即PST）。

 df$time[df$state == "FL"] <- as.POSIXct (strptime(df$time[df$state == "FL"], "%Y-%m-%d %H:%M:%S"), tz = "EST" 
 df$time[df$state == "TX"] <- as.POSIXct (strptime(df$time[df$state == "TX"], "%Y-%m-%d %H:%M:%S"), tz = "CST6CDT")
 df$time
#[1] "2010-03-05 04:03:00 PST" "2010-03-05 13:00:00 PST" "2010-03-06 05:01:00 PST"

示例2 - 尝试将df $ time字符串直接转换为每个州的时区，而不是首次转换

ng the string to a POSIX object

 df <- data.frame(time = c("2010-03-05 07:03:00", "2010-03-05 16:00:00", "2010-03-06 07:01:00"), state = c("FL", "FL", "TX"))
 df$time <- as.character(df$time); df$state <- as.character(df$state)
 df$time
#[1] "2010-03-05 07:03:00" "2010-03-05 16:00:00" "2010-03-06 07:01:00"
 df$time[df$state == "FL"] <- as.POSIXct (strptime(df$time[df$state == "FL"], "%Y-%m-%d %H:%M:%S"), tz = "EST")

df$time[df$state == "TX"] <- as.POSIXct (strptime(df$time[df$state == "TX"], "%Y-%m-%d %H:%M:%S"), tz = "CST6CDT")
df$time
[1] "1267790580" "1267822800" "1267880460"

示例3 - 虽然我可以获取示例2中的代码生成的df $ time字符串并成功将它们转换为EST ...

 as.POSIXct(as.numeric(df$time[df$state == "FL"]), origin = "1970-01-01", tz = "EST")

#[1] "2010-03-05 07:03:00 EST" "2010-03-05 16:00:00 EST"

＃...但是如果我尝试将这些对象传递回数据帧，R会将它们转换回字符串，然后我就完整了。

 df$time[df$state == "FL"] <- as.POSIXct(as.numeric(df$time[df$state == "FL"]), origin = "1970-01-01", tz = "EST")
 df$time
#[1] "1267790580" "1267822800" "1267880460"

Answer 1

根据评论，R无法在单个向量中处理多个时区。因此，如果其他人试图解决与我相同的问题，我可以提供一个粗略但有效的解决方法。

首先，为每个时区创建单独的向量，并将POSIX对象设置为每个时间的本地时间，然后将单独的向量合并为一个新向量，将POSIX对象设置为UTC / GMT（或者您的其他单个时区）选择）。

df <- data.frame(time = c("2010-03-05 07:03:00", "2010-03-05 16:00:00", "2010-03-05 08:27:00"), state = c("FL", "FL", "TX"))

df$time <- as.character(df$time); df$state <- as.character(df$state)

df$timeFL[df$state == "FL"] <- as.POSIXct (strptime(df$time[df$state == "FL"], "%Y-%m-%d %H:%M:%S"), tz = "EST")

df$timeFL <- as.POSIXct(df$timeFL, origin = "1970-01-01", tz = "EST")

df$timeTX[df$state == "TX"] <- as.POSIXct (strptime(df$time[df$state == "TX"], "%Y-%m-%d %H:%M:%S"), tz = "CST6CDT")

df$timeTX <- as.POSIXct(df$timeTX, origin = "1970-01-01", tz = "CST6CDT")

df$common.time.UTC[!is.na(df$timeFL)] <- df$timeFL[!is.na(df$timeFL)]

df$common.time.UTC[!is.na(df$timeTX)] <- df$timeTX[!is.na(df$timeTX)]

df$common.time.UTC <- as.POSIXct(df$common.time.UTC, origin = "1970-01-01", tz = "UTC")

df$timeFL <- NULL; df$timeTX <- NULL

df

time                state     common.time.UTC
2010-03-05 07:03:00    FL 2010-03-05 12:03:00
2010-03-05 16:00:00    FL 2010-03-05 21:00:00
2010-03-05 08:27:00    TX 2010-03-05 14:27:00

R如何将时间戳转换为同一列中的多个时区

1 个答案: