我认为这不一定是严格的时间序列数据,但我无法看到如何构建问题...
我有一个数据表,其中行作为唯一的XY位置和ID,X和Y的列,然后是12个月的列,用于几个气候变量:
数据如下所示:
ID lat long Tave01 Tave02 Tave03 Tave04 Tave05 Tave06 Tave07 Tave08 Tave09 Tave10 Tave11 Tave12 Tmax01 Tmax02 Tmax03 Tmax04 Tmax05 Tmax06 Tmax07 Tmax08 Tmax09 Tmax10 Tmax11 Tmax12 Tmin01 Tmin02 Tmin03 Tmin04 Tmin05 Tmin06 Tmin07 Tmin08 Tmin09 Tmin10 Tmin11 Tmin12
F102 51.5 -116.3 -12.8 -9.2 -4.6 -0.9 4.1 5.8 9.5 8.1 3.2 -1.1 -7.4 -12.0 -8.0 -2.8 0.4 5.2 10.5 9.9 15.0 14.3 8.1 2.5 -4.8 -7.6 -17.5 -15.6 -9.5 -6.9 -2.2 1.7 3.9 1.9 -1.7 -4.7 -10.0 -16.4
F103 51.5 -116.2 -13.0 -9.5 -4.9 -1.2 4.0 5.7 9.4 8.0 3.1 -1.1 -7.6 -12.3 -7.9 -2.9 0.2 5.0 10.3 9.8 15.0 14.3 8.1 2.7 -4.6 -7.6 -18.1 -16.1 -10.0 -7.3 -2.4 1.7 3.7 1.7 -1.9 -5.0 -10.6 -17.0
F105 51.5 -116.1 -13.7 -10.8 -6.5 -3.5 1.8 4.2 7.6 6.1 1.5 -2.7 -9.1 -13.5 -8.7 -4.7 -1.8 2.2 7.6 7.1 12.2 11.5 5.7 1.1 -6.9 -9.1 -18.8 -17.0 -11.2 -9.2 -3.9 1.2 2.9 0.7 -2.7 -6.5 -11.3 -18.0
F106 51.5 -116.0 -13.7 -10.8 -6.4 -3.3 1.8 4.2 7.7 6.2 1.7 -2.5 -9.0 -13.4 -8.5 -4.7 -1.9 2.6 7.8 7.3 12.5 11.9 6.0 1.7 -6.8 -8.9 -18.8 -16.9 -11.0 -9.3 -4.3 1.1 2.9 0.6 -2.7 -6.6 -11.2 -17.9
F116 51.5 -115.3 -11.3 -7.8 -2.8 1.2 5.9 7.6 11.6 10.1 5.4 2.4 -3.9 -10.2 -5.7 -0.7 2.4 8.4 13.4 12.7 18.3 17.3 11.4 9.0 0.8 -5.1 -17.0 -15.0 -8.1 -5.9 -1.5 2.4 4.9 2.9 -0.7 -4.2 -8.7 -15.3
F117 51.5 -115.2 -11.2 -7.7 -2.6 1.4 6.2 7.8 11.8 10.3 5.6 2.6 -3.7 -10.1 -5.6 -0.5 2.8 8.6 13.6 13.0 18.6 17.5 11.7 9.2 1.2 -4.9 -16.9 -14.9 -8.0 -5.8 -1.3 2.6 5.0 3.1 -0.5 -4.1 -8.6 -15.2
F118 51.5 -115.1 -11.1 -7.4 -2.4 1.6 6.4 8.1 12.0 10.6 5.9 2.7 -3.4 -9.9 -5.4 -0.1 3.2 8.9 13.9 13.3 18.9 17.7 11.9 9.4 1.8 -4.7 -16.8 -14.7 -8.0 -5.6 -1.0 2.8 5.2 3.6 -0.1 -3.9 -8.6 -15.0
F119 51.5 -115.0 -10.9 -7.2 -2.2 1.9 6.7 8.4 12.3 10.9 6.2 3.0 -2.9 -9.6 -5.2 0.3 3.6 9.3 14.2 13.7 19.3 18.1 12.3 9.7 2.6 -4.3 -16.7 -14.6 -8.0 -5.4 -0.8 3.0 5.4 3.8 0.1 -3.8 -8.4 -14.8
F120 51.5 -114.9 -10.6 -6.6 -1.7 2.4 7.0 8.6 12.6 11.0 6.3 3.3 -2.5 -9.0 -4.7 1.1 4.4 9.8 14.7 14.2 19.8 18.5 12.9 10.1 3.1 -3.4 -16.5 -14.3 -7.9 -5.0 -0.8 3.1 5.3 3.4 -0.2 -3.5 -8.2 -14.5
F121 51.5 -114.8 -10.3 -6.2 -1.3 2.7 7.2 8.9 12.7 11.2 6.5 3.5 -2.3 -8.5 -4.2 1.7 5.1 10.2 15.1 14.7 20.3 18.9 13.3 10.4 3.3 -2.8 -16.3 -14.0 -7.7 -4.7 -0.7 3.1 5.2 3.4 -0.4 -3.4 -7.8 -14.2
...
我想要的是一个时间序列布局,其中每一行代表每个离散ID的月份条目,其中变量(Tmean,Tmin,Tmax)为列。因此,每个唯一ID最终会有12行。像这样......
ID Month Tave Tmax Tmin
F102 1 -12.8 -8.0 -17.5
F102 2 -9.2 -2.8 -15.6
F102 3 -4.6 0.4 -9.5
F102 4 4.1 5.2 -6.9
...
F102 12 -12.0 -7.6 -16.4
F103 1 -13.0 -7.9 -17.5
F103 2 -9.5 -2.9 -16.1
...
每个电台和每个月等等。
我可以通过循环或其他方式通过所有变量强制执行此操作并填充 expand.grid 表或其他内容,但必须有更优雅(即更快更容易)的解决方案。
我已经浏览了 reshape 包,但似乎无法追踪我需要的内容。
答案 0 :(得分:2)
这是一个data.table
解决方案。绝对不是最干净的解决方案。我只是喜欢这个问题的挑战......而且只是在我做对了之前就玩了:
# Set to data.table
setDT(df)
# Melt to long form
dfl <- melt(df, id.vars = c("ID", "lat","long"))
dfl
ID lat long variable value
1: F102 51.5 -116.3 Tave01 -12.8
2: F103 51.5 -116.2 Tave01 -13.0
3: F105 51.5 -116.1 Tave01 -13.7
4: F106 51.5 -116.0 Tave01 -13.7
5: F116 51.5 -115.3 Tave01 -11.3
---
356: F117 51.5 -115.2 Tmin12 -15.2
357: F118 51.5 -115.1 Tmin12 -15.0
358: F119 51.5 -115.0 Tmin12 -14.8
359: F120 51.5 -114.9 Tmin12 -14.5
360: F121 51.5 -114.8 Tmin12 -14.2
# Add new variables, month and variable type using regex
dfl[, Month := gsub(".+([0-9]{2})$", "\\1", variable)]
dfl[, var_type := gsub("(.{4}).+$", "\\1", variable)]
# Take to final form
dfw <- reshape(dfl, idvar = c("ID", "lat","long", "Month"), timevar = "var_type", direction = "wide")
# Get rid of extra columns that are not needed anymore
dfw[, grep("variable\\.", names(dfw), value = TRUE) := NULL]
dfw[, c("lat", "long") := NULL]
# Cleaning the names a bit
names(dfw) <- gsub("value\\.", "", names(dfw))
# Print final product
dfw
ID Month Tave Tmax Tmin
1: F102 01 -12.8 -8.0 -17.5
2: F103 01 -13.0 -7.9 -18.1
3: F105 01 -13.7 -8.7 -18.8
4: F106 01 -13.7 -8.5 -18.8
5: F116 01 -11.3 -5.7 -17.0
---
116: F117 12 -10.1 -4.9 -15.2
117: F118 12 -9.9 -4.7 -15.0
118: F119 12 -9.6 -4.3 -14.8
119: F120 12 -9.0 -3.4 -14.5
120: F121 12 -8.5 -2.8 -14.2
修改强>
根据评论中提供的链接,我提出了另一个更短的版本来完成这项工作:
dfw <- reshape(df, idvar="ID", direction="long",
varying=list(ave=4:15, max=16:27, min=28:39),
v.names = c("Tave", "Tmax", "Tmin"))
setnames(dfw, "time", "Month")
dfw[, c("lat", "long") := NULL]
答案 1 :(得分:1)
使用dplyr
和tidyr
的解决方案。我们可以使用gather
将数据帧转换为长格式。使用separate
将列分隔为Measure
和Month
。请注意,sep
参数可以指定将列分隔的位置。如果可能,convert
可以确定是否要将列转换为数字。最后,使用spread将其转换为所需的输出(dt2
)。
library(dplyr)
library(tidyr)
dt2 <- dt %>%
select(-lat, -long) %>%
gather(Type, Value, -ID) %>%
separate(Type, into = c("Measure", "Month"), sep = 4, convert = TRUE) %>%
spread(Measure, Value)
数据:
dt <- read.table(text = "ID lat long Tave01 Tave02 Tave03 Tave04 Tave05 Tave06 Tave07 Tave08 Tave09 Tave10 Tave11 Tave12 Tmax01 Tmax02 Tmax03 Tmax04 Tmax05 Tmax06 Tmax07 Tmax08 Tmax09 Tmax10 Tmax11 Tmax12 Tmin01 Tmin02 Tmin03 Tmin04 Tmin05 Tmin06 Tmin07 Tmin08 Tmin09 Tmin10 Tmin11 Tmin12
F102 51.5 -116.3 -12.8 -9.2 -4.6 -0.9 4.1 5.8 9.5 8.1 3.2 -1.1 -7.4 -12.0 -8.0 -2.8 0.4 5.2 10.5 9.9 15.0 14.3 8.1 2.5 -4.8 -7.6 -17.5 -15.6 -9.5 -6.9 -2.2 1.7 3.9 1.9 -1.7 -4.7 -10.0 -16.4
F103 51.5 -116.2 -13.0 -9.5 -4.9 -1.2 4.0 5.7 9.4 8.0 3.1 -1.1 -7.6 -12.3 -7.9 -2.9 0.2 5.0 10.3 9.8 15.0 14.3 8.1 2.7 -4.6 -7.6 -18.1 -16.1 -10.0 -7.3 -2.4 1.7 3.7 1.7 -1.9 -5.0 -10.6 -17.0
F105 51.5 -116.1 -13.7 -10.8 -6.5 -3.5 1.8 4.2 7.6 6.1 1.5 -2.7 -9.1 -13.5 -8.7 -4.7 -1.8 2.2 7.6 7.1 12.2 11.5 5.7 1.1 -6.9 -9.1 -18.8 -17.0 -11.2 -9.2 -3.9 1.2 2.9 0.7 -2.7 -6.5 -11.3 -18.0
F106 51.5 -116.0 -13.7 -10.8 -6.4 -3.3 1.8 4.2 7.7 6.2 1.7 -2.5 -9.0 -13.4 -8.5 -4.7 -1.9 2.6 7.8 7.3 12.5 11.9 6.0 1.7 -6.8 -8.9 -18.8 -16.9 -11.0 -9.3 -4.3 1.1 2.9 0.6 -2.7 -6.6 -11.2 -17.9
F116 51.5 -115.3 -11.3 -7.8 -2.8 1.2 5.9 7.6 11.6 10.1 5.4 2.4 -3.9 -10.2 -5.7 -0.7 2.4 8.4 13.4 12.7 18.3 17.3 11.4 9.0 0.8 -5.1 -17.0 -15.0 -8.1 -5.9 -1.5 2.4 4.9 2.9 -0.7 -4.2 -8.7 -15.3
F117 51.5 -115.2 -11.2 -7.7 -2.6 1.4 6.2 7.8 11.8 10.3 5.6 2.6 -3.7 -10.1 -5.6 -0.5 2.8 8.6 13.6 13.0 18.6 17.5 11.7 9.2 1.2 -4.9 -16.9 -14.9 -8.0 -5.8 -1.3 2.6 5.0 3.1 -0.5 -4.1 -8.6 -15.2
F118 51.5 -115.1 -11.1 -7.4 -2.4 1.6 6.4 8.1 12.0 10.6 5.9 2.7 -3.4 -9.9 -5.4 -0.1 3.2 8.9 13.9 13.3 18.9 17.7 11.9 9.4 1.8 -4.7 -16.8 -14.7 -8.0 -5.6 -1.0 2.8 5.2 3.6 -0.1 -3.9 -8.6 -15.0
F119 51.5 -115.0 -10.9 -7.2 -2.2 1.9 6.7 8.4 12.3 10.9 6.2 3.0 -2.9 -9.6 -5.2 0.3 3.6 9.3 14.2 13.7 19.3 18.1 12.3 9.7 2.6 -4.3 -16.7 -14.6 -8.0 -5.4 -0.8 3.0 5.4 3.8 0.1 -3.8 -8.4 -14.8
F120 51.5 -114.9 -10.6 -6.6 -1.7 2.4 7.0 8.6 12.6 11.0 6.3 3.3 -2.5 -9.0 -4.7 1.1 4.4 9.8 14.7 14.2 19.8 18.5 12.9 10.1 3.1 -3.4 -16.5 -14.3 -7.9 -5.0 -0.8 3.1 5.3 3.4 -0.2 -3.5 -8.2 -14.5
F121 51.5 -114.8 -10.3 -6.2 -1.3 2.7 7.2 8.9 12.7 11.2 6.5 3.5 -2.3 -8.5 -4.2 1.7 5.1 10.2 15.1 14.7 20.3 18.9 13.3 10.4 3.3 -2.8 -16.3 -14.0 -7.7 -4.7 -0.7 3.1 5.2 3.4 -0.4 -3.4 -7.8 -14.2",
header = TRUE, stringsAsFactors = FALSE)
答案 2 :(得分:1)
要为 reshape 解决方案添加一些有用的东西(以弥补询问重复的问题),我将在这里添加一个快速 for 循环脚本来制作重塑命令中的变化列表(对大型数据集很有用)。
您可以先使用ycw的 read.table 行来构建数据(我在此不再赘述)。
foo <- read.table...
# Make a list of the variable names (minus month or time numbers)
foo.vars <- c("Tave","Tmax","Tmin")
# Loop to make a list of variable column numbers
for(i in 1:length(foo.vars)){
if(i==1) reshape.list <- list()
reshape.list[[i]] <- grep(foo.vars[i],names(foo))
}
这将为每个变量创建列号列表,并在reshape命令的 vary 选项中进行标识:
reshape.list
[[1]]
[1] 4 5 6 7 8 9 10 11 12 13 14 15
[[2]]
[1] 16 17 18 19 20 21 22 23 24 25 26 27
[[3]]
[1] 28 29 30 31 32 33 34 35 36 37 38 39
然后是实际的重塑命令:
# Reshape command to make a ling table from the wide table
foo.long <- reshape(foo, idvar="ID", direction="long", varying=reshape.list, timevar="Month", v.names=foo.vars)
foo.long <- foo.long[order(foo.long$ID,foo.long$Month),]
代码根据需要返回数据:
ID lat long Month Tave Tmax Tmin
F102 51.5 -116.3 1 -12.8 -8.0 -17.5
F102 51.5 -116.3 2 -9.2 -2.8 -15.6
F102 51.5 -116.3 3 -4.6 0.4 -9.5
F102 51.5 -116.3 4 -0.9 5.2 -6.9
F102 51.5 -116.3 5 4.1 10.5 -2.2
...
答案 3 :(得分:0)
这是一个整合的解决方案,我认为你可以扩展。此解决方案是您无法使用separate
功能:
df1 <- data.frame(stringsAsFactors=FALSE,
ID = c("F102", "F103", "F105", "F106", "F116", "F117", "F118",
"F119", "F120", "F121"),
lat = c(51.5, 51.5, 51.5, 51.5, 51.5, 51.5, 51.5, 51.5, 51.5, 51.5),
long = c(-116.3, -116.2, -116.1, -116, -115.3, -115.2, -115.1, -115,
-114.9, -114.8),
Tave01 = c(-12.8, -13, -13.7, -13.7, -11.3, -11.2, -11.1, -10.9, -10.6,
-10.3),
Tave02 = c(-9.2, -9.5, -10.8, -10.8, -7.8, -7.7, -7.4, -7.2, -6.6, -6.2),
Tave03 = c(-4.6, -4.9, -6.5, -6.4, -2.8, -2.6, -2.4, -2.2, -1.7, -1.3),
Tave04 = c(-0.9, -1.2, -3.5, -3.3, 1.2, 1.4, 1.6, 1.9, 2.4, 2.7),
Tave05 = c(4.1, 4, 1.8, 1.8, 5.9, 6.2, 6.4, 6.7, 7, 7.2),
Tave06 = c(5.8, 5.7, 4.2, 4.2, 7.6, 7.8, 8.1, 8.4, 8.6, 8.9),
Tave07 = c(9.5, 9.4, 7.6, 7.7, 11.6, 11.8, 12, 12.3, 12.6, 12.7),
Tave08 = c(8.1, 8, 6.1, 6.2, 10.1, 10.3, 10.6, 10.9, 11, 11.2),
Tave09 = c(3.2, 3.1, 1.5, 1.7, 5.4, 5.6, 5.9, 6.2, 6.3, 6.5),
Tave10 = c(-1.1, -1.1, -2.7, -2.5, 2.4, 2.6, 2.7, 3, 3.3, 3.5),
Tave11 = c(-7.4, -7.6, -9.1, -9, -3.9, -3.7, -3.4, -2.9, -2.5, -2.3),
Tave12 = c(-12, -12.3, -13.5, -13.4, -10.2, -10.1, -9.9, -9.6, -9, -8.5),
Tmax01 = c(-8, -7.9, -8.7, -8.5, -5.7, -5.6, -5.4, -5.2, -4.7, -4.2),
Tmax02 = c(-2.8, -2.9, -4.7, -4.7, -0.7, -0.5, -0.1, 0.3, 1.1, 1.7),
Tmax03 = c(0.4, 0.2, -1.8, -1.9, 2.4, 2.8, 3.2, 3.6, 4.4, 5.1),
Tmax04 = c(5.2, 5, 2.2, 2.6, 8.4, 8.6, 8.9, 9.3, 9.8, 10.2),
Tmax05 = c(10.5, 10.3, 7.6, 7.8, 13.4, 13.6, 13.9, 14.2, 14.7, 15.1),
Tmax06 = c(9.9, 9.8, 7.1, 7.3, 12.7, 13, 13.3, 13.7, 14.2, 14.7),
Tmax07 = c(15, 15, 12.2, 12.5, 18.3, 18.6, 18.9, 19.3, 19.8, 20.3),
Tmax08 = c(14.3, 14.3, 11.5, 11.9, 17.3, 17.5, 17.7, 18.1, 18.5, 18.9),
Tmax09 = c(8.1, 8.1, 5.7, 6, 11.4, 11.7, 11.9, 12.3, 12.9, 13.3),
Tmax10 = c(2.5, 2.7, 1.1, 1.7, 9, 9.2, 9.4, 9.7, 10.1, 10.4),
Tmax11 = c(-4.8, -4.6, -6.9, -6.8, 0.8, 1.2, 1.8, 2.6, 3.1, 3.3),
Tmax12 = c(-7.6, -7.6, -9.1, -8.9, -5.1, -4.9, -4.7, -4.3, -3.4, -2.8),
Tmin01 = c(-17.5, -18.1, -18.8, -18.8, -17, -16.9, -16.8, -16.7, -16.5,
-16.3),
Tmin02 = c(-15.6, -16.1, -17, -16.9, -15, -14.9, -14.7, -14.6, -14.3, -14),
Tmin03 = c(-9.5, -10, -11.2, -11, -8.1, -8, -8, -8, -7.9, -7.7),
Tmin04 = c(-6.9, -7.3, -9.2, -9.3, -5.9, -5.8, -5.6, -5.4, -5, -4.7),
Tmin05 = c(-2.2, -2.4, -3.9, -4.3, -1.5, -1.3, -1, -0.8, -0.8, -0.7),
Tmin06 = c(1.7, 1.7, 1.2, 1.1, 2.4, 2.6, 2.8, 3, 3.1, 3.1),
Tmin07 = c(3.9, 3.7, 2.9, 2.9, 4.9, 5, 5.2, 5.4, 5.3, 5.2),
Tmin08 = c(1.9, 1.7, 0.7, 0.6, 2.9, 3.1, 3.6, 3.8, 3.4, 3.4),
Tmin09 = c(-1.7, -1.9, -2.7, -2.7, -0.7, -0.5, -0.1, 0.1, -0.2, -0.4),
Tmin10 = c(-4.7, -5, -6.5, -6.6, -4.2, -4.1, -3.9, -3.8, -3.5, -3.4),
Tmin11 = c(-10, -10.6, -11.3, -11.2, -8.7, -8.6, -8.6, -8.4, -8.2, -7.8),
Tmin12 = c(-16.4, -17, -18, -17.9, -15.3, -15.2, -15, -14.8, -14.5, -14.2)
)
library(tidyr)
library(readr)
extract.alpha <- function(x, space = ""){
require(stringr)
require(purrr)
require(magrittr)
y <- strsplit(unlist(x), "[^a-zA-Z]+")
z <- y %>% map(~paste(., collapse = space)) %>% simplify()
return(z)}
df1 %>%
gather(Variable, Value, -c(ID, lat, long)) %>%
mutate(Month = parse_number(Variable),
Variable2 = extract.alpha(Variable)) %>%
select(-c(Variable, lat, long)) %>%
spread(Variable2, Value)
&#13;