如果我有三个时间序列表,如:
df1 <- read.table(text = " Date V1 V2
2000-01-07 5 1
2000-01-08 1 4
2000-01-09 4 3
2000-01-10 0 0",
header = TRUE, stringsAsFactors = FALSE)
df2 <- read.table(text = " Date V1 V2
2000-01-01 1 1
2000-01-02 0 0
2000-01-03 4 6
2000-01-04 6 5
2000-01-05 3 0
2000-01-06 3 0
2000-01-07 7 4
2000-01-08 9 0
2000-01-09 0 0
2000-01-10 0 0",
header = TRUE, stringsAsFactors = FALSE)
df3 <- read.table(text = " Date V1 V2
2000-01-01 4 3
2000-01-02 4 0
2000-01-03 6 1
2000-01-04 7 5",
header = TRUE, stringsAsFactors = FALSE)
我怎样才能创建一个表df4,其中包含表中最早的日期。然后它组织每个表的秒列,最后组织每个表的三分之一列。请注意,如果日期不存在..如果用NA填充列。
df4 <- read.table(text = " Date df1_V1 df2_V1 df3_V1 df1_V2 df2_V2 df3_V2
2000-01-01 NA 1 4 NA 1 3
2000-01-02 NA 0 4 NA 0 0
2000-01-03 NA 4 6 NA 6 1
2000-01-04 NA 6 7 NA 5 5
2000-01-05 NA 3 NA NA 0 NA
2000-01-06 NA 3 NA NA 0 NA
2000-01-07 5 7 NA 1 4 NA
2000-01-08 1 9 NA 4 0 NA
2000-01-09 4 0 NA 3 0 NA
2000-01-10 0 0 NA 0 0 NA",
header = TRUE, stringsAsFactors = FALSE)
答案 0 :(得分:0)
您只需创建日期向量并合并数据框即可:
df1 <- read.table(text = " Date V1 V2
2000-01-07 5 1
2000-01-08 1 4
2000-01-09 4 3
2000-01-10 0 0",
header = TRUE, stringsAsFactors = FALSE)
df2 <- read.table(text = " Date V1 V2
2000-01-01 1 1
2000-01-02 0 0
2000-01-03 4 6
2000-01-04 6 5
2000-01-05 3 0
2000-01-06 3 0
2000-01-07 7 4
2000-01-08 9 0
2000-01-09 0 0
2000-01-10 0 0",
header = TRUE, stringsAsFactors = FALSE)
df3 <- read.table(text = " Date V1 V2
2000-01-01 4 3
2000-01-02 4 0
2000-01-03 6 1
2000-01-04 7 5",
header = TRUE, stringsAsFactors = FALSE)
df1$Date <- as.Date(df1$Date)
df2$Date <- as.Date(df2$Date)
df3$Date <- as.Date(df3$Date)
# Make a vector of all dates between the lowest and highest
tdate <- seq(from=as.Date(min(df1$Date, df2$Date, df3$Date)), to=as.Date(max(df1$Date, df2$Date, df3$Date)), by = 1)
# Merge the dataframes to this vector of dates
df4 <- data.frame(Date = tdate)
df4 <- merge(df4, df1, all.x=T, by="Date")
df4 <- merge(df4, df2, all.x=T, by="Date")
df4 <- merge(df4, df3, all.x=T, by="Date")
df4
#> Date V1.x V2.x V1.y V2.y V1 V2
#> 1 2000-01-01 NA NA 1 1 4 3
#> 2 2000-01-02 NA NA 0 0 4 0
#> 3 2000-01-03 NA NA 4 6 6 1
#> 4 2000-01-04 NA NA 6 5 7 5
#> 5 2000-01-05 NA NA 3 0 NA NA
#> 6 2000-01-06 NA NA 3 0 NA NA
#> 7 2000-01-07 5 1 7 4 NA NA
#> 8 2000-01-08 1 4 9 0 NA NA
#> 9 2000-01-09 4 3 0 0 NA NA
#> 10 2000-01-10 0 0 0 0 NA NA
答案 1 :(得分:0)
使用dplyr
可以轻松实现。
df1 <- read.table(text = " Date V1 V2
2000-01-07 5 1
2000-01-08 1 4
2000-01-09 4 3
2000-01-10 0 0",
header = TRUE, stringsAsFactors = FALSE)
df2 <- read.table(text = " Date V1 V2
2000-01-01 1 1
2000-01-02 0 0
2000-01-03 4 6
2000-01-04 6 5
2000-01-05 3 0
2000-01-06 3 0
2000-01-07 7 4
2000-01-08 9 0
2000-01-09 0 0
2000-01-10 0 0",
header = TRUE, stringsAsFactors = FALSE)
df3 <- read.table(text = " Date V1 V2
2000-01-01 4 3
2000-01-02 4 0
2000-01-03 6 1
2000-01-04 7 5",
header = TRUE, stringsAsFactors = FALSE)
df4 <- read.table(text = " Date df1_V1 df2_V1 df3_V1 df1_V2 df2_V2 df3_V2
2000-01-01 NA 1 4 NA 1 3
2000-01-02 NA 0 4 NA 0 0
2000-01-03 NA 4 6 NA 6 1
2000-01-04 NA 6 7 NA 5 5
2000-01-05 NA 3 NA NA 0 NA
2000-01-06 NA 3 NA NA 0 NA
2000-01-07 5 7 NA 1 4 NA
2000-01-08 1 9 NA 4 0 NA
2000-01-09 4 0 NA 3 0 NA
2000-01-10 0 0 NA 0 0 NA",
header = TRUE, stringsAsFactors = FALSE)
library(dplyr)
# We can use `full_join` to merge the dataframes
# (`full_join` means that all records from all of the dataframe
# are represented in the new dataframe in fills NA values
# where there is no matching date). by = c("Date" = "Date")
# tells dplyr that we want to join only on the Date column
df5 <- df1 %>%
full_join(df2, by = c("Date" = "Date")) %>%
full_join(df3, by = c("Date" = "Date"))
# next we use the arrange function to sort the dataframe
# by increasing Date
df5 <- df5 %>%
arrange(Date)
df5
#> Date V1.x V2.x V1.y V2.y V1 V2
#> 1 2000-01-01 NA NA 1 1 4 3
#> 2 2000-01-02 NA NA 0 0 4 0
#> 3 2000-01-03 NA NA 4 6 6 1
#> 4 2000-01-04 NA NA 6 5 7 5
#> 5 2000-01-05 NA NA 3 0 NA NA
#> 6 2000-01-06 NA NA 3 0 NA NA
#> 7 2000-01-07 5 1 7 4 NA NA
#> 8 2000-01-08 1 4 9 0 NA NA
#> 9 2000-01-09 4 3 0 0 NA NA
#> 10 2000-01-10 0 0 0 0 NA NA
# Finally, we rename and reorder the columns to match your
# example answer, df4
df5 <- df5 %>%
select(
Date,
df1_V1 = V1.x,
df2_V1 = V1.y,
df3_V1 = V1,
df1_V2 = V2.x,
df2_V2 = V2.y,
df3_V2 = V2)
df5
#> Date df1_V1 df2_V1 df3_V1 df1_V2 df2_V2 df3_V2
#> 1 2000-01-01 NA 1 4 NA 1 3
#> 2 2000-01-02 NA 0 4 NA 0 0
#> 3 2000-01-03 NA 4 6 NA 6 1
#> 4 2000-01-04 NA 6 7 NA 5 5
#> 5 2000-01-05 NA 3 NA NA 0 NA
#> 6 2000-01-06 NA 3 NA NA 0 NA
#> 7 2000-01-07 5 7 NA 1 4 NA
#> 8 2000-01-08 1 9 NA 4 0 NA
#> 9 2000-01-09 4 0 NA 3 0 NA
#> 10 2000-01-10 0 0 NA 0 0 NA
all.equal(df5, df4)
#> [1] TRUE