如何在特定订单上合并不同的表?

时间:2018-02-20 14:45:54

标签: r dataframe merge

如果我有三个时间序列表,如:

df1 <- read.table(text = " Date V1 V2
2000-01-07 5 1
2000-01-08 1 4
2000-01-09 4 3
2000-01-10 0 0", 
              header = TRUE, stringsAsFactors = FALSE)

df2 <- read.table(text = " Date V1 V2
2000-01-01 1 1
2000-01-02 0 0
2000-01-03 4 6
2000-01-04 6 5
2000-01-05 3 0
2000-01-06 3 0
2000-01-07 7 4
2000-01-08 9 0
2000-01-09 0 0
2000-01-10 0 0", 
              header = TRUE, stringsAsFactors = FALSE)

df3 <- read.table(text = " Date V1 V2
2000-01-01 4 3
2000-01-02 4 0
2000-01-03 6 1
2000-01-04 7 5", 
              header = TRUE, stringsAsFactors = FALSE)

我怎样才能创建一个表df4,其中包含表中最早的日期。然后它组织每个表的秒列,最后组织每个表的三分之一列。请注意,如果日期不存在..如果用NA填充列。

df4 <- read.table(text = " Date df1_V1 df2_V1 df3_V1 df1_V2 df2_V2  df3_V2
2000-01-01 NA 1 4 NA 1 3
2000-01-02 NA 0 4 NA 0 0
2000-01-03 NA 4 6 NA 6 1
2000-01-04 NA 6 7 NA 5 5
2000-01-05 NA 3 NA NA 0 NA
2000-01-06 NA 3 NA NA 0 NA
2000-01-07 5 7 NA 1 4 NA
2000-01-08 1 9 NA 4 0 NA
2000-01-09 4 0 NA 3 0 NA
2000-01-10 0 0 NA 0 0 NA", 
              header = TRUE, stringsAsFactors = FALSE)

2 个答案:

答案 0 :(得分:0)

您只需创建日期向量并合并数据框即可:

df1 <- read.table(text = " Date V1 V2
2000-01-07 5 1
2000-01-08 1 4
2000-01-09 4 3
2000-01-10 0 0", 
                  header = TRUE, stringsAsFactors = FALSE)

df2 <- read.table(text = " Date V1 V2
2000-01-01 1 1
2000-01-02 0 0
2000-01-03 4 6
2000-01-04 6 5
2000-01-05 3 0
2000-01-06 3 0
2000-01-07 7 4
2000-01-08 9 0
2000-01-09 0 0
2000-01-10 0 0", 
                  header = TRUE, stringsAsFactors = FALSE)

df3 <- read.table(text = " Date V1 V2
2000-01-01 4 3
2000-01-02 4 0
2000-01-03 6 1
2000-01-04 7 5", 
                  header = TRUE, stringsAsFactors = FALSE)

df1$Date <- as.Date(df1$Date)
df2$Date <- as.Date(df2$Date)
df3$Date <- as.Date(df3$Date)

# Make a vector of all dates between the lowest and highest
tdate <- seq(from=as.Date(min(df1$Date, df2$Date, df3$Date)), to=as.Date(max(df1$Date, df2$Date, df3$Date)), by = 1)

# Merge the dataframes to this vector of dates
df4 <- data.frame(Date = tdate)
df4 <- merge(df4, df1, all.x=T, by="Date")
df4 <- merge(df4, df2, all.x=T, by="Date")
df4 <- merge(df4, df3, all.x=T, by="Date")
df4
#>          Date V1.x V2.x V1.y V2.y V1 V2
#> 1  2000-01-01   NA   NA    1    1  4  3
#> 2  2000-01-02   NA   NA    0    0  4  0
#> 3  2000-01-03   NA   NA    4    6  6  1
#> 4  2000-01-04   NA   NA    6    5  7  5
#> 5  2000-01-05   NA   NA    3    0 NA NA
#> 6  2000-01-06   NA   NA    3    0 NA NA
#> 7  2000-01-07    5    1    7    4 NA NA
#> 8  2000-01-08    1    4    9    0 NA NA
#> 9  2000-01-09    4    3    0    0 NA NA
#> 10 2000-01-10    0    0    0    0 NA NA

答案 1 :(得分:0)

使用dplyr可以轻松实现。


df1 <- read.table(text = " Date V1 V2
2000-01-07 5 1
                  2000-01-08 1 4
                  2000-01-09 4 3
                  2000-01-10 0 0", 
                  header = TRUE, stringsAsFactors = FALSE)

df2 <- read.table(text = " Date V1 V2
                  2000-01-01 1 1
                  2000-01-02 0 0
                  2000-01-03 4 6
                  2000-01-04 6 5
                  2000-01-05 3 0
                  2000-01-06 3 0
                  2000-01-07 7 4
                  2000-01-08 9 0
                  2000-01-09 0 0
                  2000-01-10 0 0", 
                  header = TRUE, stringsAsFactors = FALSE)

df3 <- read.table(text = " Date V1 V2
                  2000-01-01 4 3
                  2000-01-02 4 0
                  2000-01-03 6 1
                  2000-01-04 7 5", 
                  header = TRUE, stringsAsFactors = FALSE)

df4 <- read.table(text = " Date df1_V1 df2_V1 df3_V1 df1_V2 df2_V2  df3_V2
                  2000-01-01 NA 1 4 NA 1 3
                  2000-01-02 NA 0 4 NA 0 0
                  2000-01-03 NA 4 6 NA 6 1
                  2000-01-04 NA 6 7 NA 5 5
                  2000-01-05 NA 3 NA NA 0 NA
                  2000-01-06 NA 3 NA NA 0 NA
                  2000-01-07 5 7 NA 1 4 NA
                  2000-01-08 1 9 NA 4 0 NA
                  2000-01-09 4 0 NA 3 0 NA
                  2000-01-10 0 0 NA 0 0 NA", 
                  header = TRUE, stringsAsFactors = FALSE)

library(dplyr)

# We can use `full_join` to merge the dataframes 
# (`full_join` means that all records from all of the dataframe 
# are represented in the new dataframe in fills NA values 
# where there is no matching date). by = c("Date" = "Date")
# tells dplyr that we want to join only on the Date column
df5 <- df1 %>% 
  full_join(df2, by = c("Date" = "Date")) %>% 
  full_join(df3, by = c("Date" = "Date"))

# next we use the arrange function to sort the dataframe
# by increasing Date
df5 <- df5 %>%
  arrange(Date) 

df5
#>          Date V1.x V2.x V1.y V2.y V1 V2
#> 1  2000-01-01   NA   NA    1    1  4  3
#> 2  2000-01-02   NA   NA    0    0  4  0
#> 3  2000-01-03   NA   NA    4    6  6  1
#> 4  2000-01-04   NA   NA    6    5  7  5
#> 5  2000-01-05   NA   NA    3    0 NA NA
#> 6  2000-01-06   NA   NA    3    0 NA NA
#> 7  2000-01-07    5    1    7    4 NA NA
#> 8  2000-01-08    1    4    9    0 NA NA
#> 9  2000-01-09    4    3    0    0 NA NA
#> 10 2000-01-10    0    0    0    0 NA NA

# Finally, we rename and reorder the columns to match your
# example answer, df4
df5 <- df5 %>%
  select(
    Date, 
    df1_V1 = V1.x,
    df2_V1 = V1.y,
    df3_V1 = V1,
    df1_V2 = V2.x,
    df2_V2 = V2.y,
    df3_V2 = V2)

df5
#>          Date df1_V1 df2_V1 df3_V1 df1_V2 df2_V2 df3_V2
#> 1  2000-01-01     NA      1      4     NA      1      3
#> 2  2000-01-02     NA      0      4     NA      0      0
#> 3  2000-01-03     NA      4      6     NA      6      1
#> 4  2000-01-04     NA      6      7     NA      5      5
#> 5  2000-01-05     NA      3     NA     NA      0     NA
#> 6  2000-01-06     NA      3     NA     NA      0     NA
#> 7  2000-01-07      5      7     NA      1      4     NA
#> 8  2000-01-08      1      9     NA      4      0     NA
#> 9  2000-01-09      4      0     NA      3      0     NA
#> 10 2000-01-10      0      0     NA      0      0     NA

all.equal(df5, df4)
#> [1] TRUE