如何基于R中的其他列合并列

时间:2020-01-27 11:17:38

标签: r dataframe

我想合并产品和价格列,以便从国家和日期列复制数据。

我的数据:

df1 = data.frame(country = c("USA", "USA", "ITALY"), date = c("10/19", "10/19" , "11/19"),
                 product=c("A","B","A"), price=c( 100, 200, 150), product2 =c("B","A", NA), 
                 price2 = c( 50, 100,NA),product3 =c("B","A", "C"), price3 = c(30,NA,10) )

> df1
  country  date product price product2 price2 product3 price3
1     USA 10/19       A   100        B     50        B     30
2     USA 10/19       B   200        A    100        A     NA
3   ITALY 11/19       A   150     <NA>     NA        C     10

I expected:

 country  date product price
1     USA 10/19       A   100
2     USA 10/19       B   200
3   ITALY 11/19       A   150
4     USA 10/19       B    50
5     USA 10/19       A   100
6     USA 10/19       B    30
7     USA 10/19       A    NA
8   ITALY 11/19       C    10

2 个答案:

答案 0 :(得分:3)

您可以使用pivot_longer中的tidyr

library(dplyr)
library(tidyr)

pivot_longer(df1, 
              cols = -c(country, date), 
              names_to = c(".value", "num"), 
              names_sep = "\\d") %>%
 select(-num) %>%
 na.omit

# A tibble: 5 x 4
#  country date  product price
#  <fct>   <fct> <fct>   <dbl>
#1 USA     10/19 A         100
#2 USA     10/19 B          50
#3 USA     10/19 B         200
#4 USA     10/19 A         100
#5 ITALY   11/19 A         150

答案 1 :(得分:0)

此问题与您的问题有关:enter link description here

以下是您问题的几种解决方案:

轻松一遍

bind_rows(
  df1 %>% select(country, date, product = product, price = price),
  df1 %>% select(country, date, product = product2, price = price2)
) %>%
  na.omit()

更具可扩展性:

nm1 <- names(df1)[-(1:2)] 
split(nm1, sub("\\D+", "", nm1)) %>% 
  purrr::map_df(~ df1 %>% select(country, date, .x) %>%
           rename_at(3:4, ~ c("product", "price"))) %>%
  na.omit()

对于您编辑的问题:

library(dplyr)
library(purrr)
df1 = data.frame(country = c("USA", "USA", "ITALY"), date = c("10/19", "10/19" , "11/19"),
                 product=c("A","B","A"), price=c( 100, 200, 150), product2 =c("B","A", NA), 
                 price2 = c( 50, 100,NA),product3 =c("B","A", "C"), price3 = c(30,NA,10), stringsAsFactors = FALSE)

nm1 <- names(df1)[-(1:2)] 
split(nm1, sub("\\D+", "", nm1)) %>% 
  purrr::map_df(~ df1 %>% select(country, date, .x) %>%
                  rename_at(3:4, ~ c("product", "price"))) %>%
  filter(!is.na(product))

结果:

     country  date product price
1     USA 10/19       A   100
2     USA 10/19       B   200
3   ITALY 11/19       A   150
4     USA 10/19       B    50
5     USA 10/19       A   100
6     USA 10/19       B    30
7     USA 10/19       A    NA
8   ITALY 11/19       C    10