在过去20年里,我有一些美国 - 朝鲜进口和出口产品,许多列名都是月份的名称,进口月份为IJAN,出口月份为EJAN,因此我使用了gather()两次尝试以正确的tidydata格式获取它们。
这是我最初的反复:
# A tibble: 26 x 29
year CTY_CODE CTYNAME IJAN IFEB IMAR IAPR IMAY IJUN IJUL IAUG ISEP IOCT INOV IDEC IYR EJAN EFEB EMAR EAPR EMAY EJUN EJUL EAUG ESEP
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1992 5790 Korea, N~ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.100 0. 0. 0. 0. 0. 0. 0.
2 1993 5790 Korea, N~ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
3 1994 5790 Korea, N~ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.200 0. 0. 0.
4 1995 5790 Korea, N~ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 6.60 0. 0. 4.20 0. 0. 0.200
5 1996 5790 Korea, N~ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.400 0. 0. 0. 0. 0.100
6 1997 5790 Korea, N~ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.100 0. 2.00 0. 0.300
7 1998 5790 Korea, N~ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 4.00 0. 0. 0.100 0. 0.300 0.
8 1999 5790 Korea, N~ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.100 0. 0. 0.300 0. 1.10 0.500 0.500
9 2000 5790 Korea, N~ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.100 0. 0. 0.100 2.50 0.100 0. 0. 0. 0. 0. 0.100 0.
10 2001 5790 Korea, N~ 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
# ... with 16 more rows, and 4 more variables: EOCT <dbl>, ENOV <dbl>, EDEC <dbl>, EYR <dbl>
我第一次使用gather()来处理导入月份,它可以正常工作
USNKTrade <- USNKTrade %>% gather(contains("I"), key="month", value="ImportAmount")
导致
# A tibble: 338 x 18
year CTY_CODE CTYNAME EJAN EFEB EMAR EAPR EMAY EJUN EJUL EAUG ESEP EOCT ENOV EDEC EYR month ImportAmount
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
1 1992 5790 Korea, North 0. 0.100 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.100 IJAN 0.
2 1993 5790 Korea, North 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 2.00 0. 2.00 IJAN 0.
3 1994 5790 Korea, North 0. 0. 0. 0. 0. 0.200 0. 0. 0. 0. 0. 0. 0.200 IJAN 0.
4 1995 5790 Korea, North 0. 0. 6.60 0. 0. 4.20 0. 0. 0.200 0. 0.500 0.100 11.6 IJAN 0.
5 1996 5790 Korea, North 0. 0. 0. 0.400 0. 0. 0. 0. 0.100 0. 0. 0. 0.500 IJAN 0.
6 1997 5790 Korea, North 0. 0. 0. 0. 0.100 0. 2.00 0. 0.300 0. 0.100 0. 2.50 IJAN 0.
7 1998 5790 Korea, North 0. 0. 4.00 0. 0. 0.100 0. 0.300 0. 0. 0. 0. 4.40 IJAN 0.
8 1999 5790 Korea, North 0. 0.100 0. 0. 0.300 0. 1.10 0.500 0.500 0.500 0.600 7.70 11.3 IJAN 0.
9 2000 5790 Korea, North 2.50 0.100 0. 0. 0. 0. 0. 0.100 0. 0. 0. 0. 2.70 IJAN 0.
10 2001 5790 Korea, North 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.500 0.500 IJAN 0.
# ... with 328 more rows
但是当我尝试使用
对导出执行相同操作时USNKTrade <- USNKTrade %>% gather(starts_with("E"), key="month", value="ExportAmount")
然后我得到一个大约4000行的tibble,实际上我想要一个只有前一个大两倍的tibble(这样每个导入和导出月都有自己的行)。
非常感谢帮助!感谢
答案 0 :(得分:0)
这是一种方法。使用starts_with()
代替contains()
,因为有些月份可能不仅包含E或I的第一个字母。在第二次收集时,请注意我已取消选择新创建的列ImportAmount
和ImportMonth
以及列year
。
year <- 1992:2001
EJAN <- rnorm(10)
IJAN <- rnorm(10)
EFEB <- rnorm(10)
IFEB <- rnorm(10)
df <- data.frame(year, EJAN, IJAN, EFEB, IFEB)
df %>% gather(starts_with("E"), key = ImportMonth, value = ImportAmount) %>%
gather(starts_with("I"), key = ExportMonth, value = ExportAmount, -ImportAmount, -ImportMonth, - year)
# year ImportMonth ImportAmount ExportMonth ExportAmount
# 1 1992 EJAN -1.1528 IJAN 0.94967
# 2 1993 EJAN 0.1165 IJAN 0.86506
# 3 1994 EJAN 0.2553 IJAN -0.05108
# 4 1995 EJAN -0.8516 IJAN -0.50873
# 5 1996 EJAN -0.3014 IJAN 0.50614
# 6 1997 EJAN 1.4017 IJAN 1.73527
# 7 1998 EJAN 0.8019 IJAN -0.71507
# 8 1999 EJAN 1.7179 IJAN -0.32709
# 9 2000 EJAN -1.2478 IJAN -1.07364
# 10 2001 EJAN -1.0491 IJAN -1.83764
# ...
# 40 2001 EFEB 1.2181 IFEB 0.76118
答案 1 :(得分:0)
虽然OP
未提供可重现的数据,但似乎问题是OP
已递归gather
data.frame
。
#The columns containing "I" will move to row after below command
USNKTrade %>% gather(contains("I"), key="month", value="ImportAmount")
#Now below command will add for more rows
USNKTrade <- USNKTrade %>% gather(starts_with("E"), key="month", value="ExportAmount")
可行的解决方案可能是:
USNKTrade %>% select_at(vars(year, CTY_CODE, CTYNAME, contains("I"))) %>%
gather(contains("I"), key="month", value="ImportAmount") %>%
inner_join(USNKTrade %>% select_at(vars(year, CTY_CODE, CTYNAME, starts_with("E"))) %>%
starts_with(contains("E"), key="month", value="ImportAmount"),
by = c("year", "CTY_CODE", "CTYNAME", "month"))
方法是首先仅选择ImportAmount
所需的列,然后应用gather
。现在只选择ExportAmount
所需的列,然后应用gather
。合并两组。
假设加入时"year", "CTY_CODE", "CTYNAME", "month"
为key
列。