我有一个具有以下形式的数据集:
Year ... X1 X2 ... XN ... Y1 Y2 ... Y5 ...
2006 ...
2007
...
2016
我分别对每个Y作为因变量,对X作为自变量分别进行回归。
我想将此数据集转换为面板数据集,以便可以运行固定效果面板回归。
您知道如何将数据集转换为所需格式吗?
如果有帮助,我会发布一部分数据集:
structure(list(Year = c(2006, 2007, 2008, 2009, 2010, 2011, 2012,
2013, 2014, 2015, 2016), X1 = c(NA, 6231989.16,
6286192.8, 7997940.88, 5964272.33, 2220471.25, 1161886.38, 1854724.67,
7414435.45, 1030764.86, 1760876.07), X2 = c(NA,
16033423.97, 14591392.59, 10807666.03, 10568403.25, 9895997.3,
7783115.74, 9609331.42, 13195226.51, 9840290.11, 10612093.19),
Y2 = c(NA, NA, NA, 26041118.06,
18038215.91, 19174941.38, 15250404.65, 19670622.34, 19969051.53,
13454512.28, 17033742.37),
Y1 = c(NA, 51860962.74, 38081542.65, 24057388.46, 24340687.5,
27960591.55, 25526505.72, 31599623.65, 38597641.61, 48611516.44,
45851933.17), Y3 = c(NA, 30898514.64, 34234806.16,
38595099.38, 41654402.22, 41895856.36, 45906588.53, 58857032.54,
68599527.69, 69905755.6, 63085613.98
)), row.names = c(NA, -11L), class = c("data.table",
"data.frame"), .internal.selfref = <pointer: 0x0000000004601ef0>, sorted = "Year")
答案 0 :(得分:1)
这里有两个步骤。请注意,所有组的结尾都是相同的Xs
。
library(tidyverse)
library(plm)
# 1- from 3 Ys to 3 groups
df.panel <- df %>%
gather(group, Y, -year, -starts_with("X")) %>%
arrange(year)
glimpse(df.panel) # have a look at df.panel
# 2- clean group ID by removing the first character ("Y")
df.panel$group <- substr(df.panel$group, 2, nchar(df.panel$group))
数据
df <- structure(list(year = 2006:2016, X1 = c(NA, 6231989.16, 6286192.8,
7997940.88, 5964272.33, 2220471.25, 1161886.38, 1854724.67, 7414435.45,
1030764.86, 1760876.07), X2 = c(NA, 16033423.97, 14591392.59,
10807666.03, 10568403.25, 9895997.3, 7783115.74, 9609331.42,
13195226.51, 9840290.11, 10612093.19), Y2 = c(NA, NA, NA, 26041118.06,
18038215.91, 19174941.38, 15250404.65, 19670622.34, 19969051.53,
13454512.28, 17033742.37), Y1 = c(NA, 51860962.74, 38081542.65,
24057388.46, 24340687.5, 27960591.55, 25526505.72, 31599623.65,
38597641.61, 48611516.44, 45851933.17), Y3 = c(NA, 30898514.64,
34234806.16, 38595099.38, 41654402.22, 41895856.36, 45906588.53,
58857032.54, 68599527.69, 69905755.6, 63085613.98)), .Names = c("year",
"X1", "X2", "Y2", "Y1", "Y3"), row.names = c(NA, -11L), class = "data.frame")