我正在尝试计算以下一组数据以学习一些时间序列分析,然后阻止引导个人的标准错误:
这是数据集:
https://www.dropbox.com/s/z066lnxetz9uaf6/health.csv?dl=0
这是我为Cor做的代码:
#Check for duplicates
health.d <- health.d[!duplicated(health.d),]
health.d$lnincome <- log(health.d$Income + 1)
health.d <- health.d[(health.d$sex == 1 & health.d$married == 0),]
#First Difference for each individual ( %>% , group_by and mutate are functions in dplyr package)
health.d <- health.d %>%
group_by(ID) %>%
mutate(Dy = lnincome - lag(lnincome, 1))
#Remove NA from Dy
health.d <- health.d[!is.na(health.d$Dy),]
#Autocorretion
health.d <- arrange(health.d, ID, year)
health.d <- transform(health.d, time = as.numeric(interaction(ID, drop=TRUE)))
health.d$lag1DY <- health.d$lnincome - lag(health.d$lnincome, 1)
health.d$lagDY_s1 <- lag(health.d$lnincome,1) - lag(health.d$lnincome, 2)
health.d$lagDY_s2 <- lag(health.d$lnincome,2) - lag(health.d$lnincome, 3)
health.d$lagDY_s3 <- lag(health.d$lnincome,3) - lag(health.d$lnincome, 4)
health.d$lagDY_s4 <- lag(health.d$lnincome,4) - lag(health.d$lnincome, 5)
#Remove NA from lag
health.d <- health.d[!is.na(health.d$lag1DY),]
health.d <- health.d[!is.na(health.d$lagDY_s1),]
health.d <- health.d[!is.na(health.d$lagDY_s2),]
health.d <- health.d[!is.na(health.d$lagDY_s3),]
health.d <- health.d[!is.na(health.d$lagDY_s4),]
cor(health.d$lag1DY, health.d$lagDY_s1)
cor(health.d$lag1DY, health.d$lagDY_s2)
cor(health.d$lag1DY, health.d$lagDY_s3)
cor(health.d$lag1DY, health.d$lagDY_s4)
结果:
> cor(health.d$lag1DY, health.d$lagDY_s1)
[1] -0.05593212
> cor(health.d$lag1DY, health.d$lagDY_s2)
[1] -0.1033625
> cor(health.d$lag1DY, health.d$lagDY_s3)
[1] -0.0804236
> cor(health.d$lag1DY, health.d$lagDY_s4)
[1] -0.1235624
这些似乎是错误的,因为收入之间的时间段之间应该存在高度相关性,但我无法弄清楚我做错了什么。
编辑:我已更新我的代码以包含我已达到的当前结果。这些似乎不正确,但(1)我不知道正确的数字,(2)我不知道我的代码在哪里错了。我发布了我目前的结果,希望有人可以纠正我:)
有关标准错误的块引导的任何帮助吗?
提前致谢。
答案 0 :(得分:2)
您所需要的只是在acf
包中使用stats
功能。它可以根据您的喜好进行多次滞后关联。
library(stats) # for the use of "acf" function
health.d <- health.d[!duplicated(health.d),]
health.d$lnincome <- log(health.d$Income + 1)
health.d <- health.d[(health.d$sex == 1 & health.d$married == 0),]
#First Difference for each individual ( %>% , group_by and mutate are functions in dplyr package)
health.d <- health.d %>%
group_by(ID) %>%
mutate(Dy = lnincome - lag(lnincome, 1))
acf.results<-acf(health.d$Dy, lag.max = 5, type = "correlation",plot = TRUE, na.action = na.pass)
plot(acf.results, main="Auto-correlation")
这将为您提供以下acf
参数中指定的5个滞后的自动更正图
如果您想访问acf结果,可以使用:
print(acf.results)
您将获得以下
Autocorrelations of series ‘health.d$Dy’, by lag
0 1 2 3 4 5
1.000 -0.225 0.016 -0.030 -0.002 0.002