Question

我有两个变量矩阵：lagcolmean（独立）和Dropcolmax（依赖）。

head(lagcolmean)
        MSFT       AAPL      GOOGL
2 -0.2130120  0.8115149 -0.2093960
3 -0.1824693 -0.3352958 -0.1845572
4  0.2175819 -0.6885582  0.9421018
head(Dropcolmax)
       MSFT     AAPL    GOOGL
1 0.3837303 3.871109 6.618858
2 0.6723379 4.369627 7.070757
3 1.1848480 2.565165 2.079593

我使用这个公式来回归这两个变量：

fit <- lapply(names(Dropcolmax), function(x){
  lm(lagcolmean[[x]] ~ Dropcolmax[[x]])
})

因此我得到三个截距和三个没有任何t值的系数。现在我想用相应的平均值来报告这三个截距和系数的平均值。

Answer 1

我正在逐步修改您的功能，以显示您可以获得的不同输出。在我看来，broom包使您的生活更轻松，因为它为您在大数据框中组合的每个模型提供了有用的信息。然后，您可以使用适当的列和过滤器计算所需的所有内容。

library(broom)

lagcolmean = read.table(text="
MSFT       AAPL      GOOGL
-0.2130120  0.8115149 -0.2093960
-0.1824693 -0.3352958 -0.1845572
0.2175819 -0.6885582  0.9421018", sep="", header=T)

Dropcolmax = read.table(text="
       MSFT     AAPL    GOOGL
0.3837303 3.871109 6.618858
0.6723379 4.369627 7.070757
1.1848480 2.565165 2.079593", sep="", header=T)


# returns list of models with minimal info
fit1 <- lapply(names(Dropcolmax), function(x){
  lm(lagcolmean[[x]] ~ Dropcolmax[[x]]) })

fit1

# [[1]]
# 
# Call:
#   lm(formula = lagcolmean[[x]] ~ Dropcolmax[[x]])
# 
# Coefficients:
#   (Intercept)  Dropcolmax[[x]]  
# -0.4819           0.5657  
# 
# 
# [[2]]
# 
# Call:
#   lm(formula = lagcolmean[[x]] ~ Dropcolmax[[x]])
# 
# Coefficients:
#   (Intercept)  Dropcolmax[[x]]  
# -1.4706           0.3886  
# 
# 
# [[3]]
# 
# Call:
#   lm(formula = lagcolmean[[x]] ~ Dropcolmax[[x]])
# 
# Coefficients:
#   (Intercept)  Dropcolmax[[x]]  
# 1.4289          -0.2371  


# returns a list of models with useful info
fit2 <- lapply(names(Dropcolmax), function(x){
  summary(lm(lagcolmean[[x]] ~ Dropcolmax[[x]])) })

fit2

# [[1]]
# 
# Call:
#   lm(formula = lagcolmean[[x]] ~ Dropcolmax[[x]])
# 
# Residuals:
#   1        2        3 
# 0.05179 -0.08095  0.02916 
# 
# Coefficients:
#                 Estimate Std. Error t value Pr(>|t|)
# (Intercept)      -0.4819     0.1430  -3.370    0.184
# Dropcolmax[[x]]   0.5657     0.1750   3.233    0.191
# 
# Residual standard error: 0.1004 on 1 degrees of freedom
# Multiple R-squared:  0.9127,  Adjusted R-squared:  0.8253 
# F-statistic: 10.45 on 1 and 1 DF,  p-value: 0.191
# 
# 
# [[2]]
# 
# Call:
#   lm(formula = lagcolmean[[x]] ~ Dropcolmax[[x]])
# 
# Residuals:
#   1       2       3 
# 0.7777 -0.5628 -0.2149 
# 
# Coefficients:
#                 Estimate Std. Error t value Pr(>|t|)
# (Intercept)      -1.4706     2.7482  -0.535    0.687
# Dropcolmax[[x]]   0.3886     0.7465   0.521    0.694
# 
# Residual standard error: 0.9838 on 1 degrees of freedom
# Multiple R-squared:  0.2132,  Adjusted R-squared:  -0.5735 
# F-statistic: 0.271 on 1 and 1 DF,  p-value: 0.6944
# 
# 
# [[3]]
# 
# Call:
#   lm(formula = lagcolmean[[x]] ~ Dropcolmax[[x]])
# 
# Residuals:
#   1         2         3 
# -0.069114  0.062857  0.006258 
# 
# Coefficients:
#                  Estimate Std. Error t value Pr(>|t|)  
# (Intercept)      1.42885    0.13717  10.417   0.0609 .
# Dropcolmax[[x]] -0.23707    0.02398  -9.884   0.0642 .
# ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Residual standard error: 0.09363 on 1 degrees of freedom
# Multiple R-squared:  0.9899,  Adjusted R-squared:  0.9797 
# F-statistic:  97.7 on 1 and 1 DF,  p-value: 0.06419


# returns list of dataframes with useful info
fit3 <- lapply(names(Dropcolmax), function(x){
  tidy(lm(lagcolmean[[x]] ~ Dropcolmax[[x]])) })

fit3

# [[1]]
#              term   estimate std.error statistic   p.value
# 1     (Intercept) -0.4818854 0.1430076 -3.369648 0.1836572
# 2 Dropcolmax[[x]]  0.5657314 0.1750100  3.232566 0.1909950
# 
# [[2]]
#              term   estimate std.error  statistic   p.value
# 1     (Intercept) -1.4705816 2.7481718 -0.5351127 0.6872022
# 2 Dropcolmax[[x]]  0.3886215 0.7464919  0.5205971 0.6944294
# 
# [[3]]
#              term   estimate  std.error statistic    p.value
# 1     (Intercept)  1.4288545 0.13717192 10.416523 0.06092962
# 2 Dropcolmax[[x]] -0.2370706 0.02398423 -9.884433 0.06418790


# collapse the list to a big dataframe
dt = do.call(rbind, fit3)

dt

#              term   estimate  std.error  statistic    p.value
# 1     (Intercept) -0.4818854 0.14300765 -3.3696477 0.18365719
# 2 Dropcolmax[[x]]  0.5657314 0.17501003  3.2325659 0.19099495
# 3     (Intercept) -1.4705816 2.74817181 -0.5351127 0.68720222
# 4 Dropcolmax[[x]]  0.3886215 0.74649192  0.5205971 0.69442940
# 5     (Intercept)  1.4288545 0.13717192 10.4165232 0.06092962
# 6 Dropcolmax[[x]] -0.2370706 0.02398423 -9.8844330 0.06418790


# returns list of dataframes with useful info and the name of the variable
fit4 <- lapply(names(Dropcolmax), function(x){
  dd = tidy(lm(lagcolmean[[x]] ~ Dropcolmax[[x]]))
  data.frame(name = x, dd)})

fit4

# [[1]]
#   name            term   estimate std.error statistic   p.value
# 1 MSFT     (Intercept) -0.4818854 0.1430076 -3.369648 0.1836572
# 2 MSFT Dropcolmax[[x]]  0.5657314 0.1750100  3.232566 0.1909950
# 
# [[2]]
#   name            term   estimate std.error  statistic   p.value
# 1 AAPL     (Intercept) -1.4705816 2.7481718 -0.5351127 0.6872022
# 2 AAPL Dropcolmax[[x]]  0.3886215 0.7464919  0.5205971 0.6944294
# 
# [[3]]
#    name            term   estimate  std.error statistic    p.value
# 1 GOOGL     (Intercept)  1.4288545 0.13717192 10.416523 0.06092962
# 2 GOOGL Dropcolmax[[x]] -0.2370706 0.02398423 -9.884433 0.06418790


# collapse the list to a big dataframe
dt = do.call(rbind, fit4)

dt

#    name            term   estimate  std.error  statistic    p.value
# 1  MSFT     (Intercept) -0.4818854 0.14300765 -3.3696477 0.18365719
# 2  MSFT Dropcolmax[[x]]  0.5657314 0.17501003  3.2325659 0.19099495
# 3  AAPL     (Intercept) -1.4705816 2.74817181 -0.5351127 0.68720222
# 4  AAPL Dropcolmax[[x]]  0.3886215 0.74649192  0.5205971 0.69442940
# 5 GOOGL     (Intercept)  1.4288545 0.13717192 10.4165232 0.06092962
# 6 GOOGL Dropcolmax[[x]] -0.2370706 0.02398423 -9.8844330 0.06418790


# get average of intercepts
mean(dt$estimate[dt$term == "(Intercept)"])
[1] -0.1745375

截距的平均值和回归结果的系数t stat

1 个答案: