查找选定列中多个点的斜率

时间:2018-07-04 19:58:56

标签: r regression linear-regression lm

给出以下数据框:

structure(list(`-5` = c(0, 1, 0, 0, 9, 22), `-4` = c(1, 3, 0, 
0, 1, 17), `-3` = c(1, 3, 0, 0, 0, 12), `-2` = c(1, 3, 0, 0, 
2, 10), `-1` = c(0, 0, 0, 4, 3, 9), `0` = c(0, 1, 0, 2, 2, 21
), `1` = c(0, 1, 1, 7, 1, 21), `2` = c(1, 0, 1, 2, 1, 10), `3` = c(0, 
9, 0, 6, 1, 12), `4` = c(0, 2, 0, 5, 0, 18), `5` = c(0, 0, 0, 
3, 0, 23)), .Names = c("-5", "-4", "-3", "-2", "-1", "0", "1", 
"2", "3", "4", "5"), row.names = c(NA, 6L), class = "data.frame")

#  -5 -4 -3 -2 -1  0  1  2  3  4  5
#1  0  1  1  1  0  0  0  1  0  0  0
#2  1  3  3  3  0  1  1  0  9  2  0
#3  0  0  0  0  0  0  1  1  0  0  0
#4  0  0  0  0  4  2  7  2  6  5  3
#5  9  1  0  2  3  2  1  1  1  0  0
#6 22 17 12 10  9 21 21 10 12 18 23

我希望R给我-5:-1列的每一行中所有数据点的斜率。基本上,基于这5个数据点的线性回归趋势线的斜率。然后是列1:5的所有数据点的第二个斜率。年份0被忽略。

基本上就是这样(使用Excel计算的最后两列):

structure(list(`-5` = c(0, 1, 0, 0, 9, 22), `-4` = c(1, 3, 0, 
0, 1, 17), `-3` = c(1, 3, 0, 0, 0, 12), `-2` = c(1, 3, 0, 0, 
2, 10), `-1` = c(0, 0, 0, 4, 3, 9), `0` = c(0, 1, 0, 2, 2, 21
), `1` = c(0, 1, 1, 7, 1, 21), `2` = c(1, 0, 1, 2, 1, 10), `3` = c(0, 
9, 0, 6, 1, 12), `4` = c(0, 2, 0, 5, 0, 18), `5` = c(0, 0, 0, 
3, 0, 23), `Negative Years` = c(0, -2, 0, 0.8, -1.1, -3.3), `Positive Years` = c(-0.1, 
0, -0.3, -0.5, -0.3, 1.2)), .Names = c("-5", "-4", "-3", "-2", 
"-1", "0", "1", "2", "3", "4", "5", "Negative Years", "Positive Years"
), row.names = c(NA, 6L), class = "data.frame")

#  -5 -4 -3 -2 -1  0  1  2  3  4  5 Negative Years Positive Years
#1  0  1  1  1  0  0  0  1  0  0  0            0.0           -0.1
#2  1  3  3  3  0  1  1  0  9  2  0           -2.0            0.0
#3  0  0  0  0  0  0  1  1  0  0  0            0.0           -0.3
#4  0  0  0  0  4  2  7  2  6  5  3            0.8           -0.5
#5  9  1  0  2  3  2  1  1  1  0  0           -1.1           -0.3
#6 22 17 12 10  9 21 21 10 12 18 23           -3.3            1.2

3 个答案:

答案 0 :(得分:5)

这就是统计学家(不是数据科学家)会做的。

让您的数据框为dat

Y <- t(dat)  ## response matrix
t <- -5:5    ## time stamps
id <- c(rep("-", 5), NA, rep("+", 5))  ## group index (factor)
fit <- lm(Y ~ t * id)  ## mlm
m <- coef(fit)[c(2, 4), ]  ## coefficient matrix
m[2, ] <- m[2, ] + m[1, ]  ## reverse contrast
round(t(m), 2)

#     t t:id+
#1  0.0  -0.1
#2 -0.2   0.0
#3  0.0  -0.3
#4  0.8  -0.5
#5 -1.1  -0.3
#6 -3.3   1.2

将列名称更改为所需的名称。

答案 1 :(得分:2)

a=by(data.frame(t(dat)),sign(as.numeric(names(dat))),function(x)
  round(unname(sapply(x,function(y)coef(lm(data.frame(y,as.numeric(rownames(x)))))[2])),2))

cbind(dat,do.call(cbind,setNames(a[-2],c("Negative Years","Positive Years"))))

  -5 -4 -3 -2 -1  0  1  2  3  4  5 Negative Years Positive Years
1  0  1  1  1  0  0  0  1  0  0  0            0.0           -0.1
2  1  3  3  3  0  1  1  0  9  2  0           -0.2            0.0
3  0  0  0  0  0  0  1  1  0  0  0            0.0           -0.3
4  0  0  0  0  4  2  7  2  6  5  3            0.8           -0.5
5  9  1  0  2  3  2  1  1  1  0  0           -1.1           -0.3
6 22 17 12 10  9 21 21 10 12 18 23           -3.3            1.2

使用tidyverse:

library(tidyverse)
data.frame(t(dat))%>%
  rownames_to_column("x")%>%
  mutate(x=as.numeric(x))%>%
  gather(col,val,-x)%>%
  filter(x!=0)%>%
  group_by(col,s=sign(x))%>%
  summarise(u=round(coef(lm(val~x))[2],2))%>%
  spread(col,u)%>%{data.frame(t(.[-1]))}%>%
  setNames(c("Negative Years","Positive Years"))%>%
  cbind(dat,.)

 -5 -4 -3 -2 -1  0  1  2  3  4  5 Negative Years Positive Years
1  0  1  1  1  0  0  0  1  0  0  0            0.0           -0.1
2  1  3  3  3  0  1  1  0  9  2  0           -0.2            0.0
3  0  0  0  0  0  0  1  1  0  0  0            0.0           -0.3
4  0  0  0  0  4  2  7  2  6  5  3            0.8           -0.5
5  9  1  0  2  3  2  1  1  1  0  0           -1.1           -0.3
6 22 17 12 10  9 21 21 10 12 18 23           -3.3            1.2

答案 2 :(得分:1)

使用tidyverse软件包的解决方案。假设dat是原始数据帧,而dat2是最终输出。

library(tidyverse)

dat2 <- dat %>%
  rowid_to_column() %>%                                          # Get the rowid to a column
  gather(Column, Value, -rowid, convert = TRUE) %>%              # Convert to long format
  filter(Column != 0) %>%                                        # Remove Column == 0
  mutate(Sign = ifelse(Column > 0, "Positive", "Negative")) %>%  # Create a column show Positive and Negative
  group_by(rowid, Sign) %>%                                      # Create nested column
  nest() %>%                                                     # Each element in nested column is a dtaa frame
  mutate(LM = map(data, ~lm(Value ~ Column, data = .x))) %>%     # Apply lm to each element in nested data frame
  mutate(Slope = 
           map_dbl(LM, ~round(.x[["coefficients"]][[2]], 
                              digits = 1))) %>%                  # Get the rounded slope
  select(rowid, Sign, Slope) %>%                                 # Select relevant column
  spread(Sign, Slope) %>%                                        # Convert to wide format
  left_join(dat %>% rowid_to_column(), ., by = "rowid") %>%      # Merge to the original data frame
  select(-rowid)                                                 # Remove the rowid column
dat2
#   -5 -4 -3 -2 -1  0  1  2  3  4  5 Negative Positive
# 1  0  1  1  1  0  0  0  1  0  0  0      0.0     -0.1
# 2  1  3  3  3  0  1  1  0  9  2  0     -0.2      0.0
# 3  0  0  0  0  0  0  1  1  0  0  0      0.0     -0.3
# 4  0  0  0  0  4  2  7  2  6  5  3      0.8     -0.5
# 5  9  1  0  2  3  2  1  1  1  0  0     -1.1     -0.3
# 6 22 17 12 10  9 21 21 10 12 18 23     -3.3      1.2