无论如何,有什么方法可以使R代码更有效?

时间:2019-08-03 19:49:54

标签: r function functional-programming

我正在应用一个生育力模型,要执行该过程,需要保存每个生育强度的矩阵,根据其顺序,我将其称为mati。在这种情况下,i =(1、2、3,... n) 下面的数据框是如何显示我的数据的一个示例。我的实际数据框有525行和10列("AGE" "year" "mat1" "mat2" "mat3" "mat4" "mat5" "mat6" "mat7" "mat8")。

year <- c(rep(1998:2001, 4))
Age <- c(rep(15:18, 4))
mat1 <- c(rep(0.01, 16))
mat2 <- c(rep(0.012, 16))
mat3 <- c(rep(0.015, 16))
mat <- data.frame(year, Age, mat1, mat2, mat3)

mat

   year Age mat1  mat2  mat3
1  1998  15 0.01 0.012 0.015
2  1999  16 0.01 0.012 0.015
3  2000  17 0.01 0.012 0.015
4  2001  18 0.01 0.012 0.015
5  1998  15 0.01 0.012 0.015
6  1999  16 0.01 0.012 0.015
7  2000  17 0.01 0.012 0.015
8  2001  18 0.01 0.012 0.015
9  1998  15 0.01 0.012 0.015
10 1999  16 0.01 0.012 0.015
11 2000  17 0.01 0.012 0.015
12 2001  18 0.01 0.012 0.015
13 1998  15 0.01 0.012 0.015
14 1999  16 0.01 0.012 0.015
15 2000  17 0.01 0.012 0.015
16 2001  18 0.01 0.012 0.015

要执行以获得最终的数值矩阵,我已经执行了下面的代码,但是需要很长时间。

##mat1###

library(dlyr)
library(tidyr)

mat1 <- #selecting just intensities of order 1 and creating matrices
  select(mat, Age, year, mat1) %>% 
  spread(year, mat1) 

names(mat1)[c(2:6)] <- paste0("year ", names(mat1[2:6])) #alter colnames
mat1[ ,1] <- paste0("age ", mat1[,1]) #alter the row from column "age"

mat_oe1 <- data.matrix(mat1[2:6])
dimnames(mat_oe1) <- list(c(mat1[,1]),
                          c(names(mat1[2:6])))
#Saving as txt to read i the model
write.table(mat_oe2, file = "mat_oe1.txt", sep = "\t",
            row.names = T, col.names = T)

##mat2
mat2 <- #selecting just intensities of order 1 and creating matrices
  select(mat, Age, year, mat2) %>% 
  spread(year, mat2) 

names(mat2)[c(2:6)] <- paste0("year ", names(mat2[2:6])) #alter colnames
mat2[ ,1] <- paste0("age ", mat2[,1]) #alter the row from column "age"

mat_oe2 <- data.matrix(mat2[2:6])
dimnames(mat_oe2) <- list(c(mat1[,1]),
                          c(names(mat1[2:6])))
#Saving as txt to read i the model
write.table(mat_oe2, file = "mat_oe2.txt", sep = "\t",
            row.names = T, col.names = T)

##mat3
mat3 <- #selecting just intensities of order 1 and creating matrices
  select(mat, Age, year, mat3) %>% 
  spread(year, mat3) 

names(mat3)[c(2:6)] <- paste0("year ", names(mat3[2:6])) #alter colnames
mat3[ ,1] <- paste0("age ", mat3[,1]) #alter the row from column "age"

mat_oe3 <- data.matrix(mat3[2:6])
dimnames(mat_oe3) <- list(c(mat3[,1]),
                          c(names(mat3[2:6])))
#Saving as txt to read i the model
write.table(mat_oe3, file = "mat_oe3.txt", sep = "\t",
            row.names = T, col.names = T)  

我使用spread是因为我需要以下格式的数据:

mat1 

     1998        1999       2000       2001
15   0.01        0.01       0.01       0.01
16   0.01        0.01       0.01       0.01
17   0.01        0.01       0.01       0.01
18   0.01        0.01       0.01       0.01

我也开始写一个循环,但是我已经停留在第一行了。

mat_list <- list()
for(i in names(mat[,3:7])) {
  mat_list[[i]] <- data.frame(
                      spread(
                        select(mat, AGE, year, mat[[paste0("mat",i)]]), year, mat[[paste0("mat", i)]])) 

应用上面的代码后,我获得了以下结果:

view(mat1)
        year 1998  year 1999  year 2000  year 2001
age 15   0.01        0.01       0.01       0.01
age 16   0.01        0.01       0.01       0.01
age 17   0.01        0.01       0.01       0.01
age 18   0.01        0.01       0.01       0.01


view(mat2)
        year 1998  year 1999    year 2000    year 2001
age 15   0.012        0.012       0.012       0.012
age 16   0.012        0.012       0.012       0.012
age 17   0.012        0.012       0.012       0.012
age 18   0.012        0.012       0.012       0.012


view(mat3)
        year 1998  year 1999    year 2000    year 2001
age 15   0.015        0.015       0.015       0.015
age 16   0.015        0.015       0.015       0.015
age 17   0.015        0.015       0.015       0.015
age 18   0.015        0.015       0.015       0.015

3 个答案:

答案 0 :(得分:1)

我相信您想先gather然后spread数据。这使您可以分两步执行所有操作。

library(dplyr)
library(tidyr)

mat %>%
  gather(key, value, -year, -Age)%>%
  spread(year, value)%>%
  group_split(key)

[[1]]
# A tibble: 4 x 6
    Age key   `1998` `1999` `2000` `2001`
  <int> <chr>        <dbl>        <dbl>        <dbl>        <dbl>
1    15 mat1          0.01         0.01         0.01         0.01
2    16 mat1          0.01         0.01         0.01         0.01
3    17 mat1          0.01         0.01         0.01         0.01
4    18 mat1          0.01         0.01         0.01         0.01

[[2]]
# A tibble: 4 x 6
    Age key   `1998` `1999` `2000` `2001`
  <int> <chr>        <dbl>        <dbl>        <dbl>        <dbl>
1    15 mat2         0.012        0.012        0.012        0.012
2    16 mat2         0.012        0.012        0.012        0.012
3    17 mat2         0.012        0.012        0.012        0.012
4    18 mat2         0.012        0.012        0.012        0.012

[[3]]
# A tibble: 4 x 6
    Age key   `1998` `1999` `2000` `2001`
  <int> <chr>        <dbl>        <dbl>        <dbl>        <dbl>
1    15 mat3         0.015        0.015        0.015        0.015
2    16 mat3         0.015        0.015        0.015        0.015
3    17 mat3         0.015        0.015        0.015        0.015
4    18 mat3         0.015        0.015        0.015        0.015

或者您也可以在基础上完成

mats <- reshape(data = data.frame(year = mat$year,Age = mat$Age,  stack(mat, select = c('mat1', 'mat2', 'mat3')))
        , idvar = c('Age', 'ind'), timevar = c('year'), direction = 'wide')

mat_list <- split(mats, mats$ind)

mat_list

$mat1
  Age  ind values.1998 values.1999 values.2000 values.2001
1  15 mat1        0.01        0.01        0.01        0.01
2  16 mat1        0.01        0.01        0.01        0.01
3  17 mat1        0.01        0.01        0.01        0.01
4  18 mat1        0.01        0.01        0.01        0.01

$mat2
   Age  ind values.1998 values.1999 values.2000 values.2001
17  15 mat2       0.012       0.012       0.012       0.012
18  16 mat2       0.012       0.012       0.012       0.012
19  17 mat2       0.012       0.012       0.012       0.012
20  18 mat2       0.012       0.012       0.012       0.012

$mat3
   Age  ind values.1998 values.1999 values.2000 values.2001
33  15 mat3       0.015       0.015       0.015       0.015
34  16 mat3       0.015       0.015       0.015       0.015
35  17 mat3       0.015       0.015       0.015       0.015
36  18 mat3       0.015       0.015       0.015       0.015

数据 我略微更改了您的数据,以使每个ID组合都是唯一的。

year <- rep(1998:2001, each = 4) #each was the change.
Age <- rep(15:18, 4)
mat1 <- rep(0.01, 16)
mat2 <- rep(0.012, 16)
mat3 <- rep(0.015, 16)
mat <- data.frame(year, Age, mat1, mat2, mat3)

答案 1 :(得分:1)

扩大科尔的答案。

mat %>%
    gather("mat", "val", -year, -Age) %>%
    mutate(Age=paste("age",Age), year=paste("year",year)) %>%
    group_by(mat) %>%
    group_map(~spread(., year, val))

purrr :: group_map将函数应用于每个组,并返回一个列表,其中每个列表元素是应用于每个组的函数的结果。

# A tibble: 4 x 5
  Age    `year 1998` `year 1999` `year 2000` `year 2001`
  <chr>        <dbl>       <dbl>       <dbl>       <dbl>
1 age 15        0.01        0.01        0.01        0.01
2 age 16        0.01        0.01        0.01        0.01
3 age 17        0.01        0.01        0.01        0.01
4 age 18        0.01        0.01        0.01        0.01

[[2]]
# A tibble: 4 x 5
  Age    `year 1998` `year 1999` `year 2000` `year 2001`
  <chr>        <dbl>       <dbl>       <dbl>       <dbl>
1 age 15       0.012       0.012       0.012       0.012
2 age 16       0.012       0.012       0.012       0.012
3 age 17       0.012       0.012       0.012       0.012
4 age 18       0.012       0.012       0.012       0.012

[[3]]
# A tibble: 4 x 5
  Age    `year 1998` `year 1999` `year 2000` `year 2001`
  <chr>        <dbl>       <dbl>       <dbl>       <dbl>
1 age 15       0.015       0.015       0.015       0.015
2 age 16       0.015       0.015       0.015       0.015
3 age 17       0.015       0.015       0.015       0.015
4 age 18       0.015       0.015       0.015       0.015

这是使用Cole稍作修改的数据。

year <- rep(1998:2001, each = 4) #each was the change.
Age <- rep(15:18, 4)
mat1 <- rep(0.01, 16)
mat2 <- rep(0.012, 16)
mat3 <- rep(0.015, 16)
mat <- data.frame(year, Age, mat1, mat2, mat3)

答案 2 :(得分:0)

首先重塑为长形

#add unique id to your data
mat$id=1:nrow(mat)
#reshape to long by mat
long1 = reshape_toLong(data = mat,id = "id",j = "all123",value.var.prefix = "mat")
#delet id column
long2=long1[,-1]

第二次重塑到宽幅

#reshape wide by year
wide=reshape_toWide(data = long2,id = "all123",j = "year",value.var.prefix = "mat")

最后获取数据

mat1

wide[wide$all123==1,]
   Age all123 mat1998 mat1999 mat2000 mat2001
1   15      1    0.01    0.01    0.01    0.01
4   16      1    0.01    0.01    0.01    0.01
8   17      1    0.01    0.01    0.01    0.01
12  18      1    0.01    0.01    0.01    0.01

mat2

wide[wide$all123==2,]
   Age all123 mat1998 mat1999 mat2000 mat2001
3   15      2   0.012   0.012   0.012   0.012
5   16      2   0.012   0.012   0.012   0.012
7   17      2   0.012   0.012   0.012   0.012
11  18      2   0.012   0.012   0.012   0.012

mat3

wide[wide$all123==3,]
   Age all123 mat1998 mat1999 mat2000 mat2001
2   15      3   0.015   0.015   0.015   0.015
6   16      3   0.015   0.015   0.015   0.015
9   17      3   0.015   0.015   0.015   0.015
10  18      3   0.015   0.015   0.015   0.015

在使用reshape_toLongreshape_toWide函数之前,您需要使用以下命令从我的github onetree安装yikeshu0611软件包

devtools::install_github("yikeshu0611/onetree")
library(onetree)

注意:您提供的数据有问题,因此我使用Cole更改的数据

year <- rep(1998:2001, each = 4) #each was the change.
Age <- rep(15:18, 4)
mat1 <- rep(0.01, 16)
mat2 <- rep(0.012, 16)
mat3 <- rep(0.015, 16)
mat <- data.frame(year, Age, mat1, mat2, mat3)