获取数据框中的子列表

时间:2019-02-08 11:45:02

标签: r list dataframe apply

我想从数据框中获取子列表。我一直在找东西。

这不是完整列表,例如map()中带有got_chars的示例,而是数据框中的列表。

最后一件事:不记得我的问题是关于数据帧中的子列表,而不是关于boxplot.stats()。在没有boxplot.stats()的情况下,从数据框中获取列表可能会很有用。

这是我的代表:

library(tidyverse)
library(stats)
library(DBI)

con <- dbConnect(RSQLite::SQLite(), ":memory:")

dbWriteTable(con, "mtcars", mtcars)
data_in<-dbGetQuery(con, "SELECT 'cyl'||cyl as cyl,disp FROM mtcars")

data_in
#>       cyl  disp
#> 1  cyl6.0 160.0
#> 2  cyl6.0 160.0
#>  [...snip..snip ...]
#> 28 cyl4.0  95.1
#> 29 cyl8.0 351.0
#> 30 cyl6.0 145.0
#> 31 cyl8.0 301.0
#> 32 cyl4.0 121.0




boxplot.stats.fct <-function (data, ind, code ) {

   r<- boxplot.stats( unlist(data[ind]), coef = 1.5, do.conf = TRUE)

     # it works but I would like to do that at the return variable
    r$quartile1<-r$stats[2]
    r$quartile3<-r$stats[3]

     # it doesn't works
    r$stats_list=r$stats

    r[code]<-as.character(data[code])


  return(r)
}


data_transposed <-aggregate (disp ~ cyl, data =data_in, FUN=t)
data_transposed
#>      cyl
#> 1 cyl4.0
#> 2 cyl6.0
#> 3 cyl8.0
#>                                                                                               disp
#> 1                           108.0, 146.7, 140.8, 78.7, 75.7, 71.1, 120.1, 79.0, 120.3, 95.1, 121.0
#> 2                                                  160.0, 160.0, 258.0, 225.0, 167.6, 167.6, 145.0
#> 3 360.0, 360.0, 275.8, 275.8, 275.8, 472.0, 460.0, 440.0, 318.0, 304.0, 350.0, 400.0, 351.0, 301.0




 data.stats.out <- as.data.frame(do.call(rbind, 
                                         apply(data_transposed,
                                               MARGIN=1, 
                                               FUN=boxplot.stats.fct,"disp","cyl"
                                               )
                                         )
                                 )

 data.stats.out
#>                                  stats  n                conf out
#> 1 71.10, 78.85, 108.00, 120.65, 146.70 11 88.08698, 127.91302    
#> 2    145.0, 160.0, 167.6, 196.3, 225.0  7  145.9222, 189.2778 258
#> 3    275.8, 301.0, 350.5, 400.0, 472.0 14    308.695, 392.305    
#>   quartile1 quartile3                           stats_list    cyl
#> 1     78.85       108 71.10, 78.85, 108.00, 120.65, 146.70 cyl4.0
#> 2       160     167.6    145.0, 160.0, 167.6, 196.3, 225.0 cyl6.0
#> 3       301     350.5    275.8, 301.0, 350.5, 400.0, 472.0 cyl8.0

它可以工作,但是我已经在函数中用r$stats[2]剪切了数据。

 (data.stats.out %>% mutate (toto1 = .$quartile1,
                            toto2 = .$quartile3
                            )%>% select (cyl,toto1,toto2))
#>      cyl toto1 toto2
#> 1 cyl4.0 78.85   108
#> 2 cyl6.0   160 167.6
#> 3 cyl8.0   301 350.5

无效:列表已合并,第1行等于第2行

 (data.stats.out %>% mutate (toto1 = .$stats_list[2],
                             toto2 = .$stats_list[3]
 )%>% select (cyl,toto1,toto2))
#>      cyl                             toto1
#> 1 cyl4.0 145.0, 160.0, 167.6, 196.3, 225.0
#> 2 cyl6.0 145.0, 160.0, 167.6, 196.3, 225.0
#> 3 cyl8.0 145.0, 160.0, 167.6, 196.3, 225.0
#>                               toto2
#> 1 275.8, 301.0, 350.5, 400.0, 472.0
#> 2 275.8, 301.0, 350.5, 400.0, 472.0
#> 3 275.8, 301.0, 350.5, 400.0, 472.0

 dbDisconnect(con)

reprex package(v0.2.1)于2019-02-08创建

我想在功能之外选择列表的第n个。而且不在函数中。

1 个答案:

答案 0 :(得分:0)

我找到了解决问题的方法,在unlist中调用了apply():)

library(tidyverse)
library(stats)
library(DBI)

con <- dbConnect(RSQLite::SQLite(), ":memory:")

dbWriteTable(con, "mtcars", mtcars)
data_in<-dbGetQuery(con, "SELECT 'cyl'||cyl as cyl,disp FROM mtcars")
#extract
data_in[1:5,]
#>      cyl disp
#> 1 cyl6.0  160
#> 2 cyl6.0  160
#> 3 cyl4.0  108
#> 4 cyl6.0  258
#> 5 cyl8.0  360


boxplot.stats.fct <-function (data, ind, code ) {

  r<- boxplot.stats( unlist(data[ind]), coef = 1.5, do.conf = TRUE)

  # I don't want to preset here
  # r$quartile1<-r$stats[2]
  # r$quartile3<-r$stats[4]

  r[code]<-as.character(data[code])

  return(r)
}


data_transposed <-aggregate (disp ~ cyl, data =data_in, FUN=t)

data.stats.out <- as.data.frame(
  do.call(rbind, 
          apply(data_transposed,
                MARGIN=1, 
                FUN=boxplot.stats.fct,"disp","cyl"
          )
  )
)



D<-data.stats.out
D
#>                                  stats  n                conf out    cyl
#> 1 71.10, 78.85, 108.00, 120.65, 146.70 11 88.08698, 127.91302     cyl4.0
#> 2    145.0, 160.0, 167.6, 196.3, 225.0  7  145.9222, 189.2778 258 cyl6.0
#> 3    275.8, 301.0, 350.5, 400.0, 472.0 14    308.695, 392.305     cyl8.0


unlista <-function (data,  code ) {
  apply(data[code],MARGIN=1,FUN=unlist)
}  


# yes ! what I want to select : only at the end to select the nth element of the nth list here, 
# for my  exemple 2rd et 4th of stats :)

the_df_list_dynamic_answer<-data.frame(
  cyl=unlista(D,"cyl"), # or D["cyl"], 
  quartile1=unlista(D,"stats")[2,],
  quartile3=unlista(D,"stats")[4,]
)
the_df_list_dynamic_answer
#>      cyl quartile1 quartile3
#> 1 cyl4.0     78.85    120.65
#> 2 cyl6.0    160.00    196.30
#> 3 cyl8.0    301.00    400.00

dbDisconnect(con)

reprex package(v0.2.1)于2019-02-12创建