Question

我正在尝试开发一个基本的R闪亮应用程序，但面临着处理速度的问题。程序如下，我需要读取约500K行的csv文件 - ＆gt;将文件拆分为更小的段 - ＆gt;计算每个细分的新功能并显示结果。以下是我的UI.R和Server.R
UI.R

library(shiny)
library(shinyBS)
library(shinycssloaders)
library(DT)


shinyUI(fluidPage( 
mainPanel(
#UI for choosing the file to input
fileInput("file1", label = (" Choose Drivecycle Data "),multiple = F),

#UI for showing the number of Rows in original dataset 
fluidRow(
  column(8, h4(helpText("Number of rows input dataset"))),
  column(3,verbatimTextOutput("totrows", placeholder = TRUE))),

#UI for showing the number of segments the data set had been split into
fluidRow(
  column(8, h4(helpText("Number of segmentations"))),
  column(3,verbatimTextOutput("totseg", placeholder = TRUE))),

fluidRow(
  column(8, downloadButton("subtablednld", label = 'Downloadcsv'))
),

tabsetPanel(

  #UI to show the original data set in First tab
  tabPanel("Table",icon = icon("table"),withSpinner(DT::dataTableOutput('table'), 
                                                    type = getOption("spinner.type", default = 8) )),

  #UI to show the  features of the segments of the orginal dataset in Second Tab
  tabPanel("Feature Table",icon = icon("table"),withSpinner(DT::dataTableOutput('table1'), 
                                                            type = getOption("spinner.type", default = 8) )),


),style = 'width:1000px;height"3000px'
)
)
)

Server.R

library(shiny)
library(earth)
library(tidyr)

options(shiny.maxRequestSize=300*1024^2) #increase the max upload file size 
to 30 MB
options(shiny.trace=TRUE)

# Define server logic required to draw a histogram
shinyServer(function(input, output) {

#Function to input data set using UI 
dataframe <- reactive( {

###  Create a data frame reading data file to be used by other functions..
inFile <- input$file1

data1 <- read.csv(inFile$datapath, header = TRUE)

 })

 #Display the input dataset
 observeEvent(input$file1,output$table <- renderDataTable({dataframe()}))

 #Show the number of rows in the input dataset
 observeEvent(input$file1,output$totrows<- renderText({nrow(dataframe())}))

 #Split the data set
 Splitfile <- function(){
 split(dataframe(), (seq(nrow(dataframe()))-1) %/% 200)
 }

 #Show the number of segments the data has been split into
 observeEvent(input$file1,output$totseg <-renderText({length(Splitfile())}))

  #Acceleration calculation function
  Acceleration <- function(){
  c <- lapply(1:length(Splitfile()), function(i)
  {

   acceleration <- c(0,diff(Splitfile()[[i]]$Vehicle.Speed)/2)


    })
 Splitfile <- mapply(cbind, Splitfile(), "acceleration" = c, SIMPLIFY = F)
 Splitfile
 }

 #Calculating Features 

  CaclFeatures <- function(){  
  FileFeatures <- lapply(1:length(Acceleration()), function(i){

   Velocity_mean <-round(mean(Acceleration()[[i]]$Vehicle.Speed),digits = 3)

   Variance_Velocity      <-round(var(Acceleration()[[i]]$Vehicle.Speed)*
                                    ((length(Acceleration( 
                               [[i]]$Vehicle.Speed)-1)/length(Acceleration() 
                               [[i]]$Vehicle.Speed))
                                   ,digits = 3)

      c(Velocity_mean,
        Variance_Velocity)

    })
     FileFeatures<- as.data.frame(do.call(rbind, FileFeatures))
     names(FileFeatures)[names(FileFeatures) == 'V1'] <- "Velocity_Mean"
     names(FileFeatures)[names(FileFeatures) == 'V2'] <- "Variance_Velocity"
    }

    #Display the table containing all features of all the segments
      output$table1 <- renderDataTable({
         CaclFeatures()},options = list(scrollX = TRUE))


    #Print to csv
       output$subtablednld <- downloadHandler(

         filename = function(){

              paste("dataset-", ".csv", sep = "")
           },

       content = function(file){

      write.csv(CaclFeatures(), file ,row.names = FALSE)
        }
         )

      })

如果我读取大约2k行的csv文件但是如果我读取的数据集超过2k则不起作用，该应用程序正常工作，它既不会出错也不会崩溃。旋转器保持旋转但无法显示结果。此外，在常规R script中使用相同的逻辑可以正常处理大于500k的大数据集，而不是计算22个新功能。
目前，我使用的是8gb RAM i5 Processor系统。有没有办法提高计算速度，当我的任务管理器Rstudio仅在47% - 52%内存使用时检查时，我没有其他进程在R studio

以外运行

编辑：可以使用下面的代码创建示例数据，
drive <- as.data.frame(sample(1:50, 500000, replace = T))

Answer 1

您的整个计算似乎取决于输入data.frame中的某些结构属性，因此我无法在合理的时间内生成一个工作示例，只需对您的代码进行细微更改。

但是，您的代码评估是一种非常棒的性能。

以Acceleration为例。 WITHIN 您的lapply，您致电Splitfile()，这是常规功能。假设分割数量约为2500，您将此功能称为 2500次。操作split(dataframe(), (seq(nrow(dataframe()))-1) %/% 200)在我的计算机上大约需要2秒钟，因此您等待5000秒，而Splitfiles()的结果始终相同。然后，在CalcFeatures内，您在每个Acceleration()循环内再次调用lapply四次。这使得大约等待时间为5 000 * 2 500 * 4 = 50 000 000秒或578天。

您可能会对reactive的概念感到困惑，其中函数调用只返回当前值并且重新评估是隐式的。

所以你要么：

在功能开始时调用昂贵的函数。
- 使用Acceleration开始files <- Splitfiles()，然后使用files。
- 使用CalcFeatures开始acc <- Acceleration()，然后使用acc。
将您的功能变为被动功能。
- Splitfiles <- reactive({ ... dataframe() ... })
- Acceleration <- reactive({ ... Splitfiles() ... })
- CalcFeature <- reactive({ ... Acceleration() ... })

这两个概念的混合并不是更好。坚持任何一个。

提高Rshiny的计算速度

1 个答案: