我正在尝试分析6个非常大的CSV文件(至少对我们的计算资源来说很重要)。我开发了一个能够分析所有文件的脚本。但我一直收到错误告诉我“无法读取文件Z:〜/ AC2561图像的设置/ EMT1.csv:没有足够的存储空间来处理此命令。”我使用rm()来删除不再需要的变量,并通过自己更改索引使其“手动”运行。但是我无法运行for循环。
我将来需要在大量文件上运行此脚本,因此获取for循环(或其他替代工作)非常有用
#loading necessary packages
library(dplyr)
library(readr)
#making list of file names (6 csv files in this case)
temp = list.files(pattern="*.csv")
#starting empty list to save summary per file
summary_data <- list()
for (i in seq_along(temp)){
df <- read_csv(temp[i])%>%
#selecting only the columns I will work with (original file has 68 columns)
select(`Dye 2 Positive`, `Dye 3 Positive`, `Dye 4 Positive`, `Dye 4 Cytoplasm Intensity` )
#Each of these objects is a distinct subset of cells
#I'm trying to measure n() of `Dye 4 Positive` and mean() of `Dye 4 Cytoplasm Intensity`
FITCPos_CD45Neg <- df%>%
filter(`Dye 2 Positive` == 1, `Dye 3 Positive` == 0)%>%
summarise(Total_GD2_count = n(), MFI = mean(`Dye 4 Cytoplasm Intensity`))
FITCPos_CD45Pos <- df%>%
filter(`Dye 2 Positive` == 1, `Dye 3 Positive` == 1)%>%
summarise(Total_GD2_count = n(), MFI = mean(`Dye 4 Cytoplasm Intensity`))
FITCNeg_CD45Neg <- df%>%
filter(`Dye 2 Positive` == 1, `Dye 3 Positive` == 0)%>%
summarise(Total_GD2_count = n(), MFI = mean(`Dye 4 Cytoplasm Intensity`))
FITCNeg_CD45Pos <- df%>%
filter(`Dye 2 Positive` == 1, `Dye 3 Positive` == 0)%>%
summarise(Total_GD2_count = n(), MFI = mean(`Dye 4 Cytoplasm Intensity`))
#generating name for each file summary
item_nam <- temp[i]
#Appending the summary to initial list
summary_data[item_nam] <- list(FITCPos_CD45Neg,FITCPos_CD45Pos,FITCNeg_CD45Neg,FITCNeg_CD45Pos)
#removing "objects" that are no longer needed
rm( "FITCNeg_CD45Neg", "FITCNeg_CD45Pos", "FITCPos_CD45Neg","FITCPos_CD45Pos", "df")
}