第二次迭代时Cran R循环错误 - 获取所有NAs

时间:2018-04-27 16:45:10

标签: r for-loop fread read.csv

我有一个遍历目录中每个文件的循环。它在一个文件上工作正常,但只要目录中有2个或更多文件,第二个(或更多)输出就是所有NA。

我尝试从read.csv切换到fread,我尝试将.csv转换为.txt,我尝试了不同的选择特定列的方法(例如,保持,选择),但我总是得到NAs第二次通过循环。它不是第二个文件,因为如果删除第一个文件,第二个文件将被完美处理。

不确定它是否在.csv的末尾,或者它是否将rownames添加到第二个文件或什么。谢谢!

filenames <- list.files()
n_filenames <- length(filenames)
SSRT_cb1_pre <- data.frame(matrix(ncol = 4, nrow = n_filenames)) 
cols <- c(13, 23, 24, 25, 28, 29, 31, 32)
for (i in 1:n_filenames) {
  print(filenames)
  dt_pre <- fread(filenames[i], header=T, sep=",", select=cols, 
        stringsAsFactors=F, na.strings=c("NA", "", "."))
  dt_pre$RT <- as.numeric(dt_pre$rt)
  data_real_pre <- subset(dt_pre, SSTBlocks.thisRepN>=0)
  data_corr_pre <- subset(data_real_pre, corr == 1)
  data_corr_pre_RTmean <- aggregate(RT ~ P, data = data_corr_pre, 
       FUN=mean, na.rm=TRUE) 
  data_corr_pre_SSDmean <- aggregate(SSD ~ P, data = data_corr_pre, 
   FUN = mean, na.rm = TRUE) 
  pre_sub <- data_corr_pre_RTmean[i,1]
  preMeanRT <- data_corr_pre_RTmean[i,2]
  preMeanSSD  <- data_corr_pre_SSDmean[i,2]   
  SSRT_cb1_pre[i, 1] <- i
  SSRT_cb1_pre[i, 2] <- pre_sub
  SSRT_cb1_pre[i, 3] <- preMeanRT
  SSRT_cb1_pre[i, 4] <- preMeanSSD
}
SSRT_cb1_pre

以下给出了这个输出:

输出:

SSRT_cb1_pre

i sub1     preRT    preSSD
1 1  301 0.4877872 0.2580645
2 2   NA        NA        NA
比ABO更新的代码
filenames <- list.files()
n_filenames <- length(filenames)
n_rows <- n_filenames/2
SSRT_cb1_pre <- data.frame(matrix(ncol = 4, nrow = n_filenames)) # for output
colnames(SSRT_cb1_pre) <- c("i","sub1", "preRT", "preSSD")
cols <- c(13, 23, 24, 25, 28, 29, 31, 32)
colsnames <- c("SSTBlocks.thisRepN", "SSD", "corr", "rt", "sess", "CB", "P", "expName")

for (i in 1:n_filenames) {

  print(filenames)
  dt_pre <- fread(filenames[i], header=T, sep=",", select=colsnames, stringsAsFactors=F, na.strings=c("NA", "", "."))
  dt_pre$RT <- as.numeric(dt_pre$rt)
  data_real_pre <- subset(dt_pre, SSTBlocks.thisRepN>=0)
  data_corr_pre <- subset(data_real_pre, corr == 1)
  data_corr_pre_RTmean <- data_corr_pre[, mean(RT, na.rm=T), by = P] #suggested by Yannis Vassiliadis Stackoverflow as alt to aggregate 
  data_corr_pre_SSDmean <- data_corr_pre[, mean(SSD, na.rm=T), by = P] 
  # values to collect from each file
  pre_sub <- data_corr_pre_RTmean[i, 1]
  preMeanRT <- data_corr_pre_RTmean[i, 2]
  preMeanSSD  <- data_corr_pre_SSDmean[i, 2]   
 # output for values - should iterate through 
  SSRT_cb1_pre[i, 1] <- i
  SSRT_cb1_pre[i, 2] <- pre_sub
  SSRT_cb1_pre[i, 3] <- preMeanRT
  SSRT_cb1_pre[i, 4] <- preMeanSSD

}

SSRT_cb1_pre
class(data_corr_pre_RTmean)
class(data_corr_pre_SSDmean)

这给出了输出:

[1] "301_1_PsychoPy_SST_Pretest_2.csv" "303_1_PsychoPy_SST_Pretest.csv"  
[1] "301_1_PsychoPy_SST_Pretest_2.csv" "303_1_PsychoPy_SST_Pretest.csv"  
Warning messages:
1: In as.numeric(dt_pre$rt) : NAs introduced by coercion
2: In as.numeric(dt_pre$rt) : NAs introduced by coercion
> 
> SSRT_cb1_pre
  i sub1     preRT    preSSD
1 1  301 0.4877872 0.2580645
2 2   NA        NA        NA
> class(data_corr_pre_RTmean)
[1] "data.table" "data.frame"
> class(data_corr_pre_SSDmean)
[1] "data.table" "data.frame"

1 个答案:

答案 0 :(得分:0)

如果没有可重复的例子,我无法测试它,但我认为你应该改为代码

pre_sub <- data_corr_pre_RTmean[i,1]
preMeanRT <- data_corr_pre_RTmean[i,2]
preMeanSSD  <- data_corr_pre_SSDmean[i,2] 

pre_sub <- data_corr_pre_RTmean[,1]
preMeanRT <- data_corr_pre_RTmean[,2]
preMeanSSD <- data_corr_pre_SSDmean[,2]