我使用readMat命令从MATLAB .mat文件导入大量数据。数据具有以下特征布局:struct$ERP: 200x256x16
,struct$TGT: 200x1
以及我要丢弃的其他一些无关的内容。 TGT
变量是分类目标,第三维是每个目标的试验索引。
每个单独的主题文件夹中有七个或更多这些文件 - 我想构建一个有效的结构,允许我快速处理单个试验(沿第三维的切片),同时跟踪目标变量,使用正确的R风格。
我可以用一个主题的文件以相对笨拙的方式执行这些步骤:
require(R.matlab)
subdirs <- list.dirs(".")
filelist <- list.files(path = '.', full.names = FALSE, pattern = "^.*\\.mat$",
ignore.case = TRUE, recursive = TRUE,
include.dirs = FALSE)
sub1t1src <- as.data.frame(filedatas[1][1][[1]]$eeg)
erp1 <- sub1t1src[1,1]$ERP
erp1 <- aperm(erp1,c(2,3,1)) # data is permuted differently than I would like
erp1r <- apply(erp1,2,rbind)
erp1rdf <- as.data.frame(erp1r)
tgt1 <- sub1t1df[2,1]$TGT
tgt1 <- as.factor(tgt1)
tgt1r <- rep(tgt1,each=256)
sub1t1df <- cbind(erp1rdf,tgt1r)
这给我一个数据框,每个试验展开成行,每行重复TGT变量。这是一种聪明的方法,或者是否有更好的方法来组织类似于原始MATLAB结构的数据?
最重要的是,如果 这是一种不太可行的方法,我怎样才能在filelist
中迭代每个文件并添加类似短主题名称的内容subdirs
以apply
函数的“正确”方式将数据作为新列?
示例数据(.mats是二进制的,所以我只是从一个主题中输出一个修剪后的输出):
subdirs <- c(".", "./s1", "./s2")
filelist <- c("s1/file1.dat.mat","s1/file2.dat.mat","s2/file1.dat.mat","s2/file2.dat.mat")
# for some reason subdirs are still in the output of filelist
sub1t1src <- structure(list(`1.1` = structure(list(ERP = structure(c(-10.5069999694824,
-13.585000038147, -6.21299982070923, -11.6659994125366, -16.5679988861084,
-17.1949996948242, -26.390998840332, -13.6799993515015, -0.759999990463257,
-7.58099985122681, 23.5789985656738, -2.07099986076355, -7.3149995803833,
-2.33699989318848, -18.1070003509521, -21.9639987945557),
Csingle = TRUE, .Dim = c(2L,
2L, 4L)), TGT = structure(c(1L, 0L, 0L, 1L), .Dim = c(4L, 1L)), FS = structure(256,
.Dim = c(1L, 1L))), .Names = c("ERP", "TGT", "FS"))),
.Names = "1.1", row.names = c("ERP", "TGT", "FS"
), class = "data.frame")
答案 0 :(得分:1)
考虑到Roland的建议,我想这是一个用for循环做错的方法。我把对象分开了,因为对象太大了。
subdirs <- list.dirs(".")
for (this_subd in 2:length(subdirs)) {
erpdata <- array(dim = c(200,16,256)) # ERP array
targets <- array(dim = c(200,1)) # Target array
# look for all mat-files in that directory
filelist <- list.files(path = subdirs[this_subd], full.names = FALSE,
pattern = "^.*.mat$",
ignore.case = TRUE, recursive = TRUE,
include.dirs = FALSE)
# combine current subdir path
filelist <- paste(subdirs[this_subd],filelist,sep="/")
# Anonymous function to work over each file and resave as R data
filedatas <- lapply(filelist, function(x) {
curdata <- readMat(con = x)
return(curdata)
})
for (this_file in 2:length(filedatas)) {
this_erp <- filedatas[][[this_file]]$eeg
this_tgt <- this_erp[,,1]$TGT
this_erp <- array(this_erp[,,1]$ERP,dim = c(200,16,256))
erpdata <- abind(erpdata,this_erp,along=1)
targets <- rbind(targets,this_tgt)
}
# Permute the data into samples X channels X trials
erpdata <- aperm(erpdata,c(3,2,1))
# Remove NAs from originally initializing array
erpdata <- array(erpdata[!is.na(erpdata)],dim = dim(erpdata))
targets <- array(targets[!is.na(targets)],dim = dim(targets))
targets <- as.factor(targets) # convert to categorical variable
save(erpdata,targets,
file = paste(subdirs[this_subd],"/",
substring(subdirs[this_subd],first=3),
"unifieddata.Rdata",
sep = "")
)
# cleanup to save memory
rm(erpdata,targets,this_erp,this_tgt)
}