我有一个.asc文件' test.asc'它由不同长度和内容的线组成。
my name is blalala
This is my home and I live in here
12 13 10 is he
he is my brother 12 13 14
如何将文件内容导入MATLAB单元格数组,其中每一行是由空格分隔符分隔的行?
resultCellarray={
'my' 'name' 'is' 'blalala' [] [] [] [] []
'This' 'is' 'my' 'home' 'and' 'I' 'live' 'in' 'here'
'12' '13' '10' 'is' 'he' [] [] [] []
'he' 'is' 'my' 'brother' '12' '13' '14' [] []
}
我尝试将每一行插入一个单元格:
content = textread('test.asc','%s','delimiter','\n','whitespace','');
然后将细胞分成几列,使用: separating cell array into several columns MATLAB,但文件很大时需要花费很多时间。最快的方法是什么?
答案 0 :(得分:1)
此代码运行速度非常快(在0.2秒内拆分1M个字符):
%generate random file
% w=[10,13,32*ones(1,10),97:122,97:122];
% FILE_LENGTH=10*1000*1000;mytext=char(w(randi(length(w),1,FILE_LENGTH)));
% fileID = fopen('z:\mytest.asc','w');fprintf(fileID,'%s',mytext);fclose(fileID);
clear
tic
%settings
Filename='z:\test.asc';
LineDelimiter=newline;%=char(10)
WordDelimiter=' ';
%read file
fid=fopen(Filename,'r');
text=fread(fid,'*char')';
fclose(fid);
%fix text
text(text==char(9))=WordDelimiter; %replace tab with space
text(text==char(13))=[];%remove '\r'
if text(end)~=LineDelimiter, text(end+1)=LineDelimiter;end %add eol if needed
IdxWords=find(text==WordDelimiter);
text(IdxWords(diff(IdxWords)==1))=[];% remove 2 spaces or more
%count words per line
IdxNewline=find(text==LineDelimiter);
NumOfLines=length(IdxNewline); %2eol=2lines
WordsPerLine=zeros(1,NumOfLines); %
IdxWords=find(text==WordDelimiter|text==LineDelimiter);
iword=1; iword_max=length(IdxWords);
for i=1:NumOfLines
while iword<=iword_max && IdxWords(iword)<=IdxNewline(i)
WordsPerLine(i)=WordsPerLine(i)+1;
iword=iword+1;
end
end
MaxWords=max(WordsPerLine);
LongestWord=max(diff(IdxWords));
%split
Output=cell(NumOfLines,MaxWords);
pos=1;iword=0;
for i=1:NumOfLines
idxline=IdxNewline(i);
for j=1:WordsPerLine(i)
iword=iword+1;
Output{i,j}=text(pos:IdxWords(iword)-1);
pos=IdxWords(iword)+1;
end
end
toc
% disp(Output)