我有一个由空格分隔的3个数字的大单元格数组(下面是150万的前5行)。
LocationCell =
'3926.611 -1534.095 26.324'
'4122.978 882.279 -67.495'
'4078.042 1072.946 60.384'
'4047.521 -1182.700 10.520'
'4188.222 -468.615 -57.303'
数组太大,需要几个小时才能循环并提取数字。
for n = 1: (1.5million)
LocationNumbers(n,:) = textscan( LocationCell{n}, '%f %f %f ');
end
有没有办法可以在没有循环的情况下做到这一点?
渴望出局:
LocationNumbers =
3926.611, -1534.095, 26.324;
4122.978, 882.279, -67.495;
4078.042, 1072.946, 60.384;
4047.521, -1182.700, 10.520;
4188.222, -468.615, -57.303
答案 0 :(得分:1)
可能需要几个小时,因为你不是preallocating,迫使MATLAB不断搜索新的连续内存块而不是从一开始就分配正确的大小块,从而导致显着的性能损失。预分配和sscanf
输出正确的数据类(textscan
输出一个单元格数组),大大缩短了时间:
LocationCell = {'3926.611 -1534.095 26.324';
'4122.978 882.279 -67.495';
'4078.042 1072.946 60.384';
'4047.521 -1182.700 10.520';
'4188.222 -468.615 -57.303'};
ncells = numel(LocationCell);
LocationNumbers = zeros(ncells, 3);
for n = 1:(ncells)
LocationNumbers(n,:) = sscanf(LocationCell{n}, '%f %f %f');
end
在R2018a中提供以下内容(下面的完整时序代码):
Timing Results
n cells: 125000
================
Original: 6.638
regex: 3.840
strsplit: 11.957
sscanf: 0.958
LocationCell = repmat({'3926.611 -1534.095 26.324'; ...
'4122.978 882.279 -67.495'; ...
'4078.042 1072.946 60.384'; ...
'4047.521 -1182.700 10.520'; ...
'4188.222 -468.615 -57.303'}, ...
25000, 1);
t1 = timeit(@()thing1(LocationCell));
t2 = timeit(@()thing2(LocationCell));
t3 = timeit(@()thing3(LocationCell));
t4 = timeit(@()thing4(LocationCell));
fprintf(['Timing Results\n', ...
'n cells: %u\n', ...
'================\n', ...
'Original: %0.3f\n', ...
'regex: %0.3f\n', ...
'strsplit: %0.3f\n', ...
'sscanf: %0.3f\n'], numel(LocationCell), t1, t2, t3, t4)
function out = thing1(in)
for n = 1: (numel(in))
out(n,:) = textscan(in{n}, '%f %f %f ');
end
end
function out = thing2(in)
S = regexp(in,'(\-?\d+\.\d+)[ ]+(\-?\d+\.\d+)[ ]+(\-?\d+\.\d+)','tokens','once');
S = vertcat(S{:});
out = str2double(S);
end
function out = thing3(in)
S = cellfun(@(x)strsplit(x,' '), in, 'UniformOutput', false);
S = vertcat(S{:});
out = str2double(S);
end
function out = thing4(in)
ncells = numel(in);
out = zeros(ncells, 3);
for n = 1:(numel(in))
out(n,:) = sscanf(in{n}, '%f %f %f ');
end
end
答案 1 :(得分:0)
这应该可以解决问题:
C = {
'3926.611 -1534.095 26.324';
'4122.978 882.279 -67.495';
'4078.042 1072.946 60.384';
'4047.521 -1182.700 10.520';
'4188.222 -468.615 -57.303'
};
% Split the cell elements using a regular expression...
S = regexp(C,'(\-?\d+\.\d+)[ ]+(\-?\d+\.\d+)[ ]+(\-?\d+\.\d+)','tokens','once');
% Flatten the result...
S = vertcat(S{:});
% Convert the cell matrix to double...
M = str2double(S)
或者,您可以继续进行传统拆分,而不是使用正则表达式:
C = {
'3926.611 -1534.095 26.324';
'4122.978 882.279 -67.495';
'4078.042 1072.946 60.384';
'4047.521 -1182.700 10.520';
'4188.222 -468.615 -57.303'
};
% Split the cell elements...
S = cellfun(@(x)strsplit(x,' '),C,'UniformOutput',false);
% Flatten the result...
S = vertcat(S{:});
% Convert the cell matrix to double...
M = str2double(S)