use strict;
use Time::HiRes qw[gettimeofday tv_interval];
my $start_index = int(rand(50))+100;#this value is arbitrary for this discussion
my $duration = 75;#also arbitrary but assume it will always be several times the size of the dataset
my $hash = {};
my @dataset = qw(foo bar baz qux bob joe sue tom);
my $partial = $duration % scalar(@dataset);
my $full = ($duration - $partial) / scalar(@dataset);
my $start = [gettimeofday()];
for my $index (0..$#dataset) {
my $w = $dataset[$index];
for (0..$full-1) {
my $i = $start_index + $index + (scalar(@dataset) * $_);
$hash->{$i} = $w;
}
}
print " full ".tv_interval($start)." secs\n";$start = [gettimeofday()];
for my $index (0..$partial-1) {
my $w = $dataset[$index];
my $s = $start_index + $index + (scalar(@dataset) * $full);
$hash->{$s} = $w;
}
print " part ".tv_interval($start)." secs\n";$start = [gettimeofday()];
当使用(多)更大的数据集和持续时间实现时,“完整”循环中的上述逻辑需要60~120秒才能执行。是否有更有效的方法来实现相同的结果?
编辑:
为了更好地了解这个数据集的大小,这个性能优化是针对信号处理程序的。
答案 0 :(得分:0)
似乎你可以做的唯一优化就是在循环之外计算标量(@dataset):
my $data_set_size = scalar(@dataset);
并在循环内部:
my $i = floor($start_index + $index + ($data_set_size * $_));
答案 1 :(得分:0)
这是解决方案:
use strict;
use warnings;
use Time::HiRes qw[gettimeofday tv_interval];
sub min ($$) {$_[$_[0] > $_[1]]}
my $start_index = int(rand(50))+100;#this value is arbitrary for this discussion
my $duration = 75;#also arbitrary but assume it will always be several times the size of the dataset
{
my @dataset = qw(foo bar baz qux bob joe sue tom);
my $hash = {};
my $partial = $duration % scalar(@dataset);
my $full = ($duration - $partial) / scalar(@dataset);
my $start = [gettimeofday()];
for my $index (0..$#dataset) {
my $w = $dataset[$index];
for (0..$full-1) {
my $i = $start_index + $index + (scalar(@dataset) * $_);
$hash->{$i} = $w;
}
}
print " full: ".tv_interval($start)." secs\n";$start = [gettimeofday()];
for my $index (0..$partial-1) {
my $w = $dataset[$index];
my $s = $start_index + $index + (scalar(@dataset) * $full);
$hash->{$s} = $w;
}
print " part: ".tv_interval($start)." secs\n";$start = [gettimeofday()];
#print "$_ => $hash->{$_}\n" foreach (sort {$a <=> $b} keys %$hash);
}
#############
print "\n\n";
#############
{
my $dataset = [qw(foo bar baz qux bob joe sue tom)];
my $hash;
$hash //= {};
my $remaining = $duration;
my $phase = 0;#arbitrary
my $start = [gettimeofday()];
while (1) {
last unless $remaining;
my $chunk_size = min($remaining, scalar(@$dataset) - $phase);
#print "$chunk_size ".($start_index+$duration-$remaining)."..".($start_index+$duration-$remaining+$chunk_size-1)." $phase..".($phase+$chunk_size-1)."\n";
@{$hash}{($start_index + $duration - $remaining .. $start_index + $duration - $remaining + $chunk_size - 1)} = @{$dataset}[$phase .. $phase + $chunk_size - 1];
$remaining -= $chunk_size;
$phase = ($phase + $chunk_size) % scalar(@$dataset);
}
print " time: ".tv_interval($start)." secs\n";$start = [gettimeofday()];
#print "$_ => $hash->{$_}\n" foreach (sort {$a <=> $b} keys %$hash);
}