perl高效数组连接到哈希

时间:2011-10-30 15:52:16

标签: arrays performance perl hash concatenation

use strict;
use Time::HiRes qw[gettimeofday tv_interval];

my $start_index = int(rand(50))+100;#this value is arbitrary for this discussion
my $duration = 75;#also arbitrary but assume it will always be several times the size of the dataset

my $hash = {};
my @dataset = qw(foo bar baz qux bob joe sue tom);
my $partial = $duration % scalar(@dataset);
my $full = ($duration - $partial) / scalar(@dataset);

my $start = [gettimeofday()];
for my $index (0..$#dataset) {
    my $w = $dataset[$index];
    for (0..$full-1) {
        my $i = $start_index + $index + (scalar(@dataset) * $_);
        $hash->{$i} = $w;
    }
}
print "  full ".tv_interval($start)." secs\n";$start = [gettimeofday()];
for my $index (0..$partial-1) {
    my $w = $dataset[$index];
    my $s = $start_index + $index + (scalar(@dataset) * $full);
    $hash->{$s} = $w;
}
print "  part ".tv_interval($start)." secs\n";$start = [gettimeofday()];

当使用()更大的数据集和持续时间实现时,“完整”循环中的上述逻辑需要60~120秒才能执行。是否有更有效的方法来实现相同的结果?

编辑:
为了更好地了解这个数据集的大小,这个性能优化是针对信号处理程序的。

2 个答案:

答案 0 :(得分:0)

似乎你可以做的唯一优化就是在循环之外计算标量(@dataset):

my $data_set_size = scalar(@dataset);

并在循环内部:

my $i = floor($start_index + $index + ($data_set_size * $_));

答案 1 :(得分:0)

这是解决方案:

use strict;
use warnings;
use Time::HiRes qw[gettimeofday tv_interval];

sub min ($$) {$_[$_[0] > $_[1]]}

my $start_index = int(rand(50))+100;#this value is arbitrary for this discussion
my $duration = 75;#also arbitrary but assume it will always be several times the size of the dataset

{
    my @dataset = qw(foo bar baz qux bob joe sue tom);
    my $hash = {};
    my $partial = $duration % scalar(@dataset);
    my $full = ($duration - $partial) / scalar(@dataset);

    my $start = [gettimeofday()];
    for my $index (0..$#dataset) {
        my $w = $dataset[$index];
        for (0..$full-1) {
            my $i = $start_index + $index + (scalar(@dataset) * $_);
            $hash->{$i} = $w;
        }
    }
    print "  full: ".tv_interval($start)." secs\n";$start = [gettimeofday()];
    for my $index (0..$partial-1) {
        my $w = $dataset[$index];
        my $s = $start_index + $index + (scalar(@dataset) * $full);
        $hash->{$s} = $w;
    }
    print "  part: ".tv_interval($start)." secs\n";$start = [gettimeofday()];

    #print "$_ => $hash->{$_}\n" foreach (sort {$a <=> $b} keys %$hash);
}

#############
print "\n\n";
#############

{
    my $dataset = [qw(foo bar baz qux bob joe sue tom)];
    my $hash;
    $hash //= {};

    my $remaining = $duration;
    my $phase = 0;#arbitrary
    my $start = [gettimeofday()];
    while (1) {
        last unless $remaining;
        my $chunk_size = min($remaining, scalar(@$dataset) - $phase);
        #print "$chunk_size   ".($start_index+$duration-$remaining)."..".($start_index+$duration-$remaining+$chunk_size-1)."   $phase..".($phase+$chunk_size-1)."\n";
        @{$hash}{($start_index + $duration - $remaining .. $start_index + $duration - $remaining + $chunk_size - 1)} = @{$dataset}[$phase .. $phase + $chunk_size - 1];
        $remaining -= $chunk_size;
        $phase = ($phase + $chunk_size) % scalar(@$dataset);
    }
    print "  time: ".tv_interval($start)." secs\n";$start = [gettimeofday()];

    #print "$_ => $hash->{$_}\n" foreach (sort {$a <=> $b} keys %$hash);
}