Perl Parser使用Hash

时间:2013-12-14 12:32:07

标签: perl perl-data-structures

我是perl的新手,对数据结构不是很好。我一直致力于文本解析器从文本文件中提取信息并将其存储在数据库中。常规模式现在还可以,但是,我只是注意到我用于哈希“时间”的密钥并不是唯一的,因为有多个更新(在文本文件中)可能同时发生。哈希所做的是创建重复项,这对我来说是不可接受的。所以我想添加另一个键可能是一个独特的计数器,但我不知道如何得到它。所以我试图添加另一个键“{$ recordcnt}”作为计数器,你会看到它附加在所有哈希语句上。我删除了计数器增量语句(也许我没有正确实现)

另外,如果你查看我的代码的print语句块(最后一节),我正在尝试打印一个包含两列节点和索引的数组(@ nodes_and_index)值,打印它以不同的方式显示它们。但是它不会打印出所需的结果。测试假设我想分别将数据输入数据库。

那么,我是否将“{$ recordcnt}”放在错误的地方,如果是的话。如何随着时间的推移使每个条目在哈希中唯一?谢谢你的阅读。

这是我的数据示例:

TIME: 11/01/13 14:30:24
FROM: 10.255.9.4 AS172193
TO: 10.255.9.10 AS676767
ASPATH: 172193 19601 14835 1286 577 4097 2841 14735 9486 573 10633 4488
NEXT_HOP: 10.255.9.126
ANNOUNCE
  10.44.193.0/24

TIME: 11/01/13 14:30:24
FROM: 10.255.9.4 AS172193
TO: 10.255.9.10 AS676767
ASPATH: 172193 19601 14835 4758 2379 10721 10787 7830 17777 4875 4488
NEXT_HOP: 10.255.9.126
ANNOUNCE
  10.44.193.0/24

TIME: 11/01/13 14:30:25
FROM: 10.255.9.4 AS172193
TO: 10.255.9.10 AS676767
ASPATH: 172193 19601 14835 4758 2379 10721 10787 7830 17777 16480 9486 573 10633 4488
NEXT_HOP: 10.255.9.126
ANNOUNCE
  10.44.193.0/24

TIME: 11/01/13 14:30:25
FROM: 10.255.9.4 AS172193
TO: 10.255.9.10 AS676767
ASPATH: 172193 19601 19602 3252 3665 2315 2379 10721 7311 12934 4875 4488
NEXT_HOP: 10.255.9.125
ANNOUNCE
  10.44.193.0/24

TIME: 11/01/13 14:30:34
FROM: 10.255.9.4 AS172193
TO: 10.255.9.10 AS676767
ASPATH: 172193 19601 19602 3252 3665 2315 2379 3725
NEXT_HOP: 10.255.9.125
ANNOUNCE
  10.44.193.0/24

这是我的完整代码:

#!/usr/bin/perl -w
use strict;
use warnings;

my %hash;
my $Dir = "/root/updates/processed/";
my $exit = 0;
my $recordcnt = 0 ;
opendir(DIRECTORY, $Dir) or die $!;

while (my $file = readdir(DIRECTORY)) { 

    unless ($file=~/\.hr$/){next;}


    my $file = $Dir."/".$file;
    print "$file\n";

    open (IN, $file) or die "error reading file: ", $file,"\n";

    my $record_id = "";
    #my $recordcnt = 0 ;
    my $type = "";
    my $peer_ip = "";
    my $peer_as = "";
    my $local_ip = "";
    my $local_as = "";
    my $next_hop = "";
    my @nodes_and_index = ();
    my @withdraw_prefix = ();
    my @announce_prefix = ();



    while (<IN>) {          

        #$exit++; last if ($exit==5);

        if (/^TIME/) {

            if ($type) {$hash{$record_id}{$recordcnt}{'type'} = $type;}
            if ($peer_ip) {$hash{$record_id}{$recordcnt}{'peer_ip'} = $peer_ip;}
            if ($peer_as) {$hash{$record_id}{$recordcnt}{'peer_as'} = $peer_as;}
            if ($local_ip) {$hash{$record_id}{$recordcnt}{'local_ip'} = $local_ip;}
            if ($local_as) {$hash{$record_id}{$recordcnt}{'local_as'} = $local_as;}
            if ($next_hop) {$hash{$record_id}{$recordcnt}{'next_hop'} = $next_hop;}

            if (@nodes_and_index) {push @{$hash{$record_id}{$recordcnt}{'nodes_and_index'}}, @nodes_and_index;}  
            if (@withdraw_prefix) {push @{$hash{$record_id}{$recordcnt}{'withdraw_prefix'}}, @withdraw_prefix;}
            if (@announce_prefix) {push @{$hash{$record_id}{$recordcnt}{'announce_prefix'}}, @announce_prefix;}

            $peer_as = "";
            $peer_ip = "";
            $type = "";
            $local_ip = "";
            $local_as = "";
            $next_hop = "";
            $record_id = "";
            $recordcnt = 0;
            @nodes_and_index = ();
            @withdraw_prefix = ();
            @announce_prefix = ();


            my @time = split '\s', $_;
            $record_id = $time[1]."_".$time[2]; 

        } elsif (/^FROM/) {
            my @from_tmp = split '\s', $_;
            $peer_ip = $from_tmp[1];
            $peer_as = $from_tmp[2];
            $peer_as =~ s/AS//;

        } elsif (/^TO/) {
            my @to_tmp = split '\s', $_;
            $local_ip = $to_tmp[1];
            $local_as = $to_tmp[2];
            $local_as =~ s/AS//;
            #print "$local_ip\n"; 

        } elsif (/^ASPATH/) {

            my @nodes_tmp = split '\s', $_;
                shift @nodes_tmp;       
            my $index = 0;

            foreach my $node (@nodes_tmp) {
                  $index++;
                  push @nodes_and_index, ($node , $index);
             }  

        }elsif (/^NEXT_HOP/) {

            my @next_hop_tmp = split '\s', $_;
            $next_hop = $next_hop_tmp[1];  

        }elsif (/^WITHDRAW/) {
            while (<IN>) {       
                     last if !/^ +/;     
                     @withdraw_prefix, $_ ;             
                 }


        }elsif (/^ANNOUNCE/) {
                while (<IN>) {        
                        last if !/^ +/;    
                push @announce_prefix, $_;

                 }  
            }


        if ($record_id) { # handle last result
            $hash{$record_id}{$recordcnt}{'peer_as'}    = $peer_as;
            $hash{$record_id}{$recordcnt}{'peer_ip'}    = $peer_ip;
            $hash{$record_id}{$recordcnt}{'local_as'}   = $local_as;
            $hash{$record_id}{$recordcnt}{'local_ip'}   = $local_ip;
            $hash{$record_id}{$recordcnt}{'next_hop'}   = $next_hop;

            push @{$hash{$record_id}{$recordcnt}{'nodes_and_index'}} ,@nodes_and_index;  
            push @{$hash{$record_id}{$recordcnt}{'withdraw_prefix'}} ,@withdraw_prefix;
            push @{$hash{$record_id}{$recordcnt}{'announce_prefix'}} ,@announce_prefix;

        }
    }
    close IN;
}  
my @arraystuff;
my @separated;
my @iindex=();
my @ppath=();
foreach (sort keys %hash) {

    print $_, "\n";
    print "\t $hash{$_}{$recordcnt}{'peer_ip'}\n";
    print "\t $hash{$_}{$recordcnt}{'peer_as'}\n";
    print "\t $hash{$_}{$recordcnt}{'local_ip'}\n";
    print "\t $hash{$_}{$recordcnt}{'local_as'}\n"; 
    print "\t $hash{$_}{$recordcnt}{'next_hop'}\n";

    @arraystuff = @{$hash{$_}{$recordcnt}{'nodes_and_index'}};
    foreach (@arraystuff) {
         @separated = split('\s', $_);
         push @iindex, $separated[1];
         push @ppath, $separated[0];
         print "\t index: @iindex";
         print "\t path: @ppath";
        }


    print "\t node index : @{$hash{$_}{$recordcnt}{'nodes_and_index'}}\n";    
    print "\t withdraw_prefix: @{$hash{$_}{$recordcnt}{'withdraw_prefix'}}\n"; 
    print "\t announce: @{$hash{$_}{$recordcnt}{'announce_prefix'}}\n"; 
}

=============================================== ===========================================

Foibs建议的新版本

#!/usr/bin/perl -w

use strict;
use warnings;


my @datasetarray;
my $Dir = "/root/updates\/processed/";
my $exit = 0;  

opendir(DIRECTORY, $Dir) or die $!;

while (my $file = readdir(DIRECTORY)) { 

    unless ($file=~/\.hr$/){next;}
    #unless ($file=~/\.txt$/){next;}

    my $file = $Dir."/".$file;
    print "$file\n";

    open (IN, $file) or die "error reading file: ", $file,"\n";

    my $record_id = "";
    my $type = "";
    my $peer_ip = "";
    my $peer_as = "";
    my $local_ip = "";
    my $local_as = "";
    my $next_hop = "";
    my @nodes_and_index = ();
    my @withdraw_prefix = ();
    my @announce_prefix = ();


    my $tmphash = {};

    while (<IN>) {          

        #$exit++; last if ($exit==5);

        if (/^TIME/) {


            if ($type) {$tmphash->{'type'} = $type;}
            if ($peer_ip) {$tmphash->{'peer_ip'} = $peer_ip;}
            if ($peer_as) {$tmphash->{'peer_as'} = $peer_as;}
            if ($local_ip) {$tmphash->{'local_ip'} = $local_ip;}
            if ($local_as) {$tmphash->{'local_as'} = $local_as;}
            if ($next_hop) {$tmphash->{'next_hop'} = $next_hop;}
               #if (@nodes_and_index) {push {$tmphash->{'nodes_and_index'}}, @nodes_and_index;}  
            #if (@withdraw_prefix) {push {$tmphash->{'withdraw_prefix'}}, @withdraw_prefix;}
            #if (@announce_prefix) {push {$tmphash->{'announce_prefix'}}, @announce_prefix;}

#The three commented lines above provide error, thus i don't know if i am implementing it the right way, since they are array and different from the others.

            $peer_as = "";
            $peer_ip = "";
            $type = "";
            $local_ip = "";
            $local_as = "";
            $next_hop = "";
            $record_id = "";
            @nodes_and_index = ();
            @withdraw_prefix = ();
            @announce_prefix = ();


            my @time = split '\s', $_;
            $record_id = $time[1]."_".$time[2];


        } elsif (/^TYPE/) {
            my @type_tmp = split '\s', $_;
            $type = $type_tmp[1];

        } elsif (/^FROM/) {
            my @from_tmp = split '\s', $_;
            $peer_ip = $from_tmp[1];
            $peer_as = $from_tmp[2];
            $peer_as =~ s/AS//;

        } elsif (/^TO/) {
            my @to_tmp = split '\s', $_;
            $local_ip = $to_tmp[1];
            $local_as = $to_tmp[2];
            $local_as =~ s/AS//;

        } elsif (/^ASPATH/) {

            my @nodes_tmp = split '\s', $_;
                shift @nodes_tmp;       
            my $index = 0;

            foreach my $node (@nodes_tmp) {
                    $index++;
            push @nodes_and_index, ($node , $index); 
             }  

        }elsif (/^NEXT_HOP/) {

            my @next_hop_tmp = split '\s', $_;
            $next_hop = $next_hop_tmp[1];  

        }elsif (/^WITHDRAW/) {
            while (<IN>) {       
                     last if !/^ +/;    
                     push @withdraw_prefix, $_ ;           

                 }


        }elsif (/^ANNOUNCE/) {

                 while (<IN>) {        
                     last if !/^ +/;    
                     push @announce_prefix, $_;

                 }  

            }


        if ($record_id) { # handle last result
            push @datasetarray, $tmphash;
            $tmphash = {};
        }
    }
    close IN; 
}  

foreach my $row (@datasetarray) {


    print $_, "\n";                       #Time doesn't get printed
    print "\t $row->{'peer_ip'}\n";       #OK
    print "\t $row->{'peer_as'}\n";       #OK
    print "\t $row->{'local_ip'}\n";      #OK
    print "\t $row->{'local_as'}\n";      #OK
    print "\t $row->{'next_hop'}\n";      #OK
#   print "\t $row->{'nodes_and_index'}\n"; # Are these guys ok ? since they are arrays
#   print "\t $row->{'withdraw_prefix'}\n"; # Are these guys ok ? since they are arrays
#   print "\t $row->{'announce_prefix'}\n"; # Are these guys ok ? since they are arrays


}

=============================================== =============================

1 个答案:

答案 0 :(得分:1)

最简单的似乎是将$recordcnt放在密钥本身中,就像这个$record_id = $recordcnt.'_'.$time[1]."_".$time[2];一样,并确保它永远不会在循环内被归零(你有一行做$ recordcnt = 0` ,这是错的)。此外,我没有找到任何你实际增加记录的地方。


然而,在我看来,通过使用散列数组而不是简单的散列,你会好得多。数组的排序方式与您输入的文件相同,但您可以使用sort对其进行任意排序,并且您不会对奇怪的计数器等进行混乱。使用数组重写它并不困难。

首先,创建一个数组,将您的所有数据保存在脚本的开头(让我们称之为@myarray)。

在循环开始之前,创建一个散列引用(引用散列,更容易处理),它将包含一个对象。

my $tmphash = {};
while (<IN>) {    
......

现在用$hash{$record_id}{$recordcnt}

替换您的$tmphash->

(例如

if ($peer_ip) {$hash{$record_id}{$recordcnt}{'peer_ip'} = $peer_ip;}

现在将

if ($peer_ip) {$tmphash->{'peer_ip'} = $peer_ip;}等等)

如果您知道已将整个对象收集到tmphash中,只需按下数组中的tmphash,重新初始化tmphash并继续使用下一个对象。

push @myarray, $tmphash;
$tmphash = {};

全部完成!现在您需要做的就是遍历数组以打印数据

foreach my $row (@myarray) {
  print "\t $row->{'peer_ip'}\n";
  #... and so on

修改

我冒昧地修复你的剧本。那里有小错误和一个主要的逻辑错误。我没有删除你的任何代码,但我注释掉了一些行并添加了一些我自己的代码。我更改或添加的所有行都在行尾标有#~#~,因此您可以轻松跟踪它们并查看差异。

#!/usr/bin/perl -w

use strict;
use warnings;


my @datasetarray;
my $Dir = "/root/updates/processed/";
my $exit = 0;  

opendir(DIRECTORY, $Dir) or die $!;

while (my $file = readdir(DIRECTORY)) { 

    unless ($file=~/\.hr$/){next;}
    #unless ($file=~/\.txt$/){next;}

    my $file = $Dir."/".$file;
    print "$file\n";

    open (IN, $file) or die "error reading file: ", $file,"\n";

    my $record_id = "";
    my $type = "";
    my $peer_ip = "";
    my $peer_as = "";
    my $local_ip = "";
    my $local_as = "";
    my $next_hop = "";
    my @nodes_and_index = ();
    my @withdraw_prefix = ();
    my @announce_prefix = ();


    my $tmphash = {};

    while (<IN>) {          

        #$exit++; last if ($exit==5);

        if (/^TIME/) {


            if ($type) {$tmphash->{'type'} = $type;}
            if ($peer_ip) {$tmphash->{'peer_ip'} = $peer_ip;}
            if ($peer_as) {$tmphash->{'peer_as'} = $peer_as;}
            if ($local_ip) {$tmphash->{'local_ip'} = $local_ip;}
            if ($local_as) {$tmphash->{'local_as'} = $local_as;}
            if ($next_hop) {$tmphash->{'next_hop'} = $next_hop;}
            if (@nodes_and_index) {push @{$tmphash->{'nodes_and_index'}}, @nodes_and_index;}  #~#~
            if (@withdraw_prefix) {push @{$tmphash->{'withdraw_prefix'}}, @withdraw_prefix;}  #~#~
            if (@announce_prefix) {push @{$tmphash->{'announce_prefix'}}, @announce_prefix;}  #~#~

            if ($record_id) {  #~#~
                $tmphash->{'time'} = $record_id; #~#~
                push @datasetarray, $tmphash;#~#~
                $tmphash = {};#~#~
            } #~#~
#The three commented lines above provide error, thus i don't know if i am implementing it the right way, since they are array and different from the others.

            $peer_as = "";
            $peer_ip = "";
            $type = "";
            $local_ip = "";
            $local_as = "";
            $next_hop = "";
            $record_id = "";
            @nodes_and_index = ();
            @withdraw_prefix = ();
            @announce_prefix = ();


            my @time = split '\s', $_;
            $record_id = $time[1]."_".$time[2];


        } elsif (/^TYPE/) {
            my @type_tmp = split '\s', $_;
            $type = $type_tmp[1];

        } elsif (/^FROM/) {
            my @from_tmp = split '\s', $_;
            $peer_ip = $from_tmp[1];
            $peer_as = $from_tmp[2];
            $peer_as =~ s/AS//;

        } elsif (/^TO/) {
            my @to_tmp = split '\s', $_;
            $local_ip = $to_tmp[1];
            $local_as = $to_tmp[2];
            $local_as =~ s/AS//;

        } elsif (/^ASPATH/) {

            my @nodes_tmp = split '\s', $_;
                shift @nodes_tmp;       
            my $index = 0;

            foreach my $node (@nodes_tmp) {
                    $index++;
            push @nodes_and_index, ($node , $index); 
             }  

        }elsif (/^NEXT_HOP/) {

            my @next_hop_tmp = split '\s', $_;
            $next_hop = $next_hop_tmp[1];  

        }elsif (/^WITHDRAW/) {
            while (<IN>) {       
                     last if !/^ +/;    
                     push @withdraw_prefix, $_ ;           

                 }


        }elsif (/^ANNOUNCE/) {

                 while (<IN>) {        
                     last if !/^ +/;    
                     push @announce_prefix, $_;

                 }  

            }


        #if ($record_id) { # handle last result #~#~
       #     push @datasetarray, $tmphash;#~#~
       #     $tmphash = {};#~#~
       # }#~#~
    }
    close IN; 

    #insert the last element of the file
    if ($type) {$tmphash->{'type'} = $type;} #~#~
    if ($peer_ip) {$tmphash->{'peer_ip'} = $peer_ip;} #~#~
    if ($peer_as) {$tmphash->{'peer_as'} = $peer_as;} #~#~
    if ($local_ip) {$tmphash->{'local_ip'} = $local_ip;} #~#~
    if ($local_as) {$tmphash->{'local_as'} = $local_as;} #~#~
    if ($next_hop) {$tmphash->{'next_hop'} = $next_hop;}  #~#~
    if (@nodes_and_index) {push @{$tmphash->{'nodes_and_index'}}, @nodes_and_index;}  #~#~
    if (@withdraw_prefix) {push @{$tmphash->{'withdraw_prefix'}}, @withdraw_prefix;}  #~#~
    if (@announce_prefix) {push @{$tmphash->{'announce_prefix'}}, @announce_prefix;}  #~#~

    if ($record_id) {  #~#~
        $tmphash->{'time'} = $record_id; #~#~
        push @datasetarray, $tmphash;#~#~
        $tmphash = {};#~#~
    } #~#~
}  

foreach my $row (@datasetarray) {


    #print $_, "\n";                       #Time doesn't get printed #~#~
    print "\t $row->{'time'}\n";       #~#~
    print "\t $row->{'peer_ip'}\n";       #OK
    print "\t $row->{'peer_as'}\n";       #OK
    print "\t $row->{'local_ip'}\n";      #OK
    print "\t $row->{'local_as'}\n";      #OK
    print "\t $row->{'next_hop'}\n";      #OK
#you can print array refs like this, just make a check that they are declared
   print "\t @{$row->{'nodes_and_index'}}\n" if ref $row->{'nodes_and_index'} eq 'ARRAY';#~#~
   print "\t @{$row->{'withdraw_prefix'}}\n" if ref $row->{'withdraw_prefix'} eq 'ARRAY';#~#~
   print "\t @{$row->{'withdraw_prefix'}}\n" if ref $row->{'withdraw_prefix'} eq 'ARRAY';#~#~


}