我正在使用Parse :: RecDescent来解析Cisco IOS ACL中的行。 ACL用于大型网络的边缘路由器,因此它包含由政府设置的近8k线路。我循环遍历每一行并将值放入哈希值。虽然它是8k线,我仍然花费超过14秒来解析线路?这听起来合理吗?对我来说似乎很慢。使用散列和其他数据结构有一些开销吗?
样本输入:(约8k或类似)
deny ip 2.3.4.5 0.0.0.7 any log-input
deny ip 5.6.7.8 0.0.0.255 any log-input
deny ip host 9.10.11.12 any log-input
deny ip 13.14.15.16 0.0.31.255 any log-input
permit tcp host 17.18.19.20 host 21.22.23.24 eq bgp
permit icmp 25.26.0.0 0.0.255.255 27.28.0.0 0.0.255.255
这是我的整个解析器:
package AccessList::Parser;
use strict;
use warnings;
use Carp;
use Scalar::Util 'blessed';
use Parse::RecDescent;
our $VERSION = '0.05';
sub new {
my ($class) = @_;
my $self = { PARSER => undef, };
bless $self, $class;
$self->_init();
return $self;
}
sub _init {
my ($self) = @_;
$self->{PARSER} = Parse::RecDescent->new( $self->_grammar() );
}
sub parse {
my ( $self, $string ) = @_;
defined ($string) or confess "blank line received";
my $tree = $self->{PARSER}->startrule($string);
defined($tree) or confess "unrecognized line\n";
return visit($tree);
}
#
# Finished tests
#
sub visit {
my ($node) = @_;
my $Rule_To_Key_Map = {
"acl_action" => 1,
"acl_protocol" => 1,
"acl_src_ip" => 1,
"acl_src_port" => 1,
"acl_dst_ip" => 1,
"acl_dst_port" => 1,
"acl_remark" => 1
};
my $parent_key;
my $result;
# set s of explored vertices
my %seen;
#stack is all neighbors of s
my @stack;
push @stack, [ $node, $parent_key ];
my $key;
while (@stack) {
my $rec = pop @stack;
$node = $rec->[0];
$parent_key = $rec->[1]; #undef for root
next if ( $seen{$node}++ );
my $rule_id = ref($node);
if ( exists( $Rule_To_Key_Map->{$rule_id} ) ) {
$parent_key = $rule_id;
}
foreach my $key ( keys %$node ) {
next if ( $key eq "EOL" );
my $next = $node->{$key};
if ( blessed($next) ) {
if ( exists( $next->{__VALUE__} ) ) {
#print ref($node), " ", ref($next), " ", $next->{__VALUE__},"\n";
my $rule = ref($node);
my $token = $next->{__VALUE__};
$result->{$parent_key} = $token;
#print $rule, " ", $result->{$rule}, "\n";
}
push @stack, [ $next, $parent_key ];
#push @stack, $next;
}
}
}
return $result;
}
sub _grammar {
my ($self) = @_;
my $grammar = q{
<autotree>
startrule :
access_list EOL
| acl_remark EOL
| <error>
#
# access-lists
#
access_list : acl_action
acl_remark :
"remark" REMARKS
acl_action :
ACTIONS acl_protocol
#
# protocol options
#
acl_protocol :
PROTOCOL acl_src_ip
#
# access-list source IP addresses
#
acl_src_ip :
address acl_dst_ip
| address acl_src_port
#
# access-list source ports
#
acl_src_port :
port acl_dst_ip
#
# access-list destination IP address
#
acl_dst_ip :
address acl_dst_port
| address acl_options
| address CONNECTION_TYPE
| address LAYER3_OPTIONS
| IPRANGE
#
# access-list destination ports
#
acl_dst_port :
port acl_options
| acl_icmp_type acl_options
#
# icmp_types
#
acl_icmp_type :
ICMP_TYPE
#
# access-list options
#
acl_options :
acl_logging LAYER3_OPTIONS
| acl_logging
| EOL
| <error>
acl_logging :
"log-input"
| "log"
#
# IP address types
#
# "object" should be fine here because "object" can not
# be used to specify ports
address :
"host" IPADDRESS
| "host" NAME
| IPNETWORK
| WILDCARD_NETWORK
| ANY
#
# port types
#
port :
port_eq
| port_range
| port_gt
| port_lt
| port_neq
port_eq :
"eq" PORT_ID
port_range :
"range" PORT_RANGE
port_gt :
"gt" PORT_GT
port_lt :
"lt" PORT_LT
port_neq :
"neq" <error: neq is unsupported>
#
# Token Definitions
#
STRING :
/\S+/
DIGIT :
/\d+/
NAME :
/((^|\s[a-zA-Z])(\.|[0-9a-zA-Z_-]+)+)/
RULE_REF :
/\S+/
ANY:
"any"
IPADDRESS :
/((\d{1,3})((\.)(\d{1,3})){3})/
MASK :
/(((255\.){3}(255|254|252|248|240|224|192|128|0+))|((255\.){2}(255|254|252|248|240|224|192|128|0+)\.0)|((255\.)(255|254|252|248|240|224|192|128|0+)(\.0+){2})|((255|254|252|248|240|224|192|128|0+)(\.0+){3}))/
INVERSE_MASK :
/(0+|1|3|7|15|31|63|127|255)((\.)(255|127|63|31|15|7|3|1|0)){3}/
WILDCARD_NETWORK :
/((\d{1,3})((\.)(\d{1,3})){3}) (0+|1|3|7|15|31|63|127|255)((\.)(255|127|63|31|15|7|3|1|0)){3}/
IPNETWORK :
/((\d{1,3})((\.)(\d{1,3})){3}) (((255\.){3}(255|254|252|248|240|224|192|128|0+))|((255\.){2}(255|254|252|248|240|224|192|128|0+)\.0)|((255\.)(255|254|252|248|240|224|192|128|0+)(\.0+){2})|((255|254|252|248|240|224|192|128|0+)(\.0+){3}))/
IPRANGE :
/((\d{1,3})((\.)(\d{1,3})){3}) ((\d{1,3})((\.)(\d{1,3})){3})/
PROTOCOL :
/\d+/ | "ahp" | "eigrp" | "esp" | "gre" | "icmp" | "icmp6" | "igmp"
| "igrp" | "ip" | "ipinip" | "ipsec" | "nos" | "ospf" | "pcp"
| "pim" | "pptp" | "snp" | "tcp" | "udp" | "41"
GROUP_PROTOCOL :
"tcp-udp" | "tcp" | "udp"
ICMP_TYPE :
/\d+/ | "alternate-address" | "conversion-error" | "echo-reply" | "echo"
| "information-reply" | "information-request" | "mask-reply" | "mask-request"
| "mobile-redirect" | "parameter-problem" | "redirect" | "router-advertisement"
| "router-solicitation" | "source-quench" | "time-exceeded" | "timestamp-reply"
| "timestamp-request" | "traceroute" | "unreachable"
CONNECTION_TYPE:
"established"
LAYER3_OPTIONS:
"fragments" | "packet-too-big"
PORT_ID :
/\S+/
PORT_GT :
/\S+/
{
bless {__VALUE__=>"$item[1] 65535"}, $item[0]
}
PORT_LT :
/\S+/
{
bless {__VALUE__=>"1 $item[1]"}, $item[0]
}
PORT_RANGE :
/\S+ \S+/
ACTIONS :
"permit"
| "deny"
REMARKS :
/.*$/
LOG_LEVEL :
/\d+/ | "emergencies" | "alerts" | "critical" | "errors"
| "warnings" | "notifications" | "informational" | "debugging"
| "disable"
EOL :
/$/
};
return $grammar;
}
1;
答案 0 :(得分:2)
性能问题:
address
中的acl_dst_ip
,IPRANGE
中的acl_dst_ip
access_list
)功能问题:
remarkfoo
视为remark
。其他地方也有类似的错误。0.0.127.4 0.0.127.255
视为“从0.0.127.4到0.0.127.255”和“从0.0.0.0到0.0.127.255”。 (第一个发现胜利,所以它被视为“从0.0.0.0到0.0.127.255”。)甚至不应该在解析器中进行区分。我开始修改你的代码。 (完全没有测试)
# make_parser.pl
use strict;
use warnings;
use Parse::RecDescent qw( );
my $grammar = <<'__EOI__';
{
use strict;
use warnings;
use Socket qw( inet_aton );
my %protocol_names = map { $_ => 1 } qw(
ahp eigrp esp gre icmp icmp6 igmp
igrp ip ipinip ipsec nos ospf pcp
pim pptp snp tcp udp
);
my %protocol_group_names = map { $_ => 1 } qw(
tcp-udp tcp udp
);
my %icmp_type_names = map { $_ => 1 } qw(
alternate-address conversion-error echo-reply echo
information-reply information-request mask-reply mask-request
mobile-redirect parameter-problem redirect router-advertisement
router-solicitation source-quench time-exceeded timestamp-reply
timestamp-request traceroute unreachable
);
sub parse_ipv4_addr {
my ($addr) = @_;
return inet_aton($addr);
}
}
parse : <skip: qr/[ \t]*/> line(s) /\Z/ { $item[2] }
line : line_body /\n|\Z/ { $item[1] }
line_body : PERMIT <commit> permit_deny_args { [ $item[1], $item[3] ] }
| DENY <commit> permit_deny_args { [ $item[1], $item[3] ] }
| REMARK <commit> /[^\n]*/ { 0 }
| /[ \t]+/ { 0 }
permit_deny_args : protocol permit_deny_src permit_deny_dst { [ @item[1,2,3] ] }
permit_deny_src : addrs ports { [ @item[1, 2] ] }
permit_deny_dst : ...
addrs : HOST <commit> ( IPv4_ADDR | DOMAIN ) { [ host => $item[3] ] }
| IPv4_ADDR <commit> IPv4_ADDR { [ range => $item[1], $item[3] ] }
| ANY <commit> { [ any => ] }
ports : EQ <commit> IDENT { [ permit => $item[2], $item[2] ] }
| NEQ <commit> IDENT { [ deny => $item[2], $item[2] ] }
| GT <commit> IDENT { [ deny => 1, $item[2] ] }
| LT <commit> IDENT { [ deny => $item[2], 65535 ] }
| RANGE <commit> IDENT IDENT { [ permit => $item[2], $item[3] ] }
| { [ permit => 1, 65535 ] }
# Rules that match simply return what they match (i.e. no type info is returned).
PROTOCOL_NAME : IDENT { $protocol_names{$item[1]} ? $item[1] : undef }
DOMAIN : ...
IPv4_ADDR : /[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/ { parse_ipv4_addr($item[1]) }
# Keywords
REMARK : IDENT { $item[1] eq 'remark' ? $item[1] : undef }
PERMIT : IDENT { $item[1] eq 'permit' ? $item[1] : undef }
DENY : IDENT { $item[1] eq 'deny' ? $item[1] : undef }
ANY : IDENT { $item[1] eq 'any' ? $item[1] : undef }
EQ : IDENT { $item[1] eq 'eq' ? $item[1] : undef }
NEQ : IDENT { $item[1] eq 'neq' ? $item[1] : undef }
LT : IDENT { $item[1] eq 'lt' ? $item[1] : undef }
GT : IDENT { $item[1] eq 'gt' ? $item[1] : undef }
IDENT : /[a-zA-Z][a-zA-Z0-9_]*/
__EOI__
Parse::RecDescent->Precompile($grammar, 'Parser')
or die("Bad grammar\n");
运行上面的文件,然后你可以按如下方式使用解析:
# test.pl
use strict;
use warnings;
use Data::Dumper qw( Dumper );
use Parser qw( );
my $text = '...';
my $parser = Parser->new();
print(Dumper($parser->parse($text)));
答案 1 :(得分:0)
Parse :: RecDescent很慢。更重要的是,递归下降解析器往往很慢。获得速度大幅提升的最佳方法是切换到不同的解析器,例如像Parse::Yapp这样的LALR解析器。
如果您想尝试一种不那么激烈的方法,请参阅optimizing your grammars的指南。
答案 2 :(得分:-1)
如果您的应用程序运行缓慢,请听起来需要对其进行分析。如今,Devel::NYTProf是首选的探测器。