运行perl脚本以解析OpenStreetMap XML文件时出现“Out of memory”错误

时间:2013-01-27 21:06:52

标签: xml perl xml-parsing openstreetmap

我使用的是Debian GNU / Linux Squeeze。

当我尝试运行此脚本时,我收到一个停止运行脚本错误消息“Out of memory!”。

脚本:

use XML::Parser;
my $Filename = shift();

# Temporary data
my (%MainAttr,$Type,%Tags, @WaySegments);
# Stats
my %AllTags;
# Stored data
my (@Nodes, @Segments, @Ways, %Stats);

# Processing stage
#----------------------------------------------
my $P = new XML::Parser(Handlers => {Start => \&DoStart, End => \&DoEnd, Char => \&DoChar});
$P->parsefile($Filename);
printf STDERR "Creating output files\n";


# Combine way data into segments
#----------------------------------------------
if(open(WAYS,">ways.txt")){
  foreach my $Way (@Ways){
    #printf WAYS "Way: %s,%s\n", $Way->{"segments"}, $Way->{"name"};
    my @SubSegments = split(/,/,$Way->{"segments"});
    $Stats{"empty ways"}++ if(scalar(@SubSegments) < 1); 

    printf WAYS "Copying keys: %s to segments %s\n",
      join(",",keys(%$Way)),
      join(",",@SubSegments);

    # Each segment in a way inherits the way's attributes
    foreach my $Segment(@SubSegments){
      foreach my $Key(keys(%$Way)){
        $Segments[$Segment]{$Key} = $Way->{$Key}
      }
    }
  }
  close WAYS;
}

# Main output (segments)
#----------------------------------------------
if(open(OSM, ">osm.txt")){
  foreach my $Segment(@Segments){
    my $From = $Segment->{"from"};
    my $To = $Segment->{"to"};
    $Stats{"segments without endpoints"}++ if($From == 0 or $To == 0);
    printf OSM "%f,%f,%f,%f,%s,%s,%s\n",
      $Nodes[$From]{"lat"},
      $Nodes[$From]{"lon"},
      $Nodes[$To]{"lat"},
      $Nodes[$To]{"lon"},
      $Segment->{"class"},
      $Segment->{"name"},
      $Segment->{"highway"};
  }
  close OSM;
}

# Secondary output (named points)
#----------------------------------------------
if(open(POINTS, ">points.txt")){
  foreach my $Node(@Nodes){
    $Stats{"Nodes with zero lat/long"}++ if($Node->{"lat"} == 0 and $Node->{"lon"} == 0);

    if($Node->{"name"} || $Node->{"amenity"} || $Node->{"class"}){
      printf POINTS "%f,%f,%s,%s,%s\n",
        $Node->{"lat"},
        $Node->{"lon"},
        $Node->{"name"},
        $Node->{"amenity"},
        $Node->{"class"};
      }
  }
  close POINTS;
}

# Statistics output
#----------------------------------------------
if(open(STATS, ">stats.txt")){
  foreach(sort {$AllTags{$b} <=> $AllTags{$a}} keys(%AllTags)){
    printf STATS "* %d %s\n", $AllTags{$_}, $_;
  }
  printf STATS "\n\nStats:\n";
  foreach(keys(%Stats)){
    printf STATS "* %d %s\n", $Stats{$_}, $_;
  }
}
printf STDERR "Done\n";
exit;

# Function is called whenever an XML tag is started
#----------------------------------------------
sub DoStart()
{
  my ($Expat, $Name, %Attr) = @_;

  if($Name eq "node"){
    undef %Tags;
    %MainAttr = %Attr;
    $Type = "n";
  }
  if($Name eq "segment"){
    undef %Tags;
    %MainAttr = %Attr;
    $Type = "s";
  }
  if($Name eq "way"){
    undef %Tags;
    undef @WaySegments;
    %MainAttr = %Attr;
    $Type = "w";
  }
  if($Name eq "tag"){
    # TODO: protect against id,from,to,lat,long,etc. being used as tags
    $Tags{$Attr{"k"}} = $Attr{"v"};
    $AllTags{$Attr{"k"}}++;
    $Stats{"tags"}++;
  }
  if($Name eq "seg"){
    push(@WaySegments, $Attr{"id"});
  }
}

# Function is called whenever an XML tag is ended
#----------------------------------------------
sub DoEnd(){
  my ($Expat, $Element) = @_;
  if($Element eq "node"){
    my $ID = $MainAttr{"id"};
    $Nodes[$ID]{"lat"} = $MainAttr{"lat"};
    $Nodes[$ID]{"lon"} = $MainAttr{"lon"};
    foreach(keys(%Tags)){
      $Nodes[$ID]{$_} = $Tags{$_};
    }
    $Stats{"named nodes"}++ if($Nodes[$ID]{"name"});
    $Stats{"tagged nodes"}++ if($MainAttr{"tags"});
    $Stats{"nodes"}++;
    #print "Node:".join(",",keys(%Tags))."\n" if(scalar(keys(%Tags))>0);
  }
  if($Element eq "segment"){
    my $ID = $MainAttr{"id"};
    $Segments[$ID]{"from"} = $MainAttr{"from"};
    $Segments[$ID]{"to"} = $MainAttr{"to"};
    foreach(keys(%Tags)){
      $Segments[$ID]{$_} = $Tags{$_};
    }
    $Stats{"tagged segments"}++ if($MainAttr{"tags"});
    $Stats{"segments"}++;
  }
  if($Element eq "way"){
    my $ID = $MainAttr{"id"};
    $Ways[$ID]{"segments"} = join(",",@WaySegments);
    foreach(keys(%Tags)){
      $Ways[$ID]{$_} = $Tags{$_};
    }    
    $Stats{"Ways"}++;
  }
}

# Function is called whenever text is encountered in the XML file
#----------------------------------------------
sub DoChar(){
  my ($Expat, $String) = @_;
}

终端:

root@Delta:~/Perl/Map# perl convert.pl map.osm
Out of memory!

root@Delta:~/Perl/Map# ulimit -a
core file size          (blocks, -c) 0
data seg size           (kbytes, -d) unlimited
scheduling priority             (-e) 0
file size               (blocks, -f) unlimited
pending signals                 (-i) 16382
max locked memory       (kbytes, -l) 64
max memory size         (kbytes, -m) unlimited
open files                      (-n) 1024
pipe size            (512 bytes, -p) 8
POSIX message queues     (bytes, -q) 819200
real-time priority              (-r) 0
stack size              (kbytes, -s) 8192
cpu time               (seconds, -t) unlimited
max user processes              (-u) unlimited
virtual memory          (kbytes, -v) unlimited
file locks                      (-x) unlimited

我需要使用该脚本将OpenStreetMap XML文档转换为文本文件。

1 个答案:

答案 0 :(得分:2)

我发现您使用数字元素ID作为@nodes@ways的索引。我的测试中@nodes的有效索引从1010888852开始,因此perl尝试创建超过十亿个标量值设置为undef以填充元素0 .. {{ 1}}。

将这些变量更改为哈希(以及1010888851,在我的测试中为空但以相同的方式使用)似乎解决了这个问题。

请检查这个程序,它纠正了我发现的错误,并用Perl编写(你似乎是一个C程序员)。

@segments