检测空的xml元素标记

时间:2014-04-03 08:03:37

标签: perl

我必须解析xml文档,解码特定元素的base64值并打印出这些字段和相应的解码值。一些元素没有值我想打印元素的名称和“无值”字符串或只是\ n,但不知怎的,我无法匹配空字符串''或未定义的值。 示例输入文件:

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet href="ums_v1.xsl" type="text/xsl"?>
<ums>
  <datatype>Report</datatype>
  <reference><![CDATA[NDkxNzYwNDAyNjMwODAy]]></reference>
  <sequence></sequence>
  <calling-party><![CDATA[NDkxNzYwNDAyNjMwOA==]]></calling-party>
  <IP></IP>
  <called-party><![CDATA[NDk4OTI0NDI0Mzc0]]></called-party>
  <start>26/02/14 09:28:55</start>
  <settings></settings>
  <direction><![CDATA[Z2VzZW5kZXQ=]]></direction>
  <result><![CDATA[ZXJmb2xncmVpY2g=]]></result>
  <fax-tif></fax-tif>
  <fax-jpg></fax-jpg>
  <fax-png></fax-png>
  <audio-wav></audio-wav>
  <audio-mp3></audio-mp3>
  <sms></sms>
  <mms></mms>
</ums>

我的节目:

#!/usr/bin/perl -w

 use XML::Parser;
 use MIME::Base64;
 use feature qw/switch/;

 my $message;      # Hashref containing infos on a message
 while ($file = shift(@ARGV)) { 
 print "========================================================\n";
 print "file: $file \n";
 print "========================================================\n";
 my $parser = new XML::Parser ( Handlers => {   # Creates parser object
                              Start   => \&hdl_start,
                              End     => \&hdl_end,
                              Char    => \&hdl_char,
                            });
  $parser->parsefile($file);  
print "========================================================\n";
}
  sub hdl_start{
      my ($p, $elt, %atts) = @_;
      $atts{'_str'} = '';
      given ($elt) {
      when ((/^reference/) || (/^sequence/) || (/^calling-party/) || (/^called-party/) || (/^settings/) || (/^direction/) || (/^sms/) || (/^result/)) {
          $message = \%atts;
      }
      when (/^audio-mp3/) {
          $message = \%atts;
      }
      when (/^audio-mp3/) {
          $message = \%atts;
      }
      when (/^mms/) {
          $message = \%atts;
      }
    }  

  }

 sub hdl_end{
  my ($p, $elt) = @_;
  given ($elt) {
      when ((/^reference/) || (/^sequence/) || (/^calling-party/) || (/^called-party/) || (/^settings/) || (/^direction/) || (/^sms/) || (/^result/))  {
         print "$elt : " ;
         format_message($message) if $message && $message->{'_str'} =~ /\S/;
      }
      when (/^audio-mp3/) {
          print "audio content in $file\.mp3\n" ;
          format_mp3($message) if $message && $message->{'_str'} =~ /\S/;
      }
     when (/^audio-wav/) {
          print "audio content in $file\.wav\n" ;
          format_wav($message) if $message && $message->{'_str'} =~ /\S/;
#             print "$Audiowav->{'_str'}";
          }
     when (/^mms/) {
          print "mms content in $file\.mms, depending on the mms content further processing may be needed\n" ;
          format_mms($message) if $message && $message->{'_str'} =~ /\S/;
    }  

  }
}

  sub hdl_char {
      my ($p, $str) = @_;
      $message->{'_str'} .=$str;
  }

  sub hdl_def { }  # We just throw everything else

  sub format_message { # Helper sub to nicely format what we got from the XML and decode base64 values of the needed attributes
      my $atts = shift;
      $atts->{'_str'} =~ s/\n//g;
      if (!defined($atts->{'_str'})) { print "\n"}
  my $decoded = decode_base64($atts->{'_str'});
      print " $decoded\n";
#      if (!$decoded) {print "\n"}
      undef $message;
 }
sub format_mp3 { # decode base64 content and save to mp3 file - for the audio-mp3 tag
      my $atts = shift;
      open FILE, "> ./$file.mp3" or die $!;
  $atts->{'_str'} =~ s/\n//g; 
#     print "mp3: $atts $atts->{'_str'}\n";
  my $decoded = decode_base64($atts->{'_str'});
  print FILE $decoded;
  close FILE;
  undef $message;
 }
sub format_wav { # decode base64 content and save to wav file - for the audio-wav tag
      my $atts = shift;
      open FILE, "> ./$file.wav" or die $!;
  $atts->{'_str'} =~ s/\n//g; 
  print "wav: $atts $atts->{'_str'}\n";
  my $decoded = decode_base64($atts->{'_str'});
  print "$decoded\n";
  print FILE $decoded;
  close FILE;
  undef $message;
 }
sub format_mms { # decode mms base64 content and save to file - depending on the content further processing may be needed
      my $atts = shift;
      open FILE, "> ./$file.wav" or die $!;
  $atts->{'_str'} =~ s/\n//g; 
  print "wav: $atts $atts->{'_str'}\n";
  my $decoded = decode_base64($atts->{'_str'});
  print "$decoded\n";
  print FILE $decoded;
  close FILE;
  undef $message;
 }

我在format_message子例程中尝试了不同的匹配,我也尝试过在hdl_end中 - 任何想法?

提前致谢

2 个答案:

答案 0 :(得分:2)

使用XML::LibXML

use strict;
use warnings;

use XML::LibXML;

my $string = do { local $/; <DATA> };

my $dom = XML::LibXML->load_xml(string => $string);

for my $node ($dom->findnodes(q{//*})) {
    if ($node->textContent() eq '') {
        print $node->nodeName, "\n";
    }
}


__DATA__
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet href="ums_v1.xsl" type="text/xsl"?>
<ums>
  <datatype>Report</datatype>
  <reference><![CDATA[NDkxNzYwNDAyNjMwODAy]]></reference>
  <sequence></sequence>
  <calling-party><![CDATA[NDkxNzYwNDAyNjMwOA==]]></calling-party>
  <IP></IP>
  <called-party><![CDATA[NDk4OTI0NDI0Mzc0]]></called-party>
  <start>26/02/14 09:28:55</start>
  <settings></settings>
  <direction><![CDATA[Z2VzZW5kZXQ=]]></direction>
  <result><![CDATA[ZXJmb2xncmVpY2g=]]></result>
  <fax-tif></fax-tif>
  <fax-jpg></fax-jpg>
  <fax-png></fax-png>
  <audio-wav></audio-wav>
  <audio-mp3></audio-mp3>
  <sms></sms>
  <mms></mms>
</ums>

输出:

sequence
IP
settings
fax-tif
fax-jpg
fax-png
audio-wav
audio-mp3
sms
mms

答案 1 :(得分:0)

解决方案是测试结束处理程序中的空标记,例如:

sub hdl_end{
      my ($p, $elt) = @_;
      given ($elt) {
          when ((/^reference/) || (/^sequence/) || (/^calling-party/) || (/^called-party/) || (/^settings/) || (/^direction/) || (/^sms/) || (/^result/))  {
             print "$elt : " ;
             format_message($message) if $message && $message->{'_str'} =~ /\S/;
             print "no value\n" if $message && $message->{'_str'} !~ /\S/;
          }
          when (/^audio-mp3/) {
              print "audio content in $file\.mp3\n" if $message && $message->{'_str'} =~ /\S/;
              format_mp3($message) if $message && $message->{'_str'} =~ /\S/;
          }
         when (/^audio-wav/) {
              print "audio content in $file\.wav\n" if $message && $message->{'_str'} =~ /\S/ ;
              format_wav($message) if $message && $message->{'_str'} =~ /\S/;

          }
         when (/^mms/) {
              print "mms content in $file\.mms, depending on the mms content further processing may be needed\n" if $message && $message->{'_str'} =~ /\S/;
              format_mms($message) if $message && $message->{'_str'} =~ /\S/;
        }  

  }
}

所以if $ message-&gt; {'_ str'}!〜/ \ S /是我需要的......谢谢大家的努力!