Perl - 修改嵌套的多寄存器XML标记

时间:2013-01-11 18:51:52

标签: perl xml-parsing

我有一个复杂的多寄存器文档xml,我需要在Perl中解析它以输出不同的格式。

输入xml:

    <?xml version="1.0" encoding="UTF-8"?>
     <response>
     <result name="response" numFound="58582" start="0"> 
     <doc>
        <str name="body">Have a great time at this park!</str><int name="is_park_id">317851</int><str name="ss_image_thumb_small"/>
    <str name="title">Playground</str>    
     </doc>
     <doc>
<str name="body">Have a great time at this park!</str><int name="is_park_id">317851</int><str name="ss_image_thumb_small"/>
    <str name="title">Playground</str>    
     </doc>
     </result>
     </response> 

,所需的输出格式为:

    <?xml version="1.0" encoding="iso-8859-1"?>
     <Feed>
     <Products>
     <Product>
<Description>Have a great time at this park!</Description><ExternalId>PF317851</ExternalId><ImageUrl/>
     <Name>Playground</Name>
</Product>
<Product>
<Description>Have a great time at this park!</Description><ExternalId>PF317851</ExternalId><ImageUrl/>
     <Name>Playground</Name>
</Product>
     </Products>
     </Feed>

<str name="body">应视为描述,<int name="is_park_id">应视为<ExternalId><str name="title">应视为<Name>

您能帮助将XML格式化为新格式吗?

4 个答案:

答案 0 :(得分:1)

我刚刚在你的文件上尝试过XML :: Simple。这是我的计划:

#! /usr/bin/env perl
#

use strict;
use warnings;
use feature qw(say);

use XML::Simple;
use Data::Dumper;

my $xml_ref = XMLin("test.xml");

say Dumper $xml_ref;

这是输出:

VAR1 = {
          'result' => {
                      'name' => 'response',
                      'doc' => {
                               'int' => {
                                        'content' => '317851',
                                        'name' => 'is_park_id'
                                      },
                               'str' => {
                                        'body' => {
                                                  'content' => 'Have a great time at this park!'
                                                },
                                        'ss_image_thumb_small' => {},
                                        'title' => {
                                                   'content' => 'Playground'
                                                 }
                                      }
                             },
                      'numFound' => '58582',
                      'start' => '0'
                    }
        };

它完美地解析您的XML文件。

您是否了解Perl引用以及如何在Perl中散列哈希值?这可能是你的问题。

答案 1 :(得分:0)

我们走了:

#!/usr/bin/env perl
use 5.010;
use strict;
use warnings qw(all);

use Data::DPath qw(dpath);
use File::Slurp;
use XML::Hash::LX;

# parse XML and store as a nested Perl hash
my $xml = xml2hash(read_file('input.xml'));

# from/to key mapping
my $from_to = {
    body                => 'Description',
    is_park_id          => 'ExternalId',
    ss_image_thumb_small=> 'ImageUrl',
    title               => 'Name',
};

# translate one bucket
my %result =
    map {
        $from_to->{$_} => (
            $xml ~~
            dpath qq(//*[key eq "-name" and value eq "$_"]/../#text)
        )->[0] // ''
    } keys %$from_to;

# output new XML
say hash2xml({
    Feed => {
        Products => {
            Product => \%result,
        },
    },
});

但是,使用XPath会更合适。

答案 2 :(得分:0)

我在perl中使用XML :: LibXML满足我的所有XML需求。

use XML::LibXML;

my @records;

# parse input

{
    my $dom = XML::LibXML->load_xml( location => 'input.xml' );

    my $root_node = $dom->documentElement;  # <response> tag

    for my $doc_node ( $root_node->getChildrenByTagName('doc') ) {

        my $record = {};    

        for my $node ( $doc_node->childNodes ) {
            $record{name} = $node->getAttribute('name');
            $record{value} = $node->textContent;
        }

        push @records, $record;
    }
}

# create output

{
    my $dom = XML::LibXML::Document->new('1.0',$some_encoding);

    my $root_node = $doc->createElement('Feed');

    my $products_node = $root_node->createElement('Products');

    for my $record ( @records ) {

        my $pnode = $products_node->createElement('Product');

        my $dnode = $pnode->createElement('Description');

        $dnode->appendText( $record->{body} );

        my $nnode = $pnode->createElement('Name');

        $nnode->appendText( $record->{name} );

        # ...

    }


    print $dom->toString;
}

答案 3 :(得分:0)

这是XML :: SAX / XML :: Simple版本:

#!/usr/bin/env perl
package XML::SAX::Handler::Parks;
use strict;
use warnings qw(all);

use base qw(XML::SAX::Base);

sub new {
    return bless {
        data    => '',
        from_to => {
            body                => 'Description',
            is_park_id          => 'ExternalId',
            ss_image_thumb_small=> 'ImageUrl',
            title               => 'Name',
        },
        product => {},
        products=> [],
        this    => '',
    } => __PACKAGE__;
}

sub start_element {
    my ($self, $el) = @_;
    $self->{data} = '';
    if ($el->{Name} =~ /^(?:int|str)$/x) {
        while (my (undef, $attr) = each %{$el->{Attributes}}) {
            if ($attr->{Name} eq 'name') {
                $self->{this} = $attr->{Value};
            }
        }
    }
    return;
}

sub end_element {
    my ($self, $el) = @_;
    my $key = $self->{from_to}{$self->{this}};
    $self->{product}{$key} = $self->{data};
    if ($el->{Name} eq 'doc') {
        push @{$self->{products}}, { %{$self->{product}} };
        $self->{product} = {};
    }
    return;
}

sub characters {
    my ($self, $data) = @_;
    $self->{data} .= $data->{Data};
    return;
}

1;

package main;
use strict;
use warnings qw(all);

use XML::Simple;
use XML::SAX::PurePerl;

my $handler = XML::SAX::Handler::Parks->new;
my $parser = XML::SAX::PurePerl->new(Handler => $handler);

$parser->parse_file(\*DATA);

print XMLout(
    {
        Products => {
            Product => $handler->{products},
        },
    },
    KeyAttr     => [],
    NoAttr      => 1,
    RootName    => 'Feed',
    XMLDecl     => 1,
);

__DATA__
<?xml version="1.0" encoding="UTF-8"?>
<response>
  <result name="response" numFound="58582" start="0">
    <doc>
      <str name="body">Have a great time at this park!</str>
      <int name="is_park_id">317851</int>
      <str name="ss_image_thumb_small"/>
      <str name="title">Playground</str>
    </doc>
    <doc>
      <str name="body">hello world</str>
      <int name="is_park_id">12345</int>
      <str name="ss_image_thumb_small"/>
      <str name="title">Park</str>
    </doc>
  </result>
</response>