我有一个复杂的多寄存器文档xml,我需要在Perl中解析它以输出不同的格式。
输入xml:
<?xml version="1.0" encoding="UTF-8"?>
<response>
<result name="response" numFound="58582" start="0">
<doc>
<str name="body">Have a great time at this park!</str><int name="is_park_id">317851</int><str name="ss_image_thumb_small"/>
<str name="title">Playground</str>
</doc>
<doc>
<str name="body">Have a great time at this park!</str><int name="is_park_id">317851</int><str name="ss_image_thumb_small"/>
<str name="title">Playground</str>
</doc>
</result>
</response>
,所需的输出格式为:
<?xml version="1.0" encoding="iso-8859-1"?>
<Feed>
<Products>
<Product>
<Description>Have a great time at this park!</Description><ExternalId>PF317851</ExternalId><ImageUrl/>
<Name>Playground</Name>
</Product>
<Product>
<Description>Have a great time at this park!</Description><ExternalId>PF317851</ExternalId><ImageUrl/>
<Name>Playground</Name>
</Product>
</Products>
</Feed>
<str name="body">
应视为描述,<int name="is_park_id">
应视为<ExternalId>
,<str name="title">
应视为<Name>
。
您能帮助将XML格式化为新格式吗?
答案 0 :(得分:1)
我刚刚在你的文件上尝试过XML :: Simple。这是我的计划:
#! /usr/bin/env perl
#
use strict;
use warnings;
use feature qw(say);
use XML::Simple;
use Data::Dumper;
my $xml_ref = XMLin("test.xml");
say Dumper $xml_ref;
这是输出:
VAR1 = {
'result' => {
'name' => 'response',
'doc' => {
'int' => {
'content' => '317851',
'name' => 'is_park_id'
},
'str' => {
'body' => {
'content' => 'Have a great time at this park!'
},
'ss_image_thumb_small' => {},
'title' => {
'content' => 'Playground'
}
}
},
'numFound' => '58582',
'start' => '0'
}
};
它完美地解析您的XML文件。
您是否了解Perl引用以及如何在Perl中散列哈希值?这可能是你的问题。
答案 1 :(得分:0)
我们走了:
#!/usr/bin/env perl
use 5.010;
use strict;
use warnings qw(all);
use Data::DPath qw(dpath);
use File::Slurp;
use XML::Hash::LX;
# parse XML and store as a nested Perl hash
my $xml = xml2hash(read_file('input.xml'));
# from/to key mapping
my $from_to = {
body => 'Description',
is_park_id => 'ExternalId',
ss_image_thumb_small=> 'ImageUrl',
title => 'Name',
};
# translate one bucket
my %result =
map {
$from_to->{$_} => (
$xml ~~
dpath qq(//*[key eq "-name" and value eq "$_"]/../#text)
)->[0] // ''
} keys %$from_to;
# output new XML
say hash2xml({
Feed => {
Products => {
Product => \%result,
},
},
});
但是,使用XPath会更合适。
答案 2 :(得分:0)
我在perl中使用XML :: LibXML满足我的所有XML需求。
use XML::LibXML;
my @records;
# parse input
{
my $dom = XML::LibXML->load_xml( location => 'input.xml' );
my $root_node = $dom->documentElement; # <response> tag
for my $doc_node ( $root_node->getChildrenByTagName('doc') ) {
my $record = {};
for my $node ( $doc_node->childNodes ) {
$record{name} = $node->getAttribute('name');
$record{value} = $node->textContent;
}
push @records, $record;
}
}
# create output
{
my $dom = XML::LibXML::Document->new('1.0',$some_encoding);
my $root_node = $doc->createElement('Feed');
my $products_node = $root_node->createElement('Products');
for my $record ( @records ) {
my $pnode = $products_node->createElement('Product');
my $dnode = $pnode->createElement('Description');
$dnode->appendText( $record->{body} );
my $nnode = $pnode->createElement('Name');
$nnode->appendText( $record->{name} );
# ...
}
print $dom->toString;
}
答案 3 :(得分:0)
这是XML :: SAX / XML :: Simple版本:
#!/usr/bin/env perl
package XML::SAX::Handler::Parks;
use strict;
use warnings qw(all);
use base qw(XML::SAX::Base);
sub new {
return bless {
data => '',
from_to => {
body => 'Description',
is_park_id => 'ExternalId',
ss_image_thumb_small=> 'ImageUrl',
title => 'Name',
},
product => {},
products=> [],
this => '',
} => __PACKAGE__;
}
sub start_element {
my ($self, $el) = @_;
$self->{data} = '';
if ($el->{Name} =~ /^(?:int|str)$/x) {
while (my (undef, $attr) = each %{$el->{Attributes}}) {
if ($attr->{Name} eq 'name') {
$self->{this} = $attr->{Value};
}
}
}
return;
}
sub end_element {
my ($self, $el) = @_;
my $key = $self->{from_to}{$self->{this}};
$self->{product}{$key} = $self->{data};
if ($el->{Name} eq 'doc') {
push @{$self->{products}}, { %{$self->{product}} };
$self->{product} = {};
}
return;
}
sub characters {
my ($self, $data) = @_;
$self->{data} .= $data->{Data};
return;
}
1;
package main;
use strict;
use warnings qw(all);
use XML::Simple;
use XML::SAX::PurePerl;
my $handler = XML::SAX::Handler::Parks->new;
my $parser = XML::SAX::PurePerl->new(Handler => $handler);
$parser->parse_file(\*DATA);
print XMLout(
{
Products => {
Product => $handler->{products},
},
},
KeyAttr => [],
NoAttr => 1,
RootName => 'Feed',
XMLDecl => 1,
);
__DATA__
<?xml version="1.0" encoding="UTF-8"?>
<response>
<result name="response" numFound="58582" start="0">
<doc>
<str name="body">Have a great time at this park!</str>
<int name="is_park_id">317851</int>
<str name="ss_image_thumb_small"/>
<str name="title">Playground</str>
</doc>
<doc>
<str name="body">hello world</str>
<int name="is_park_id">12345</int>
<str name="ss_image_thumb_small"/>
<str name="title">Park</str>
</doc>
</result>
</response>