我有以下test.xml:
<root>
<A title="A1">
<B title="B1">
<C title="C1">
<params>param=ABC1</params>
<params>param=ABC2</params>
</C>
</B>
</A>
<D title="D1">
<B title="B2">
<C title="C2">
<params>param=DBC1</params>
<params>param=DBC2</params>
</C>
</B>
</D>
</root>
我需要一个perl代码来解析它并将报告打印为:
NdeName, Attribute(s), Param(s)
A A1
B B1
C C1 param=ABC1 param=ABC2
D D1
B B2
C C2 param=DBC1 param=DBC2
我尝试过使用getElementByTagName('param')
和getNodeChilds
等等但没有成功。另外,我一直使用的是XML::DOM模块。
以下是代码:
use XML::DOM;
my $parser = new XML::DOM::Parser;
my $doc = $parser->parsefile("test.xml");
my @paramarray=();
ParseXML($doc,"");
sub ParseXML{
my $node = $_[0];
my $indent = $_[1];
my $title;
if ($node == null) {
return;
}
my $type = $node->getNodeType();
if ($type == DOCUMENT_NODE) {
ParseXML($node->getFirstChild(),"");
break;
}
if ($type == ELEMENT_NODE) {
$numberAttributes =0;
if ($node->getAttributes() !=null){
$numberAttributes = $node->getAttributes()->getLength();
}
for ($loopIndex =0; $loopIndex<$numberAttributes; $loopIndex++) {
$attribute = ($node->getAttributes())->item($loopIndex);
if($attribute->getNodeName() eq "title"){
$title = $attribute->getNodeValue();
}
}
if ($node->getNodeName() eq "params"){
foreach my $paramvar ($doc->getElementsByTagName("params")) {
foreach my $child ($paramvar->getChildNodes) {
push(@paramarray, $child->getData);
}
}
}
if ($node->getNodeName() ne "root") {
print $node->getNodeName. ", $title, @paramarray\n";
@paramarray=();
}
my @childNodes = $node->getChildNodes()
if (@childNodes != null){
my $numberChildNodes = $#childNodes + 1;
my $loopIndex;
for ($loopIndex =0; $loopIndex<$numberChildNodes; $loopIndex++) {
ParseXML($childNodes[$loopIndex],$indent);
}
}
}
if ($type == TEXT_NODE) {
my $nodeText = $node->getNodeValue();
}
}
答案 0 :(得分:2)
以下是如何使用XML::Twig实现任务的示例代码:
use strict; use warnings;
use XML::Twig;
my $twig = XML::Twig->new( twig_handlers => {
'//*[@title]' => sub {
print join("\t",
$_->gi,
$_->att('title'),
map { $_->trimmed_text } $_->findnodes('params')
), "\n";
},
} );
$twig->parsefile('test.xml');
答案 1 :(得分:1)
首先,始终以
开头use strict;
use warnings;
这会抓住你可能犯的很多错别字和愚蠢的错误。你遇到的一个大问题是null
不是Perl术语。 Perl使用undef
和defined函数(虽然在这种情况下您可能不需要defined
,因为undef
为false且对象通常为true。)
这是一个有点清理的代码版本。它仍然不会产生你要求的输出,但它更接近。
use strict;
use warnings;
use XML::DOM;
my $parser = XML::DOM::Parser->new;
my $doc = $parser->parsefile("test.xml");
my @paramarray;
ParseXML($doc,"");
sub ParseXML {
my $node = $_[0];
my $indent = $_[1];
my $title;
if (not $node) {
return;
}
my $type = $node->getNodeType();
if ($type == DOCUMENT_NODE) {
ParseXML($node->getFirstChild(),"");
return;
}
if ($type == ELEMENT_NODE) {
my $numberAttributes =0;
if ($node->getAttributes()) {
$numberAttributes = $node->getAttributes()->getLength();
}
for (my $loopIndex =0; $loopIndex<$numberAttributes; $loopIndex++) {
my $attribute = ($node->getAttributes())->item($loopIndex);
if ($attribute->getNodeName() eq "title") {
$title = $attribute->getNodeValue();
}
}
if ($node->getNodeName() eq "params") {
foreach my $paramvar ($doc->getElementsByTagName("params")) {
foreach my $child ($paramvar->getChildNodes) {
push(@paramarray, $child->getData);
}
}
} elsif ($node->getNodeName() ne "root") {
print $node->getNodeName. ", $title, @paramarray\n";
@paramarray=();
}
my @childNodes = $node->getChildNodes(); # was missing semicolon
if (@childNodes) {
my $numberChildNodes = $#childNodes + 1;
my $loopIndex;
for ($loopIndex =0; $loopIndex<$numberChildNodes; $loopIndex++) {
ParseXML($childNodes[$loopIndex],$indent);
}
}
}
if ($type == TEXT_NODE) {
my $nodeText = $node->getNodeValue();
# Were you planning on doing something here?
}
}
答案 2 :(得分:1)
我使用XML::LibXML,所以这是一个使用该模块的解决方案。
use strict;
use warnings;
use XML::LibXML qw( );
my $parser = XML::LibXML->new();
my $doc = $parser->parse_file("test.xml");
my $root = $doc->documentElement();
for my $node ($root->findnodes('//*[@title]')) {
my $name = $node->nodeName();
my $title = $node->getAttribute('title');
my @params = map $_->textContent, $node->findnodes('params');
printf("%-10s %-11s %s\n", $name, $title, join(' ', @params));
}
更新:仍然是XML :: LibXML,但这次没有XPath,以便于转换为XML :: DOM。
use strict;
use warnings;
use XML::LibXML qw( XML_ELEMENT_NODE );
sub find_params {
my ($node) = @_;
my @params;
for my $child ($node->childNodes()) {
next if $child->nodeType != XML_ELEMENT_NODE;
next if $child->nodeName ne 'params';
push @params, $child->textContent();
}
return @params;
}
sub visit {
my ($node) = @_;
return if $node->nodeType != XML_ELEMENT_NODE;
if (my $title_node = $node->getAttributeNode('title')) {
printf("%-10s %-11s %s\n",
$node->nodeName(),
$title_node->getValue(),
join(' ', find_params($node)),
);
}
visit($_) for $node->childNodes();
}
my $parser = XML::LibXML->new();
my $doc = $parser->parse_file("test.xml");
my $root = $doc->documentElement();
visit($root);