我已经创建了一个用于抓取网站上文章内容的测试代码,但是代码在无效的html属性上失败了。如何捕获无效属性异常?或者是否存在问题?
这是我抓取文章内容的代码
#!/usr/bin/perl -w
use HTML::LinkExtor;
use LWP::Simple;
use HTML::TreeBuilder::XPath;
use Term::ProgressBar;
my $url = "http://www.totalpolitics.com/blog/159117/campaigning-to-keep-our-coastguards.thtml";
my $content = get $url;
my $tree = HTML::TreeBuilder::XPath->new_from_content($content);
my $title = $tree->findvalue(q{//div[@id="article"]/h1});
my $body = defined($tree->findnodes_as_string(q{//div[@class="article-body"]})) ? shift : '';
my $author = $tree->findnodes(q{//div[@class="article-body"]/p/strong});
$author = $author->[0]->getValue;
my $xml .= '<?xml version="1.0" encoding="UTF-8" ?>';
$xml .= '<nodes>';
$xml .= '<node>';
$xml .= '<url>';
$xml .= $url;
$xml .= '</url>';
$xml .= '<title>';
$xml .= $title;
$xml .= '</title>';
$xml .= '<description>';
$xml .= "<![CDATA[$body]]>";
$xml .= '</description>';
$xml .= '<author>';
$xml .= $author;
$xml .= '</author>';
$xml .= "</node>\n";
$xml .= "</nodes>";
print $xml;
错误
span has an invalid attribute name ' _fck_bookmark' at /home/getmizanur/perl5/lib/perl5/XML/XPathEngine.pm line 125
答案 0 :(得分:1)
看看下面的例子:
sub do_something {
print "something\n";
}
sub do_something_else {
print "something_else\n";
}
eval {
do_something();
print 1/0; # ouch
do_something_else();
};
# $@ - special variable keeping track on last error
if( $@ ) {
warn "Error occured: $@";
}