我一直在使用Perl成功抓取多个网站菜单中的链接,但似乎它不适用于以下网页:
以下是我的两个自定义脚本:
这是第一个网站:
#!/usr/bin/perl
use strict;
use warnings ;
use URI;
use Web::Scraper;
use Encode;
use Data::Dumper;
open (OUT, '>threadUrl_uriminzokkiri_gisa1.txt');
for (my $i=1; $i<=42; $i++)
{
my $url="http://www.uriminzokkiri.com/index.php?ptype=gisa1&pagenum=$i";
my $resultat = scraper
{
process '//div[class="gisatitle"]', 'entree[]' => scraper
{
process '//a/@href', threadUrl => 'TEXT';
};
};
my $res = $resultat->scrape( URI->new("$url") );
for my $val (@{$res->{entree}})
{
print OUT Encode::encode("utf8", "http://www.uriminzokkiri.com/".$val->{threadUrl}."\n" );
}
}
close (OUT);
这是第二个网站:
#!/usr/bin/perl
use strict;
use warnings ;
use URI;
use Web::Scraper;
use Encode;
use Data::Dumper;
open (OUT, '>threadUrl_ournation-school_treatise.txt');
for (my $i=1; $i<=11; $i++)
{
my $url="http://www.ournation-school.com/index.php?menu=treatise&cno=&pagenum=$i";
my $resultat = scraper
{
process '//div[class="cu_title_con"]', 'entree[]' => scraper
{
process '//a/@href', threadUrl => 'TEXT';
};
};
my $res = $resultat->scrape( URI->new("$url") );
for my $val (@{$res->{entree}})
{
print OUT Encode::encode("utf8", "".$val->{threadUrl}."\n" );
}
}
close (OUT);
有谁知道怎么做,你能给我一些建议吗?