use WWW::Mechanize;
use strict;
use warnings;
use LWP::Simple;
my $ctime = time();
my $Home_page = 'www.condortk.com/';
my $output_file = "www.condortk.com-$ctime";
my $url = 'http://www.condortk.com/results.php?keyword=&Buscar.x=4&Buscar.y=7';
my $m = WWW::Mechanize->new();
$m->get( $url ) or die "unable to get $url";
my $Home_Con = $m->content;
system( 'mkdir Images' ) if ( !-d "Images" );
my $next = '';
my $page = '';
while ( $Home_Con =~ m/<div class="producto"><a href="([^>]*?)"/igs ) {
#print "Loop 1\n";
$m->get( $1 );
my $list_content = $m->content;
my $img_name = '';
my $img_folder = '';
my $image = '';
my $big_image = '';
while ( $list_content =~ m/<div\s*class="nombre2"><a\s*href="([^>]*?)"/igs ) {
#print"Loop2\n";
my $desc = '';
my $desc1 = '';
my $block = $Home_page . $1;
$m->get( $1 );
my $content1 = $m->content;
if ( $content1 =~ m/id="imgproducto"><img src="([^<]*?)"/is ) {
$img_name = $1;
# print "$img_name\n";
if ( $img_name =~ m/\/\w+\/\w+\/([^>]*?)$/is ) {
$img_folder = $1;
#print "$img_folder\n";
#print "$Home_page.$img_name\n";
getstore( $Home_page . $img_name, $img_folder );
}
}
}
}
我正在尝试从网站下载并保存特定图像。但我不知道为什么我不能得到这些图像。我使用了正确的图像正则表达式。我怀疑的部分是getstore方法。需要一些指导。谢谢。
答案 0 :(得分:2)
如果您要搜索图片,请使用可能有助于您找到所需内容的特定image-methods at WWW::Mechanize。 例如:
my @images = $mech->find_all_images( url_regex => qr/productos-detalle\.php/ );
答案 1 :(得分:1)
正则表达式起作用,因为运气好,它们不稳固。请参阅“错误的正则表达式”评论。此外, getstore(url,file)方法需要http://host.org/dir/file作为参数1, path / to / file.jpg 作为参数2。
这是一个工作版本:
use WWW::Mechanize;
use strict;
use warnings;
use LWP::Simple;
my $ctime = time();
my $Home_page = 'www.condortk.com/';
my $output_file = "www.condortk.com-$ctime";
my $url = 'http://www.condortk.com/results.php?keyword=&Buscar.x=4&Buscar.y=7';
my $m = WWW::Mechanize->new();
$m->get( $url ) or die "unable to get $url";
my $Home_Con = $m->content;
system( 'mkdir Images' ) if ( ! -d "Images" );
my $next = '';
my $page = '';
while ( $Home_Con =~ m/<div class="producto"><a href="([^"]*?)"/igs ) { # <-- wrong regex
#print "Loop 1 $1 \n";
$m->get( "http://www.condortk.com/$1" ); # <-- wrong url
my $list_content = $m->content;
my $img_name = '';
my $img_folder = '';
my $image = '';
my $big_image = '';
while ( $list_content =~ m/<div\s*class="nombre2"><a\s*href="([^"]*?)"/igs ) {
#print "Loop 2 $1 \n";
my $desc = '';
my $desc1 = '';
my $block = $Home_page . $1;
$m->get( $1 );
my $content1 = $m->content;
if ( $content1 =~ m/id="imgproducto"><img src="([^<]*?)"/is ) {
$img_name = $1;
# wrong url and 'save as' parameters
if ( $img_name =~ m/\/\w+\/\w+\/(.*)$/is ) {
my $basename = $1;
my $get = "http://${Home_page}${img_name}";
my $save = "Images/$basename";
my $code = -1;
if ( ! -f $save ) {
$code = getstore($get, $save );
print "$get --> $save : $code\n";
} else {
print "Skipping $save\n";
}
#if ($code != 200) { print "$get --> Images/$save : $code\n"; }
}
}
}
}
-
$ perl dl.img.pl
http://www.condortk.com/.../60504s_725x0.jpg --> Images/Images/60504s_725x0.jpg : 200
http://www.condortk.com/.../60508s_725x0.jpg --> Images/Images/60508s_725x0.jpg : 200
http://www.condortk.com/.../60501s_725x0.jpg --> Images/Images/60501s_725x0.jpg : 200
http://www.condortk.com/.../60020s_725x0.jpg --> Images/Images/60020s_725x0.jpg : 200