use strict;
$|++;
use WWW::Mechanize;
use File::Basename;
my $m = WWW::Mechanize->new;
$m->get("http://www.despair.com/indem.html");
my @top_links = @{$m->links};
for my $top_link_num (0..$#top_links) {
next unless $top_links[$top_link_num][0] =~ /^http:/;
$m->follow_link( n=>$top_link_num ) or die "can't follow $top_link_num";
print $m->uri, "\n";
for my $image (grep m{^http://store4}, map $_->[0], @{$m->links}) {
my $local = basename $image;
print " $image...", $m->mirror($image, $local)->message, "\n"
}
$m->back or die "can't go back";
}
任何人都能给我一行一行的解释吗?
答案 0 :(得分:3)
我尝试了第一辆轿跑车。
但是,您需要确保首先阅读并理解以下文档:
1)Perl Intro - 特别是变量范围部分
3)Perl Data Structures Cookbook
P.S。正如埃里克在评论中所说的那样,对于刚开始的人来说,这段代码绝对不是一个很好的例子。它有太多非平凡的想法/概念/活动部分。
use strict;
# Does not allow undeclared global variables or other unsafe constructs.
# You should ALWAYS code with "use strict; use warnings"
# See http://perldoc.perl.org/strict.html
$|++;
# Turn on autoflush on STDOUT filehandle.
# See "http://perldoc.perl.org/perlvar.html" for "$|" and other special variables.
# P.S. This "++" is a hack - it would be a lot more readable to do "$| = 1;"
# since $| only cares whether the value is zero or non-zero.
use WWW::Mechanize; # Load the module for getting web sites.
use File::Basename; # Load the module for finding script's name/path.
my $m = WWW::Mechanize->new; # Create new object via a constructor (new)
$m->get("http://www.despair.com/indem.html");
# Retrieve the contents of the URL.
# See http://search.cpan.org/dist/WWW-Mechanize/lib/WWW/Mechanize.pm
# for the module's documentation (aka POD)
my @top_links = @{$m->links};
# Declare a "@top_links" array,
# get the list of links on the above page (returns array reference)
# and de-reference that array reference and store it in @top_links array
for my $top_link_num (0..$#top_links) {
# Loop over all integers between 0 and the last index of @top_links array
# (e.g. if there were 3 links, loop over 0,1,2
# Assign the current loop value to $top_link_num variable
next unless $top_links[$top_link_num][0] =~ /^http:/;
# go to next iteration of the loop unless the current link's URL is HTTP protocol
# Current link is the element of the array with current undex -
# $top_links[$top_link_num]
# The link data is stored as an array reference,
# with the link URL being the first element of the arrayref
# Therefore, $top_links[$top_link_num][0] - which is the shorthand
# for $top_links[$top_link_num]->[0] as you learned
# from reading Data Structures Cookbook I linked - is the URL
# To check if URL is HTTP prtocol, we check if it starts with http:
# via regular expression - see "http://perldoc.perl.org/perlre.html"
$m->follow_link( n=>$top_link_num ) or die "can't follow $top_link_num";
print $m->uri, "\n";
for my $image (grep m{^http://store4}, map $_->[0], @{$m->links}) {
my $local = basename $image;
print " $image...", $m->mirror($image, $local)->message, "\n"
}
$m->back or die "can't go back";
}