修改Perl下载脚本以解压缩文件

时间:2015-10-01 15:31:31

标签: perl

我有一个问题,我不知道如何解决它。我必须从亚马逊下载一些文件,我不能在Ruby中使用Open-Uri。

亚马逊为我提供了一个Perl脚本来下载这些文件,但我不知道Perl,我需要修改脚本。

use strict;

my $NA_SERVER = "https://assoc-datafeeds-na.amazon.com";
my $EU_SERVER = "https://assoc-datafeeds-eu.amazon.com";
my $FE_SERVER = "https://assoc-datafeeds-fe.amazon.com";

my $CURL="/usr/bin/curl"; # Update this to an appropriate location of the curl executable

my ($feed_filename, $md5_filename, $user, $pass, $out_dir, $region) = parse_argv();
my $server;
if($region eq "NA")
{
    $server = $NA_SERVER;
}
elsif($region eq "EU")
{
    $server = $EU_SERVER;
}
elsif($region eq "FE")
{
    $server = $FE_SERVER;
}
else
{
    print "ERROR: Invalid region: $region\n";
    exit(1);
}

my $feed_names = read_feed_names($feed_filename);

my $old_md5_hash = read_md5($md5_filename); # Hash from feed name to MD5

my $new_md5_hash = fetch_new_md5($user, $pass, $server);

for my $feed_name (@$feed_names)
{
    my $feed_new_md5 = $new_md5_hash->{$feed_name};
    if(!defined($feed_new_md5) || $feed_new_md5 eq "")
    {
        print "ERROR: no md5 found for feed $feed_name, skipping it\n";
        next;
    }

    my $feed_old_md5 = $old_md5_hash->{$feed_name};
    if(defined($feed_old_md5) && $feed_old_md5 eq $feed_new_md5)
    {
        print "$feed_name has the same md5 ($feed_new_md5), skipping download for it\n";
    }
    else
    {
        print "$feed_name changed md5 from $feed_old_md5 to $feed_new_md5, downloading it\n";
        my $success = download_feed($user, $pass, $feed_name, $out_dir, $server);
        if($success == 1)
        {
            report_success($user, $pass, $feed_name, $server);
            $old_md5_hash->{$feed_name} = $feed_new_md5;
            save_md5($md5_filename, $old_md5_hash);
        }
    }
}


sub download_feed
{
    my ($user, $pass, $feed_name, $out_dir, $server) = @_;
    my $cmd = "$CURL --location --user $user:$pass -C - --digest -k $server/datafeed/getFeed?filename=$feed_name -o $out_dir/$feed_name";

    my $success = 0;
    my $sleep_secs = 5;
    unlink "$out_dir/$feed_name"; # delete the file if it already exists

    for(my $i = 0; $i < 4; $i++)
    {
        system($cmd);
        if($? != 0)
        {
            print "Command [$cmd] failed with exit code $? ($!), retrying after $sleep_secs seconds\n";
            sleep($sleep_secs);
        }
        else
        {
            $success = 1;
            last;
        }
    }

    if($success == 1)
    {
        # Download succeeded, and here I have to UNZIP
    }
    else
    {
        system($cmd);
        if($? != 0)
        {
            print "ERROR: command [$cmd] failed with exit code $? ($!). Skipping this file\n";
        }
        else
        {
            $success = 1;
        }
    }
    return $success;
}


sub report_success
{
    my ($user, $pass, $feed_name, $server) = @_;
    my $cmd = "$CURL --user $user:$pass --digest -k '$server/datafeed/reportStatus?success=1&filename=$feed_name'";
    my $cmd_out = `$cmd`;
    if($? != 0)
    {
        print "ERROR: command [$cmd] failed: $!\n";
        # Suppress this error since it's not fatal
    }
    if($cmd_out =~ m/error/i)
    {
        print "ERROR: command [$cmd] returned error response - $cmd_out\n";
        # Suppress this error since it's not fatal
    }
}


sub read_feed_names
{
    my ($feed_filename) = @_;
    my @feeds;

    open(FEEDS_FILE, $feed_filename) or die "could not open file $feed_filename: $!\n";
    while(my $line = <FEEDS_FILE>)
    {
        chomp($line);
        push(@feeds, $line);
    }
    return \@feeds;
}


sub read_md5
{
    my ($filename) = @_;
    my $md5_hash;

    if(! -e $filename)
    {
        open(FILE, "> $filename") or die "could not open $filename: $!\n";
        return $md5_hash;
    }

    # Else read the file's contents
    open(FILE, $filename) or die "could not open $filename: $!\n";
    while(my $line = <FILE>)
    {
        chomp($line);
        my @arr = split(/\t/, $line);
        if(scalar(@arr) != 2)
        {
            # Line is corrupted. Truncate entire file to clear corrput contents
            open(FILE, "> $filename") or die "could not open $filename: $!\n";
            $md5_hash = {};
            return $md5_hash;
        }
        $md5_hash->{$arr[0]} = $arr[1];
    }
    close(FILE);
    return $md5_hash;
}


sub save_md5
{
    my ($filename, $md5_hash) = @_;
    my $tmp_filename = $filename . "." . $$;
    open(FILE, "> $tmp_filename") or die "could not open $tmp_filename: $!\n";
    foreach my $key (keys(%$md5_hash))
    {
        my $value = $md5_hash->{$key};
        print FILE "$key\t$value\n";
    }
    close(FILE);
    system("mv $tmp_filename $filename");
}


sub fetch_new_md5
{
    my ($user, $pass, $server) = @_;
    my $new_md5_hash; # feed name to md5
    my $cmd = "$CURL --user $user:$pass --digest -k $server/datafeed/listFeeds?format=text";
    if(!defined(open(LIST_FEEDS, "$cmd |")))
    {
        print "ERROR: could not execute command: [$cmd], error code: $!\n";
        exit(1);
    }

    while(my $line = <LIST_FEEDS>)
    {
        my @arr = split(/\t/, $line);
        if(scalar(@arr) != 4)
        {
            print "ERROR: Failed to list feeds\n";
            exit(1);
        }
        $new_md5_hash->{$arr[0]} = $arr[2];
    }

    return $new_md5_hash;
}


sub parse_argv
{
    my ($feed_filename, $md5_filename, $user, $pass, $out_dir, $region); # Return values

    my $i = 0;
    my $num_args = $#ARGV + 1;
    while($i < $num_args)
    {
        my $key = $ARGV[$i];
        if($key eq "--input")
        {
            $feed_filename = $ARGV[$i + 1];
            $i = $i + 2;
            next;
        }
        elsif($key eq "--md5-file")
        {
            $md5_filename = $ARGV[$i + 1];
            $i = $i + 2;
            next;
        }
        elsif($key eq "--user")
        {
            $user = $ARGV[$i + 1];
            $i = $i + 2;
            next;
        }
        elsif($key eq "--pass")
        {
            $pass = $ARGV[$i + 1];
            $i = $i + 2;
            next;
        }
        elsif($key eq "--dir")
        {
            $out_dir = $ARGV[$i + 1];
            $i = $i + 2;
            next;
        }
        elsif($key eq "--region")
        {
            $region = uc($ARGV[$i + 1]);
            $i = $i + 2;
            next;
        }
        elsif($key eq "--help")
        {
            usage_help();
            exit(0);
        }
        else
        {
            print "Unrecognized argument ($key), skipping it\n";
            $i = $i + 1;
            next;
        }
    }#while

    if((!defined($feed_filename) || $feed_filename eq "") ||
       (!defined($md5_filename)  || $md5_filename eq "")  ||
       (!defined($user)          || $user eq "")          ||
       (!defined($pass)          || $pass eq "")          ||
       (!defined($out_dir)       || $out_dir eq "")       ||
       (!defined($region)        || $region eq ""))
    {
        usage_help();
        exit(1);
    }

    return ($feed_filename, $md5_filename, $user, $pass, $out_dir, $region);
} #parse_argv()


sub usage_help
{
    print "This program requires the following arguments -\n\n";
    print "\t --input <filename>: path to filename containing list of feed names to be downloaded, one per line\n";
    print "\t --md5-file <filename>: path to filename where md5 checksums will be stored\n";
    print "\t --user <user>: user name for logging onto Associates S3 Proxy\n";
    print "\t --pass <pass>: password for logging onto Associates S3 Proxy\n";
    print "\t --dir <dir>: directory where feeds will be stored\n";
    print "\t --region <region>: must be one of {NA,EU,FE}\n";
}

我想在哪里插入解压缩命令,但我不知道如何解压缩每个下载的文件...

任何提示?

1 个答案:

答案 0 :(得分:0)

检查是否有解压缩命令,如果是,请尝试:

if($success == 1)
{
    # download succeeded and here I have to UNZIP
    $unzip_cmd = "unzip $out_dir/$feed_name"
    system($unzip_cmd);
}