适用于Windows的Perl - 如何允许Gmail电子邮件的用户输入日期范围解决提取程序脚本

时间:2014-03-03 07:29:46

标签: perl

我从http://blog.mekk.waw.pl/archives/47-Scrap_email_addresses_from_GMail_inbox_or_other_folder.html

获得了这个perl脚本

我可以在Windows上使用ActiveState Perl运行它。

我以前从未在perl中编程,但现在我需要修改脚本以满足我的需要。

我需要允许用户传递日期范围,因此只会提取该日期范围内的电子邮件地址。例如,用户可以通过这种方式运行它以从2014年2月1日至2014年2月28日的消息中提取电子邮件地址:

c:> extractor.pl --login=abc@gmail.com --password = MyPassW --folder = INBOX --begindate = 20140201 --enddate = 20140228

我发现类似的脚本支持http://ulrith.livejournal.com/484386.html的日期范围,但我不知道如何将它们组合起来。

感谢您的帮助!

package Gmail::ExtractEmails;
use Moose;
use namespace::autoclean;
use Mail::IMAPClient;
use IO::Socket::SSL;
use Email::Address;
use Encode qw(decode encode);
use Text::CSV_XS;

with 'MooseX::Getopt';

has 'folder'  => (is => 'ro', isa => 'Str', default => "INBOX",
                  documentation => "GMail folder to scan (by default INBOX, use --list-folders to check which folders are available)");
has 'csv' => (is => 'ro', isa => 'Str', predicate => 'has_csv',
              documentation => "Name of created .csv file. Printing to stdout if not set");
has 'host'  => (is => 'ro', isa => 'Str', default => "imap.gmail.com",
                documentation => "GMail IMAP hostname (default imap.gmail.com, change if you are using some port mapping or tunelling)");
has 'port'  => (is => 'ro', isa => 'Int', default => 993,
                documentation => "GMail IMAP port (default 993, change if you are using some port mapping or tunelling)");
has 'verbose' => (is => 'rw', isa => 'Bool', default => 0);
has 'list-folders' => (is => 'rw', isa => 'Bool', default => 0, accessor => 'list_folders',
                       documentation => "Just print names of all known folders instead of running normally");
has 'login'   => (is => 'rw', isa => 'Str', required => 1,
                  documentation => "GMail username (either \"SomeBody\@gmail.com\", or \"SomeBody\")");
has 'password' => (is => 'rw', isa => 'Str', required => 1,
                   documentation => "GMail password");

has '_imap' => (is => 'ro', builder => '_build_imap', lazy => 1, init_arg => undef, predicate => '_has_imap');

sub DEMOLISH {
    my $self = shift;
    if($self->_has_imap) {
        $self->_imap->logout;
    }
}

sub _build_imap {
    my $self = shift;

    printf STDERR "Connecting to GMail as %s at %s:%s\n", $self->login, $self->host, $self->port
      if $self->verbose;

    my $socket = IO::Socket::SSL->new(
        Proto => 'tcp',
        PeerAddr => $self->host,
        PeerPort => $self->port);

    my $imap = Mail::IMAPClient->new(
        Socket => $socket,
        Server => $self->host,
        Port => $self->port,
        User => $self->login,
        Password => $self->password,
        Uid => 1,
       )
      or die "Gmail connection failed: $@\n";

    unless($imap->IsAuthenticated()) {
        #use Data::Dumper; print Dumper($imap->Report);
        die "Gmail authorization failed. Check your username and password.\n";
    }
    printf STDERR "... succesfully connected to GMail\n", $self->login
      if $self->verbose;

    return $imap;
}

sub run {
    my $self = shift;

    if($self->list_folders) {
        my $folders = $self->_imap->folders or die "Can't read folders list: " . $self->_imap->LastError . "\n";
        print "Known folders:\n    ", join("\n    ", @$folders), "\n";
        exit(0);
    }

    # Uniquifying emails. email -> label -> count
    my %emails;

    $self->_imap->select($self->folder);
    #my $messages = $self->_imap->fetch_hash("RFC822.HEADER"); # legacy
    #my $messages = $self->_imap->fetch_hash("BODY.PEEK[HEADER.FIELDS (FROM TO CC)]"); # all in one string,
    my $messages = $self->_imap->fetch_hash(
        "BODY.PEEK[HEADER.FIELDS (FROM)]",
        "BODY.PEEK[HEADER.FIELDS (TO)]",
        "BODY.PEEK[HEADER.FIELDS (CC)]"
       );

    foreach my $msg_id (keys %$messages) {
        my $msg_data = $messages->{$msg_id};
        foreach my $key (keys %$msg_data) {
            my @addresses = $self->get_addresses_from_email_field($msg_data->{$key});
            foreach my $a (@addresses) {
                #print STDERR "Found $a->{email} ($a->{label}) in $msg_id\n"
                #  if $self->verbose;
                $emails{ $a->{email} }->{ $a->{label} } += 1;
            }
        }
    }

    my $csv = Text::CSV_XS->new({
        binary => 1, always_quote => 1, auto_diag => 2,
    });

    my $csv_fh;
    if($self->has_csv) {
        open $csv_fh, ">:encoding(utf8)", $self->csv or die "Can't create " . $self->csv . ": $!\n";
    } else {
        open($csv_fh, ">>&STDOUT") or die "Can't rewrite stdout\n";
        binmode($csv_fh, ":encoding(utf8)");
    }

    $csv->combine("E-mail Address", "Name");
    print $csv_fh  $csv->string, "\n";

    foreach my $email (sort keys %emails) {
        $csv->combine($email, grep {$_} sort keys %{$emails{$email}});
        print $csv_fh $csv->string, "\n";
        #print $email, ": ", encode('utf8', join(", ", sort keys %{$emails{$email}})), "\n";
    }

    close $csv_fh or die "Can't save " .  $self->csv . ": $!\n";

    if($self->has_csv) {
        print "Saved to ", $self->csv, "\n"
          if $self->verbose;
    }
}

sub get_addresses_from_email_field {
    my ($self, $text) = @_;
    $text = decode('MIME-Header', $text);   # decode =?UTF-8?... and such
    $text =~ s/[ \r\n]*\Z//;   # strip trailing newlines
    $text =~ s/[ \r\n]+/ /;    # normalize separators to one space
    my @addresses;
    if($text =~ /\A(?:From|To|Cc|CC): *(.*)\Z/s) {
        @addresses = Email::Address->parse($1);
    }
    if($text && ! @addresses) {
        warn "Could not find any sensible address in the following email header:\n$text";
    }

    return map { { email => $_->address, label => $_->phrase || '' } } @addresses;
}

__PACKAGE__->meta->make_immutable;
1;

###########################################################################
# Main
###########################################################################

package main;
use Getopt::Long::Descriptive; # enforce sensible help
use Getopt::Long;
Getopt::Long::Configure("auto_help");

my $app = Gmail::ExtractEmails->new_with_options();
$app->run();

1 个答案:

答案 0 :(得分:0)

这是一个Perl脚本,可以满足您的要求:

http://www.athensfbc.com/public/scrape_addrs

./ scrape_addrs -S host / user / pwd -m mailbox -a after_date -b before_date

例如:

./ scrape_addrs       -S imap.gmail.com:993/user/password       -m收件箱        - 2014年2月24日       -b 28-feb-2014

输出如下:

========================================
日期26-Feb-2014 23:07:17 +0000
来自“Google+”< noreply-66910ff2@plus.google.com>
到rfs@gmail.com
cc tom@xyz.net
========================================