我从http://blog.mekk.waw.pl/archives/47-Scrap_email_addresses_from_GMail_inbox_or_other_folder.html
获得了这个perl脚本我可以在Windows上使用ActiveState Perl运行它。
我以前从未在perl中编程,但现在我需要修改脚本以满足我的需要。
我需要允许用户传递日期范围,因此只会提取该日期范围内的电子邮件地址。例如,用户可以通过这种方式运行它以从2014年2月1日至2014年2月28日的消息中提取电子邮件地址:
c:> extractor.pl --login=abc@gmail.com --password = MyPassW --folder = INBOX --begindate = 20140201 --enddate = 20140228
我发现类似的脚本支持http://ulrith.livejournal.com/484386.html的日期范围,但我不知道如何将它们组合起来。
感谢您的帮助!
package Gmail::ExtractEmails;
use Moose;
use namespace::autoclean;
use Mail::IMAPClient;
use IO::Socket::SSL;
use Email::Address;
use Encode qw(decode encode);
use Text::CSV_XS;
with 'MooseX::Getopt';
has 'folder' => (is => 'ro', isa => 'Str', default => "INBOX",
documentation => "GMail folder to scan (by default INBOX, use --list-folders to check which folders are available)");
has 'csv' => (is => 'ro', isa => 'Str', predicate => 'has_csv',
documentation => "Name of created .csv file. Printing to stdout if not set");
has 'host' => (is => 'ro', isa => 'Str', default => "imap.gmail.com",
documentation => "GMail IMAP hostname (default imap.gmail.com, change if you are using some port mapping or tunelling)");
has 'port' => (is => 'ro', isa => 'Int', default => 993,
documentation => "GMail IMAP port (default 993, change if you are using some port mapping or tunelling)");
has 'verbose' => (is => 'rw', isa => 'Bool', default => 0);
has 'list-folders' => (is => 'rw', isa => 'Bool', default => 0, accessor => 'list_folders',
documentation => "Just print names of all known folders instead of running normally");
has 'login' => (is => 'rw', isa => 'Str', required => 1,
documentation => "GMail username (either \"SomeBody\@gmail.com\", or \"SomeBody\")");
has 'password' => (is => 'rw', isa => 'Str', required => 1,
documentation => "GMail password");
has '_imap' => (is => 'ro', builder => '_build_imap', lazy => 1, init_arg => undef, predicate => '_has_imap');
sub DEMOLISH {
my $self = shift;
if($self->_has_imap) {
$self->_imap->logout;
}
}
sub _build_imap {
my $self = shift;
printf STDERR "Connecting to GMail as %s at %s:%s\n", $self->login, $self->host, $self->port
if $self->verbose;
my $socket = IO::Socket::SSL->new(
Proto => 'tcp',
PeerAddr => $self->host,
PeerPort => $self->port);
my $imap = Mail::IMAPClient->new(
Socket => $socket,
Server => $self->host,
Port => $self->port,
User => $self->login,
Password => $self->password,
Uid => 1,
)
or die "Gmail connection failed: $@\n";
unless($imap->IsAuthenticated()) {
#use Data::Dumper; print Dumper($imap->Report);
die "Gmail authorization failed. Check your username and password.\n";
}
printf STDERR "... succesfully connected to GMail\n", $self->login
if $self->verbose;
return $imap;
}
sub run {
my $self = shift;
if($self->list_folders) {
my $folders = $self->_imap->folders or die "Can't read folders list: " . $self->_imap->LastError . "\n";
print "Known folders:\n ", join("\n ", @$folders), "\n";
exit(0);
}
# Uniquifying emails. email -> label -> count
my %emails;
$self->_imap->select($self->folder);
#my $messages = $self->_imap->fetch_hash("RFC822.HEADER"); # legacy
#my $messages = $self->_imap->fetch_hash("BODY.PEEK[HEADER.FIELDS (FROM TO CC)]"); # all in one string,
my $messages = $self->_imap->fetch_hash(
"BODY.PEEK[HEADER.FIELDS (FROM)]",
"BODY.PEEK[HEADER.FIELDS (TO)]",
"BODY.PEEK[HEADER.FIELDS (CC)]"
);
foreach my $msg_id (keys %$messages) {
my $msg_data = $messages->{$msg_id};
foreach my $key (keys %$msg_data) {
my @addresses = $self->get_addresses_from_email_field($msg_data->{$key});
foreach my $a (@addresses) {
#print STDERR "Found $a->{email} ($a->{label}) in $msg_id\n"
# if $self->verbose;
$emails{ $a->{email} }->{ $a->{label} } += 1;
}
}
}
my $csv = Text::CSV_XS->new({
binary => 1, always_quote => 1, auto_diag => 2,
});
my $csv_fh;
if($self->has_csv) {
open $csv_fh, ">:encoding(utf8)", $self->csv or die "Can't create " . $self->csv . ": $!\n";
} else {
open($csv_fh, ">>&STDOUT") or die "Can't rewrite stdout\n";
binmode($csv_fh, ":encoding(utf8)");
}
$csv->combine("E-mail Address", "Name");
print $csv_fh $csv->string, "\n";
foreach my $email (sort keys %emails) {
$csv->combine($email, grep {$_} sort keys %{$emails{$email}});
print $csv_fh $csv->string, "\n";
#print $email, ": ", encode('utf8', join(", ", sort keys %{$emails{$email}})), "\n";
}
close $csv_fh or die "Can't save " . $self->csv . ": $!\n";
if($self->has_csv) {
print "Saved to ", $self->csv, "\n"
if $self->verbose;
}
}
sub get_addresses_from_email_field {
my ($self, $text) = @_;
$text = decode('MIME-Header', $text); # decode =?UTF-8?... and such
$text =~ s/[ \r\n]*\Z//; # strip trailing newlines
$text =~ s/[ \r\n]+/ /; # normalize separators to one space
my @addresses;
if($text =~ /\A(?:From|To|Cc|CC): *(.*)\Z/s) {
@addresses = Email::Address->parse($1);
}
if($text && ! @addresses) {
warn "Could not find any sensible address in the following email header:\n$text";
}
return map { { email => $_->address, label => $_->phrase || '' } } @addresses;
}
__PACKAGE__->meta->make_immutable;
1;
###########################################################################
# Main
###########################################################################
package main;
use Getopt::Long::Descriptive; # enforce sensible help
use Getopt::Long;
Getopt::Long::Configure("auto_help");
my $app = Gmail::ExtractEmails->new_with_options();
$app->run();
答案 0 :(得分:0)
这是一个Perl脚本,可以满足您的要求:
http://www.athensfbc.com/public/scrape_addrs
./ scrape_addrs -S host / user / pwd -m mailbox -a after_date -b before_date
例如:
./ scrape_addrs -S imap.gmail.com:993/user/password -m收件箱 - 2014年2月24日 -b 28-feb-2014
输出如下:
========================================
日期26-Feb-2014 23:07:17 +0000
来自“Google+”< noreply-66910ff2@plus.google.com>
到rfs@gmail.com
cc tom@xyz.net
========================================