我有剧本:
# N1089767N_7_SWOPT_03-Jul-2011_78919186.xml
# N1089767N_7_SWOPT_25-Jun-2011_72745892.xml
# N1089772L_9_SWOPT_03-Jul-2011_78979055.xml
# N1089772L_9_SWOPT_20-Jul-2011_69380887.xml
# N1089772L_9_SWOPT_29-Jun-2011_74754662.xml
open( CONSULTS, "confile" );
@scons = <CONSULTS>;
close CONSULTS;
my %is_trade_id_unique;
foreach ( reverse sort consort @scons ) {
chomp;
#print $_. "\n";
if ( $_ =~ m/(\w+_\d+_\w+)_(\d+)-([A-Za-z]{3})-2011_(\d+)[.]xml/i ) {
my ( $trade_id, $date, $month, $row_num ) = ( $1, $2, $3, $4 );
if ( !$is_trade_id_unique{$trade_id} ) {
print $_. "\n";
$is_trade_id_unique{$trade_id} = 1;
}
#print $_."\n";
}
}
#N1089767N_7_SWOPT_03-Jul-2011_78919186.xml
sub consort {
$aa = $a;
$bb = $b;
# save our variables because our sort routine affects them. If I "chomp $a"
# that will actually change the line seen in the foreach loop that calls this.
chomp $aa;
chomp $bb;
$aa =~ s/^ *//;
$bb =~ s/^ *//;
my %months = (
FY => 0,
Jan => 1,
Feb => 2,
Mar => 3,
Apr => 4,
May => 5,
Jun => 6,
Jul => 7,
Aug => 8,
Sep => 9,
Oct => 10,
Nov => 11,
Dec => 12,
);
my ( $trade_id, $date, $month, $row_num );
my ( $btrade_id, $bdate, $bmonth, $brow_num );
if ( $aa =~ m/(\w+_\d+_\w+)_(\d+)-([A-Za-z]{3})-2011_(\d+)[.]xml/i ) {
( $trade_id, $date, $month, $row_num ) = ( $1, $2, $months{$3}, $4 );
}
if ( $bb =~ m/(\w+_\d+_\w+)_(\d+)-([A-Za-z]{3})-2011_(\d+)[.]xml/i ) {
( $btrade_id, $bdate, $bmonth, $brow_num ) =
( $1, $2, $months{$3}, $4 );
}
$trade_id cmp $btrade_id
|| $month <=> $bmonth
|| $date <=> $bdate
|| $row_num <=> $brow_num;
}
我将此脚本转换为
#!/usr/bin/perl
use strict;
use warnings;
#use Smart::Comments;
use constant RegExp_parse_name => qr/(\w+)_(\d{2})-(\w{3})-(\d{4})_(\d+)/;
#qr/([A-Z0-9]+_\d+_[A-Z0-9]+)_(\d+)-([A-Z][a-z]{2})-(20\d{2})_(\d+)[.]xml/;
#create month hash
my @month = qw/Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec/;
my %months;
foreach my $index ( 0 .. $#month ) { $months{ $month[$index] } = $index }
#generate tmp array for special sort
my @tmp_scons;
while ( my $str = <DATA> ) {
chomp($str);
my ( $trade_id, $date, $month, $year, $row_num ) =
$str =~ RegExp_parse_name;
$trade_id or next;
$month = $months{$month};
push @tmp_scons, [ "$trade_id:$year-$month-$date:$row_num", $str ];
}
my @scons = map $_->[1], sort { $a cmp $b } @tmp_scons;
### @tmp_scons:@tmp_scons
### @scons:@scons
### %months:%months
my %is;
foreach my $str (@scons) {
my ( $trade_id, $date, $month, $year, $row_num ) =
$str =~ RegExp_parse_name;
if ( !$is{$trade_id} ) {
print "$str\n";
}
$is{$trade_id}++;
#print "$str\n";
}
__DATA__
N1089767N_7_SWOPT_03-Jul-2011_78919186.xml
N1089767N_7_SWOPT_25-Jun-2011_72745892.xml
N1089772L_9_SWOPT_03-Jul-2011_78979055.xml
N1089772L_9_SWOPT_20-Jul-2011_69380887.xml
N1089772L_9_SWOPT_29-Jun-2011_74754662.xml
但它没有正确排序问题是什么?
答案 0 :(得分:13)
在这一行:
my @scons = map $_->[1], sort { $a cmp $b } @tmp_scons;
您正在对转换后的数据进行排序,然后拉出原始数据。但是,在排序块中,当您编写$a cmp $b
时,您正在比较数组引用,因此perl正在执行类似'ARRAY(0x123412)' cmp 'ARRAY(0x234234)'
的操作,而不是查看转换后的数据,这是在该数组的第一个元素中
重写该行,如下所示:
my @scons = map $_->[1], sort { $a->[0] cmp $b->[0] } @tmp_scons;
您将正确地对转换后的值进行排序。
答案 1 :(得分:0)
#as a result
#!/usr/bin/env perl
######################################
# $URL: http://mishin.narod.ru $
# $Date: 2011-09-14 19:53:20 +0300 (Web, 14 Sep 2011) $
# $Author: mishin nikolay $
# $Revision: 1.02 $
# $Source: get_latest.pl $
# $Description: Sort trades and get latest $
##############################################################################
use strict;
use warnings;
use utf8;
use Data::Dumper;
use Carp;
use English qw(-no_match_vars);
our $VERSION = '0.01';
my $RGX_SHORT_MESS = qr/^(\w+)_(\d{2})-(\w{3})-(\d{4})_(\d+)/smo;
my $RGX_LONG_MESS = qr/^message[.](\w+)[.](\w+)_(\d{2})-(\w{3})-(\d{4})/smo;
#create month hash
my %months;
# two symbol for correct literal matching
@months{qw( Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec )} =
( '00' .. '11' );
my ( $result, $index );
my $file = shift; #'file_names.txt';
open my $fh, q{<}, $file or croak "unable to open:$file $ERRNO";
process_data($fh); #my @file_names = <$fh>;
close $fh or croak "unable to close: $file $ERRNO";
sub process_data {
my ($fh) = @_;
while ( my $str = <$fh> ) {
chomp $str;
my $search_str = $str;
my $trade_id;
if ( $search_str =~ s/$RGX_SHORT_MESS/$4-$months{$3}-$2:$5/sm ) {
$trade_id = $1;
}
elsif ( $search_str =~ s/$RGX_LONG_MESS/$5-$months{$4}-$3:$1/sm ) {
$trade_id = $2;
}
else { next }
# so, from now we are search BIGGEST value & ignore less
next
if ( exists $index->{$trade_id}
&& ( $index->{$trade_id} gt $search_str ) );
$index->{$trade_id} = $search_str;
$result->{$trade_id} = $str;
}
# $result
foreach ( reverse sort keys %{$result} ) {
print $result->{$_} . "\n";
}
return;
}
__DATA__
N1089767N_7_SWOPT_03-Jul-2011_78919186.xml
N1089767N_7_SWOPT_25-Jun-2011_72745892.xml
N1089772L_9_SWOPT_03-Jul-2011_78979055.xml
N1089772L_9_SWOPT_20-Jul-2011_69380887.xml
N1089772L_9_SWOPT_29-Jun-2011_74754662.xml
message.110530033311A4259348AS26.A4259348AS_26_SWOPT_01-Jul-2011.xml
message.110530033311A4259348AS26.A4259348AS_26_SWOPT_31-May-2011.xml
A4259348AS_26_SWOPT_29-Jun-2011_74754662.xml