为什么我的Perl脚本会在Windows上生成包含大文件的损坏输出?

时间:2011-09-29 14:58:13

标签: perl delimiter linefeed

我是Perl的新手,我有一个非常奇怪的print问题。

Perl程序在Windows XP上运行。它首先执行一个SQL然后遍历结果并通过5个子例程输出到5个文件。这5个文件将被加载到数据库,因此它使用|作为分隔符。每个子例程都具有以下内容。

print outfile $array[field1] . '|' . $array[field2] . '|' . $array[field3] . "\n";

奇怪的是有时程序输出正常。有时,输出已损坏,例如某些点后换行符丢失,或者数组中的值不正确。

我想知道这是否与记忆有关。输出文件大小范围从500MB到9GB。该程序一次读取SQL一条记录的输出,并一次写一条记录。

这是完整的Perl脚本。

#!/usr/bin/perl

use DBI;
use DBD::Oracle;

# Constants:
use constant field0  =>  0;
use constant field1  =>  1;
use constant field2  =>  2;
use constant field3  =>  3;
use constant field4  =>  4;
use constant field5  =>  5;
use constant field6  =>  6;
use constant field7  =>  7;
use constant field8  =>  8;
use constant field9  =>  9;
use constant field10  => 10;
use constant field11  => 11;
use constant field12  => 12;
use constant field13  => 13;
use constant field14  => 14;
use constant field15  => 15;
use constant field16  => 16;
use constant field17  => 17;
use constant field18  => 18;
use constant field19  => 19;
use constant field20  => 20;
use constant field21  => 21;
use constant field22  => 22;
use constant field23  => 23;
use constant field24  => 24;
use constant field25  => 25;
use constant field26  => 26;
use constant field27  => 27;
use constant field28  => 28;
use constant field29  => 29;
use constant field30  => 30;
use constant field31  => 31;
use constant field32  => 32;
use constant field33  => 33;
use constant field34  => 34;
use constant field35  => 35;
use constant field36  => 36;
use constant field37  => 37;
use constant field38  => 38;
use constant field39  => 39;
use constant field40  => 40;
use constant field41  => 41;

# Capture Directory Path from Environment Variable:
my $DIRECTORY = $ENV{DATADIR};

# Process Counters:
my %fileCntr = (
    ccr1  => 0,
    ccr2  => 0,
    ccr3  => 0,
    ccr4  => 0,
    ccr5  => 0
   );

# Process Control Hashes:
my %xref = ();

# Process Control Variables:
my $diag = 0;
my $proc = 0;
my $ndcc = 0;
my $previous = "";

# Claims Extract array:
my @arr = ();
my $hdr = "";

# Accept/Parse DSS Connection String:
$ENV{PSWD} =~ /(.+)\/(.+)\@(.+)/;
my $USER = $1;
my $PASS = $2;
my $CONN = 'DBI:Oracle:' . $3;

# ALTER Date format:
my $ATL = qq(ALTER SESSION SET NLS_DATE_FORMAT = 'YYYY-MM-DD');

# Database Connection:
my $dbh = DBI->connect( $CONN, $USER, $PASS, { RaiseError => 1, AutoCommit => 0 } );
  $dbh->do($ATL);   # Execute ALTER session.

my $SQL = qq(
 SELECT ... here is a big sql query
);

# Open OUTPUT file for CCR processing:
open OUT1, ">$DIRECTORY/ccr1.dat" or die "Unable to open OUT1 file: $!\n";
open OUT2, ">$DIRECTORY/ccr2.dat" or die "Unable to open OUT2 file: $!\n";
open OUT3, ">$DIRECTORY/ccr3.dat" or die "Unable to open OUT3 file: $!\n";
open OUT4, ">$DIRECTORY/ccr4.dat" or die "Unable to open OUT4 file: $!\n";
open OUT5, ">$DIRECTORY/ccr5.dat" or die "Unable to open OUT5 file: $!\n";

# Redirect STDOUT to log file:
open STDOUT, ">$DIRECTORY/ccr.log"   or die "Unable to open LOG file: $!\n";

# Prepare $SQL for execution:
my $sth = $dbh->prepare($SQL);
$sth->execute();

# Produce out files:
{
  local $, = "|";
  local $\ = "\n";

  while (@arr = $sth->fetchrow_array)
  {
    # Direct Write of CCR1&2 records:
    &BuildCCR12();

    # Write and Wipe CCR3 HASH Table:
    &WriteCCR3() unless ($arr[field0] == $previous);
    &BuildCCR3();

    # Loop processing for CCR4:
    &BuildCCR4();

    # Loop processing for CCR5:
    &BuildCCR5();
  }
}

# Print Record Counts for OUTPUT files:
foreach my $key (keys %fileCntr) { print "$key: " . $fileCntr{$key} . "\n"; }

# Terminate DB connection:
$sth->finish();
$dbh->disconnect();

# Close all output files:
close(OUT1); close(OUT2); close(OUT3);
close(OUT4); close(OUT5);

{
 # Reassign Output End-of-record across subroutine block:
 local $\ = "\n";

 sub BuildCCR12
 {
  # Write CCR1 Table:
  print OUT1 $arr[field6]  . '|' . $arr[field7]   . '|' . $arr[field5]   . '|' .
     $arr[field0]          . '|' . $arr[field8]   . '|' . $arr[field9]   . '|' .
     $arr[field10]         . '|' . $arr[field11]  . '|' . $arr[field12]  . '|' .
     $arr[field13]         . '|' . $arr[field2]   . '|' . $arr[field3]   . '|' .
     $arr[field40]         . '|' . $arr[field16];

  $fileCntr{ccr1}++;

  # Write CCR2 Table:
  unless ($arr[field17] eq '###########') {
            print OUT2 ++$ndcc . "|" .  $arr[field0]     . "|" . 
            $arr[field6]       . '|' . $arr[field7]      . '|' .
            $arr[field17]      . '|' . $arr[field19]     . '|' . $arr[field18] . '|' .
            $arr[field2]       . '|' . $arr[field3]      . '|' . $arr[field39];
            $fileCntr{ccr2}++;
            }
 }

 sub WriteCCR3
 {
  unless ($previous == "")
  {
   # Produce ccr3 from DISTINCT combo listing:
   foreach $key (keys %xref) { print OUT3 $xref{$key}; $fileCntr{ccr3}++; }
   %xref = ();
  }
 }

 sub BuildCCR3
 {
  # Spin off relationship:
  for (my $i = field8; $i <= field13; $i++)
  {
   unless ($arr[$i] == -1)
   {
    $xref{$arr[field0] . "|" . $arr[$i]} = $arr[field0] . "|" . $arr[$i];
   }
  }
   $previous = $arr[field0];
 }

 sub BuildCCR4
 {
  # Spin off relationship:
  for (my $i = field26; $i <= field37; $i++)
  {
   my $sak = $arr[field0] . $arr[field6] . $arr[field7] . $arr[$i];
   unless (($arr[$i] eq '#######') or ($arr[$i] eq '######')) {
                        print OUT4 ++$diag . '|' . $arr[field0] . '|' . 
                              $arr[field6] . '|' .
                              $arr[field7] . '|' . $arr[$i];
                    $fileCntr{ccr4}++;
                  }
  }
 }

 sub BuildCCR5
 {
  # Spin off field0/Procedure relationship:
  for (my $i = field20; $i <= field23; $i++)
  {
   my $sak = $arr[field0] . $arr[field6] . $arr[field7] . $arr[$i];
   unless ($arr[$i] eq '######' or $arr[$i] eq '####') {
                 print OUT5 ++$proc . '|' .  $arr[field0] . '|' . $arr[field6] . '|' .
                         $arr[field7]   . '|' . $arr[$i];
                 $fileCntr{ccr5}++;
               }
  }
 }
}

问题在于CCR3输出。在某一点之后,换行由于某种原因消失,并且数据被损坏,好像换行符吃了一些输出。从那一点开始,它变成1条连续线。

3260183|147845
3260183|78246
3260183|13898
3260183|184783
3260183|116315
3260183|184483262216|105843262217|1461703262217|175593262217|1360303262217

另一件事是这个程序将运行接近26个小时并且在循环通过sql时,有没有机会,数据可能搞砸了?但它仍然无法解释为什么突然换行不再起作用。

1 个答案:

答案 0 :(得分:4)

我试图减少混乱。首先,您定义的常量会产生很多混乱,而不是帮助提高可读性。如果你有像

这样的东西
use constant LICENSE_NO => 42;

我理解,但如果常量只是对应于整数数组索引,那么我没有看到这一点。

我还将所有打印放在一个单独的子例程中,并将错误检查添加到printclose语句中。

我没有声称这可以解决您的问题,但这是我开始实际调试的地方。这里可能会有一些拼写错误,所以请小心。

#!/usr/bin/perl

use warnings; use strict;
use DBI;
use File::Spec::Functions qw( catfile );

my @proc = qw(ccr1 ccr2 ccr3 ccr4 ccr5);

# Capture Directory Path from Environment Variable:
my $DIRECTORY = $ENV{DATADIR};

# Process Counters:
my %fileCntr = map { $_ => 0 } @proc;

# Process Control Hashes:
my %xref = ();

# Process Control Variables:
my $diag = 0;
my $proc = 0;
my $ndcc = 0;
my $previous = "";

# Claims Extract array:
my @arr = ();
my $hdr = "";

# Accept/Parse DSS Connection String:
my ($USER, $PASS, $CONN) = ($ENV{PSWD} =~ m{^(.+)/(.+)\@(.+)});

# ALTER Date format:
my $ATL = qq(ALTER SESSION SET NLS_DATE_FORMAT = 'YYYY-MM-DD');

# Database Connection:
my $dbh = DBI->connect(
    "DBI::Oracle:$CONN", $USER, $PASS,
    { RaiseError => 1, AutoCommit => 0 },
);

$dbh->do($ATL);   # Execute ALTER session.

my $SQL = qq(
    SELECT ... here is a big sql query
);

my %outh;

for my $proc ( @proc ) {
    my $fn = catfile $DIRECTORY, "$proc.dat";
    open $outh{ $proc }, '>', $fn
        or die "Cannot open '$fn' for writing: $!";
}

# Redirect STDOUT to log file:
open STDOUT, '>', catfile($DIRECTORY, 'ccr.log')
    or die "Unable to open LOG file: $!";

# Prepare $SQL for execution:
my $sth = $dbh->prepare($SQL);
$sth->execute();

# Produce out files:

while (my @arr = $sth->fetchrow_array) {
    # Direct Write of CCR1&2 records:
    BuildCCR12(\@arr);

    # Write and Wipe CCR3 HASH Table:
    WriteCCR3(\@arr) unless ($arr[0] == $previous);
    BuildCCR3(\@arr);

    # Loop processing for CCR4:
    BuildCCR4(\@arr);

    # Loop processing for CCR5:
    BuildCCR5(\@arr);
}

# Print Record Counts for OUTPUT files:
foreach my $key (keys %fileCntr) {
    printf "%s: %s\n", $key, $fileCntr{$key};
}

# Terminate DB connection:
$sth->finish();
$dbh->disconnect();

for my $proc (keys %outh) {
    close $outh{ $proc } or die "Cannot close filehandle for '$proc': $!";
}

sub print_to {
    my ($dest, $data) = @_;

    my $fh = $outh{$dest};

    print $fh join('|', @$data), "\n"
        or die "Error writing to '$dest' file: $!";

    $fileCntr{$dest}++;
    return;
}

sub BuildCCR12 {
    my ($arr) = @_;

    print_to(ccr1 =>
        [@{$arr}[6, 7, 5, 0, 8, 9, 10, 13, 2, 3, 40, 16]]);

    if ($arr->[17] ne '###########') {
        print_to(ccr2 =>
            [++$ndcc, @{ $arr }[0, 6, 7, 17, 19, 18, 2, 3, 39]]);
    }
    return;
}

sub WriteCCR3 {
    my ($arr) = @_;

    unless ($previous) {
        # Produce ccr3 from DISTINCT combo listing:

        print_to(ccr3 => [ keys %xref ]);
        %xref = ();
    }

    return;
}

sub BuildCCR3 {
    my ($arr) = @_;

    # Spin off relationship:

    for my $i (8 .. 13) {
        unless ($arr->[$i] == -1) {
            my $k = join '|', @{ $arr }[0, $i];
            $xref{ $k } = $k;
        }
    }
    $previous = $arr->[0];

    return;
}

sub BuildCCR4 {
    my ($arr) = @_;

    # Spin off relationship:

    for my $i (26 .. 37) {
        my $sak = join '|', @{ $arr }[0, 6, 7, $i];

        my $v = $arr->[$i];

        unless ( $v =~ /^#{6,7}\z/ ) {
            print_to(ccr4 => [++$diag, @{ $arr }[0, 6, 7, $v]]);
        }
    }
    return;
}

sub BuildCCR5 {
    my ($arr) = @_;

    # Spin off field0/Procedure relationship:

    for my $i (20 .. 23) {
        my $v = $arr[$i];
        my $sak = join('', @{ $arr }[0, 6, 7], $v);

        unless ($v eq '######' or $v eq '####') {
            print_to(ccr5 => [++$proc, @{ $arr }[0, 6, 7], $v]);
        }
    }

    return;
}