Question

我试图找出如何使用perl编写的代码，但对perl语法不是很熟悉。我想知道是否有人能告诉我@metafilecache文件的格式是什么？代码无法读取文件中的采样率，但我不确定如何将其格式化错误。这里是我认为合适的代码的摘录：

my $tnet = $ARGV[0];
my $tsta = $ARGV[1];
my $stadir = $ARGV[2]; 

if ( ! -d "$targetdir" ) {
  mkdir "$targetdir" || die "Cannot create $targetdir: $?\n";
}

die "Cannot find PDF bin base dir: $pdfbinbase\n" if ( ! -d "$pdfbinbase" );

my %targetdays = ();
my %targetchan = ();

# Collect target files in the $pdfbinbase dir, limited by $changlob
  foreach my $nsldir (glob("$pdfbinbase/{CHRYS}/$tnet.$tsta.*")) {
   next if ( ! -d "$nsldir" ); # Limit to directories

   # Extract location ID from directory name
   my ($net,$sta,$loc) = $nsldir =~ /\/(\w+)\.(\w+)\.([\w-]+)$/;

   if ( $net ne $tnet ) {
     print "Target network ($tnet) != network ($net)\n";
     next;
   }
   if ( $sta ne $tsta ) {
     print "Target station ($tsta) != station ($sta)\n";
     next;
   }

   foreach my $chandir (glob("$nsldir/$changlob")) {
     next if ( ! -d "$chandir" ); # Limit to directories

   # Extract channel code from directory name
     my ($chan) = $chandir =~ /.*\/([\w\d]+)$/;

     foreach my $yeardir (glob("$chandir/Y*")) {
       next if ( ! -d "$yeardir" ); # Limit to directories

       # Extract year from directory name
       my ($year) = $yeardir =~ /^.*\/Y(\d{4,4})$/;

       foreach my $daybin (glob("$yeardir/D*.bin")) {
         next if ( ! -f "$daybin" ); # Limit to regular files

         my ($day) = $daybin =~ /^.*\/D(\d{3,3})\.bin$/;

         $targetdays{"$loc.$chan.$year.$day"} = $daybin;
         $targetchan{"$loc.$chan"} = 1;
       }
     }
   }
 }
     if ( $verbose > 1 ) {
        print "Target days from PDF bin files:\n";
        my $count = 0;
        foreach my $tday (sort keys %targetdays) {
          print "Target day: $tday => $targetdays{$tday}\n";
          $count++;
        }
      print "Targets: $count\n";
     }

 # Remove targets that have already been calculated by checking
 # results files against targets.
 foreach my $tchan ( keys %targetchan ) {
   my ($loc,$chan) = split (/\./, $tchan);
   # Generate target file name
   my $targetfile = undef;
   if ( $loc ne "--" ) { $targetfile = "$targetdir/$prefix-$loc.$chan"; }
     else { $targetfile = "$targetdir/$prefix-$chan"; }

     print "\nChecking target file for previous results: $targetfile\n"  
     if        ( $verbose );

   next if ( ! -f "$targetfile" );

     # Open result file and remove any targets that are included
     open IN, "$targetfile" || next;
     foreach my $line (<IN>) {
       next if ( $line =~ /^YEAR\.DAY/ );
         my ($year,$day) = $line =~ /^(\d+).(\d+)/;

         # Delete this target
         delete $targetdays{"$loc.$chan.$year.$day"};
       }
   close IN;
  }

  if ( $verbose > 1 ) {
    print "Remaining target days:\n";
    my $count = 0;
    foreach my $tday (sort keys %targetdays) {
      print "Target day: $tday => $targetdays{$tday}\n";
      $count++;
    }
    print "Remaining Targets: $count\n";
  }
 my %targetfiles = ();

 # Calculate and store PDF mode for each target day
 TARGET: foreach my $tday (sort keys %targetdays) {
 my ($loc,$chan,$year,$day) = split (/\./, $tday);

 my %power = ();
 my %count = ();
 my @period = ();

 # Determine sampling rate
 my $samprate = GetSampRate ($tnet,$tsta,$loc,$chan);
 print "Samplerate for $tnet $tsta $loc $chan is: $samprate\n" if (     
 $verbose );
 if ( ! defined $samprate ) {
   if ( ($tsta eq "ECSD") || ($tsta eq "SFJ") || ($tsta eq "CASEE") ||     
     ($tsta eq "JSC") ){
     next;
   }
   else {
     print "Cannot determine sample rate for channel 
     $tnet.$tsta.$loc.$chan\n";
     next;
   }
 }

这是子程序GetSampRate：

sub GetSampRate {    # GetSampRate (net,sta,loc,chan)

  my $net  = shift;
  my $sta  = shift;
  my $loc  = shift;
  my $chan = shift;

  my $samprate = undef;

  # Generate source name: Net_Sta_Loc_Chan
  my $srcname = "${net}_${sta}_";
  $srcname   .= ($loc eq "--") ? "_" : "${loc}_";
  $srcname   .= "$chan";

  if ( $#metafilecache < 0 ) {

    my $metafile = "$stadir/metadata.txt";

    if ( ! -f "$metafile" ) {
      print "GetSampRate(): Cannot find metadata file: $metafile\n";
      return undef;
    }

    # Open metadata file
    if ( ! (open MF, "<$metafile") ) {
      print "GetSampRate(): Cannot open: $metafile\n";
      return undef;
    }

    # Read all lines in the metafilecache
    @metafilecache = <MF>;

    close MF;
  }

  # Read all lines starting with srcname into @lines
  my @lines = grep { /^$srcname/ } @metafilecache;

  # Find maximum of sample rates for this channel
  foreach my $line ( @lines ) {
    my @fields = split(/\t/, $line);
    my $rate   = $fields[7];

    $samprate = $rate if (!defined $samprate || $rate > $samprate);
  }

  return $samprate;
}

Answer 1

您展示的代码非常笨重。

就此范围而言，该文件名为$stadir/metadata.txt，我无法帮助$stadir，因为它未定义或是全局值设置在别处 - 不是一个很棒的设计理念
之后，@metafilecache = <MF>将整个文件加载到数组@metafilecache中，在每个元素的末尾留下一个尾随的换行符
然后my @lines = grep { /^$srcname/ } @metafilecache重复到@lines以$srcname中保存的字符串开头的所有行。这是另一个不应该使用的全局变量
以下for循环拆分选项卡（"\t"或"\x09"）分隔符上的行，并将$rate设置为第八个值$fields[7] ）。如果$samprate的最新值大于当前存储的最大值，则会在每次迭代时更新$rate

我希望有帮助

Perl行/分隔符文件格式化语法

1 个答案: