Perl如何使线程等到shell完成命令的执行

时间:2013-12-02 17:20:52

标签: multithreading perl shell

我有一个小脚本,它试图扫描目录以查找特定类型的文件。一旦填充哈希(其中键是目录名称和值,该目录中存在的文件数组)。

一旦完成,我试图通过生成一组线程(5固定数字)来存档这些目录。作业分配以固定大小的静态方式完成。

但是当我运行脚本时,我发现运行了更多的tar而不是5(和相同数量的gzip)。以下是执行相同操作的代码。

use strict;
use warnings;
use File::Find;
use Time::localtime;
use File::Copy 'cp';
use File::Copy 'mv';
use File::Path qw(make_path);
use threads;
use Thread::Queue;

# Package for debugging need to remove
use diagnostics;
use Data::Dumper;

sub CreateIndividualArchive {
    my $srcDir        = shift;
    my $destDir       = shift;
    my $arrayFileList = shift;
    my $pathDelimiter = "/";

    #my $tempFile = "dump.txt";
    my @chars = ( '0' .. '9', 'A' .. 'F' );
    my $len = 8;
    my $string;
    while ( $len-- ) { $string .= $chars[ rand @chars ] }
    my $tempFile = $string;

    #print "Temp fils is $tempFile\n";
    my $fh;
    my $fileName = $destDir . $pathDelimiter . $tempFile;
    open $fh, '>', $fileName or die "Cannot open $tempFile :$!";
    my $dirName = $arrayFileList->[0];
    chop($dirName);

    for ( @$arrayFileList[ 1 .. $#$arrayFileList ] ) {
        print $fh "$_\n";
    }
    close($fh);
    my $tarFileList = $destDir . $pathDelimiter . $tempFile;

    my $tarExt      = ".tar.gz";
    my $tarFileName = $destDir . $pathDelimiter . $dirName . $tarExt;
    my $cmd         = "tar -zcf $tarFileName -C $srcDir -T $tarFileList";

    #print "Cmd is $cmd\n";
    print "CMD = $cmd\n";

    my @tarOutput = `tar -zcf $tarFileName -C $srcDir -T $tarFileList 2>&1`;
    if ( $? == -1 ) {
        print "Archiving of the files fails : $!\n";
        unlink $tarFileName;
        return 0;
    }
    unlink $tarFileList;
    return 1;
}

sub Thread {
    my $hashParm        = shift;
    my %hashFileList    = %$hashParm;
    my $sourcePath      = shift;
    my $destinationPath = shift;
    my $keys;
    my $values;
    my @arrayValues;
    my @rowData  = ();
    my $totalKey = keys %hashFileList;
    my $pathDelimiter = "/";

    #print "In thread and total keys received $totalKey\n";

    while ( ( $keys, $values ) = each(%hashFileList) ) {
        push( @arrayValues, $keys . $pathDelimiter );
        my @row = ( $keys . $pathDelimiter, $keys . $pathDelimiter, 0 );
        push( @rowData, \@row );
        my @arrayParm = @{$values};
        foreach my $value (@arrayParm) {
            my $fileName = $$value[0];
            my $fileSize = $$value[1];
            push( @arrayValues, $fileName );
            my @row = ( $keys . $pathDelimiter, $fileName, $fileSize );
            push( @rowData, \@row );
        }

        #print "SourcePath $sourcePath and dest $destinationPath\n";
        my $error = CreateIndividualArchive( $sourcePath, $destinationPath,
            \@arrayValues );
        if ( $error eq 0 ) {
            print "Error while doing tar is $error\n";
        }
        @arrayValues = ();
        @rowData     = ();
    }
}
sub ScanDirWithPattern {
    my $sourcePath   = shift;
    my $hashFileList = shift;

    my $pathDelimiter = "/";
    my $pattern       = ".txt";
    if ( 0 eq opendir( DIR, $sourcePath ) ) {
        print "Failed to open directory $sourcePath\n";
        return 0;
    }
    my @dirList;
    if ( 0 eq ( @dirList = readdir(DIR) ) ) {
        print "Failed to read directory $sourcePath\n";
        closedir(DIR);
        return 0;
    }
    closedir(DIR);

    foreach my $dir (@dirList) {

        #print "Current directory is $dir\n";
        next if ( $dir eq "." or $dir eq ".." );
        my $currentDir = $sourcePath . $pathDelimiter . $dir;
        if ( -d $currentDir ) {
            if ( 0 eq opendir( DIR, $currentDir ) ) {
                    print "Failed to open directory $currentDir\n";
                return 0;
            }
            my @fileList;
            if ( 0 eq ( @fileList = readdir(DIR) ) ) {
                print "Failed to read directory $dir\n";
                closedir(DIR);
                return 0;
            }
            closedir(DIR);
            my @relativeFileArray;
            foreach my $file (@fileList) {
                next if ( $file eq "." or $file eq ".." );
                my $currentFile =
                  $sourcePath . $pathDelimiter . $dir . $pathDelimiter . $file;
                next if ( -d $currentFile );
                if ( -f $currentFile ) {
                    if ( $currentFile =~ /$pattern/i ) {
                        my $relativeFile     = $dir . $pathDelimiter . $file;
                        my $size             = -s $currentFile;
                        my @currentFileArray = ( $relativeFile, $size );
                            print "Inserting the $relativeFile in array\n";
                        push( @relativeFileArray, \@currentFileArray );
                    }
                }
            }
            $hashFileList->{$dir} = \@relativeFileArray;
        }
    }
    return 1;
}

sub Create {
    my ( $sourcePath, $destinationPath ) = @_;
    my $pathDelimiter = "/";
    my %hashFileList;
    my $folderName = "temp";
    my $error = ScanDirWithPattern( $sourcePath, \%hashFileList );

    if ( $error eq 0 ) {
            print "Error while scaning $sourcePath for files\n";
        return 0;
    }
    my $keys;
    my $values;
    my @arrayValues;
    my @rowData = ();
    my $totalKeys = keys %hashFileList;

    my $numThreads = 5;    #For the time being
    if ( $totalKeys le $numThreads ) {
        $numThreads = $totalKeys;
    }
    my $bucketSize = $totalKeys / $numThreads;
    my @keys       = keys %hashFileList;
    my @arrThreads;
    my $i = 0;
    my @arrHash;
    my $tempDir = $destinationPath . $pathDelimiter . $folderName;
    make_path($tempDir);
    $destinationPath = $tempDir;

    while ( my @keys2 = splice @keys, 0, $bucketSize ) {
        my %hash1;
        @hash1{@keys2} = @hashFileList{@keys2};
        push @arrHash, \%hash1;
    }
    for my $href (@arrHash) {
        my $t = threads->create( \&Thread, \%$href, $sourcePath,
            $destinationPath );
        push( @arrThreads, $t );
    }
    foreach (@arrThreads) {
        my $num = $_->join;

        #print "done with $num\n";
    }
}

my $srcDir = "";
my $destDir = "";
if ( @ARGV < 2 ) {
    die "$0 - Need source and destination directory\n"
    . "Usage: perl $0 src dest\n";
}
$srcDir = shift;
$destDir = shift;
Create($srcDir, $destDir);

以下是ps命令的输出。

[root@localhost trunk]# ps -eaf | grep "tar -zcf"
root      1994 28983  0 19:54 pts/0    00:00:00 tar -zcf /root/dump//temp/Test1106.tar.gz -C /root/tests -T /root/dump//temp/BFA8D1F4
root      1998 28983  0 19:54 pts/0    00:00:00 tar -zcf /root/dump//temp/Test636.tar.gz -C /root/tests -T /root/dump//temp/8BED4FA8
root      2021 28983  0 19:54 pts/0    00:00:00 tar -zcf /root/dump//temp/Test2573.tar.gz -C /root/tests -T /root/dump//temp/044B2F61
root      2149 28983  0 19:54 pts/0    00:00:00 tar -zcf /root/dump//temp/Test2563.tar.gz -C /root/tests -T /root/dump//temp/9657C48F
root      2150 28983  0 19:54 pts/0    00:00:00 tar -zcf /root/dump//temp/Test1553.tar.gz -C /root/tests -T /root/dump//temp/71BE66D1
root      2152 28983  0 19:54 pts/0    00:00:00 tar -zcf /root/dump//temp/Test1726.tar.gz -C /root/tests -T /root/dump//temp/1B2D081F
root      2200 28983  0 19:54 pts/0    00:00:00 tar -zcf /root/dump//temp/Test493.tar.gz -C /root/tests -T /root/dump//temp/8932236E
root      2201 28983  0 19:54 pts/0    00:00:00 tar -zcf /root/dump//temp/Test2274.tar.gz -C /root/tests -T /root/dump//temp/F42D8053
root      2300 28983  0 19:54 pts/0    00:00:00 tar -zcf /root/dump//temp/Test1508.tar.gz -C /root/tests -T /root/dump//temp/573093A4
root      2301 28983  0 19:54 pts/0    00:00:00 tar -zcf /root/dump//temp/Test431.tar.gz -C /root/tests -T /root/dump//temp/1A75C3EF
root      2353 28983  0 19:54 pts/0    00:00:00 tar -zcf /root/dump//temp/Test1088.tar.gz -C /root/tests -T /root/dump//temp/02CA6015
root      2368 25225  0 19:54 pts/1    00:00:00 grep tar -zcf
[root@localhost trunk]# ps -eaf | grep "gzip"
root      2208  2200  2 19:54 pts/0    00:00:01 gzip
root      2209  2149  1 19:54 pts/0    00:00:00 gzip
root      2210  2150  1 19:54 pts/0    00:00:01 gzip
root      2302  2301  0 19:54 pts/0    00:00:00 gzip
root      2303  2300  1 19:54 pts/0    00:00:00 gzip
root      2371  2353  3 19:54 pts/0    00:00:01 gzip
root      2384  2377  0 19:55 pts/0    00:00:00 gzip
root      2387  2386  0 19:55 pts/0    00:00:00 gzip
root      2499  2389  2 19:55 pts/0    00:00:00 gzip
root      2581  2509  0 19:55 pts/0    00:00:00 gzip
root      2663  2583  4 19:55 pts/0    00:00:00 gzip
root      2691 25225  0 19:55 pts/1    00:00:00 grep gzip
root      2700  2665  0 19:55 pts/0    00:00:00 gzip

现在我只创建了5个线程,我希望只有5个tar实例和5个gzip实例。但事实似乎并非如此。有任何想法来解决这个问题。

P.S。 请注意,实际数据不是文本文件,因此归档将需要一些时间才能完成操作。此脚本也可以使用超过1K的目录

1 个答案:

答案 0 :(得分:3)

我认为问题在于:

if ( $totalKeys le $numThreads ) {
    $numThreads = $totalKeys;
}

您正在使用字符串比较而不是数字比较。这意味着如果(例如),$totalKeys15$numThreads5,则会比较15以及确定$totalKeys实际上是较小的。这会导致$numThreads设置为更高的数字。

解决方案是对数字使用数字比较:

if ( $totalKeys < $numThreads ) { ...