我有一个小脚本,它试图扫描目录以查找特定类型的文件。一旦填充哈希(其中键是目录名称和值,该目录中存在的文件数组)。
一旦完成,我试图通过生成一组线程(5固定数字)来存档这些目录。作业分配以固定大小的静态方式完成。
但是当我运行脚本时,我发现运行了更多的tar而不是5(和相同数量的gzip)。以下是执行相同操作的代码。
use strict;
use warnings;
use File::Find;
use Time::localtime;
use File::Copy 'cp';
use File::Copy 'mv';
use File::Path qw(make_path);
use threads;
use Thread::Queue;
# Package for debugging need to remove
use diagnostics;
use Data::Dumper;
sub CreateIndividualArchive {
my $srcDir = shift;
my $destDir = shift;
my $arrayFileList = shift;
my $pathDelimiter = "/";
#my $tempFile = "dump.txt";
my @chars = ( '0' .. '9', 'A' .. 'F' );
my $len = 8;
my $string;
while ( $len-- ) { $string .= $chars[ rand @chars ] }
my $tempFile = $string;
#print "Temp fils is $tempFile\n";
my $fh;
my $fileName = $destDir . $pathDelimiter . $tempFile;
open $fh, '>', $fileName or die "Cannot open $tempFile :$!";
my $dirName = $arrayFileList->[0];
chop($dirName);
for ( @$arrayFileList[ 1 .. $#$arrayFileList ] ) {
print $fh "$_\n";
}
close($fh);
my $tarFileList = $destDir . $pathDelimiter . $tempFile;
my $tarExt = ".tar.gz";
my $tarFileName = $destDir . $pathDelimiter . $dirName . $tarExt;
my $cmd = "tar -zcf $tarFileName -C $srcDir -T $tarFileList";
#print "Cmd is $cmd\n";
print "CMD = $cmd\n";
my @tarOutput = `tar -zcf $tarFileName -C $srcDir -T $tarFileList 2>&1`;
if ( $? == -1 ) {
print "Archiving of the files fails : $!\n";
unlink $tarFileName;
return 0;
}
unlink $tarFileList;
return 1;
}
sub Thread {
my $hashParm = shift;
my %hashFileList = %$hashParm;
my $sourcePath = shift;
my $destinationPath = shift;
my $keys;
my $values;
my @arrayValues;
my @rowData = ();
my $totalKey = keys %hashFileList;
my $pathDelimiter = "/";
#print "In thread and total keys received $totalKey\n";
while ( ( $keys, $values ) = each(%hashFileList) ) {
push( @arrayValues, $keys . $pathDelimiter );
my @row = ( $keys . $pathDelimiter, $keys . $pathDelimiter, 0 );
push( @rowData, \@row );
my @arrayParm = @{$values};
foreach my $value (@arrayParm) {
my $fileName = $$value[0];
my $fileSize = $$value[1];
push( @arrayValues, $fileName );
my @row = ( $keys . $pathDelimiter, $fileName, $fileSize );
push( @rowData, \@row );
}
#print "SourcePath $sourcePath and dest $destinationPath\n";
my $error = CreateIndividualArchive( $sourcePath, $destinationPath,
\@arrayValues );
if ( $error eq 0 ) {
print "Error while doing tar is $error\n";
}
@arrayValues = ();
@rowData = ();
}
}
sub ScanDirWithPattern {
my $sourcePath = shift;
my $hashFileList = shift;
my $pathDelimiter = "/";
my $pattern = ".txt";
if ( 0 eq opendir( DIR, $sourcePath ) ) {
print "Failed to open directory $sourcePath\n";
return 0;
}
my @dirList;
if ( 0 eq ( @dirList = readdir(DIR) ) ) {
print "Failed to read directory $sourcePath\n";
closedir(DIR);
return 0;
}
closedir(DIR);
foreach my $dir (@dirList) {
#print "Current directory is $dir\n";
next if ( $dir eq "." or $dir eq ".." );
my $currentDir = $sourcePath . $pathDelimiter . $dir;
if ( -d $currentDir ) {
if ( 0 eq opendir( DIR, $currentDir ) ) {
print "Failed to open directory $currentDir\n";
return 0;
}
my @fileList;
if ( 0 eq ( @fileList = readdir(DIR) ) ) {
print "Failed to read directory $dir\n";
closedir(DIR);
return 0;
}
closedir(DIR);
my @relativeFileArray;
foreach my $file (@fileList) {
next if ( $file eq "." or $file eq ".." );
my $currentFile =
$sourcePath . $pathDelimiter . $dir . $pathDelimiter . $file;
next if ( -d $currentFile );
if ( -f $currentFile ) {
if ( $currentFile =~ /$pattern/i ) {
my $relativeFile = $dir . $pathDelimiter . $file;
my $size = -s $currentFile;
my @currentFileArray = ( $relativeFile, $size );
print "Inserting the $relativeFile in array\n";
push( @relativeFileArray, \@currentFileArray );
}
}
}
$hashFileList->{$dir} = \@relativeFileArray;
}
}
return 1;
}
sub Create {
my ( $sourcePath, $destinationPath ) = @_;
my $pathDelimiter = "/";
my %hashFileList;
my $folderName = "temp";
my $error = ScanDirWithPattern( $sourcePath, \%hashFileList );
if ( $error eq 0 ) {
print "Error while scaning $sourcePath for files\n";
return 0;
}
my $keys;
my $values;
my @arrayValues;
my @rowData = ();
my $totalKeys = keys %hashFileList;
my $numThreads = 5; #For the time being
if ( $totalKeys le $numThreads ) {
$numThreads = $totalKeys;
}
my $bucketSize = $totalKeys / $numThreads;
my @keys = keys %hashFileList;
my @arrThreads;
my $i = 0;
my @arrHash;
my $tempDir = $destinationPath . $pathDelimiter . $folderName;
make_path($tempDir);
$destinationPath = $tempDir;
while ( my @keys2 = splice @keys, 0, $bucketSize ) {
my %hash1;
@hash1{@keys2} = @hashFileList{@keys2};
push @arrHash, \%hash1;
}
for my $href (@arrHash) {
my $t = threads->create( \&Thread, \%$href, $sourcePath,
$destinationPath );
push( @arrThreads, $t );
}
foreach (@arrThreads) {
my $num = $_->join;
#print "done with $num\n";
}
}
my $srcDir = "";
my $destDir = "";
if ( @ARGV < 2 ) {
die "$0 - Need source and destination directory\n"
. "Usage: perl $0 src dest\n";
}
$srcDir = shift;
$destDir = shift;
Create($srcDir, $destDir);
以下是ps命令的输出。
[root@localhost trunk]# ps -eaf | grep "tar -zcf"
root 1994 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//temp/Test1106.tar.gz -C /root/tests -T /root/dump//temp/BFA8D1F4
root 1998 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//temp/Test636.tar.gz -C /root/tests -T /root/dump//temp/8BED4FA8
root 2021 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//temp/Test2573.tar.gz -C /root/tests -T /root/dump//temp/044B2F61
root 2149 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//temp/Test2563.tar.gz -C /root/tests -T /root/dump//temp/9657C48F
root 2150 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//temp/Test1553.tar.gz -C /root/tests -T /root/dump//temp/71BE66D1
root 2152 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//temp/Test1726.tar.gz -C /root/tests -T /root/dump//temp/1B2D081F
root 2200 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//temp/Test493.tar.gz -C /root/tests -T /root/dump//temp/8932236E
root 2201 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//temp/Test2274.tar.gz -C /root/tests -T /root/dump//temp/F42D8053
root 2300 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//temp/Test1508.tar.gz -C /root/tests -T /root/dump//temp/573093A4
root 2301 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//temp/Test431.tar.gz -C /root/tests -T /root/dump//temp/1A75C3EF
root 2353 28983 0 19:54 pts/0 00:00:00 tar -zcf /root/dump//temp/Test1088.tar.gz -C /root/tests -T /root/dump//temp/02CA6015
root 2368 25225 0 19:54 pts/1 00:00:00 grep tar -zcf
[root@localhost trunk]# ps -eaf | grep "gzip"
root 2208 2200 2 19:54 pts/0 00:00:01 gzip
root 2209 2149 1 19:54 pts/0 00:00:00 gzip
root 2210 2150 1 19:54 pts/0 00:00:01 gzip
root 2302 2301 0 19:54 pts/0 00:00:00 gzip
root 2303 2300 1 19:54 pts/0 00:00:00 gzip
root 2371 2353 3 19:54 pts/0 00:00:01 gzip
root 2384 2377 0 19:55 pts/0 00:00:00 gzip
root 2387 2386 0 19:55 pts/0 00:00:00 gzip
root 2499 2389 2 19:55 pts/0 00:00:00 gzip
root 2581 2509 0 19:55 pts/0 00:00:00 gzip
root 2663 2583 4 19:55 pts/0 00:00:00 gzip
root 2691 25225 0 19:55 pts/1 00:00:00 grep gzip
root 2700 2665 0 19:55 pts/0 00:00:00 gzip
现在我只创建了5个线程,我希望只有5个tar实例和5个gzip实例。但事实似乎并非如此。有任何想法来解决这个问题。
P.S。 请注意,实际数据不是文本文件,因此归档将需要一些时间才能完成操作。此脚本也可以使用超过1K的目录
答案 0 :(得分:3)
我认为问题在于:
if ( $totalKeys le $numThreads ) {
$numThreads = $totalKeys;
}
您正在使用字符串比较而不是数字比较。这意味着如果(例如),$totalKeys
为15
且$numThreads
为5
,则会比较1
和5
以及确定$totalKeys
实际上是较小的。这会导致$numThreads
设置为更高的数字。
解决方案是对数字使用数字比较:
if ( $totalKeys < $numThreads ) { ...