我有一系列目录名称和统计信息,例如所有者,修改日期,文件数,总文件大小等。当向大约2,700个现有目录名称的bash数组添加133个新目录名时,需要大约1/2小时排序。时间是不可接受的,如何改进(注意:* 这是一个自我回答的问题)。
我尝试创建一个关联数组以加快处理速度,但它不允许顺序读取键,只是一些随机哈希表未排序的顺序。
以下是相关的代码段:
#-------------------------------------------------------------------------------
BubbleSortDirsArr () {
cp ~/.bafmanDirs ~/.bafmanDirs~ #Make backup copy
IFS="|" read -ra DirsArr < ~/.bafmanDirs
DirsArrCnt=${#DirsArr[@]}
DirsArrUnsetCount=0
TotBump=0
Sorted=false
while [[ $Sorted == false ]]; do
Sorted=true
i=0 # TODO: Use HoldNdx to update progress
LastNdx=0
LastKey=""
EOF=false # fudge it
while [[ $i -lt $DirsArrCnt ]] ; do
DirsArr[$i]=false
CurrNdx=$i
CurrKey=$(echo "${DirsArr[$(( $i + 1 ))]}" | tr -dc '[:alnum:]/')
if [[ "$CurrKey" > "$LastKey" ]] || [[ "$CurrKey" == "$LastKey" ]]; then
LastNdx=$CurrNdx
LastKey="$CurrKey"
i=$(( $i + $OneDirArrCnt))
continue
fi
HoldNdx=$(( $CurrNdx + $OneDirArrCnt )) # When done we'll restart here.
if [[ $HoldNdx -ge $DirsArrCnt ]] ; then
echo "%%%%%%%%%%%%%%%%%%%%%%%% Hold>Last OVERRIDE: "
echo "HoldNdx: $HoldNdx CurrNdx: $CurrNdx LastNdx: $LastNdx"
echo "CurrKey: $CurrKey"
echo "LastKey: $LastKey"
HoldNdx=$CurrNdx # Curr= last array entry, restart at LastKey swapped
EOF=true # fudge it
fi
# If last index > curr index, we can't swap
if [[ $LastNdx -gt $CurrNdx ]] ; then
echo "ABORT!!! LastNdx: $LastNdx greater than CurrNdx: $CurrNdx"
echo "CurrKey: $CurrKey"
echo "LastKey: $LastKey"
exit
fi
Sorted=false # Restart sort from array top later
printf "= START ====================== Loop 3:"
echo " HoldNdx: $HoldNdx CurrNdx: $CurrNdx LastNdx: $LastNdx"
echo " CurrKey: $CurrKey"
echo " LastKey: $LastKey"
Bump=0
while [[ "$CurrKey" < "$LastKey" ]] || [[ "$CurrKey" == "$LastKey" ]] ; do
Bump=$(( $Bump + 1 ))
# Move Last entry to work array
WorkArr=()
j=$LastNdx
for (( c=1; c<=$OneDirArrCnt; c++ )); do WorkArr+=("${DirsArr[j++]}"); done ;
# Move current entry to last entry
j=$LastNdx
k=$CurrNdx
for (( c=1; c<=$OneDirArrCnt; c++ )); do DirsArr[j++]="${DirsArr[k++]}"; done ;
# Move work entry to what was current entry (swap)
j=$CurrNdx
k=0
for (( c=1; c<=$OneDirArrCnt; c++ )); do DirsArr[j++]="${WorkArr[k++]}"; done ;
CurrNdx=$LastNdx # Step back one entry
LastNdx=$(( $CurrNdx - $OneDirArrCnt ))
if [[ $LastNdx -ge 0 ]]; then
LastKey=$(echo "${DirsArr[$(( $LastNdx + 1 ))]}" | tr -dc '[:alnum:]/')
else
echo "+++++++++ Loop 3 Bumped: $Bump to top CurrNdx: $CurrNdx CurrKey: $CurrKey"
echo "WorkArr: ${WorkArr[*]}"
break
fi
done # CurrKey is now > LastKey, stop swapping
TotBump=$(( $TotBump + $Bump))
printf " - END ---------------------- Loop 3:"
echo " HoldNdx: $HoldNdx CurrNdx: $CurrNdx LastNdx: $LastNdx Bumped: $Bump"
echo " CurrKey: $CurrKey"
echo " LastKey: $LastKey"
i=$HoldNdx
done
echo "Loop 2 bottom: i: $i HoldNdx: $HoldNdx CurrNdx: $CurrNdx LastNdx: $LastNdx"
echo " CurrKey: $CurrKey"
echo " LastKey: $LastKey"
TotLoop=$(( $TotLoop + 1 ))
done
printf "* * * * * * * * * Loop 1 bottom - Total Loops: $TotLoop"
echo " Bumps: $TotBump * * * * * * * * *"
echo "${DirsArr[*]}" > ~/.bafmanDirs # "*" preserves separtator, "@" does not.
} ### BubbleDirsArr ()
以下是程序输出的最后几行:
= START ====================== Loop 3: HoldNdx: 24700 CurrNdx: 24700 LastNdx: 24690
CurrKey: /home/rick/cache/yelp/WebKitCache/Version10/Blobs
LastKey: /srv
- END ---------------------- Loop 3: HoldNdx: 24700 CurrNdx: 5580 LastNdx: 5570 Bumped: 1912
CurrKey: /home/rick/cache/yelp/WebKitCache/Version10/Blobs
LastKey: /home/rick/cache/yelp/WebKitCache/Version10
Loop 2 bottom: i: 24710 HoldNdx: 24700 CurrNdx: 24700 LastNdx: 24700
CurrKey: /srv
LastKey: /srv
Loop 2 bottom: i: 24710 HoldNdx: 24700 CurrNdx: 24700 LastNdx: 24700
CurrKey: /srv
LastKey: /srv
* * * * * * * * * Loop 1 bottom - Total Loops: 135 Bumps: 257158 * * * * * * * * *
real 29m27.445s
user 14m39.636s
sys 2m5.596s
答案 0 :(得分:1)
在许多情况下,Bash数组的速度非常慢。您需要调用外部排序程序。这是一段代码:
#-------------------------------------------------------------------------------
ExternalSortDirsArr () {
cp ~/.bafmanDirs ~/.bafmanDirs~ #Make backup copy
IFS="|" read -ra DirsArr < ~/.bafmanDirs
DirsArrCnt=${#DirsArr[@]}
DirPercent=0
DirLastPercent=0
DirCount=0
DirTotal=$(( $DirsArrCnt / $OneDirArrCnt ))
DirTotal=$(( $DirTotal + $DirTotal )) # Two passes
# Named FIFO pipes used between us and spawn-progress-log to avoid
# race conditions (flashing screen and keyboard lag) over 1/2 hour
YadNamedPipe="/tmp/bafman-yad-"$(date +%s) #seconds since EPOCH
mkfifo "$YadNamedPipe"
spawn-progress-log "$YadNamedPipe" \
"bafman - Born Again File Manager" \
"Four pass directory name external sort." &
# Create Keys Index
echo " "
echo "Create Keys-Index Pairs File"
> ~/.bafmanSort # Empty existing file.
time for (( i=0; i<$DirsArrCnt; i=i+$OneDirArrCnt )) ; do
CurrKey=$(echo "${DirsArr[$(( $i + 1 ))]}" | tr -dc '[:alnum:]/')
echo "$CurrKey|$i" >> ~/.bafmanSort
# Update progress display
DirPercent=$(( $DirCount * 100 / $DirTotal ))
DirCount=$(( $DirCount + 1 ))
if [[ "$DirPercent" -ne "$DirLastPercent" ]] ; then
echo "#$CurrKey" > "$YadNamedPipe" & # Update YAD log window
DirLastPercent=$DirPercent
echo "$DirPercent" > "$YadNamedPipe" & # Update YAD progress bar
fi
done
# Call external sort program
echo " "
echo "Sort Keys-Index Pairs File"
time sort -k1 -t"|" ~/.bafmanSort -o ~/.bafmanSort
# Strip out keys
echo " "
echo "Strip out keys leaving Sorted Indices"
time cut -f2 -d '|' ~/.bafmanSort > ~/.bafmanNdx
echo " "
echo "Rewrite DirsArr by Sorted Index"
> ~/.bafmanDirs # Empty existing file.
> ~/.bafmanLog # Empty existing file.
Second=""
time while read -r line; do
j=$(( $line + $OneDirArrCnt ))
for (( i=$line; i<j; i++ )); do
echo -n "$Second""${DirsArr[i]}" >> ~/.bafmanDirs
Second="|"
# Update progress display
DirPercent=$(( $DirCount * 100 / $DirTotal ))
DirCount=$(( $DirCount + 1 ))
if [[ "$DirPercent" -ne "$DirLastPercent" ]] ; then
# echo "#$CurrKey" > "$YadNamedPipe" & # Update YAD log window
DirLastPercent=$DirPercent
echo "$DirPercent" > "$YadNamedPipe" & # Update YAD progress bar
fi
done
done < ~/.bafmanNdx
echo " "
printf "* * * * * * * * * ExternalSortDirsArr -- "
echo " Total DirsArr elements: $DirsArrCnt Added: $DirsArrAddElementCount * * * * * * * * *"
echo "100" > $YadNamedPipe & # Signal close
rm -f $YadNamedPipe # Remove FIFO named pipe for IPC
} ### ExternalSortDirsArr ()
以下是基准时间显示。请注意1/2小时如何降至9秒:
Create Keys-Index Pairs File
real 0m3.899s
user 0m0.400s
sys 0m0.424s
Sort Keys-Index Pairs File
real 0m0.013s
user 0m0.012s
sys 0m0.000s
Strip out keys leaving Sorted Indices
real 0m0.001s
user 0m0.004s
sys 0m0.000s
Rewrite DirsArr by Sorted Index
real 0m1.407s
user 0m1.268s
sys 0m0.464s
* * * * * * * * * ExternalSortDirsArr -- Total DirsArr elements: 24710 Added: 134 * * * * * * * * *
Gtk-Message: GtkDialog mapped without a transient parent. This is discouraged.
real 0m9.667s
user 0m1.828s
sys 0m0.952s