我有一个包含以下行的bash脚本
for file in $diff_file_list
do
# replace any ? with $current_date and replace any % with $file
formatted_output_filename=$(echo $output_filename | sed "s|?|$current_date|g" | sed "s|%|_$file|g")
$pig_bin_dir/pig -param preceding=$hdfs_hadoop_pre_dir/$file -param current=$hdfs_hadoop_cur_dir/$file -param output_added=$hdfs_hadoop_delta_dir/${file}_added -param output_removed=$hdfs_hadoop_delta_dir/${file}_removed -param delimiter=$delimiter diff.pig
[ $? -ne 0 ] && die "diff of data between $previous_date and $current_date using pig failed. exiting `basename $0` script"
$hadoop_bin_dir/hadoop dfs -cat $hdfs_hadoop_delta_dir/${file}_added/* | gzip > $file_output_dir/${formatted_output_filename}_added.gz
$hadoop_bin_dir/hadoop dfs -cat $hdfs_hadoop_delta_dir/${file}_removed/* | gzip > $file_output_dir/${formatted_output_filename}_removed.gz
[ $? -ne 0 ] && die "there was a problem gzipping ${formatted_output_filename}. exiting `basename $0` script"
[ $post_diff_script ] && ./$post_diff_script $source $previous_date $current_date
done
我只希望它在文件非空时创建_removed.gz和_added.gz文件。 我试过在下面这样做但是我的脚本有问题吗?
for file in $diff_file_list
do
# replace any ? with $current_date and replace any % with $file
formatted_output_filename=$(echo $output_filename | sed "s|?|$current_date|g" | sed "s|%|_$file|g")
$pig_bin_dir/pig -param preceding=$hdfs_hadoop_pre_dir/$file -param current=$hdfs_hadoop_cur_dir/$file -param output_added=$hdfs_hadoop_delta_dir/${file}_added -param output_removed=$hdfs_hadoop_delta_dir/${file}_removed -param delimiter=$delimiter diff.pig
[ $? -ne 0 ] && die "diff of data between $previous_date and $current_date using pig failed. exiting `basename $0` script"
if [[ -s $hdfs_hadoop_delta_dir/${file}_added/* ]] ; then
echo "$hdfs_hadoop_delta_dir/${file}_added/* has data."
$hadoop_bin_dir/hadoop dfs -cat $hdfs_hadoop_delta_dir/${file}_added/* | gzip > $file_output_dir/${formatted_output_filename}_added.gz
$hadoop_bin_dir/hadoop dfs -cat $hdfs_hadoop_delta_dir/${file}_removed/* | gzip > $file_output_dir/${formatted_output_filename}_removed.gz
else
echo "$hdfs_hadoop_delta_dir/${file}_added/*is empty."
fi ;
[ $? -ne 0 ] && die "there was a problem gzipping ${formatted_output_filename}. exiting `basename $0` script"
[ $post_diff_script ] && ./$post_diff_script $source $previous_date $current_date
done
答案 0 :(得分:2)
if [[ -s file ]]
then
do_file_creation
fi
或
for f in dir/*
do
if [[ -s $f ]]
then
do_file_creation
fi
done
使用小写或大小写混合变量名称。
使用if
代替[[ ]] &&
使用缩进。
答案 1 :(得分:0)
我试图找到一个不添加循环的解决方案。但它不是那么干净:
if [[ -s `ls -S $hdfs_hadoop_delta_dir/${file}_added/* 2>/dev/null | head -1` ]] ; then
ls -S
按文件大小对文件进行排序,head -1
取最大值,然后if [[ -s
对其进行非零测试。
不幸的是,必须处理没有文件的情况。我用了2>/dev/null
。任何人都有更好的主意吗?