从文件中提取行块时,我遇到的问题很少。考虑以下两个文件
File-1
1.20/abc/this_is_test_1
perl/RRP/RRP-1.30/JEDI/JEDIExportSuccess2
exec perl/RRP/RRP-1.30/JEDI/CommonReq/confAbvExp
perl/LRP/BaseLibs/close-MMM
exec perl/LRP/BaseLibs/launchLRPCHURRTA("TYRE")
this/or/that
File-2
exec 1.20/setup/testird
exec 1.20/sql/temp/Test3
exec 1.20/setup/testxyz
exec 1.20/sql/fondle_opr_sql_labels
exec 1.20/setup/testird
exec 1.20/sql/temp/NEWTest
exec 1.20/setup/testxyz
exec 1.20/sql/fondle_opr_sql_xfer
exec 1.20/setup/testird
exec 1.20/sql/set_sec_not_0
exec 1.20/setup/testpqr
exec 1.20/sql/sql_ba_statuses_on_mult
exec perl/RRP/SetupReq/testdef_ijk
exec perl/RRP/RRP-1.30/JEDI/SetupReq/confAbvExp
exec perl/RRP/RRP-1.30/JEDI/JEDIExportSuccess1
exec perl/RRP/SetupReq/testdef_ijk
exec perl/RRP/RRP-1.30/JEDI/SetupReq/confAbvExp
exec perl/RRP/RRP-1.30/JEDI/JEDIExportSuccess2
exec perl/RRP/SetupReq/testdef_ijk
exec perl/RRP/RRP-1.30/JEDI/SetupReq/confAbvExp
exec perl/RRP/RRP-1.30/JEDI/JEDIExportSuccess3
exec 1.20/setup/testird
exec 1.20/sql/sqlmenu_purr_labl
exec 1.20/sql/est_time_at_non_drp_plc
exec 1.20/sql/half_Brd_Supply_mix_single
exec 1.20/setup/testird
exec 1.20/sql/temp/Test
exec 1.20/setup/testird
exec 1.20/sql/temp/Test2
exec perl/LRP/SetupReq/testird_LRP("LRP")
exec perl/BaseLibs/launch_client("LRP")
exec perl/LRP/LRP-classic-4.14/churrip/chorSingle
exec perl/LRP/BaseLibs/setupLRPMMMTab
exec perl/LRP/BaseLibs/launchMMM
exec perl/LRP/BaseLibs/launchLRPCHURRTA("TYRE")
#PAUSE Expand Churrip tree view & open all nodes
exec perl/LRP/LRP-classic-4.14/Corrugator/multipleSeriesWeb
exec perl/BaseLibs/ShutApp("Self Destruction System")
exec perl/LRP/BaseLibs/close-MMM
exec 1.20/setup/testmiddle
exec 1.20/sql/collective_reads
exec 1.20/setup/testinit
exec 1.20/abc/this_is_test_1
exec 1.20/abc/this_is_test_1
exec perl/LRP/SetupReq/abcDEF
exec perl/BaseLibs/launch_client("sqlC","LRP")
exec perl/LRP/LRP-perl-4.20/fireTrigger
现在,对于File-1中的每一行,我想从File-2中提取相关的行块。 File-2中的块定义如下
exec 1.20/setup/xxxxx
blah blah blah
blah blah blah
.
.
.
all lines till next setup line is found
例如
exec 1.20/setup/testinit
exec 1.20/abc/this_is_test_1
exec 1.20/abc/this_is_test_1
或
exec perl/LRP/SetupReq/xxxxx
blah blah blah
blah blah blah
.
.
.
all lines till next setup line is found
例如
exec perl/LRP/SetupReq/testird_LRP("LRP")
exec perl/BaseLibs/launch_client("LRP")
exec perl/LRP/LRP-classic-4.14/churrip/chorSingle
exec perl/LRP/BaseLibs/setupLRPMMMTab
exec perl/LRP/BaseLibs/launchMMM
exec perl/LRP/BaseLibs/launchLRPCHURRTA("TYRE")
#PAUSE Expand Churrip tree view & open all nodes
exec perl/LRP/LRP-classic-4.14/Corrugator/multipleSeriesWeb
exec perl/BaseLibs/ShutApp("Self Destruction System")
exec perl/LRP/BaseLibs/close-MMM
到目前为止,我已设法通过以下脚本
从File-2中提取相关块Shell Script
#set -x
FLBATCHLIST=$1
BATCHFILE=$2
TEMPDIR="/usr/tmp/tempBatchDir"
rm -rf $TEMPDIR/*
WORKFILE="$TEMPDIR/failedTestList.txt"
CPBATCHFILE="$TEMPDIR/orig.test"
TESTSETFILE="$TEMPDIR/testset.txt"
TEMPFILE="$TEMPDIR/temp.txt"
DIFFFILE="$TEMPDIR/diff.txt"
#Output
FAILEDBATCH="$TEMPDIR/FailedBatch.test"
LOGFILE="$TEMPDIR/log.txt"
createBatch ()
{
TESTNAME=$1
#First process the $CPBATCHFILE to not have any blank lines, leading and trailing whitespaces
# delete BOTH leading and trailing whitespace from each line and blank lines from file
sed -i 's/^[[:space:]]*//;s/[[:space:]]*$//g;/^$/d' $CPBATCHFILE
FOUND=0
STATUS=1
while [ $STATUS -ne "0" ]
do
if [ ! -s $CPBATCHFILE ]; then
echo "$CPBATCHFILE is empty" >> $LOGFILE
STATUS=0
fi
awk '/[Ss]etup.*[Tt]est/ || /perl\/[[:alpha:]]*\/[Ss]etup[rR]eq/{if(b) exit; else b=1}1' $CPBATCHFILE > $TESTSETFILE
grep -i "$TESTNAME$" $TESTSETFILE >> $LOGFILE 2>&1
if [ $? -eq "0" ]; then
echo "test found" >> $LOGFILE
cat $TESTSETFILE >> $FAILEDBATCH
FOUND=1
fi
TSTFLLINES=`wc -l < $TESTSETFILE`
CPBTCHLINES=`wc -l < $CPBATCHFILE`
DIFF=`expr $CPBTCHLINES - $TSTFLLINES`
tail -n $DIFF $CPBATCHFILE > $DIFFFILE
mv $DIFFFILE $CPBATCHFILE
done
if [ $FOUND -eq 0 ]; then
echo $TESTNAME > $TEMPDIR/test.txt
ABSTEST=$(echo $TESTNAME | sed 's/\\//g')
echo "FATAL ERROR: Test \"$ABSTEST\" not found in batch" | tee -a $LOGFILE
fi
}
####STARTS HERE####
mkdir -p $TEMPDIR
#cat $TEMPDIR/test.txt
#FLBATCHLIST="$TEMPDIR/test.txt"
# delete run, BOTH leading and trailing whitespace and blank lines from file
sed 's/^[eE][xX][eE][cC]//g;s/^[[:space:]]*//;s/[[:space:]]*$//g;/^$/d' $FLBATCHLIST > $WORKFILE
# escaping special characters like '\' and '.' in the path names for better grepping
sed -i 's/\([\/\.\"]\)/\\\1/g' $WORKFILE
for fltest in $(cat $WORKFILE)
do
echo $fltest >> $LOGFILE
cp $BATCHFILE $CPBATCHFILE
createBatch $fltest
done
sed -i 's/\//\\/g' $FAILEDBATCH
## Clean up
cp $FAILEDBATCH .
此脚本的问题是
为File-1的每一行遍历File-2需要一些时间。我想知道是否有更好的解决方案,我只需要遍历File-2一次。
该脚本确实解决了我的问题,但我留下的文件中有重复的行块。我想知道有没有办法删除重复的行块。
这是我执行脚本时的输出
exec 1.20\setup\testinit
exec 1.20\abc\this_is_test_1
exec 1.20\abc\this_is_test_1
exec perl\RRP\SetupReq\testdef_ijk
exec perl\RRP\RRP-1.30\JEDI\SetupReq\confAbvExp
exec perl\RRP\RRP-1.30\JEDI\JEDIExportSuccess2
exec perl\RRP\SetupReq\testdef_ijk
exec perl\RRP\RRP-1.30\JEDI\SetupReq\confAbvExp
exec perl\RRP\RRP-1.30\JEDI\JEDIExportSuccess1
exec perl\RRP\SetupReq\testdef_ijk
exec perl\RRP\RRP-1.30\JEDI\SetupReq\confAbvExp
exec perl\RRP\RRP-1.30\JEDI\JEDIExportSuccess2
exec perl\RRP\SetupReq\testdef_ijk
exec perl\RRP\RRP-1.30\JEDI\SetupReq\confAbvExp
exec perl\RRP\RRP-1.30\JEDI\JEDIExportSuccess3
exec perl\LRP\SetupReq\testird_LRP("LRP")
exec perl\BaseLibs\launch_client("LRP")
exec perl\LRP\LRP-classic-4.14\churrip\chorSingle
exec perl\LRP\BaseLibs\setupLRPMMMTab
exec perl\LRP\BaseLibs\launchMMM
exec perl\LRP\BaseLibs\launchLRPCHURRTA("TYRE")
#PAUSE Expand Churrip tree view & open all nodes
exec perl\LRP\LRP-classic-4.14\Corrugator\multipleSeriesWeb
exec perl\BaseLibs\ShutApp("Self Destruction System")
exec perl\LRP\BaseLibs\close-MMM
exec perl\LRP\SetupReq\testird_LRP("LRP")
exec perl\BaseLibs\launch_client("LRP")
exec perl\LRP\LRP-classic-4.14\churrip\chorSingle
exec perl\LRP\BaseLibs\setupLRPMMMTab
exec perl\LRP\BaseLibs\launchMMM
exec perl\LRP\BaseLibs\launchLRPCHURRTA("TYRE")
#PAUSE Expand Churrip tree view & open all nodes
exec perl\LRP\LRP-classic-4.14\Corrugator\multipleSeriesWeb
exec perl\BaseLibs\ShutApp("Self Destruction System")
exec perl\LRP\BaseLibs\close-MMM
我尝试通过网络搜索我的答案,但无法找到符合我需求的答案。
给定File-1和File-2 这是我希望我的脚本输出 (我列出了我期望FILE-1中每行的输出)
For line "1.20/abc/this_is_test_1" in FILE-1
Output
exec 1.20/setup/testinit
exec 1.20/abc/this_is_test_1
exec 1.20/abc/this_is_test_1
For line "perl/RRP/RRP-1.30/JEDI/JEDIExportSuccess2" in FILE-1
Output
exec perl/RRP/SetupReq/testdef_ijk
exec perl/RRP/RRP-1.30/JEDI/SetupReq/confAbvExp
exec perl/RRP/RRP-1.30/JEDI/JEDIExportSuccess2
For line "exec perl/RRP/RRP-1.30/JEDI/CommonReq/confAbvExp" in FILE-1
Output
do nothing as there is no line matching this is in FILE-2
For line "perl/LRP/BaseLibs/close-MMM" in FILE-1
Output
exec perl/LRP/SetupReq/testird_LRP("LRP")
exec perl/BaseLibs/launch_client("LRP")
exec perl/LRP/LRP-classic-4.14/churrip/chorSingle
exec perl/LRP/BaseLibs/setupLRPMMMTab
exec perl/LRP/BaseLibs/launchMMM
exec perl/LRP/BaseLibs/launchLRPCHURRTA("TYRE")
#PAUSE Expand Churrip tree view & open all nodes
exec perl/LRP/LRP-classic-4.14/Corrugator/multipleSeriesWeb
exec perl/BaseLibs/ShutApp("Self Destruction System")
exec perl/LRP/BaseLibs/close-MMM
For line "exec perl/LRP/BaseLibs/launchLRPCHURRTA("TYRE")" in FILE-1
Output
Do nothing as it would generate the same black as line "perl/LRP/BaseLibs/close-MMM" in FILE-1 did
For Line "this/or/that" in FILE-1
Output
Do nothing as there is no line matching this is in FILE-2
所以我的最终输出应该是类似的(块的顺序无关紧要)到
exec 1.20/setup/testinit
exec 1.20/abc/this_is_test_1
exec 1.20/abc/this_is_test_1
exec perl/RRP/SetupReq/testdef_ijk
exec perl/RRP/RRP-1.30/JEDI/SetupReq/confAbvExp
exec perl/RRP/RRP-1.30/JEDI/JEDIExportSuccess2
exec perl/LRP/SetupReq/testird_LRP("LRP")
exec perl/BaseLibs/launch_client("LRP")
exec perl/LRP/LRP-classic-4.14/churrip/chorSingle
exec perl/LRP/BaseLibs/setupLRPMMMTab
exec perl/LRP/BaseLibs/launchMMM
exec perl/LRP/BaseLibs/launchLRPCHURRTA("TYRE")
#PAUSE Expand Churrip tree view & open all nodes
exec perl/LRP/LRP-classic-4.14/Corrugator/multipleSeriesWeb
exec perl/BaseLibs/ShutApp("Self Destruction System")
exec perl/LRP/BaseLibs/close-MMM
如果有人能给我一些关于如何进行的指示,真的很棒。是的,我忘了提,这不是一个功课问题:-)。
非常感谢
答案 0 :(得分:0)
如果行顺序无关紧要,您可以通过以下方式从文件中删除重复项:
sort filename | uniq
为了找到两个文件中存在哪些行,我使用了一个创建哈希(或关联数组,如果你愿意)的perl脚本。然后我通过文件A扫描,将每行添加到哈希,使用行作为键,并将值设置为1.然后我对文件A执行相同操作,但将值设置为2,如果键已经存在,我添加了2。结果只会遍历每个文件一次,最后我知道如果密钥的值为1,它只存在于文件A中,如果它的值为2,它只存在于文件B中,并且如果它的值为3,则它存在于两者中。
修改强> 我在项目中发现了一些perl代码,完全按照我的描述进行操作。在这段代码中,我只是在差异之后,但应该很容易根据您的需要进行修改
my %found;
foreach my $item (@qlist) { $found{$item} += 2 };
foreach my $item (@xlist) { $found{$item} += 1 };
foreach my $found (keys(%found))
{
if ($found{$found} == 3)
{
# It's in both files. Not doing anything.
}
elsif ($found{$found} == 2)
{
print "$found found in the QC-list, but not the x-list.\n";
}
elsif ($found{$found} == 1)
{
print "$found found in the x-list, but not the QC-list.\n";
}
}
答案 1 :(得分:0)
以下假设“setup”行对于每个块都是唯一的。我们使用这一行作为关联数组的关键,它跟踪我们已经打印过的块。
第一行脚本将第一个文件读入名为regex
的变量,该变量从第一个文件中收集我们想要匹配的行(成语NR==FNR
表示当前行号file等于收集的所有文件的行号,也就是说,只有当我们从参数列表中读取第一个文件时才会出现这种情况。脚本的其余部分相当简单,我希望。
awk 'NR==FNR { gsub(/\//,"\\/"); regex = regex sep $0; sep = "|" ; next}
/[Ss]etup/ { label = $0; printing = 0; collected = nl = "" }
{ collected = collected nl $0; nl=RS }
$0 ~ regex { if(!printed[label]) {
printed[label] = printing = 1; print collected } }
printing { print }' File-1 File-2
如果“设置”行不一定是唯一的,也许您可以使用collected
的值作为键。
这应该(我希望)对File-1
中File-2
匹配sed
中相同块的多行提供强大功能。
我知道我在评论中暗示了一个awk
解决方案,但事实证明这是{{1}}感觉更自然的问题。当然,它可以用Perl或Python完成,也可以用你做的。
答案 2 :(得分:0)
感谢@tripleee和@Jarmund提出的建议。根据您的意见,我终于能够找到问题的解决方案。我从关联数组中得到提示,为每个块创建唯一键,所以这就是我所做的
取文件-2并将每个块转换为单行
awk'/[Ss]etup.[Tt]est/ || / perl / [[:alpha:]] / [Ss] etup [Rr] eq / {if(b)退出;否则b = 1} 1'file-2&gt; $ TESTSETFILE cat $ TESTSETFILE | sed':a; N; $!ba; s / \ n // g; s / // g'&gt;&gt; $ SINGLELINEFILE
现在此文件中的每一行都是唯一条目
也许这个解决方案不是最好的,但它比前一个快得多。
这是我的新剧本
FLBATCHLIST=$1
BATCHFILE=$2
TEMPDIR="./tempBatchdir"
rm -rf $TEMPDIR/*
WORKFILE="$TEMPDIR/failedTestList.txt"
CPBATCHFILE="$TEMPDIR/orig.test"
TESTSETFILE="$TEMPDIR/testset.txt"
DIFFFILE="$TEMPDIR/diff.txt"
SINGLELINEFILE="$TEMPDIR/singleline.txt"
TEMPFILE="$TEMPDIR/temp.txt"
#Output
FAILEDBATCH="$TEMPDIR/FailedBatch.test"
LOGFILE="$TEMPDIR/log.txt"
convertSingleLine ()
{
sed -i 's/^[[:space:]]*//;s/[[:space:]]*$//g;/^$/d' $CPBATCHFILE
STATUS=1
while [ $STATUS -ne "0" ]
do
if [ ! -s $CPBATCHFILE ]; then
echo "$CPBATCHFILE is empty" >> $LOGFILE
STATUS=0
fi
awk '/[Ss]etup.*[Tt]est/ || /perl\/[[:alpha:]]*\/[Ss]etup[Rr]eq/{if(b) exit; else b=1}1' $CPBATCHFILE > $TESTSETFILE
cat $TESTSETFILE | sed ':a;N;$!ba;s/\n//g;s/ //g' >> $SINGLELINEFILE
echo "**" >> $SINGLELINEFILE
TSTFLLINES=`wc -l < $TESTSETFILE`
CPBTCHLINES=`wc -l < $CPBATCHFILE`
DIFF=`expr $CPBTCHLINES - $TSTFLLINES`
tail -n $DIFF $CPBATCHFILE > $DIFFFILE
mv $DIFFFILE $CPBATCHFILE
done
}
####STARTS HERE####
mkdir -p $TEMPDIR
sed 's/^[eE][xX][eE][cC]//g;s/^[[:space:]]*//;s/[[:space:]]*$//g;/^$/d' $FLBATCHLIST > $WORKFILE
sed -i 's/\([\/\.\"]\)/\\\1/g' $WORKFILE
cp $BATCHFILE $CPBATCHFILE
convertSingleLine
for fltest in $(cat $WORKFILE)
do
echo $fltest >> $LOGFILE
grep "$fltest" $SINGLELINEFILE >> $FAILEDBATCH
if [ $? -eq "0" ]; then
echo "TEST FOUND" >> $LOGFILE
else
ABSTEST=$(echo $fltest | sed 's/\\//g')
echo "FATAL ERROR: Test \"$ABSTEST\" not found in $BATCHFILE" | tee -a $LOGFILE
fi
done
awk '!x[$0]++' $FAILEDBATCH > $TEMPFILE
mv $TEMPFILE $FAILEDBATCH
sed -i "s/exec/\\nexec /g;s/#/\\n#/g" $FAILEDBATCH
sed -i '1d;s/\//\\/g' $FAILEDBATCH
这是输出
$ crflbatch file-1 file-2
FATAL ERROR: Test "perl/RRP/RRP-1.30/JEDI/CommonReq/confAbvExp" not found in file-2
FATAL ERROR: Test "this/or/that" not found in file-2
$ cat tempBatchdir/FailedBatch.test
exec 1.20\setup\testinit
exec 1.20\abc\this_is_test_1
exec 1.20\abc\this_is_test_1
exec perl\RRP\SetupReq\testdef_ijk
exec perl\RRP\RRP-1.30\JEDI\SetupReq\confAbvExp
exec perl\RRP\RRP-1.30\JEDI\JEDIExportSuccess2
exec perl\LRP\SetupReq\testird_LRP("LRP")
exec perl\BaseLibs\launch_client("LRP")
exec perl\LRP\LRP-classic-4.14\churrip\chorSingle
exec perl\LRP\BaseLibs\setupLRPMMMTab
exec perl\LRP\BaseLibs\launchMMM
exec perl\LRP\BaseLibs\launchLRPCHURRTA("TYRE")
#PAUSEExpandChurriptreeview&openallnodes
exec perl\LRP\LRP-classic-4.14\Corrugator\multipleSeriesWeb
exec perl\BaseLibs\ShutApp("SelfDestructionSystem")
exec perl\LRP\BaseLibs\close-MMM
$