在正则表达式sed / awk之后替换字符

时间:2014-04-23 13:31:52

标签: regex bash awk sed

我在分隔符之间替换字符时遇到了一些问题(包括它们) 例如。 file sample.txt

sample = 0;
        test "test0" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX010\
                      10101010101010101010101010101010101010";
         ...
        test "test11" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX01010101010101\
                      01010101010101010101010101010101010101"
         ...
 end;
 ...
 sample = 1001;
        test "test0" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX010\
                      10101010101010101010101010101010101010";
         ...
        test "test11" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX01010101010101\
                      01010101010101010101010101010101010101"
         ...
 end;

 sample = 1010;
        test "test0" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX010\
                      10101010101010101010101010101010101010";
         ...
        test "test11" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX01010101010101\
                      01010101010101010101010101010101010101"
         ...
 end;
 ...

我希望在示例1001的test11中将所有1&1更改为0。新文本文件应如下所示:

sample = 0;
        test "test0" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX010\
                      10101010101010101010101010101010101010";
         ...
        test "test11" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX01010101010101\
                      01010101010101010101010101010101010101"
         ...
 end;
 ...
 sample = 1001;
        test "test0" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX010\
                      10101010101010101010101010101010101010";
         ...
        test "test11" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX00000000000000\
                      0000000000000000000000000000000000000"
         ...
 end;
 ...
 sample = 1010;
        test "test0" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX010\
                      10101010101010101010101010101010101010";
         ...
        test "test11" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX01010101010101\
                      01010101010101010101010101010101010101"
         ...
 end;

问题是当我使用sed时:

sed '/sample = 1001;/,/end;/ {/test11/,/;/ s/1/0/g} sample.txt'

它还将test11的数量更改为test00

我还尝试以下列方式更改测试名称:

sed -i -e '/sample = 1001;/,/end;/ s/test11/tempname/' -e '/sample = 1001;/,/end;/ { /tempname/,/;/ s/1/0/g}' -e '/sample = 1001;/,/end;/ s/tempname/test11/'

但这太慢了(我需要在数千个样本中更改数千个测试)

你有什么想法吗?

4 个答案:

答案 0 :(得分:2)

查找样本,然后当找到该样本时,隔离需要进行更改的行的尾部,然后仅在该部分中将0替换为0,重复直到找到下一个测试或样本行:

$ cat tst.awk
inTest && (/test|sample/)    { inSample=inTest=0 }
/sample = 1001;/             { inSample = 1 }
inSample && /test "test11"/  { inTest = 1 }
inTest && match($0,/("[[:alnum:]\\]+|[[:digit:]]+");?$/) {
    head = substr($0,1,RSTART-1)
    tail = substr($0,RSTART)
    gsub(/1/,0,tail)
    $0 = head tail
}
{ print }

$ awk -f tst.awk file
sample = 0;
        test "test0" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX010\
                      10101010101010101010101010101010101010";
         ...
        test "test11" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX01010101010101\
                      01010101010101010101010101010101010101"
         ...
end;
...
sample = 1001;
        test "test0" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX010\
                      10101010101010101010101010101010101010";
         ...
        test "test11" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX00000000000000\
                      00000000000000000000000000000000000000"
         ...
end;

sample = 1010;
        test "test0" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX010\
                      10101010101010101010101010101010101010";
         ...
        test "test11" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX01010101010101\
                      01010101010101010101010101010101010101"
         ...
end;
...

答案 1 :(得分:1)

这可能适合你(GNU sed):

sed -r '/sample = 1001/,/end/{/test "test11"/{N;:a;s/(= "[^"]*)1/\10/;ta}}' file

这会将搜索范围缩小到sample = 1001,然后再缩小为test11。然后,它使用循环将{1}}符号后的任何1更改为0。

答案 2 :(得分:0)

怎么样

sed '/sample = 1001;/,/end;/ {/test11/,/;/{s/1/0/g;s/test00/test11/}}' sample.txt

? ; - )

答案 3 :(得分:0)

花了一些时间对此进行编码,但它有效。

#!/bin/bash

usage(){
    echo "Usage: $0 -sample x -test y"
    echo "Where x is the sample number y is the test number"
    exit 1
}

[ ! $# = 4 ] || [ ! $1 = "-sample" -a ! $3 = "-test" ] && usage

sample=$2
test=$4

sample_expanded=$(sed -e 's,sample = ,\
sample = ,g' < list.txt)
sample_preceding=$(head -n +$(($sample+1)) <<< "$sample_expanded")
sample_actual=$(tail -n +$(($sample+2)) <<< "$sample_expanded" |head -n 1)
sample_remaining=$(tail -n +$(($sample+3)) <<< "$sample_expanded" )

test_expanded=$(sed -e 's,test "test,\
test "test,g' <<< "$sample_actual" )
test_preceding=$(head -n +$(($test+1)) <<< "$test_expanded")
test_actual=$(tail -n +$(($test+2)) <<< "$test_expanded" |head -n 1)
test_remaining=$(tail -n +$(($test+3)) <<< "$test_expanded")

result_expanded=$(sed -e 's,=,=\
,g' <<< "$test_actual" )
result_name=$(head -n 1 <<< "$result_expanded")
result_value=$(tail -n 1 <<< "$result_expanded" |tr 1 0 ) # Change 1s to 0s

echo $sample_preceding $test_preceding $result_name $result_value $test_remaining $sample_remaining |
sed -e 's,sample = ,\
sample = ,g' \
-e's, test "test,\
    test "test,g' \
-e's,\\,\\\
                    ,g' \
-e's," end;," \
end;,g' > modified_list.txt

List.txt看起来像这样

sample = 0;
    test "test0" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX010\
                    10101010101010101010101010101010101010";
    test "test1" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX01010101010101\
                    01010101010101010101010101010101010101"
    test "test2" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX01010101010101\
                    01010101010101010101010101010101010101"
end;
sample = 1;
    test "test0" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX010\
                    10101010101010101010101010101010101010";
    test "test1" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX01010101010101\
                    01010101010101010101010101010101010101"
    test "test2" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX01010101010101\
                    01010101010101010101010101010101010101"
end;
sample = 2;
    test "test0" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX010\
                    10101010101010101010101010101010101010";
    test "test1" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX01010101010101\
                    01010101010101010101010101010101010101"
    test "test2" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX01010101010101\
                    01010101010101010101010101010101010101"
end;
sample = 3;
    test "test0" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX010\
                    10101010101010101010101010101010101010";
    test "test1" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX01010101010101\
                    01010101010101010101010101010101010101"
    test "test2" = "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX01010101010101\
                    01010101010101010101010101010101010101"
end;