删除重复行

时间:2016-03-18 14:46:46

标签: bash shell scripting sh

这应该很简单。我需要编写一个shell脚本来执行以下操作(抱歉,我是初学者):

假设我有一个文件:

act="1" bar="moreCrap" interesting="car" foo="I"
act="1" bar="that" interesting="car" foo="just"
act="1" bar="matters" interesting="truck" foo="need"
act="1" bar="to" interesting="sedan" foo="a"
act="1" bar="no" interesting="sedan" foo="regex "
act="1" bar="one" interesting="truck" foo="matcher"
act="1" bar="including" interesting="tricycle" foo="that"
act="1" bar="me" interesting="truck" foo="will"
act="1" bar="or" interesting="tricycle" foo="delete"
act="1" bar="even" interesting="tricycle" foo="repetitions"
act="2" bar="no" interesting="sedan" foo="regex "
act="2" bar="one" interesting="truck" foo="matcher"
act="2" bar="including" interesting="tricycle" foo="that"
act="2" bar="me" interesting="truck" foo="will"
act="2" bar="or" interesting="sedan" foo="delete"
act="2" bar="even" interesting="sedan" foo="repetitions"

我需要提取所有字符串后跟有趣的=, 如果有四次连续重复"轿车",则应删除最后两次重复以及与该行为相对应的其余行。在这种情况下,输出应如下所示:

*[show_background=true]{
    background-image:url(:/background.png);
    /* ... */
}

谢谢大家。

2 个答案:

答案 0 :(得分:1)

这个awk one-liner做到了:

awk -F'interesting="' '{t=$2;sub(/".*/,"",t)}a[t]++<3' file

测试你的输入:

kent$  awk -F'interesting="' '{t=$2;sub(/".*/,"",t)}a[t]++<3' f
foo="crap" bar="moreCrap" interesting="car" wow="I"
foo="that" bar="that" interesting="car" wow="just"
foo="really" bar="matters" interesting="truck" wow="need"
foo="does" bar="to" interesting="sedan" wow="a"
foo="not" bar="no" interesting="sedan" wow="regex "
foo="matter" bar="one" interesting="truck" wow="matcher"
foo="to" bar="including" interesting="tricycle" wow="that"
foo="me" bar="me" interesting="truck" wow="will"
foo="at" bar="or" interesting="tricycle" wow="delete"
foo="all" bar="even" interesting="tricycle" wow="repetitions"

答案 1 :(得分:1)

也许看起来有点不舒服,但这很符合您的需求

#!/bin/bash

awk '
BEGIN { rep = 0; cnt = 0; }
{
    if (skipact != $1) {
        if ( act == $1 && interest == $3 ) {
            rep++;
        } else {
            rep = 1;
            act = $1;
            interest = $3;
        }

        if (rep == 3) {
            tmp = $0;
            getline;

            if (act != $1 || interest != $3) {
                arr[cnt++] = tmp;
            } else {
                skipact = $1;
            }
        } else {
            arr[cnt++] = $0;
        }
    }
}
END {
    for (i = 0 ; i < cnt ; ++i) {
        print arr[i];
    }
}
'