计算_的最大数量并添加额外的数量;如果失踪

时间:2015-03-09 09:14:19

标签: bash awk

我有一个包含以下几个字段的文件

deme_Fort_Email_am;04/02/2015;Deme_Fort_Postal
deme_faible_Email_am;18/02/2015;deme_Faible_Email_Relance_am
equi_Fort_Email_am;23/02/2015;trav_Fort_Email_am
trav_Faible_Email_pm;18/02/2015;trav_Faible_Email_Relance_pm
trav_Fort_Email_am;12/02/2015;Trav_Fort_Postal
voya_Faible_Email_am;29/01/2015;voya_Faible_Email_Relance_am

目标是拥有

deme;Fort;Email;am;04/02/2015;Deme;Fort;Postal;;
faible;Email;am;18/02/2015;deme;Faible;Email;Relance;am
Fort;Email;am;23/02/2015;trav;Fort;Email;am;
trav;Faible;Email;pm;18/02/2015;trav;Faible;Email;Relance;pm
trav;Fort;Email;am;12/02/2015;Trav;Fort;Postal
voya;Faible;Email;am;29/01/2015;voya;Faible;Email;Relance;am

我计算其中一行的下划线最大值,然后将其更改为分号并添加额外的分号,如果它不是所有行中找到的最大分号数。 / p>

我考虑过使用awk但是我只会在下面的命令行中更改第一个字段之后的所有内容。我的目标也是添加额外的分号

awk 'BEGIN{FS=OFS=";"} {for (i=1;i<=NF;i++) gsub(/_/,";", $i) } 1' file

注意:由于awk是逐行处理的,我不确定我能做到这一点但是我要求以防万一。如果无法完成,请告诉我,我会尝试寻找其他方式。

感谢。

2 个答案:

答案 0 :(得分:2)

这是一个双程解决方案。注意,运行awk时需要在命令行上放两次数据文件:

$ cat mu.awk
BEGIN { FS="_"; OFS=";" }
NR == FNR { if (max < NF) max = NF; next }
{ $1=$1; i = max; j = NF; while (i-- > j) $0 = $0 OFS }1

$ awk -f mu.awk mu.txt mu.txt
deme;Fort;Email;am;04/02/2015;Deme;Fort;Postal;;
deme;faible;Email;am;18/02/2015;deme;Faible;Email;Relance;am
equi;Fort;Email;am;23/02/2015;trav;Fort;Email;am;
trav;Faible;Email;pm;18/02/2015;trav;Faible;Email;Relance;pm
trav;Fort;Email;am;12/02/2015;Trav;Fort;Postal;;
voya;Faible;Email;am;29/01/2015;voya;Faible;Email;Relance;am

BEGIN块设置输入和输出文件分隔符。

NF == FNR块首次通过文件,设置最大字段数。

最后一个块使第二个传递文件。首先,它重新构建该行以使用输出文件分隔符,然后为该行短于最大值的多个字段添加额外的;

修改

此版本回答更新的问题,仅影响字段7之后的字段:

$ cat mu2.awk
BEGIN { OFS=FS=";" }

# First pass, find the max number of "_"
NR == FNR { gsub("[^_]",""); if (max < length()) max = length(); next }

# Second pass:
{
    # count number of "_" less than the max
    line = $0
    gsub("[^_]","", line)
    n = max - length(line)

    # replace "_" with ";" after field 7
    for (i=8; i<=NF; ++i) gsub("_", ";", $i);

    # add an extra ";" for each "_" less than max
    while (n-- > 0) $0 = $0 ";"
}1

$ awk -f mu2.awk mu2.txt mu2.txt
xxx;x_x_x;xxx;xxx;x_x_x;xxx;xxx;deme;Fort;Email;am;04/02/2015;Deme;Fort;Postal;;
xxx;x_x_x;xxx;xxx;x_x_x;xxx;xxx;deme;faible;Email;am;18/02/2015;deme;Faible;Email;Relance;am
xxx;x_x_x;xxx;xxx;x_x_x;xxx;xxx;equi;Fort;Email;am;23/02/2015;trav;Fort;Email;am;
xxx;x_x_x;xxx;xxx;x_x_x;xxx;xxx;trav;Faible;Email;pm;18/02/2015;trav;Faible;Email;Relance;pm
xxx;x_x_x;xxx;xxx;x_x_x;xxx;xxx;trav;Fort;Email;am;12/02/2015;Trav;Fort;Postal;;
xxx;x_x_x;xxx;xxx;x_x_x;xxx;xxx;voya;Faible;Email;am;29/01/2015;voya;Faible;Email;Relance;am

答案 1 :(得分:1)

这应该做:

awk -F_ '{for (i=1;i<=NF;i++) a[NR FS i]=$i;c=NF>c?NF:c} END {for (j=1;j<=NR;j++) {for (i=1;i<c;i++) printf "%s;",a[j FS i];print a[j FS c]}}' file
deme;Fort;Email;am;04/02/2015;Deme;Fort;Postal;;
deme;faible;Email;am;18/02/2015;deme;Faible;Email;Relance;am
equi;Fort;Email;am;23/02/2015;trav;Fort;Email;am;
trav;Faible;Email;pm;18/02/2015;trav;Faible;Email;Relance;pm
trav;Fort;Email;am;12/02/2015;Trav;Fort;Postal;;
voya;Faible;Email;am;29/01/2015;voya;Faible;Email;Relance;am

工作原理:

awk -F_ '                               # Set field separator to "_"
    {for (i=1;i<=NF;i++)                # Loop trough one by one field
        a[NR FS i]=$i                   # Store the field in array "a" using both row(NR) and column position(i) as referense
    c=NF>c?NF:c}                        # Find the largest number of fields and store it in "c"
END {                                   # When file read is done, then do at end
    for (j=1;j<=NR;j++) {               # Loop trough all row
        for (i=1;i<c;i++)               # Loop trough all column
            printf "%s;",a[j FS i]      # Print one and one field for every row
        print a[j FS c]                 # Print end field in each row
        }
    }
' file                                  # read the file