使用AWK进行转置

时间:2012-07-12 08:31:04

标签: unix awk

  

可能重复:
  Transpose using AWK or Perl

您好我想使用AWK以下面的格式获取输出文件。我的输入文件是一个空格分隔的文本文件。任何帮助都将不胜感激。感谢

输入文件

id  quantity colour shape   size colour shape     size  colour  shape   size
1   10       blue   square  10   red    triangle   12   pink    circle  20
2   12       yellow pentagon 3   orange rectangle   4   purple   oval   6

期望输出

id  colour  shape    size
1   blue    square   10
1   red     triangle 12
1   pink    circle   20
2   yellow  pentagon  3
2   orange  rectangle 4
2   purple  oval      6

4 个答案:

答案 0 :(得分:2)

这是一般化的,因此您可以按名称选择输出列。我在输入数据中使用了额外的列进行了测试,无论位置如何,都无法输出。

#!/usr/bin/awk -f
BEGIN {
    col_list = "colour shape size"
    # Use a B ("blank") to add spaces in the output before or
    # after a format string (e.g. %6dB), but generally use the numeric argument
    col_fmt = "%-12s %-14s %5d"
    # columns to be repeated on multiple lines may appear anywhere in
    # the input, but they will be output together at the beginning of the line
    repeat_fields["id"]
    # since these are individually set we won't use B
    repeat_fmt["id"] = "%4d "
    # additional fields to repeat on each line
    #repeat_fields["another"]
    #repeat_fmt["another"] = "%8s"
    ncols = split(col_list, cols)
    split(col_fmt, fmts)
    for (i = 1; i <= ncols; i++) {
        col_names[cols[i]]
        forms[cols[i]] = fmts[i]
    }
}

# save the positions of the columns using the header line
FNR == 1 {
    for (i = 1; i <= NF; i++) {
        if ($i in repeat_fields) {
            repeat[++nrepeats] = i
            repeat_look[i] = i
            rformats[i] = repeat_fmt[$i]
        }
        if ($i in col_names) {
            col_nums[++n] = i
            col_look[i] = i
            formats[i] = forms[$i]
        }
    }
    # print the header line
    for (i = 1; i <= nrepeats; i++) {
        f = rformats[repeat[i]]
        sub("d", "s", f)
        gsub("B", " ", f)
        printf f, $repeat[i]
    }
    for (i = 1; i <= ncols; i++) {
        f = formats[col_nums[i]]
        sub("d", "s", f)
        gsub("B", " ", f)
        printf f, $col_nums[i]
    }
    printf "\n"
    next
}

{
    for (i = 1; i <= NF; i++) {
        if (i in repeat_look) {
            f = rformats[i]
            gsub("B", " ", f)
            repeat_out = repeat_out sprintf(f, $i)

        }
        if (i in col_look) {
            f = formats[i]
            gsub("B", " ", f)
            out = out sprintf(f, $i)
            coln++
        }
        if (coln == ncols) {
            print repeat_out out
            out = ""
            coln = 0
        }
    }
    repeat_out = ""
}

使用此修改后的输入数据:

no id  colour base   shape    size colour shape     size  colour  shape   size material
14 1   blue   twenty square   10   red    triangle   12   pink    circle  20   wool
23 2   yellow ninety pentagon 3    orange rectangle   4   purple  oval    6    cotton

输出是:

  id colour      shape          size
   1 blue        square           10
   1 red         triangle         12
   1 pink        circle           20
   2 yellow      pentagon          3
   2 orange      rectangle         4
   2 purple      oval              6

答案 1 :(得分:1)

见下文:

kent$  cat a
id  colour  shape   size colour shape     size  colour  shape   size
1   blue    square  10   red    triangle   12   pink    circle  20
2   yellow  pentagon 3   orange rectangle   4   purple   oval   6

kent$  awk 'NR==1{print "id colour shape size";next;}
{id=$1; printf id;
        for(i=2;i<=NF;i++){
                printf FS$i; if((i-1)%3==0)printf (NF!=i)?"\n"id:"\n"; }}' a         
id colour shape size
1 blue square 10
1 red triangle 12
1 pink circle 20
2 yellow pentagon 3
2 orange rectangle 4
2 purple oval 6

如果你有'column',你可以将输出传递给列以使其看起来更好:

kent$  awk 'NR==1{print "id colour shape size";next;}
{id=$1; printf id;
        for(i=2;i<=NF;i++){
                printf FS$i; if((i-1)%3==0)printf (NF!=i)?"\n"id:"\n"; }}' a|column -t
id  colour  shape      size
1   blue    square     10
1   red     triangle   12
1   pink    circle     20
2   yellow  pentagon   3
2   orange  rectangle  4
2   purple  oval       6

答案 2 :(得分:0)

一种方式:

script.awk的内容:

FNR == 1 { 
    for ( i = 1; i <= 4; i++ ) { 
        header = (header ? header "\t" : "") $i
    }   
    printf "%s\n", header
    next
}

FNR > 1 { 
    id = $1
    for ( i = 2; i <= NF; i += 3 ) { 
        j = i + 2 
        for ( ; j >= i; j-- ) { 
            line = $j "\t" line
        }   
        printf "%d\t%s\n", id, line
        line = ""
    }   
}

运行它(感谢 Kent for columnt -t命令):

awk -f script.awk infile | column -t

使用以下输出:

id  colour  shape      size
1   blue    square     10
1   red     triangle   12
1   pink    circle     20
2   yellow  pentagon   3
2   orange  rectangle  4
2   purple  oval       6

答案 3 :(得分:0)

sed -e '1! {s/\([0-9][0-9]*\)[ \t][ \t]*/\1\n/g;}' test.txt |awk -vOFS="\t" 'NR==1 {print $1,$2,$3,$4} NF==1 {id=$1} NR>1 && NF>1 {print id,$0}'

试图在sed中完成所有操作,但是我使用保持缓冲区进行绿化,而不是。无论如何,在每个数字后插入一个换行符:

id  colour  shape   size colour shape     size  colour  shape   size
1
blue    square  10
red    triangle   12
pink    circle  20
2
yellow  pentagon 3
orange rectangle   4
purple   oval   6

并使用awk将id向下移动到以下行:

id  colour  shape   size
1   blue    square  10
1   red    triangle   12
1   pink    circle  20
2   yellow  pentagon 3
2   orange rectangle   4
2   purple   oval   6

应该在sed中完全可行