我想使用awk透视表内容,有人可以分享我将如何做。谢谢
表1
FEATURE,TESTER,LICENSE_USED,PROGRAM,AREA
Low,T6712,23,Element01,FT1
High,T7911,54,Element03,FT2
Medium,E8123,48,Element02,FT3
High,F4309,54,Element02,PB1
Low,F4309,23,Element01,PB1
Low,T7911,23,Element04,FT1
High,E8123,54,Element05,FT2
Medium,F4309,48,Element01,PB1
预期的输出,“功能”将变为列,其中包含使用的许可证中的值
TESTER,PROGRAM,AREA,High,Low,Medium
E8123,Element02,FT3,0,0,48
E8123,Element05,FT2,54,0,0
F4309,Element01,PB1,0,23,48
F4309,Element02,PB1,54,0,0
T6712,Element01,FT1,0,23,0
T7911,Element03,FT2,54,0,
T7911,Element04,FT1,0,23,0
另一个输出,是否有可能像下面那样转置预期的输出?
TESTER,E8123,E8123,F4309,F4309,T6712,T7911,T7911
PROGRAM,Element02,Element05,Element01,Element02,Element01,Element03,Element04
AREA,FT3,FT2,PB1,PB1,FT1,FT2,FT1
High,0,54,0,54,0,54,0
Low,0,0,23,0,23,0,23
Medium,48,0,48,0,0,,0
答案 0 :(得分:1)
$ awk '
BEGIN {
# set input, output and subscript separator to comma
FS = OFS = SUBSEP = ","
}
NR > 1 {
# keep test-program-area in `a` for future reference
a[$2, $4, $5]
# associate license_used with test-program-area-(high/low/medium) in `b`
b[$2, $4, $5, $1] = $3
}
END {
print "TESTER", "PROGRAM", "AREA", "High", "Low", "Medium"
# for each key in `a`
for (k in a)
# print (test, program, area), high, low, medium
print k, int(b[k, "High"]), int(b[k, "Low"]), int(b[k, "Medium"])
}
' file
TESTER,PROGRAM,AREA,High,Low,Medium
F4309,Element02,PB1,54,0,0
E8123,Element02,FT3,0,48,0
T6712,Element01,FT1,0,0,23
T7911,Element03,FT2,54,0,0
F4309,Element01,PB1,0,48,23
E8123,Element05,FT2,54,0,0
T7911,Element04,FT1,0,0,23
如果您要寻找花哨的单线,则没有空格和注释:
awk 'BEGIN {FS=OFS=SUBSEP=","} NR>1{a[$2,$4,$5];b[$2,$4,$5,$1]=$3} END{print "TESTER","PROGRAM","AREA","High","Low","Medium";for(c in a) print k,0+b[k,"High"],0+b[k,"Low"],0+b[k,"Medium"]}' file
对于移调输出:
$ awk '
BEGIN {
FS = OFS = ","
tr[cell[4, 1] = "High" ] = 4
tr[cell[5, 1] = "Low" ] = 5
tr[cell[6, 1] = "Medium"] = 6
}
{
cell[1, NR] = $2
cell[2, NR] = $4
cell[3, NR] = $5
}
NR > 1 {
cell[tr[$1], NR] = $3
}
END {
for (row = 1; row <= 6; ++row) {
for (col = 1; col <= NR; ++col)
if (row > 3 && col > 1)
$col = int(cell[row, col])
else
$col = cell[row, col]
print
}
}' file
TESTER,T6712,T7911,E8123,F4309,F4309,T7911,E8123,F4309
PROGRAM,Element01,Element03,Element02,Element02,Element01,Element04,Element05,Element01
AREA,FT1,FT2,FT3,PB1,PB1,FT1,FT2,PB1
High,0,54,0,54,0,0,54,0
Low,23,0,0,0,23,23,0,0
Medium,0,0,48,0,0,0,0,48
按AREA
进行排序(即第5 列):
$ awk '
BEGIN {
FS = OFS = ","
tr[cell[4, 1] = "High" ] = 4
tr[cell[5, 1] = "Low" ] = 5
tr[cell[6, 1] = "Medium"] = 6
}
{
cell[1, NR] = $2
cell[2, NR] = $4
cell[3, NR] = $5
}
NR > 1 {
cell[tr[$1], NR] = $3
}
END {
for (row = 1; row <= 6; ++row) {
for (col = 1; col <= NR; ++col)
if (row > 3 && col > 1)
$col = int(cell[row, col])
else
$col = cell[row, col]
print
}
}' <(head -n 1 file) <(tail -n +2 file | sort -t ',' -k 5) # <- 5th
TESTER,T6712,T7911,E8123,T7911,E8123,F4309,F4309,F4309
PROGRAM,Element01,Element04,Element05,Element03,Element02,Element02,Element01,Element01
AREA,FT1,FT1,FT2,FT2,FT3,PB1,PB1,PB1
High,0,0,54,54,0,54,0,0
Low,23,23,0,0,0,0,23,0
Medium,0,0,0,0,48,0,0,48
注意:如果您使用的外壳与bash不同,并且无法使最后一个外壳正常工作,请放弃进程替换并使用命令组,例如:
{ head -n 1 file; tail -n +2 file | sort -t ',' -k 5; } | awk '...'
答案 1 :(得分:1)
这是原始输入内容:
FEATURE,TESTER,LICENSE_USED,PROGRAM,AREA
Low,T6712,23,Element01,FT1
High,T7911,54,Element03,FT2
Medium,E8123,48,Element02,FT3
High,F4309,54,Element02,PB1
Low,F4309,23,Element01,PB1
Low,T7911,23,Element04,FT1
High,E8123,54,Element05,FT2
Medium,F4309,48,Element01,PB1
好了,我们可以看看发生了什么事
$ tr , '\011' < data.txt | column -tR 3,5
FEATURE TESTER LICENSE_USED PROGRAM AREA
Low T6712 23 Element01 FT1
High T7911 54 Element03 FT2
Medium E8123 48 Element02 FT3
High F4309 54 Element02 PB1
Low F4309 23 Element01 PB1
Low T7911 23 Element04 FT1
High E8123 54 Element05 FT2
Medium F4309 48 Element01 PB1
这是预期的输出:
$ tr , '\011' < expected.txt | column -tR 2,3,4,5,6,7,8
TESTER E8123 E8123 F4309 F4309 T6712 T7911 T7911
PROGRAM Element02 Element05 Element01 Element02 Element01 Element03 Element04
AREA FT3 FT2 PB1 PB1 FT1 FT2 FT1
High 0 54 0 54 0 54 0
Low 0 0 23 0 23 0 23
Medium 48 0 48 0 0 0 0
似乎我们希望将三个列标题转换为行标题,并且我们希望使用低,中和高作为每个测试人员/元素/区域的行标题(按测试人员排序很明显):
$ tr , '\011' < data.txt | column -tR 3 | sort -k2
Medium E8123 48 Element02 FT3
High E8123 54 Element05 FT2
Low F4309 23 Element01 PB1
Medium F4309 48 Element01 PB1
High F4309 54 Element02 PB1
Low T6712 23 Element01 FT1
Low T7911 23 Element04 FT1
High T7911 54 Element03 FT2
FEATURE TESTER LICENSE_USED PROGRAM AREA
我们可以很容易地看到Testers也可以在不同的Elements上工作,因此我们必须考虑到这一点:
BEGIN {
FS=","
}
NR > 1 {
data[$2,$4,$5] = data[$2,$4,$5] $1 ":" $3 FS
}
END {
#construct the table
for (tester_element_area in data) {
split(tester_element_area, parts, SUBSEP)
tester = parts[1]
element = parts[2]
area = parts[3]
n = split(data[tester_element_area], d)
template["High"] = 0
template["Medium"] = 0
template["Low"] = 0
for (i = 1; i <= n; i++) {
split(d[i], license, ":")
degree = license[1]
value = license[2]
template[ degree ] = value
}
table["TESTER"] = table["TESTER"] FS tester
table["PROGRAM"] = table["PROGRAM"] FS element
table["AREA"] = table["AREA"] FS area
table["High"] = table["High"] FS template["High"]
table["Medium"] = table["Medium"] FS template["Medium"]
table["Low"] = table["Low"] FS template["Low"]
}
#print the table
header[1] = "TESTER"
header[2] = "PROGRAM"
header[3] = "AREA"
header[4] = "High"
header[5] = "Low"
header[6] = "Medium"
for (i = 1; i <= 6; i++) {
header_name = header[i]
printf header_name
n = split(table[header_name], parts)
for (j = 1; j <= n; j++) {
if (j > 1) {
printf FS
}
printf parts[j]
}
print ""
}
}
让我们看看它返回什么:
$ awk -f prog.awk < data.txt | tr , '\011' | column -tR2,3,4,5,6,7,8
TESTER E8123 T7911 F4309 E8123 T6712 T7911 F4309
PROGRAM Element05 Element04 Element02 Element02 Element01 Element03 Element01
AREA FT2 FT1 PB1 FT3 FT1 FT2 PB1
High 54 0 54 0 0 54 0
Low 0 23 0 0 23 0 23
Medium 0 0 0 48 0 0 48
不太破旧,列的排列顺序不正确。他们应该排序。如果您愿意使用GAWK,则只需更改一下代码即可
END {
for (tester_element_area in data) {
cols[++i] = tester_element_area
}
m = asort(cols)
#construct the table
for (k = 1; k <= m; k++) {
tester_element_area = cols[k]
...
输出:
$ awk -f prog.awk < data.txt | tr , '\011' | column -tR2,3,4,5,6,7,8
TESTER E8123 E8123 F4309 F4309 T6712 T7911 T7911
PROGRAM Element02 Element05 Element01 Element02 Element01 Element03 Element04
AREA FT3 FT2 PB1 PB1 FT1 FT2 FT1
High 0 54 0 54 0 54 0
Low 0 0 23 0 23 0 23
Medium 48 0 48 0 0 0 0
更新:按区域排序
NR > 1 {
data[$5,$2,$4] = data[$5,$2,$4] $1 ":" $3 FS
}
END {
for (tester_element_area in data) {
cols[++i] = tester_element_area
}
m = asort(cols)
#construct the table
for (k = 1; k <= m; k++) {
tester_element_area = cols[k]
split(tester_element_area, parts, SUBSEP)
area = parts[1]
tester = parts[2]
element = parts[3]
输出:
TESTER T6712 T7911 E8123 T7911 E8123 F4309 F4309
PROGRAM Element01 Element04 Element05 Element03 Element02 Element01 Element02
AREA FT1 FT1 FT2 FT2 FT3 PB1 PB1
High 0 0 54 54 0 0 54
Low 23 23 0 0 0 23 0
Medium 0 0 0 0 48 48 0