您好我需要从非常庞大的文件日志生成报告。现在我用awk写了“one liners”,这足以满足我的需求,现在我已经卡住了。
我写了awk代码:
awk '{if ($1 == "GR" && $4 == "NGR") {c = $3; n = $6} else if ($1 == "NAME") {m = $3} else if ($1 == "PCM-TSL" || $1 == "TERMID") {s = "1"} else if (s == "1" && $1 ~ "-") {split ($1,pcm,"-")} else if (s == "1" && p == pcm[1]) {next} else if (c != "" && n != "" ) {p = pcm[1]; print c " " n " "m " " p }}' DATA_FILE
或分手:
awk '
{
if ($1=="GR" && $4=="NGR") {
c=$3
n=$6}
else if ($1=="NAME") {
m=$3}
else if ($1=="PCM-TSL" || $1=="TERMID") {
s="1"}
else if (s=="1" && $1~"-") {
split ($1,pcm,"-")}
else if (s=="1" && p==pcm[1]) {
next}
else if (c!="" && n!="" ) {
p = pcm[1]
print c,n,m,p}
}' DATA_FILE
它以正确的格式显示数据,但只显示一个GR&的记录。 NGR& NAME。
我需要表明:
GR NGR NAME PCM
200 D200E AAA1 4
200 D200E AAA1 8
200 D200E AAA1 ...
220 DA30E BBA1 1
220 DA30E BBA1 2
...
有例子日志:
GROUP(S)
some unused data....
GR : 200 NGR : D200E
NAME : AAA1
some unused data....
DR1A : - DR2A : - ART1A : - ART2A : - ACT : NO
DR1B : - DR2B : - ART1B : - ART2B : - ACT : NO
DR1C : - DR2C : - ART1C : - ART2C : - ACT : NO
CIRCUIT(S)
PCM-TSL ORD CTRL HGR STATE LSI
4-0 1 X 1-1 ACT -
4-1 2 X 1-2 ACT -
4-2 3 X 1-3 ACT -
4-3 4 X 1-4 ACT -
4-4 5 X 1-5 ACT -
4-5 6 X 1-6 ACT -
4-6 7 X 1-7 ACT -
4-7 8 X 1-8 ACT -
4-8 9 X 1-9 ACT -
4-9 10 X 1-10 ACT -
4-10 11 X 1-11 ACT -
8-5 6 X 1-6 ACT -
8-6 7 X 1-7 ACT -
8-7 8 X 1-8 ACT -
8-8 9 X 1-9 ACT -
8-9 10 X 1-10 ACT -
8-10 11 X 1-11 ACT -
GROUP(S)
GR : 220 NGR : DA30E
NAME : BBA1
DR1A : - DR2A : - ART1A : - ART2A : - ACT : NO
DR1B : - DR2B : - ART1B : - ART2B : - ACT : NO
DR1C : - DR2C : - ART1C : - ART2C : - ACT : NO
some unused data....
CIRCUIT(S)
PCM-TSL ORD CTRL HGR STATE LSI
1-0 1 X 1-1 ACT -
1-1 2 X 1-2 ACT -
1-2 3 X 1-3 ACT -
1-3 4 X 1-4 ACT -
1-4 5 X 1-5 ACT -
1-5 6 X 1-6 ACT -
1-6 7 X 1-7 ACT -
1-7 8 X 1-8 ACT -
1-8 9 X 1-9 ACT -
1-9 10 X 1-10 ACT -
1-10 11 X 1-11 ACT -
2-5 6 X 1-6 ACT -
2-6 7 X 1-7 ACT -
2-7 8 X 1-8 ACT -
2-8 9 X 1-9 ACT -
2-9 10 X 1-10 ACT -
2-10 11 X 1-11 ACT -
more simmilar blocks
问候汤姆
答案 0 :(得分:2)
以下是我对您的问题的理解:
根据我对问题的理解,这是我的解决方案,文件 summary.awk :
$1 == "GR" && $4 == "NGR" {gr = $3; ngr = $6}
$1 == "NAME" { name = $3 }
# GROUP(S) marks the end of the table
$1 == "GROUP(S)" { in_table = 0 }
# PCM-STL and TERMID marks the beginning of a table. Print the header
# Once.
$1 == "PCM-TSL" || $1 == "TERMID" {
in_table = 1
if (!header_printed)
printf "%-3s %-6s %-6s %-s\n", "GR", "NGR", "NAME", "PCM"
header_printed = 1
}
# In a table, we locate the PCM value, print it out without duplication
# To do this, we compare the current PCM to the one last found and only
# print when they differ.
in_table && $1 ~ /[0-9][0-9-]*/ {
split($1,pcm_tsl,"-")
pcm = pcm_tsl[1]
if (pcm != last_pcm)
printf "%3d %-6s %-6s %d\n", gr, ngr, name, pcm
last_pcm = pcm
}
我将脚本保存为 summary.awk ,将数据文件保存为DATA_FILE:
awk -f summary.awk DATA_FILE
这是输出:
GR NGR NAME PCM
200 D200E AAA1 4
200 D200E AAA1 8
220 DA30E BBA1 1
220 DA30E BBA1 2
$1 ~ /[0-9][0-9-]*/
表示“第一列必须以数字开头,后跟数字或破折号。”答案 1 :(得分:1)
这种方法的一般方法是从输入构建一个数组,并在每次到达下一条记录时打印它。像这样(未经测试但应该接近):
awk '
function prtRec( pcm,numPcm) {
print "GR", "NGR", "NAME", "PCM"
numPcm = split(rec["PCM"],pcm) - 1
for (i=1; i<=numPcm; i++) {
print rec["GR"], rec["NGR"], rec["NAME"], pcm[i]
}
split("",rec) # or delete(rec) in gawk
}
/:/ {
for (i=1; i<=NF; i+=3) {
rec[$i] = $(i+2)
}
}
/^PCM/ {
inPcm = 1
}
inPcm && NF {
split($1,pcmTsl,/-/)
rec["PCM"] = rec["PCM"] pcmTsl[1] " "
}
/^[[:space:]]*]GROUP\(s\)[[:space:]]*$/ { inPcm=0; prtRec() }
END { prtRec() }
' file
答案 2 :(得分:1)
你可以尝试
awk -f ext.awk DATA_FILE
其中ext.awk
是
$1 == "GR" && $4=="NGR" {
if (f) {
ngri[i]=j; j=0
}
f=0; i++; f2=1;
g[i] = $3;
ngr[i] = $6
}
$1=="NAME"{
n[i] = $3
next
}
$1 == "PCM-TSL" || $1 == "TERMID" {
f=1
next
}
f && $1 ~ "-" {
split ($1,a,"-")
if (f2) {pcm[i,++j]=a[1]}
else {
if (pcm[i,j]!=a[1]) pcm[i,++j]=a[1]
}
f2=0
}
END {
ngri[i]=j
printf "GR\tNGR\tNAME\tPCM\n"
nn=i
for (i=1; i<=nn; i++)
for (j=1; j<=ngri[i]; j++)
print g[i], ngr[i],n[i],pcm[i,j]
}