我在文件中有多个“表格”,例如:
col1, col2, col3, col4
1, 2, 3, 4
5, 6, 7, 8
col2, col3, col5
10, 11, 12
13, 14, 15
我想将这两个表折叠为:
col1, col2, col3, col4, col5
1 , 2 , 3 , 4 ,
5 , 6 , 7 , 8 ,
, 10 , 11 , , 12
, 13 , 14 , , 15
(注意:留下额外的空白只是为了让事情更容易理解)
这似乎需要至少2次传递,一次用于收集完整的列列表,另一次用于创建输出表。用awk可以做到这一点吗?如果没有,你会推荐其他什么工具?
答案 0 :(得分:2)
尝试一下:
代码:
$ cat s.awk
NR==FNR{
if (match($1, /^col/))
maxIndex=(substr($NF,4,1)>maxIndex)?substr($NF,4,1):maxColumn
next
}
FNR==1{
for (i=1;i<=maxIndex;i++)
header=(i==maxIndex)?header "col"i:header "col" i ", "
print header
}
/^col[1-9]/{
for (i in places)
delete places[i]
for (i=1;i<=NF;i++){
n=substr($i,4,1)
places[n]=i
}
}
/^[0-9]/{
s=""
for (i=1;i<=maxIndex;i++)
s=(i in places)? s $places[i] " " : s ", "
print s
}
致电:
awk -f s.awk file file | column -t
输出:
col1, col2, col3, col4, col5
1, 2, 3, 4 ,
5, 6, 7, 8 ,
, 10, 11, , 12
, 13, 14, , 15
HTH Chris
答案 1 :(得分:1)
这是一个单程perl解决方案。它假定文件中的每个表之间至少有一个空行。
perl -00 -ne '
BEGIN {
%column2idx = ();
@idx2column = ();
$lineno = 0;
@lines = ();
}
chomp;
@rows = split /\n/;
@field_map = ();
@F = split /, /, $rows[0];
for ($i=0; $i < @F; $i++) {
if (not exists $column2idx{$F[$i]}) {
$idx = @idx2column;
$column2idx{$F[$i]} = $idx;
$idx2column[$idx] = $F[$i];
}
$field_map[$i] = $column2idx{$F[$i]};
}
for ($i=1; $i < @rows; $i++) {
@{$lines[$lineno]} = ();
@F = split /, /, $rows[$i];
for ($j=0; $j < @F; $j++) {
$lines[$lineno][$field_map[$j]] = $F[$j];
}
$lineno++;
}
END {
$ncols = @idx2column;
print join(", ", @idx2column), "\n";
foreach $row (@lines) {
@row = ();
for ($i=0; $i < $ncols; $i++) {
push @row, $row->[$i];
}
print join(", ", @row), "\n";
}
}
' tables | column -t
输出
col1, col2, col3, col4, col5
1, 2, 3, 4,
5, 6, 7, 8,
, 10, 11, , 12
, 13, 14, , 15
答案 2 :(得分:1)
代码假定表格由空行分隔:
awk -F', *' 'END {
for (i = 0; ++i <= c;)
printf "%s", (cols[i] (i < c ? OFS : RS))
for (i = 0; ++i <= n;)
for (j = 0; ++j <= c;)
printf "%s", (vals[i, cols[j]] (j < c ? OFS : RS))
}
!NF {
fnr = NR + 1; next
}
NR == 1 || NR == fnr {
for (i = 0; ++i <= NF;) {
_[$i]++ || cols[++c] = $i
idx[i] = $i
}
next
}
{
++n; for (i = 0; ++i <= NF;)
vals[n, idx[i]] = $i
}' OFS=', ' tables
如果您将表格放在单独的文件中:
awk -F', *' 'END {
for (i = 0; ++i <= c;)
printf "%s", (cols[i] (i < c ? OFS : RS))
for (i = 0; ++i <= n;)
for (j = 0; ++j <= c;)
printf "%s", (vals[i, cols[j]] (j < c ? OFS : RS))
}
FNR == 1 {
for (i = 0; ++i <= NF;) {
_[$i]++ || cols[++c] = $i
idx[i] = $i
}
next
}
{
++n; for (i = 0; ++i <= NF;)
vals[n, idx[i]] = $i
}' OFS=', ' file1 file2 [.. filen]