将行转换为多列的Shell脚本
输入CSV文件:
Driver Id,Driver Name,Measure Names,Measure Values
123,XYZ,Total Offers,10
123,XYZ,Driver Reject,0
123,XYZ,Driver Accept ,4
123,XYZ,Expired Offers,3
123,XYZ,Total Bookings,6
123,XYZ,Rider Cancels,2
123,XYZ,Driver Cancels,0
123,XYZ,Rider No-Show,0
123,XYZ,Completed Rides,4
124,PQR,Total Offers,2
124,PQR,Driver Reject,0
124,PQR,Driver Accept ,1
124,PQR,Expired Offers,1
124,PQR,Total Bookings,1
124,PQR,Rider Cancels,0
124,PQR,Driver Cancels,0
124,PQR,Rider No-Show,0
124,PQR,Completed Rides,1
需要输出:
驱动程序ID,驱动程序名称,总优惠,驱动程序拒绝,驱动程序接受,过期优惠,总预订,车手取消,驱动程序取消,车手未显示,已完成游乐设施
123,XYZ,10,0,4,3,6,2,0,0,4
124,PQR,2,0,1,1,1,0,0,0,1
我尝试使用awk,但结果不正确。
awk -F\, '
BEGIN{
P["Total Offers"]="%s;%s;%s;;;;;;;;;\n"
P["Driver Reject"]="%s;%s;;%s;;;;;;;;\n"
P["Driver Accept"]="%s;%s;;;%s;;;;;;;\n"
P["Expired Offers"]="%s;%s;;;;%s;;;;;;\n"
P["Total Bookings"]="%s;%s;;;;;%s;;;;;\n"
P["Rider Cancels"]="%s;%s;;;;;;%s;;;;\n"
P["Driver Cancels"]="%s;%s;;;;;;;%s;;;\n"
P["Rider No-Show"]="%s;%s;;;;;;;;%s;;\n"
P["Completed Rides"]="%s;%s;;;;;;;;;%s;\n"
}
FNR==1{
print "Driver Id,Driver Name,Total Offers,Driver Reject,Driver Accept,Expired Offers,Total Bookings,Rider Cancels,Driver Cancels,Rider No-Show,Completed Rides"
next
}
{
printf(P[$3],$1,$2,$4)
}
' sample1.csv
有人可以帮我或者告诉我任何其他方法来实现这个。 在此先感谢
答案 0 :(得分:0)
考虑到您的Input_file与显示的示例相同,如果您不关心输出序列应该作为输入,那么以下内容可以帮助您。
awk -F, 'FNR>1{a[$1,$2]=a[$1,$2]?a[$1,$2] FS $NF:$NF} END{for(i in a){print i FS a[i]}}' SUBSEP="," Input_file
答案 1 :(得分:0)
如果有任何
,下面会考虑输出顺序和缺失值awk '
BEGIN{
FS=OFS=SUBSEP=",";
}
FNR==1{
printf("%s%s%s",$1,OFS,$2);
next
}
{
if(!(($1,$2) in tmp)){
usr[++u] = $1 OFS $2
tmp[$1,$2]
}
if(!($3 in tmp)){
names[++n] = $3;
tmp[$3]
printf("%s%s",OFS,$3)
}
arr[$1,$2,$3] = $4
}
END{
print ""
for(u=1; u in usr; u++){
printf("%s", usr[u]);
for(n=1; n in names; n++){
indexkey = usr[u] SUBSEP names[n]
printf("%s%s",OFS, (indexkey in arr) ? arr[indexkey]:"")
}
print ""
}
}
' infile
<强>解释强>
FS=OFS=SUBSEP=",";
- 设置字段分隔符,输出字段分隔符和内置变量subsep为逗号,在当前程序中至少至少OFS和SUBSEP应该相同,因为我用它来访问数组{{1}如果您有任何其他输入字段分隔符(比如管道),那么请indexkey = usr[u] SUBSEP names[n]
FS="|"; OFS=SUBSEP=","
如果是第一行,则打印前2个字段并转到下一行
FNR==1{
printf("%s%s%s",$1,OFS,$2);
next
}
由于您需要有序输出,因此在此程序中使用连续(按顺序)数组(if(!(($1,$2) in tmp)){
usr[++u] = $1 OFS $2
tmp[$1,$2]
}
)。 usr
是数组,其中索引为tmp
和$1
,$2
为数组,其中index为变量usr
,值为u
且$1
,$2
如果之前不存在则负责。
if(!(($1,$2) in tmp))
与上述类似,if(!($3 in tmp)){
names[++n] = $3;
tmp[$3]
printf("%s%s",OFS,$3)
}
数组是连续的,值为names
$3
数组arr[$1,$2,$3] = $4
键为3个字段arr
,值为$1,$2,$3
最后在$4
块循环中通过END
和usr
数组,构建indexkey和print数组值,如果数组names
中存在indexkey
输入:
arr
输出:
$ cat infile
Driver Id,Driver Name,Measure Names,Measure Values
123,XYZ,Total Offers,10
123,XYZ,Driver Reject,0
123,XYZ,Driver Accept ,4
123,XYZ,Expired Offers,3
123,XYZ,Total Bookings,6
123,XYZ,Rider Cancels,2
123,XYZ,Driver Cancels,0
123,XYZ,Rider No-Show,0
123,XYZ,Completed Rides,4
124,PQR,Total Offers,2
124,PQR,Driver Reject,0
124,PQR,Driver Accept ,1
124,PQR,Expired Offers,1
124,PQR,Total Bookings,1
124,PQR,Rider Cancels,0
124,PQR,Driver Cancels,0
124,PQR,Rider No-Show,0
124,PQR,Completed Rides,1
答案 2 :(得分:0)
如果行未在必填字段中排序,则必须使用关联数组。
$ awk -F, -v cols='Total Offers,Driver Reject,Driver Accept ,Expired Offers,Total Bookings,Rider Cancels,Driver Cancels,Rider No-Show,Completed Rides' '
BEGIN {n=split(cols,f)}
NR>1 {k=$1 FS $2; keys[k]; a[k,$3]=$4}
END {for(k in keys)
{printf "%s", k;
for(i=1;i<=n;i++) printf "%s%d", FS,+a[k,f[i]];
print ""}}' file
124,PQR,2,0,1,1,1,0,0,0,1
123,XYZ,10,0,4,3,6,2,0,0,4
如果缺少任何度量行,则需要注意
PS。请注意&#34;驱动程序接受&#34;有一个尾随空间,我保留。