我是awk的新手并尝试将ldif文件转换为csv,因为我在这里使用awk来解析和转换它。如果属性值中没有任何逗号,则下面的脚本工作正常。但是如果属性值中有逗号,则在csv中它创建一个单独的列。 如果它包含使用awk的逗号,则需要帮助操作属性值字段。 下面是我正在使用的awk文件和示例数据。 AWK脚本: -
#!/bin/awk -f
BEGIN {
uid = ""
omEntitytype = ""
sn = ""
givenName = ""
initials = ""
omUnit = ""
departmentNumber = ""
omCostCenter = ""
title = ""
omManager = ""
omaffiliatedaccount = ""
printf("USERID,USER_TYPE,USER_LASTNAME,USER_FIRSTNAME,USER_INITIAL,USER_UNIT,USER_DEPT,CHARGE_UNIT,USER_JOB_TITLE,SPONSOR_USERID,ACCOUNT_ID\n");
}
/^uid:/ {uid=$2}
/^omEntitytype:/ {omEntitytype=$2}
/^sn:/ {sn=$2}
/^givenName:/ {givenName=$2}
/^initials:/ {initials=$2}
/^omUnit:/ {omUnit=$2}
/^departmentNumber:/ {departmentNumber=$2}
/^omCostCenter:/ {omCostCenter=$2}
/^title:/ {title=$2}
/^omManager:/ {omManager=$2}
/^omaffiliatedaccount:/ {omaffiliatedaccount=$2}
/^dn/{
if(uid != "") printf("%s,%s,%s,,%s,%s,%s,%s,%s,%s,%s,%s\n",uid,omEntitytype,sn,givenName,initials,omUnit,departmentNumber,omCostCenter,title,omManager,omaffiliatedaccount);
uid = ""
omEntitytype = ""
sn = ""
givenName = ""
initials = ""
omUnit = ""
departmentNumber = ""
omCostCenter = ""
title = ""
omManager = ""
omaffiliatedaccount = ""
}
#capture last dn
END{
if(uid != "") printf("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n",uid,omEntitytype,sn,givenName,initials,omUnit,departmentNumber,omCostCenter,title,omManager,omaffiliatedaccount)
}
示例ldif要传递给它的数据
dn: uid=sample1,o=people,dc=om,DC=COM
uid: sample1
omEntitytype: Contingent, off-Site
sn: sample1 name1
givenName: sample1
initials: P
omUnit: 07
departmentNumber: 123
omCostCenter: 10
title: Analyst
omaffiliatedaccount: 12345
dn: uid=sample2,o=people,dc=om,DC=COM
uid: sample2
omEntitytype: Contingent, On-Site
sn: sample2 name2
givenName: sample2
initials: P
omUnit: 07
departmentNumber: 123
omCostCenter: 10
title: PLAT MGR, ENGINE,HYD,ELECT,DRIVES
omaffiliatedaccount: 12345
这里的示例数据中,我有问题(填写到csv中的新列)的字段omEntitytype和title,因为它们在值中有','
任何帮助都表示赞赏..谢谢。
答案 0 :(得分:1)
请原谅我的无知,但你不能只使用不同的角色(例如'/')来分隔多个标题吗?
答案 1 :(得分:1)
每当你有名字时 - >数据中的值对最好先构建一个类似的映射数组,然后按名称打印值:
$ cat tst.awk
BEGIN {
OFS = ","
numFlds = split("uid omEntitytype sn givenName initials omUnit departmentNumber omCostCenter title omManager omaffiliatedaccount",flds)
print "USERID,USER_TYPE,USER_LASTNAME,USER_FIRSTNAME,USER_INITIAL,USER_UNIT,USER_DEPT,CHARGE_UNIT,USER_JOB_TITLE,SPONSOR_USERID,ACCOUNT_ID"
}
{
name = value = $0
sub(/:.*/,"",name)
sub(/[^:]*:[[:space:]]*/,"",value)
name2value[name] = value
}
!NF { prtRec() }
END { prtRec() }
function prtRec() {
for (i=1; i<=numFlds; i++) {
printf "\"%s\"%s", name2value[flds[i]], (i<numFlds?OFS:ORS)
}
delete name2value
}
$ awk -f tst.awk file
USERID,USER_TYPE,USER_LASTNAME,USER_FIRSTNAME,USER_INITIAL,USER_UNIT,USER_DEPT,CHARGE_UNIT,USER_JOB_TITLE,SPONSOR_USERID,ACCOUNT_ID
"sample1","Contingent, off-Site","sample1 name1","sample1","P","07","123","10","Analyst","","12345"
"sample2","Contingent, On-Site","sample2 name2","sample2","P","07","123","10","PLAT MGR, ENGINE,HYD,ELECT,DRIVES","","12345"
您实际上没有告诉我们您希望如何处理逗号,上面引用了它们,但如果这不是您想要的,那么只需更改prtRec()
即可执行您想要的操作,例如:也许gsub(/,/,";",name2value[flds[i]])
。
请注意,如果您的输入数据没有丢失某些您想要输出的字段和/或如果您只是希望字段按照它们出现在输入中的顺序输出,那么上面的内容会更加简单。
答案 2 :(得分:0)
您可以检查&#34;,&#34;存在于该字段中,您可以将其更改为另一个字符或引用该字段(这是标准的)
这是后一种情况的原型
echo "a:b,c:d" |
awk -F: -v q='"' -v OFS=, '{for(i=1;i<=NF;i++)
if(match($i,",")) $i=q $i q}1'
a,"b,c",d
答案 3 :(得分:0)
我会这样做(需要GNU awk)
generate LDIF | gawk '
function quote(str) {
if (str ~ /,/) str = "\"" str "\""
return str
}
function print_record() {
if (data["uid"] != "")
printf "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n",
quote(data["uid"]),
quote(data["omEntitytype"]),
quote(data["sn"]),
quote(data["givenName"]),
quote(data["initials"]),
quote(data["omUnit"]),
quote(data["departmentNumber"]),
quote(data["omCostCenter"]),
quote(data["title"]),
quote(data["omManager"]),
quote(data["omaffiliatedaccount"])
}
match($0, /^([^:]+): (.*)/, m) {data[m[1]] = m[2]}
NF == 0 {print_record(); delete data}
END {print_record()}
'
根据您的样本输入
输出sample1,"Contingent, off-Site",sample1 name1,sample1,P,07,123,10,Analyst,,12345
sample2,"Contingent, On-Site",sample2 name2,sample2,P,07,123,10,"PLAT MGR, ENGINE,HYD,ELECT,DRIVES",,12345