我试图通过使用shell脚本代码来获取xml数据。当前Shell脚本代码是 @Copy⇒enter link description here
#!/bin/bash
xmlFile=$1
function parseXML() {
elemList=( $(cat $xmlFile | tr '\n' ' ' | XMLLINT_INDENT="" xmllint --format - | /bin/grep -e "</.*>$" | while read line; do \
echo $line | sed -e 's/^.*<\///' | cut -d '>' -f 1; \
done) )
totalNoOfTags=${#elemList[@]}; ((totalNoOfTags--))
suffix=$(echo ${elemList[$totalNoOfTags]} | tr -d '</>')
suffix="${suffix}_"
for (( i = 0 ; i < ${#elemList[@]} ; i++ )); do
elem=${elemList[$i]}
elemLine=$(cat $xmlFile | tr '\n' ' ' | XMLLINT_INDENT="" xmllint --format - | /bin/grep "</$elem>")
echo $elemLine | grep -e "^</[^ ]*>$" 1>/dev/null 2>&1
if [ "0" = "$?" ]; then
continue
fi
elemVal=$(echo $elemLine | tr '\011' '\040'| sed -e 's/^[ ]*//' -e 's/^<.*>\([^<].*\)<.*>$/\1/' | sed -e 's/^[ ]*//' | sed -e 's/[ ]*$//')
echo "$elemVal" #output
xmlElem="${suffix}$(echo $elem | sed 's/-/_/g')"
eval ${xmlElem}=`echo -ne \""${elemVal}"\"`
attrList=($(cat $xmlFile | tr '\n' ' ' | XMLLINT_INDENT="" xmllint --format - | /bin/grep "</$elem>" | tr '\011' '\040' | sed -e 's/^[ ]*//' | cut -d '>' -f 1 | sed -e 's/^<[^ ]*//' | tr "'" '"' | tr '"' '\n' | tr '=' '\n' | sed -e 's/^[ ]*//' | sed '/^$/d' | tr '\011' '\040' | tr ' ' '>'))
for (( j = 0 ; j < ${#attrList[@]} ; j++ )); do
attr=${attrList[$j]}
((j++))
attrVal=$(echo ${attrList[$j]} | tr '>' ' ')
attrName=`echo -ne ${xmlElem}_${attr}`
eval ${attrName}=`echo -ne \""${attrVal}"\"`
done
done
}
parseXML
而且,当前的XML结构是
<root>
<abc>
<name>zzz</name>
<email>zzz@gmail.com</email>
<phno>1234589</phno>
</abc>
<abc>
<name>aaaa</name>
<email>aa@gmail.com</email>
<phno>2456677</phno>
</abc>
</root>
当前输出
**
aaaa
aa@gmail.com
2456677
aaaa
aa@gmail.com
2456677
**
但我希望得到的输出结构是
**
zzz
zzz@gmail.com
1234589
aaaa
aa@gmail.com
2456677
**
答案 0 :(得分:1)
使用xmllint或xmlstarlet等XML解析器。
xmlstarlet sel -t -v /root/abc file
输出:
zzz zzz@gmail.com 1234589 aaaa aa@gmail.com 2456677