我有这个样本数据:
<products_database>
<product>
<id>##1234
<a name ="toy">toy</a>
<weight>5kg
<a href ="#block">block</a>
</product>
<product>
<id>##56789
<a name ="brick">brick</a>
<a name ="lego">lego</a>
<a name ="block">block</a>
<weight>2kg
<a href ="#toy">toy</a>
</product>
<product>
<id>##1357902
<a href ="#toy">toy</a>
<a href ="#brick">brick</a>
<weight>4kg
</product>
</product_database>
我想将href转换为:
<products_database>
<product>
<id>##1234
<a name ="toy">toy</a>
<weight>5kg
<..pd ##56789 #block>block</a>
</product>
<product>
<id>##56789
<a name ="brick">brick</a>
<a name ="lego">lego</a>
<a name ="block">block</a>
<weight>2kg
<..pd ##1234 #toy>toy</a>
</product>
<product>
<id>##1357902
<..pd ##1234 #toy>toy</a>
<..pd ##56789 #brick>brick</a>
<weight>4kg
</product>
将href转换为这个唯一标记,在该标记中,它在每个“产品”中获得“id”,其中“a href”值等同于它们的“名称”。我只允许使用javascript和正则表达式,任何帮助将不胜感激。
答案 0 :(得分:1)
我试图找出你究竟需要做什么的事情,并写了一个可能做你需要的解决方案。
//var findProduct = /<product>\s+<id>(##\d+)\s+((?:<a name ="[^"]+">[^<]+<\/a>)+)\s+<weight>(\d+kg)\s+((?:<a href ="#[^"]+">[^<]+<\/a>)+)\s+<\/product>/g;
var byName = {}, products = [];
var findProduct = /<product>\s+([\W\w]+?)\s+<\/product>/g;
var findTag = /<([\w]+)(?: (name|href) ="#?([^"]+)")?>([^<\n\r]+)/g;
var data = document.getElementById("data").value;
data.replace(findProduct, function(match, tags) {
var product = {
id: "", names: [], weight: "", links: []
};
tags.replace(findTag, function(match, tagName, attr, attrValue, tagValue) {
switch (tagName) {
case "id": product.id = tagValue; break;
case "weight": product.weight = tagValue; break;
case "a":
if (attr === "name") { product.names.push(attrValue); byName[attrValue] = product; }
else /* if (attr === "href") */ { product.links.push(attrValue); }
break;
}
});
products.push(product);
});
data = "<product_database>" + products.map(function(product) {
return "<product><id>" + product.id + "\n<weight>" + product.weight + "\n" +
product.names.map(function(name) {
return "<a name =\"" + name + "\">" + name + "</a>";
}).join("\n") +
product.links.map(function(link) {
return "<..pd " + byName[link].id + " #" + link + ">block</a>";
}).join("\n")
+ "\n</product>";
}).join("\n") + "</product_database>";
document.getElementById("data").value = data;
&#13;
<textarea id="data" cols=50 rows=30><products_database>
<product>
<id>##1234
<a name ="toy">toy</a>
<weight>5kg
<a href ="#block">block</a>
</product>
<product>
<id>##56789
<a name ="brick">brick</a>
<a name ="lego">lego</a>
<a name ="block">block</a>
<weight>2kg
<a href ="#toy">toy</a>
</product>
<product>
<id>##1357902
<a href ="#toy">toy</a>
<a href ="#brick">brick</a>
<weight>4kg
</product>
</product_database></textarea>
&#13;
答案 1 :(得分:1)
或者您可以使用此(主要是正则表达式方法),但我不确定这是多么容错:
var a = '<products_database>\n<product>\n <id>##1234\n <a name ="toy">toy</a>\n <weight>5kg\n <a href ="#block">block</a>\n</product>\n<product>\n <id>##56789\n <a name ="brick">brick</a>\n <a name ="lego">lego</a>\n <a name ="block">block</a>\n <weight>2kg\n <a href ="#toy">toy</a>\n</product>\n<product>\n <id>##1357902\n <a href ="#toy">toy</a>\n <a href ="#brick">brick</a>\n <weight>4kg\n</product>\n</product_database>';
while (a.match(/(?:<id>(##\d+)((?:[\s\S](?!<id))*?<a name =")(.+?)("[\S\s]*?)<a href ="#\3">|<a href ="#(.+?)">([\S\s]*?)<id>(##\d+)((?:[\s\S](?!<id))*?<a name =")\1")/g) != null) {
a = a.replace(/<id>(##\d+)((?:[\s\S](?!<id))*?<a name =")(.+?)("[\S\s]*?)<a href ="#\3">/g, '<id>$1$2$3$4<.. pd $1 #$3>').replace(/<a href ="#(.+?)">([\S\s]*?)<id>(##\d+)((?:[\s\S](?!<id))*?<a name =")\1"/gm, '<.. pd $3 #$1>$2<id>$3$4$1"');
}
a;
有两个正则表达式
1。)如果id在href之前匹配
2。)匹配,如果id在href之后
你必须遍历它,因为正则表达式不会多次匹配id。
请询问是否有不明确的事情。