使用正则表达式和javascript将锚点hrefs转换为唯一标记

时间:2016-12-01 15:18:33

标签: javascript regex

我有这个样本数据:

<products_database>
<product>
         <id>##1234
         <a name ="toy">toy</a>
         <weight>5kg
         <a href ="#block">block</a>
</product>
<product>
          <id>##56789
          <a name ="brick">brick</a>
          <a name ="lego">lego</a>
          <a name ="block">block</a>
          <weight>2kg
          <a href ="#toy">toy</a>
</product>
<product>
          <id>##1357902
          <a href ="#toy">toy</a>
          <a href ="#brick">brick</a>
          <weight>4kg
</product>
</product_database>

我想将href转换为:

<products_database>
<product>
         <id>##1234
         <a name ="toy">toy</a>
         <weight>5kg
         <..pd ##56789 #block>block</a>
</product>
<product>
          <id>##56789
          <a name ="brick">brick</a>
          <a name ="lego">lego</a>
          <a name ="block">block</a>
          <weight>2kg
          <..pd ##1234 #toy>toy</a>
</product>
<product>
          <id>##1357902
          <..pd ##1234 #toy>toy</a>
          <..pd ##56789 #brick>brick</a>
          <weight>4kg
</product>

将href转换为这个唯一标记,在该标记中,它在每个“产品”中获得“id”,其中“a href”值等同于它们的“名称”。我只允许使用javascript和正则表达式,任何帮助将不胜感激。

2 个答案:

答案 0 :(得分:1)

我试图找出你究竟需要做什么的事情,并写了一个可能做你需要的解决方案。

&#13;
&#13;
//var findProduct = /<product>\s+<id>(##\d+)\s+((?:<a name ="[^"]+">[^<]+<\/a>)+)\s+<weight>(\d+kg)\s+((?:<a href ="#[^"]+">[^<]+<\/a>)+)\s+<\/product>/g;

var byName = {}, products = [];

var findProduct = /<product>\s+([\W\w]+?)\s+<\/product>/g;
var findTag = /<([\w]+)(?: (name|href) ="#?([^"]+)")?>([^<\n\r]+)/g;

var data = document.getElementById("data").value;
data.replace(findProduct, function(match, tags) {
  var product = {
    id: "", names: [], weight: "", links: []
  };
  tags.replace(findTag, function(match, tagName, attr, attrValue, tagValue) {
    switch (tagName) {
      case "id": product.id = tagValue; break;
      case "weight": product.weight = tagValue; break;
      case "a":
        if (attr === "name") { product.names.push(attrValue); byName[attrValue] = product; }
        else /* if (attr === "href") */ { product.links.push(attrValue); }
        break;
    }
  });
  products.push(product);
});

data = "<product_database>" + products.map(function(product) {
  return "<product><id>" + product.id + "\n<weight>" + product.weight + "\n" +
    product.names.map(function(name) {
      return "<a name =\"" + name + "\">" + name + "</a>";
    }).join("\n") +
    product.links.map(function(link) {
      return "<..pd " + byName[link].id + " #" + link + ">block</a>";
    }).join("\n")
  + "\n</product>";
}).join("\n") + "</product_database>";

document.getElementById("data").value = data;
&#13;
  <textarea id="data" cols=50 rows=30><products_database>
<product>
         <id>##1234
         <a name ="toy">toy</a>
         <weight>5kg
         <a href ="#block">block</a>
</product>
<product>
          <id>##56789
          <a name ="brick">brick</a>
          <a name ="lego">lego</a>
          <a name ="block">block</a>
          <weight>2kg
          <a href ="#toy">toy</a>
</product>
<product>
          <id>##1357902
          <a href ="#toy">toy</a>
          <a href ="#brick">brick</a>
          <weight>4kg
</product>
</product_database></textarea>
&#13;
&#13;
&#13;

答案 1 :(得分:1)

或者您可以使用此(主要是正则表达式方法),但我不确定这是多么容错:

var a = '<products_database>\n<product>\n         <id>##1234\n         <a name ="toy">toy</a>\n         <weight>5kg\n         <a href ="#block">block</a>\n</product>\n<product>\n          <id>##56789\n          <a name ="brick">brick</a>\n          <a name ="lego">lego</a>\n          <a name ="block">block</a>\n          <weight>2kg\n          <a href ="#toy">toy</a>\n</product>\n<product>\n          <id>##1357902\n          <a href ="#toy">toy</a>\n          <a href ="#brick">brick</a>\n          <weight>4kg\n</product>\n</product_database>';

while (a.match(/(?:<id>(##\d+)((?:[\s\S](?!<id))*?<a name =")(.+?)("[\S\s]*?)<a href ="#\3">|<a href ="#(.+?)">([\S\s]*?)<id>(##\d+)((?:[\s\S](?!<id))*?<a name =")\1")/g) != null) {
  a = a.replace(/<id>(##\d+)((?:[\s\S](?!<id))*?<a name =")(.+?)("[\S\s]*?)<a href ="#\3">/g, '<id>$1$2$3$4<.. pd $1 #$3>').replace(/<a href ="#(.+?)">([\S\s]*?)<id>(##\d+)((?:[\s\S](?!<id))*?<a name =")\1"/gm, '<.. pd $3 #$1>$2<id>$3$4$1"');
}

a;

有两个正则表达式

1。)如果id在href之前匹配

2。)匹配,如果id在href之后

你必须遍历它,因为正则表达式不会多次匹配id。

请询问是否有不明确的事情。