搜索字符串并捕获特定HTML属性的值

时间:2017-04-07 13:41:28

标签: javascript regex

我需要一个正则表达式来获取所有后续字符串的 src 值,考虑单引号,双引号和无引号。

使用双引号:

var a = '<script class="anyClass" src="anyFile.js" id="anyID">';
var b = '<script class="anyClass" src="anyFile.js">';
var c = '<script src="anyFile.js" id="anyID">';
var d = '<script src="anyFile.js">';

使用单引号:

var e = "<script class='anyClass' src='anyFile.js' id='anyID'>";
var f = "<script class='anyClass' src='anyFile.js'>";
var g = "<script src='anyFile.js' id='anyID'>";
var h = "<script src='anyFile.js'>";

没有引言:

var i= "<script class=anyClass src=anyFile.js id=anyID>";
var j= "<script class=anyClass src=anyFile.js>";
var k= "<script src=anyFile.js id=anyID>";
var l= "<script src=anyFile.js>";

预期匹配/返回

anyFile.js

我有这个看起来很糟糕的解决方案:

var file = "";
var match = 'src="';
if(a.indexOf(match)>=0){
  file = a.split(match);
  file = file[1];
  file = file.split('.js"');
  file = file[0] + ".js";
}
else{
  match = "src='";
  if(a.indexOf(match)>=0){
    file = a.split(match);
    file = file[1];
    file = file.split(".js'");
    file = file[0] + ".js";
  }
  else{
    match = "src=";
    if(a.indexOf(match)>=0){
      file = a.split(match);
      file = file[1];
      file = file.split('.js');
      file = file[0] + ".js";
    }
    else {
    file = "no match";
    }
  }
}

但它可以匹配错误的src属性,所以我需要一个正则表达式。

非常感谢任何帮助。

解决方案(特别感谢Rajesh)

var a = "<script class='anyClass' src='anyFile.js' id='anyID'>";
var b = '<script class="anyClass" src="anyFile.js">';
var c = '<script src="anyFile.js" id="anyID">';
var d = '<script src="anyFile.js">';

var e = "<script class='anyClass' src='anyFile.js' id='anyID'>";
var f = "<script class='anyClass' src='anyFile.js'>";
var g = "<script src='anyFile.js' id='anyID'>";
var h = "<script src='anyFile.js'>";

var i= "<script class=anyClass src=anyFile.js id=anyID>";
var j= "<script class=anyClass src=anyFile.js>";
var k= "<script src=anyFile.js id=anyID>";
var l= "<script src=anyFile.js>";
var l= "<script src=anyFile.js>";

function getSrc(str){
  var regex = /src=["']*[^"' >]+/;
  var match = str.match(regex);
  if(match!==null){
    match = match[0];
    var replaceRegex = /src=["' ]*/;
    match = match.replace(replaceRegex, "")
  }
  else{
    match = "no match";
  }
  console.log(match);
}

[a,b,c,d,e,f,g,h,i,j,k,l].forEach(getSrc)

3 个答案:

答案 0 :(得分:1)

这是我想出来实现这一目标的。欢迎任何改进:

script src=["']?(\w+\.js)

https://regex101.com/r/6NCj94/2

或者您的脚本名称类似于jquery.min.js的情况:

script src=["']?(.*\.js)

https://regex101.com/r/6NCj94/3

答案 1 :(得分:1)

您可以尝试这样的事情:

<强>逻辑:

  • 搜索模式src=,后跟单引号/双引号[可选],后跟任何非单/双引号。
  • 这会产生类似src="anyfile.js
  • 的内容
  • 现在替换首字母,即src=,然后是可选引号。

正则表达式

var a = '<script class="anyClass" src="anyFile.js" id="anyID">';
var b = '<script class="anyClass" src="anyFile.js">';
var c = '<script src="anyFile.js" id="anyID">';
var d = '<script src="anyFile.js">';
var i= "<script class=anyClass src=anyFile.js id=anyID>";
var j= "<script class=anyClass src=anyFile.js>";
var k= "<script src=anyFile.js id=anyID>";
var l= "<script src=anyFile.js>";

function getSrc(str){
  var regex = /src=["']*[^"' >]+/;
  var match = str.match(regex)[0];
  var replaceRegex = /src=["' ]*/;
  match = match.replace(replaceRegex, "")
  console.log(match)
}

[a,b,c,d,i,j,k,l].forEach(getSrc)

答案 2 :(得分:1)

我更喜欢以正确的方式使用DOM:

sub_query = Model.select(:column_name).where(condition).to_sql
Model.
  select("column_name(s)").
  where("column_name operator ALL (#{sub_query})")

完整的javascript代码:

var scripts = document.getElementsByTagName('script');
Array.prototype.forEach.call(scripts, (script => console.log(script.src)));

正则表达方式

var a = '<script src="anyFile1.js"><\/script>';
var b = '<script src=\'anyFile2.js\'><\/script>';
var c = '<script src=anyFile3.js><\/script>';

var html = document.createElement('div');
html.innerHTML = a + b + c;

var scripts = html.getElementsByTagName('script');
[].forEach.call(scripts, (script => console.log(script.src)));

Live demo

<script[^>]+?src=(['"]?)([^\s>]+)\1[^>]*>(<\/script>)?

完整的JS代码:

str.replace(/<script[^>]+?src=(['"]?)([^\s>]+)\1[^>]*>(<\/script>)?/g, (
    (match, $0, $1) => $1 ? array.push($1) : ''
))