获取HTML中元素的字符位置

时间:2017-06-12 23:33:00

标签: jquery html ruby parsing url

我有一个HTML字符串,并希望稍后用修改后的值替换锚点上每个href属性中的值。为此,我想将索引抓取到href属性开始的HTML字符串(以及它结束的字符),或者可能是锚点开始的HTML字符串中的字符(和它结束的角色)。例如,如果我有字符串:

<html><head></head><body><a href='http://example.com'/></body></html>

我想编写一个返回[34, 51]的方法,href中第一个字符的索引和最后一个字符的索引。据我所知,JQuery没有给我一个选择器响应的原始HTML字符串的索引。任何其他图书馆也没有给我一个确定这些信息的方法。

如果现有的Javascript库无法实现这一点(没有构建新的解析器),是否有另一种语言的库提供此功能,(特别是Ruby)?

5 个答案:

答案 0 :(得分:0)

这个例子对你有帮助吗?我根据我回答的另一个堆栈溢出问题把这个答案放在一起。

<!DOCTYPE html>
<html lang="en">
<head>
    <title>Bootstrap Example</title>
    <meta charset="utf-8">
    <meta name="viewport" content="width=device-width, initial-scale=1">
    @*MAKE SURE YOU HAVE A reference to jquery here-I have it in my bundle*@
    <script type="text/javascript">
        $(function () {
            $("#aBtn").click(function () {
                var elems = $("div[data-src]");
                var lastOne = elems[elems.length - 1]
                //replacing the last one with C, so C is shown twice-last two
                var grandchild = lastOne.children[0].children[0]
                grandchild.srcset = "https://dummyimage.com/1024x768/000/ffffff.jpg&text=large+C"
            })
        })
    </script>
</head>
<body>
    <input type="button" id="aBtn" value="BtnTriggerInsteadOfOnLoad" />
    <div class="row">
        <div class="columns small-12">
            <div class="responsive-picture" data-src="https://dummyimage.com/1024x768/000/ffffff.jpg&text=large+A">
                <picture>
                    <!--[if IE 9]><video style="display: none;"><![endif]-->
                    <source media="(min-width: 64em)" srcset="https://dummyimage.com/1024x768/000/ffffff.jpg&text=large+A">
                    <source media="(min-width: 40em)" srcset="https://dummyimage.com/640x480/000/ffffff.jpg&text=meduim+A">
                    <source media="screen" srcset="https://dummyimage.com/320x240/000/ffffff.jpg&text=small+A">
                    <!--[if IE 9]></video><![endif]-->
                    <img alt="Placeholder Picture" src="transparent.gif">
                </picture>
            </div>
            <div class="responsive-picture" data-src="https://dummyimage.com/1024x768/000/ffffff.jpg&text=large+B">
                <picture>
                    <!--[if IE 9]><video style="display: none;"><![endif]-->
                    <source media="(min-width: 64em)" srcset="https://dummyimage.com/1024x768/000/ffffff.jpg&text=large+B">
                    <source media="(min-width: 40em)" srcset="https://dummyimage.com/640x480/000/ffffff.jpg&text=medium+B">
                    <source media="screen" srcset="https://dummyimage.com/320x240/000/ffffff.jpg&text=small+B">
                    <!--[if IE 9]></video><![endif]-->
                    <img alt="Placeholder Picture" src="transparent.gif">
                </picture>
            </div>
            <div class="responsive-picture" data-src="https://dummyimage.com/1024x768/000/ffffff.jpg&text=large+C">
                <picture>
                    <img alt="Placeholder Picture" src="transparent.gif" srcset="https://dummyimage.com/1024x768/000/ffffff.jpg&text=large+C">
                </picture>
            </div>
            @*I PUT THE SRCE BACK ON THE DIV like it is suppose to be*@
            <div class="responsive-picture" data-src="https://dummyimage.com/1024x768/000/ff0000.jpg&text=large+D">
                <picture>
                    <img alt="Placeholder Picture" src="transparent.gif" srcset="https://dummyimage.com/1024x768/000/ff0000.jpg&text=large+D"> 
                </picture>
            </div>
        </div>
    </div>
</body>
</html>

答案 1 :(得分:0)

我不确定为什么你需要搜索一串HTML代码,而不是直接使用DOM,但是下面的函数将完成你的问题需要。如果你有一个包含多个锚标签的字符串,你会想要编写一个与下面类似的递归函数。

var htmlString = "<html><head></head><body><a href='http://example.com'/></body></html>";

var getUrl = function (string) {
  var hrefStart = string.indexOf('href');
  var httpStart = string.indexOf("'", hrefStart) + 1;
  var httpEnd = string.indexOf("'", httpStart) - 1;
  
  return [httpStart, httpEnd]
};

console.log(getUrl(htmlString));

答案 2 :(得分:0)

您也可以使用普通的javascript进行一些解析:

function getHrefsPositions(inputHtml){

    var currentIndex = inputHtml.indexOf("href");
    var results = [];

    while (currentIndex != -1){
        var closingQuote = inputHtml.indexOf("'", currentIndex + 6);

        results.push([currentIndex+5,closingQuote]);
        currentIndex = inputHtml.indexOf("'", closingQuote + 1);
    }

    return results;
}

<html>
<head>
<script>

function getHrefs(inputHtml){
	
	var currentIndex = inputHtml.indexOf("href");
	var results = [];
	
	while (currentIndex != -1){
		var closingQuote = inputHtml.indexOf("'", currentIndex + 6);
		
		results.push([currentIndex+5,closingQuote]);
		currentIndex = inputHtml.indexOf("'", closingQuote + 1);
	}

    alert(results);
	return results;
}

getHrefs("<html><head></head><body><a href='http://example.com'/><a href='http://example.com'/></body></html>");
</script>

</head>

<body>

</body>
</html>

答案 3 :(得分:0)

为什么不直接使用jQuery替换href值,然后将生成的DOM转换为字符串?

$('a').attr('href', 'http://www.example.com');
var htmlString = $('html')[0].outerHTML;
console.log(htmlString);

答案 4 :(得分:0)

以下正则表达式可能会有所帮助:

var pattern = /href=(["'])(?:(?=(\\?))\2.)*?\1/igm
var html = "<html><head></head><body><a href='http://example.com'/></body></html>";

while (match = pattern.exec(html)) {
  console.log(match.index + ' ' + pattern.lastIndex);
}

console.log(html[28 + 6]);
console.log(html[53-1]);

部分取自Return positions of a regex match() in Javascript?,现在使用Return positions of a regex match() in Javascript?的模式进行更新 既然你正在使用Ruby,你也可以使用ruby运行相同/相似的正则表达式,用你想要的文本替换匹配的模式(我想象)。