
时间:2018-05-31 17:03:35

标签: javascript jquery html performance parsing


$(document).ready(function() {
  function thereIsImportantContent(id) {
    return $(id).find("strong").filter(function() {
      var index = $(id).text().indexOf($(this).text());
      return 0 <= index && index <= 20;
    }).length > 0;

  // specs code
  describe("thereIsImportantContent", function() {

    it("accept strong near head", function() {

    it("accept strong near head with children", function() {

    it("accept wrapped strong near head", function() {

    it("reject strong further down", function() {

    it("reject strong further down with copies near head", function() {

  // load jasmine htmlReporter
  (function() {
    var env = jasmine.getEnv();
    env.addReporter(new jasmine.HtmlReporter());
container {
  display: none;
<link rel="stylesheet" href="https://cdn.jsdelivr.net/jasmine/1.3.1/jasmine.css">
<script src="https://cdn.jsdelivr.net/jasmine/1.3.1/jasmine.js"></script>
<script src="https://cdn.jsdelivr.net/jasmine/1.3.1/jasmine-html.js"></script>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<container id="test_case_1">
  <!-- strong content at the beginning -> accept -->
  <p>Some <strong>content</strong></p>
  <p>other text</p>
  <p>Hey look: <strong>content</strong>!</p>

<container id="test_case_2">
  <!-- handle strong with children correctly -->
  <strong>Hey look: <span> content!</span></strong>

<container id="test_case_3">
  <p>Hey <strong>content!</strong></p>

<container id="test_case_4">
  <p>other text</p>
  <!-- strong content but located further down -> reject -->
  <p>Hey look: <strong>content</strong>!</p>

<container id="test_case_5">
  <!-- same text as in strong below triggering false accept -->
  <p>Some content</p>
  <p>other text</p>
  <!-- strong content but located further down -> should eject -->
  <p>Hey look: <strong>content</strong>!</p>



  <p>Some <strong>content</strong></p>
  <p>other text</p>
  <p>Hey look: <strong>content</strong>!</p>




  ......   其他文字
  ......   嘿看:内容


container.find("strong").slice(0,10).filter(function () {
    var index = container.text().indexOf($(this).text());
    console.log("Testing: " + $(this).text(), " index: " + index);
    return 0 <= index && index <= 50



   <p>Some content</p>  <---position where the text "content" was found and
                             ^                             wrongly accepted       
   ...                       |Potentially important  
   <p>really long text</p>   |Element with text "content"
   ...                       |should be ignored as its far away
                             |from the start of the text
   <p>Hey look: <strong>content</strong>!</p>



3 个答案:

答案 0 :(得分:0)


  1. 使用深度搜索遍历DOM,并通过累积子项的element.textConent.length来计算已处理的文本长度。
  2. 继续,直到没有剩余元素或文本已达到给定的长度限制。
  3. 这样做会缓存所有与给定标签名称/其他标准匹配的扫描元素
  4. $(document).ready(function() {
      function findChildrenNearElementHead(element, selector, maxDistance = 15) {
        /* length of already processed text 
       (thats the number of characters the user has already read when he arives at the child element in question) */
        var curLen = 0;
        /* current depth in the DOM relative to the container element
        if negative we finised processing all elements in the container and shoudl stop */
        var depth = 0;
        // look for children that match this selector
        selector = selector.toUpperCase().split(",");
        // the result
        var candidates = [];
        // traverse complete DOM in container in pre-order
        while (curLen < maxDistance && depth >= 0) {
          // if element matches selector
          if (selector.indexOf(element.tagName) > -1) {
            // add element to result array
              "element": element,
              "index": curLen
            // increase current text length by length of element
            curLen += element.textContent.length;
          } else if (element.firstElementChild) {
            /* if this element is not matched by the selector 
            and has children, dive in and look there for more elements */
            // begin with the first child element
            element = element.firstElementChild;
            // increase depth 
            depth += 1;
            // skip rest of current loop iteration
          // increase current text length by length of element
          curLen += element.textContent.length;
          // element has no children, has it siblings?
          if (element.nextElementSibling) {
            // yes it has -> continiue there
            element = element.nextElementSibling;
            /* element has no siblings
            go one layer up to parent and look there for siblings */
          } else if (element.parentElement.nextElementSibling) {
            // select next sibling of parent as active element
            element = element.parentElement.nextElementSibling;
            // descrease depth as we just moved one layer up
            depth -= 1;
          } else {
            // no children, no siblings, nothing to do
        return candidates;
      function thereIsImportantContent(element) {
        return findChildrenNearElementHead(element, "h1,h2,strong").length > 0;
      // specs code
      describe("findChildrenNearElementHead", function() {
        it("accept strong near head", function() {
        it("accept strong near head with children", function() {
        it("accept wrapped strong near head", function() {
        it("reject strong further down", function() {
        it("reject strong further down with copies near head", function() {
      // load jasmine htmlReporter
      (function() {
        var env = jasmine.getEnv();
        env.addReporter(new jasmine.HtmlReporter());
    container {
      display: none;
    <link href="https://cdn.jsdelivr.net/jasmine/1.3.1/jasmine.css" rel="stylesheet" />
    <script src="https://cdn.jsdelivr.net/jasmine/1.3.1/jasmine.js"></script>
    <script src="https://cdn.jsdelivr.net/jasmine/1.3.1/jasmine-html.js"></script>
    <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
    <container id="test_case_1">
      <!-- strong content at the beginning -> accept -->
      <p>Some <strong>content</strong></p>
      <p>other text</p>
      <p>Hey look: <strong>content</strong>!</p>
    <container id="test_case_2">
      <!-- handle strong with children correctly -->
      <strong>Hey look: <span> content!</span></strong>
    <container id="test_case_3">
      <p>Hey <strong>content!</strong></p>
    <container id="test_case_4">
      <p>other text</p>
      <!-- strong content but located further down -> reject -->
      <p>Hey look: <strong>content</strong>!</p>
    <container id="test_case_5">
      <!-- same text as in strong below triggering false accept -->
      <p>Some content</p>
      <p>other text</p>
      <!-- strong content but located further down -> should eject -->
      <p>Hey look: <strong>content</strong>!</p>

    Plunker copy: https://embed.plnkr.co/YjLVxVHAWj0kDhoceRGK/



答案 1 :(得分:0)

如果我理解正确,并且基于your current solution,这里有一个等效的方式在jQuery中执行。

我所做的更改位于// The jQuery way start.// The jQuery way end.之间。


与OP的编辑合并(添加了新的HTML /测试用例),我用新的代码替换了JS代码 - _findEls(),这应该可以很好地完成工作。

$(document).ready(function() {
  function findChildrenNearElementHead(element, selector, maxDistance = 15) {
    let curLen = 0,
      candidates = [];
    // The jQuery way start.
    function _findEls(el) {
      $(el).children().each(function() {
        if (curLen < maxDistance) {
          if ($(this).is(selector)) {
          } else if (this.firstElementChild /* it has children */ ) {
            /* only count text of element OR its children */
            return _findEls(this);

          curLen += $(this).text().length;

    // The jQuery way end.

    return candidates;

  function thereIsImportantContent(element) {
    return findChildrenNearElementHead(element, "h1,h2,strong").length > 0;

  // specs code
  describe("findChildrenNearElementHead", function() {

    it("accept strong near head", function() {

    it("accept strong near head with children", function() {

    it("accept wrapped strong near head", function() {

    it("reject strong further down", function() {

    it("reject strong further down with copies near head", function() {

  // load jasmine htmlReporter
  (function() {
    var env = jasmine.getEnv();
    env.addReporter(new jasmine.HtmlReporter());
container {
  display: none;
<link href="https://cdn.jsdelivr.net/jasmine/1.3.1/jasmine.css" rel="stylesheet" />
<script src="https://cdn.jsdelivr.net/jasmine/1.3.1/jasmine.js"></script>
<script src="https://cdn.jsdelivr.net/jasmine/1.3.1/jasmine-html.js"></script>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>

<container id="test_case_1">
  <!-- strong content at the beginning -> accept -->
  <p>Some <strong>content</strong></p>
  <p>other text</p>
  <p>Hey look: <strong>content</strong>!</p>

<container id="test_case_2">
  <!-- handle strong with children correctly -->
  <strong>Hey look: <span> content!</span></strong>

<container id="test_case_3">
  <p>Hey <strong> content!</strong></p>

<container id="test_case_4">
  <p>other text</p>
  <!-- strong content but located further down -> reject -->
  <p>Hey look: <strong>content</strong>!</p>

<container id="test_case_5">
  <!-- same text as in strong below triggering false accept -->
  <p>Some content</p>
  <p>other text</p>
  <!-- strong content but located further down -> should eject -->
  <p>Hey look: <strong>content</strong>!</p>

答案 2 :(得分:-1)


$(document).ready(function() {

  var checkAllContainers = true; //change to false to verify containers individually
  if (checkAllContainers) {
    // verifying by parent container
    // container3 will be at position 200+ 
  } else {
    // verifying containers individually
    // this way your filter will check from begin, container3 will start at position 0

function verifyStrongText(idElement) {
  $(idElement).find("strong").filter(function() {
    var start = 0; // starting from begin
    var end = 88; // change this number if you want expand or restrict your search range
    var strong_text = $(this).text();
    //checks the index of texts between <strong> tag
    var index = $(idElement).text().substring(start, end).indexOf(strong_text);

    //check if index of text is inside estipuled range (not found = -1)
    var response = start <= index && index <= end;

    if (response) {
      //make your actions here when words compare and check response
      functionXYZ("> " + $(this).text() + "(" + index + ") >> " +
        response + " == true (expected)   >> " +
        (response == true ? "OK" : "WRONG"));
    } else {
      functionXYZ("> " + $(this).text() + "(" + index + ") >> " +
        response + " == false(expected) >> " +
        (response == false ? "OK" : "WRONG"));

    return response; // will return an Object, not a boolean, doesn't reuse it

  }).css("background", "limegreen"); // optional visual effect to see eligible words

//You can call ANY javascript function from your code
function functionXYZ(msg) {
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<container id="main_container">
  <container id="my_container_1">
    <p>Some <strong>content 1</strong></p>
    <p><strong>First Headline</strong></p>
    <p>Hey look: <strong>other content 2</strong>!</p>

  <container id="my_container_2">
    <p>Something</p> content 3 ...
    <p><strong>Second Headline</strong></p>
    <p>Hey look: <strong>content 4</strong>!</p>

  <container id="my_container_3">
    <p>Some content 5</p>
    <p><strong>Third Headline</strong></p>
    <p>Hey look: <strong>content 6</strong>!</p>





  //Here you're slicing between 4 and 10 <p> elements
  $("#my_container").find("p").slice(4, 10).find("strong").filter(
    function() {
      var index = $("#my_container").text().indexOf($(this).text());
      console.log("Testing: " + $(this).text(), " (Index: " + index + ")");
      return 0 <= index && index <= 10; //return is irrelevant and does nothing
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<container id="my_container">
  <p>Some content</p>
  <p>Hey look: <strong>content</strong>!</p>
  <p>more content</p>
  <p><strong>Second Headline</strong></p>
  <span>even more content</span>
  <p>Hey look: <strong>other content</strong>!</p>
  <span>even more content</span>


    function() {
      var start = 26;
      var limit = 69;
      var index = $("#my_container").text().indexOf($(this).text());
      console.log("Testing: " + $(this).text(), " (Index: " + index + ")");
      console.log(start <= index && index <= limit ? "OK!" : "OUT OF RANGE!");
      return start <= index && index <= limit;
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<container id="my_container">
  Some content
  Hey look: <strong>content</strong>!
  <p>more content</p>
  <strong>Second Headline</strong>
  <span>even more content</span>
  <p>Hey look: <strong>other content</strong>!</p>
  <span>even more content</span>


    function() {
      var start = 2;
      // to find at the beginning of text use: var start = 0;
      var limit = 26;
      // to find until the end of text use: var limit = $("#my_container").text().length;

      var index = $("#my_container").text().substring(start, limit).indexOf($(this).text());
      console.log("Testing: " + $(this).text(), " (Index: " + index + ")");
      console.log(start <= index && index <= limit ? "OK!" : "OUT OF RANGE!");
      if(start <= index && index <= limit) {
         alert("What i need to do?");
      return start <= index && index <= limit;

    }).css('background', 'limegreen')
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<container id="my_container">
  Some content Hey look: <strong>content</strong>!
  <p>more content</p>
  <strong>Second Headline</strong> ...
  <span>even more content</span>
  <p>Hey look: <strong>other content</strong>!</p>
  <span>even more content</span>