如何按距离到给定祖先元素的开头来过滤元素?

时间:2018-05-31 17:03:35

标签: javascript jquery html performance parsing

最小示例:预期行为由Jasmin测试指定:



$(document).ready(function() {
  function thereIsImportantContent(id) {
    return $(id).find("strong").filter(function() {
      var index = $(id).text().indexOf($(this).text());
      return 0 <= index && index <= 20;
    }).length > 0;
  }

  // specs code
  describe("thereIsImportantContent", function() {

    it("accept strong near head", function() {
      expect(thereIsImportantContent($("#test_case_1")[0])).toBeTruthy();
    });

    it("accept strong near head with children", function() {
      expect(thereIsImportantContent($("#test_case_2")[0])).toBeTruthy();
    });

    it("accept wrapped strong near head", function() {
      expect(thereIsImportantContent($("#test_case_3")[0])).toBeTruthy();
    });

    it("reject strong further down", function() {
      expect(thereIsImportantContent($("#test_case_4")[0])).toBeFalsy();
    });

    it("reject strong further down with copies near head", function() {
      expect(thereIsImportantContent($("#test_case_5")[0])).toBeFalsy();
    });
  });

  // load jasmine htmlReporter
  (function() {
    var env = jasmine.getEnv();
    env.addReporter(new jasmine.HtmlReporter());
    env.execute();
  }());
});
&#13;
container {
  display: none;
}
&#13;
<link rel="stylesheet" href="https://cdn.jsdelivr.net/jasmine/1.3.1/jasmine.css">
<script src="https://cdn.jsdelivr.net/jasmine/1.3.1/jasmine.js"></script>
<script src="https://cdn.jsdelivr.net/jasmine/1.3.1/jasmine-html.js"></script>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<container id="test_case_1">
  <!-- strong content at the beginning -> accept -->
  <p>Some <strong>content</strong></p>
  ...
  <p>other text</p>
  ...
  <p>Hey look: <strong>content</strong>!</p>
</container>

<container id="test_case_2">
  <!-- handle strong with children correctly -->
  <strong>Hey look: <span> content!</span></strong>
</container>

<container id="test_case_3">
  <p>Test</p>
  <p>Hey <strong>content!</strong></p>
</container>

<container id="test_case_4">
  <p>Something</p>
  ...
  <p>other text</p>
  ...
  <!-- strong content but located further down -> reject -->
  <p>Hey look: <strong>content</strong>!</p>
</container>

<container id="test_case_5">
  <!-- same text as in strong below triggering false accept -->
  <p>Some content</p>
  ...
  <p>other text</p>
  ...
  <!-- strong content but located further down -> should eject -->
  <p>Hey look: <strong>content</strong>!</p>
</container>
&#13;
&#13;
&#13;

我的用例:作为尝试查找网站潜在标题的用户脚本的一部分:

假设我的网站看起来像:

<container>
  <p>Some <strong>content</strong></p>
  ...
  <p>other text</p>
  ...
  <p>Hey look: <strong>content</strong>!</p>
</container>

我正在寻找一种方法,找到可见文本开头附近的重要外观元素,例如(例如h1,h2,strong,...)。

上面的代码向用户显示如下内容:

  

一些内容
  ......   其他文字
  ......   嘿看:内容

我目前的方法是评估container.text().indexOf(elementOfIntrest.text())并仅使用低索引的这些......

container.find("strong").slice(0,10).filter(function () {
    var index = container.text().indexOf($(this).text());
    console.log("Testing: " + $(this).text(), " index: " + index);
    return 0 <= index && index <= 50
});

但我意识到这只有在重要内容不会在早期的正常文本中出现时才有效。

例如:

<container>
   <p>Some content</p>  <---position where the text "content" was found and
                             ^                             wrongly accepted       
   ...                       |Potentially important  
   <p>really long text</p>   |Element with text "content"
   ...                       |should be ignored as its far away
                             |from the start of the text
   <p>Hey look: <strong>content</strong>!</p>
</container>

indexOf找到&#34;内容&#34;来自第二行中的强大元素并接受它。

问:如何有效地过滤HTML元素到达给定祖先元素开头的距离,以字符计算?

3 个答案:

答案 0 :(得分:0)

我意识到我从错误的方向解决了这个问题,现在提出了以下解决方案:

  1. 使用深度搜索遍历DOM,并通过累积子项的element.textConent.length来计算已处理的文本长度。
  2. 继续,直到没有剩余元素或文本已达到给定的长度限制。
  3. 这样做会缓存所有与给定标签名称/其他标准匹配的扫描元素
  4. $(document).ready(function() {
      function findChildrenNearElementHead(element, selector, maxDistance = 15) {
        /* length of already processed text 
       (thats the number of characters the user has already read when he arives at the child element in question) */
        var curLen = 0;
    
        /* current depth in the DOM relative to the container element
        if negative we finised processing all elements in the container and shoudl stop */
        var depth = 0;
    
        // look for children that match this selector
        selector = selector.toUpperCase().split(",");
    
        // the result
        var candidates = [];
    
        // traverse complete DOM in container in pre-order
        while (curLen < maxDistance && depth >= 0) {
          // if element matches selector
          if (selector.indexOf(element.tagName) > -1) {
            // add element to result array
            candidates.push({
              "element": element,
              "index": curLen
            });
    
            // increase current text length by length of element
            curLen += element.textContent.length;
          } else if (element.firstElementChild) {
            /* if this element is not matched by the selector 
            and has children, dive in and look there for more elements */
    
            // begin with the first child element
            element = element.firstElementChild;
    
            // increase depth 
            depth += 1;
    
            // skip rest of current loop iteration
            continue;
          }
    
          // increase current text length by length of element
          curLen += element.textContent.length;
    
          // element has no children, has it siblings?
          if (element.nextElementSibling) {
            // yes it has -> continiue there
            element = element.nextElementSibling;
    
            /* element has no siblings
            go one layer up to parent and look there for siblings */
          } else if (element.parentElement.nextElementSibling) {
            // select next sibling of parent as active element
            element = element.parentElement.nextElementSibling;
    
            // descrease depth as we just moved one layer up
            depth -= 1;
          } else {
    
            // no children, no siblings, nothing to do
            break;
          }
        }
    
        return candidates;
      }
    
      function thereIsImportantContent(element) {
        return findChildrenNearElementHead(element, "h1,h2,strong").length > 0;
      }
    
      // specs code
      describe("findChildrenNearElementHead", function() {
    
        it("accept strong near head", function() {
          expect(thereIsImportantContent($("#test_case_1")[0])).toBeTruthy();
        });
    
        it("accept strong near head with children", function() {
          expect(thereIsImportantContent($("#test_case_2")[0])).toBeTruthy();
        });
    
        it("accept wrapped strong near head", function() {
          expect(thereIsImportantContent($("#test_case_3")[0])).toBeTruthy();
        });
    
        it("reject strong further down", function() {
          expect(thereIsImportantContent($("#test_case_4")[0])).toBeFalsy();
        });
    
        it("reject strong further down with copies near head", function() {
          expect(thereIsImportantContent($("#test_case_5")[0])).toBeFalsy();
        });
      });
    
      // load jasmine htmlReporter
      (function() {
        var env = jasmine.getEnv();
        env.addReporter(new jasmine.HtmlReporter());
        env.execute();
      }());
    });
    container {
      display: none;
    }
    <link href="https://cdn.jsdelivr.net/jasmine/1.3.1/jasmine.css" rel="stylesheet" />
    <script src="https://cdn.jsdelivr.net/jasmine/1.3.1/jasmine.js"></script>
    <script src="https://cdn.jsdelivr.net/jasmine/1.3.1/jasmine-html.js"></script>
    <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
    
    <container id="test_case_1">
      <!-- strong content at the beginning -> accept -->
      <p>Some <strong>content</strong></p>
      ...
      <p>other text</p>
      ...
      <p>Hey look: <strong>content</strong>!</p>
    </container>
    
    <container id="test_case_2">
      <!-- handle strong with children correctly -->
      <strong>Hey look: <span> content!</span></strong>
    </container>
    
    <container id="test_case_3">
      <p>Test</p>
      <p>Hey <strong>content!</strong></p>
    </container>
    
    <container id="test_case_4">
      <p>Something</p>
      ...
      <p>other text</p>
      ...
      <!-- strong content but located further down -> reject -->
      <p>Hey look: <strong>content</strong>!</p>
    </container>
    
    <container id="test_case_5">
      <!-- same text as in strong below triggering false accept -->
      <p>Some content</p>
      ...
      <p>other text</p>
      ...
      <!-- strong content but located further down -> should eject -->
      <p>Hey look: <strong>content</strong>!</p>
    </container>

    Plunker copy: https://embed.plnkr.co/YjLVxVHAWj0kDhoceRGK/

    虽然这正是我想要的,但考虑到我们有像jQuery这样的库,我对所涉及的代码量不满意。

    我希望有人能想出一个更紧凑的方法来实现这一目标。

答案 1 :(得分:0)

如果我理解正确,并且基于your current solution,这里有一个等效的方式在jQuery中执行。

我所做的更改位于// The jQuery way start.// The jQuery way end.之间。

<强>更新

与OP的编辑合并(添加了新的HTML /测试用例),我用新的代码替换了JS代码 - _findEls(),这应该可以很好地完成工作。

$(document).ready(function() {
  function findChildrenNearElementHead(element, selector, maxDistance = 15) {
    let curLen = 0,
      candidates = [];
    // The jQuery way start.
    function _findEls(el) {
      $(el).children().each(function() {
        if (curLen < maxDistance) {
          if ($(this).is(selector)) {
            candidates.push(this);
          } else if (this.firstElementChild /* it has children */ ) {
            /* only count text of element OR its children */
            return _findEls(this);
          }

          curLen += $(this).text().length;
        }
      });
    }

    _findEls(element);
    // The jQuery way end.

    return candidates;
  }

  function thereIsImportantContent(element) {
    return findChildrenNearElementHead(element, "h1,h2,strong").length > 0;
  }

  // specs code
  describe("findChildrenNearElementHead", function() {

    it("accept strong near head", function() {
      expect(thereIsImportantContent($("#test_case_1")[0])).toBeTruthy();
    });

    it("accept strong near head with children", function() {
      expect(thereIsImportantContent($("#test_case_2")[0])).toBeTruthy();
    });

    it("accept wrapped strong near head", function() {
      expect(thereIsImportantContent($("#test_case_3")[0])).toBeTruthy();
    });

    it("reject strong further down", function() {
      expect(thereIsImportantContent($("#test_case_4")[0])).toBeFalsy();
    });

    it("reject strong further down with copies near head", function() {
      expect(thereIsImportantContent($("#test_case_5")[0])).toBeFalsy();
    });
  });

  // load jasmine htmlReporter
  (function() {
    var env = jasmine.getEnv();
    env.addReporter(new jasmine.HtmlReporter());
    env.execute();
  }());
});
container {
  display: none;
}
<link href="https://cdn.jsdelivr.net/jasmine/1.3.1/jasmine.css" rel="stylesheet" />
<script src="https://cdn.jsdelivr.net/jasmine/1.3.1/jasmine.js"></script>
<script src="https://cdn.jsdelivr.net/jasmine/1.3.1/jasmine-html.js"></script>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>

<container id="test_case_1">
  <!-- strong content at the beginning -> accept -->
  <p>Some <strong>content</strong></p>
  ...
  <p>other text</p>
  ...
  <p>Hey look: <strong>content</strong>!</p>
</container>

<container id="test_case_2">
  <!-- handle strong with children correctly -->
  <strong>Hey look: <span> content!</span></strong>
</container>

<container id="test_case_3">
  <p>Test</p>
  <p>Hey <strong> content!</strong></p>
</container>

<container id="test_case_4">
  <p>Something</p>
  ...
  <p>other text</p>
  ...
  <!-- strong content but located further down -> reject -->
  <p>Hey look: <strong>content</strong>!</p>
</container>

<container id="test_case_5">
  <!-- same text as in strong below triggering false accept -->
  <p>Some content</p>
  ...
  <p>other text</p>
  ...
  <!-- strong content but located further down -> should eject -->
  <p>Hey look: <strong>content</strong>!</p>
</container>

答案 2 :(得分:-1)

根据您的代码段和最后评论查看另一个示例:

$(document).ready(function() {

  var checkAllContainers = true; //change to false to verify containers individually
  
  if (checkAllContainers) {
    // verifying by parent container
    // container3 will be at position 200+ 
    verifyStrongText("#main_container");
  } else {
    // verifying containers individually
    // this way your filter will check from begin, container3 will start at position 0
    verifyStrongText("#my_container_1");
    verifyStrongText("#my_container_3");
    verifyStrongText("#my_container_2");
  }
});

function verifyStrongText(idElement) {
  $(idElement).find("strong").filter(function() {
    var start = 0; // starting from begin
    var end = 88; // change this number if you want expand or restrict your search range
    var strong_text = $(this).text();
    //checks the index of texts between <strong> tag
    var index = $(idElement).text().substring(start, end).indexOf(strong_text);

    //check if index of text is inside estipuled range (not found = -1)
    var response = start <= index && index <= end;

    if (response) {
      //make your actions here when words compare and check response
      functionXYZ("> " + $(this).text() + "(" + index + ") >> " +
        response + " == true (expected)   >> " +
        (response == true ? "OK" : "WRONG"));
    } else {
      functionXYZ("> " + $(this).text() + "(" + index + ") >> " +
        response + " == false(expected) >> " +
        (response == false ? "OK" : "WRONG"));
    }

    return response; // will return an Object, not a boolean, doesn't reuse it

  }).css("background", "limegreen"); // optional visual effect to see eligible words
}

//You can call ANY javascript function from your code
function functionXYZ(msg) {
  console.log(msg);
}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<container id="main_container">
  <container id="my_container_1">
    <p>Some <strong>content 1</strong></p>
    ...
    <p><strong>First Headline</strong></p>
    ...
    <p>Hey look: <strong>other content 2</strong>!</p>
  </container>

  <container id="my_container_2">
    <p>Something</p> content 3 ...
    <p><strong>Second Headline</strong></p>
    ...
    <p>Hey look: <strong>content 4</strong>!</p>
  </container>

  <container id="my_container_3">
    <p>Some content 5</p>
    ...
    <p><strong>Third Headline</strong></p>
    ...
    <p>Hey look: <strong>content 6</strong>!</p>
  </container>
</container>

您的代码已更正并使用了两个示例:

如果您想要使用切片,则需要将所有文本放在<p>之类的选择器中以对其进行切片。

  

切片功能不是文本定位器。

$(document).ready(
  //Here you're slicing between 4 and 10 <p> elements
  $("#my_container").find("p").slice(4, 10).find("strong").filter(
    function() {
      var index = $("#my_container").text().indexOf($(this).text());
      console.log("Testing: " + $(this).text(), " (Index: " + index + ")");
      return 0 <= index && index <= 10; //return is irrelevant and does nothing
    })
);
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<container id="my_container">
  <p>Some content</p>
  <p>Hey look: <strong>content</strong>!</p>
  <p>more content</p>
  ...
  <p><strong>Second Headline</strong></p>
  ...
  <span>even more content</span>
  <p>Hey look: <strong>other content</strong>!</p>
  <span>even more content</span>
</container>

但你可以用另一种方式做到这一点,检查过滤器功能内的条件:

$(document).ready(
  $("#my_container").find("strong").filter(
    function() {
      var start = 26;
      var limit = 69;
      var index = $("#my_container").text().indexOf($(this).text());
      console.log("Testing: " + $(this).text(), " (Index: " + index + ")");
      console.log(start <= index && index <= limit ? "OK!" : "OUT OF RANGE!");
      return start <= index && index <= limit;
    }).css('background','limegreen')
);
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<container id="my_container">
  Some content
  Hey look: <strong>content</strong>!
  <p>more content</p>
  ...
  <strong>Second Headline</strong>
  ...
  <span>even more content</span>
  <p>Hey look: <strong>other content</strong>!</p>
  <span>even more content</span>
</container>

其他例子,根据您的评论:

$(document).ready(
  $("#my_container").find("strong").filter(
    function() {
      var start = 2;
      // to find at the beginning of text use: var start = 0;
      var limit = 26;
      // to find until the end of text use: var limit = $("#my_container").text().length;

      var index = $("#my_container").text().substring(start, limit).indexOf($(this).text());
      console.log("Testing: " + $(this).text(), " (Index: " + index + ")");
      console.log(start <= index && index <= limit ? "OK!" : "OUT OF RANGE!");
      
      if(start <= index && index <= limit) {
         alert("What i need to do?");
      }
      
      return start <= index && index <= limit;

    }).css('background', 'limegreen')
);
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<container id="my_container">
  Some content Hey look: <strong>content</strong>!
  <p>more content</p>
  ...
  <strong>Second Headline</strong> ...
  <span>even more content</span>
  <p>Hey look: <strong>other content</strong>!</p>
  <span>even more content</span>
</container>