如何从JavaScript中的HTML字符串中提取div中的所有内容

时间:2011-10-05 14:07:58

标签: javascript jquery html-parsing

我有一个像这样的HTML字符串: -

var html = '<div id="parent_div">
            <div id="child_div">
            <ul>
             <li><img src="wow/img1.jpg" /><a href="http://wow.com">wow link</a></li>
             <li><img src="wow/img2.jpg" /><a href="http://wow.com">wow link</a></li>
            </ul>
            </div>
            </div>';

如何提取<div id="parent_div">中的所有内容?

4 个答案:

答案 0 :(得分:1)

在jQuery中你可以做$($html).html();

在php中你可以使用像Simple HTML DOM

这样的东西

答案 1 :(得分:0)

查看DOM或其​​他PHP XML库。

http://se.php.net/manual/en/book.dom.php

答案 2 :(得分:0)

String是一个字符串。使用正则表达式来解析它。

在JS方面:
请查看replacematch
像(未经测试)的东西:

pattern = '/<div id="parent_div">(.*?)<\/div>/';
matches = html.match(pattern);
result= matches[1];

这可能不起作用,因为它会在第一个</div>停止,但正则表达式是这里的方法,只需找到一个返回所需内容的表达式。

一般来说,我认为在PHP或JS变量中“硬编码”HTML是一个坏主意 恕我直言,最好使用模板引擎,它可以在Mustache这两面(客户端和服务器)上使用。它提供HTML与任何类型的应用程序逻辑,客户端(JS)或服务器(PHP)的完全分离。

答案 3 :(得分:0)

好的,我前面遇到了同样的问题,而且它与从ajax结果中刷新一个div会有同样的页面html,但现在我只想抓取我想要的id,并设置innerHTML 。所以我写了一个javascript函数来做到这一点,

    //Extracts all the string or html content from a named div
    function ExtractFromDiv(str_html, str_div_id)
    {


    //The string we want to return at the end
    var str_return = "";  

    //We would copy all the string contents inside a div<div >
    var str_div_contents = "";

    //We need to know where to start the search
    var start_index = -1; 

    //We want to store the length of the string
    var int_str_len = 0; 

    //The array for the beginning of a div
    var arr_div_start = new Array('<', 'd', 'i', 'v', ' ');

    //The array that ends a div
    var arr_div_end =  new Array('<', '/', 'd', 'i', 'v', '>');

    //The ending character
    var char_end = '>';

    //We must put this in the notation ''
    str_div_id =    '\'' + str_div_id + '\'';


    //Lets iterate for all the characters but we start from the first div
    start_index = str_html.indexOf("<div");

    //If we did not find a div then its okay, we just have to return
    if (start_index == -1 ) { return str_return; }

    //We must get the length of the string
    int_str_len = str_html.length;


    // A boolean that we are begininng the search of a div
    var boo_div_start = false;

    //A boolean for the ending tag of a div
    var boo_div_end = false;

    //An integer that keeps count of all the opened divs
    var int_opened_div_count = 0;

    // A boolean that shows if we have found the div with the id
    var boo_div_with_id = false;

    //We are going to loop through and try to get the character
    for(var int_count = start_index; int_count<int_str_len;  int_count++ )
    {

    //This points to the current character we are dealing with
    var char_current = ' ';
    char_current = str_html.charAt(int_count);

    //Lets check the beginning of a div, then the div begins from here
    if ( int_count + 4  <  int_str_len )
    {
       if (   str_html.charAt(int_count)     ==  arr_div_start[0] )
       if (   str_html.charAt(int_count + 1) ==  arr_div_start[1] )
       if (   str_html.charAt(int_count + 2) ==  arr_div_start[2] )
       if (   str_html.charAt(int_count + 3) ==  arr_div_start[3] )
       if (   str_html.charAt(int_count + 4) ==  arr_div_start[4] )

       {
           boo_div_start = true;

          //If we had already discovered the div with the id, then we must count every other div opening and increment it by one 
            if( boo_div_with_id )
            {
              int_opened_div_count = int_opened_div_count + 1;
            }
       }

    }



    //Lets seek 6 characters forward to see if the next 6 characters close a div
    boo_div_end = false;
    if ( int_count + 6 < int_str_len )
    {

       if (   str_html.charAt(int_count + 1) ==  arr_div_end[0] )
       if (   str_html.charAt(int_count + 2) ==  arr_div_end[1] )
       if (   str_html.charAt(int_count + 3) ==  arr_div_end[2] )
       if (   str_html.charAt(int_count + 4) ==  arr_div_end[3] )
       if (   str_html.charAt(int_count + 5) ==  arr_div_end[4] )
       if (   str_html.charAt(int_count + 6) ==  arr_div_end[5] )

           {
                boo_div_end = true;
                //If we had already discovered the div with the id, then we must decrement this by 1
                if( boo_div_with_id )
                {
                  int_opened_div_count = int_opened_div_count - 1;
                } 
           }

    }




    //If we had already discovered the start of a div id we needed then we copy all the content from the start to the end
    //We must also take care of nested divs this means that everytime we reach the beginning of a div we count it as an extra div that must be                      closed.
    //And also when we know the next 6 characters are closing a div we must reduce the number of nested divs!
    //If we had already found the div with the id, then we must copy all thats next
    if ( boo_div_with_id )
    {
        str_return = str_return + char_current;
        //Once we have the opened divs count at zero or less, then we are sure that we copied all the nested divs in between so we are ready to
        //return            
        if ( int_opened_div_count <= 0 ) {  return  str_return;  }

    }



    //The start of div then we must copy the div contents until we reach the end of the div tag >
    if ( boo_div_start )
    {
        str_div_contents = str_div_contents + char_current;
    }
    else
    {
      str_div_contents = "";
    }




    //Now we must detect the end of the div if we had detected the start before
    if ( boo_div_start )
    {
            //The end of the div is denoted by a >
            if( char_current == '>' )
            {
                //We have just copied whats in a div then we must check the id we wanted
                if ( boo_div_with_id == false )
                {
                    boo_div_with_id = ( str_div_contents.indexOf(str_div_id) != -1);

                    // We set this to 1, if we get a new div we increment it by 1 if we get to the end of a div we decrease it by 1
                    int_opened_div_count = 1; 
                }

                str_div_contents = "";
                boo_div_start = false;
            }
    }


    }




    return str_return;
    }


    //U can try this function as follows in your code
    <script language="javascript">

        var sample_html = "";
        var sample_div = "sammy the dog1";
        var sample_read = "";



        sample_html= "<html><body><div id=\'sammy the dog\'><div   id=\'sammy the dog1\'><div id=\'sammy the dog2\'>crap crap</div></div> </div>                               <body></html>";

        sample_read = ExtractFromDiv(sample_html, sample_div);

        alert( sample_read );

    </script>

///它适用于我的目的。好编码!