Question

我想结合使用Curl和Simple HTML DOM。

两者都分别正常工作。

我想卷曲一个网站然后我想使用DOM查看内部数据用分页页码。

我正在使用此代码。

<?php
include 'simple_html_dom.php';

function dlPage($href) {
    $curl = curl_init();

    curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE);
    curl_setopt($curl, CURLOPT_HEADER, false);
    curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
    curl_setopt($curl, CURLOPT_URL, $href);
    curl_setopt($curl, CURLOPT_REFERER, $href);
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.125 Safari/533.4");

    $str = curl_exec($curl);

    curl_close($curl);

    // Create a DOM object
    $dom = new simple_html_dom();
    // Load HTML from a string
    $dom->load($str);

    return $dom;
}

$url = 'http://example.com/';
$data = dlPage($url);

// echo $data;

#######################################################

$startpage = 1;
$endpage = 3;

for ($p=$startpage;$p<=$endpage;$p++) {
    $html = file_get_html('http://example.com/page/$p.html');   
    // connect to main page links
    foreach ($html->find('div#link a') as $link) {  
        $linkHref = $link->href;
        //loop through each link
        $linkHtml = file_get_html($linkHref);
        // parsing inner data
        foreach($linkHtml->find('h1') as $title) {          
            echo $title;
        } 
        foreach ($linkHtml->find('div#data') as $description) {
            echo $description;
        }
    }
 }
?>

如何将其组合起来使其作为单个脚本运行？

使用链接分页使用简单HTML DOM进行卷曲

0 个答案: