我试图运行循环从2个不同的网站提取数据,但无法理解是什么问题?

时间:2017-05-28 20:34:35

标签: php html dom web-crawler extraction

无法运行循环从2个网站中提取数据任何人都可以帮助我吗? 我想从两个不同的网站提取数据,它是单独工作,但我想运行循环,以便我可以使用任何循环从两个网站一起提取数据,但不能理解我怎么能这样做,请指导。

<?php
require('db.php');
require('simple_html_dom.php');
$q = "SELECT * from links";
$r = mysqli_query($con, $q);
if($row = mysqli_fetch_array($r)){
    $linksrc = $row['link'];
    if(strpos($linksrc, 'http://health.hamariweb.com'!== false)){
        while($row = mysqli_fetch_array($r)){
            $linksrc = $row['link'];
            $curl = curl_init();
            curl_setopt_array($curl, array(
            CURLOPT_URL => $linksrc,
            CURLOPT_RETURNTRANSFER => 1,
            CURLOPT_FOLLOWLOCATION => 1,
            CURLOPT_ENCODING => "",
            CURLOPT_MAXREDIRS => 10,
            CURLOPT_TIMEOUT => 30,
            CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
            CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
            ));
            $file = curl_exec($curl);
            $error = curl_error($curl);
            curl_close($curl);
            $dom = new simple_html_dom();
            $dom->load($file);
            $doctorDivs = $dom->find("#infinite-grid-images", 0)->children();
            $doctors = array();
            foreach($doctorDivs as $div){
                $doctor = array();
                $image = $doctor["image"] = $linksrc.$div->find('img', 0)->src;
                $link = $doctor["link"] = $linksrc.$div->find('a', 0)->href;
                $details = $div->find('table', 1)->find("tr");
                $name = $doctor["name"] = trim($details[0]->plaintext);
                $spec = $doctor["Spec"] = trim($details[1]->plaintext);
                $qua = $doctor["qua"] = trim($details[2]->plaintext);
                $doctors[] = $doctor;
                echo "
                    <div class='col-xs-4 col-sm-4' style='padding-top: 20px;'>
                        <h5><span style='width: 40px; text-align: center;' class='glyphicon glyphicon-user'></span> Full Name: <a href='$link' target='_blank'> $name </a></h5>
                        <h5><span style='width: 40px; text-align: center;' class='glyphicon glyphicon-star'></span> Specialization: $spec</h5>
                        <h5><span style='width: 40px; text-align: center;' class='fa fa-building'></span> Qualification: $qua</h5>
                        <a href='$linksrc'>Data Extracted From : $linksrc</a>
                  </div>
                ";
            }
        }
    }
    elseif(strpos($linksrc, 'https://www.marham.pk') !== false){
        while($row = mysqli_fetch_array($r)){
            $curl = curl_init();
            curl_setopt_array($curl, array(
            CURLOPT_URL => $linksrc,
            CURLOPT_RETURNTRANSFER => 1,
            CURLOPT_FOLLOWLOCATION => 1,
            CURLOPT_ENCODING => "",
            CURLOPT_MAXREDIRS => 10,
            CURLOPT_TIMEOUT => 30,
            CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
            CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
            ));
            $file = curl_exec($curl);
            $error = curl_error($curl);
            curl_close($curl);
            $dom = new simple_html_dom();
            $dom->load($file);
            $doctorDivs = $dom->find("#posts #doctor-list", 0)->children();
            $doctors = array();
            foreach($doctorDivs as $div){
                $doctor = array();

                $link = $doctor["link"] = $div->find('meta', 0)->content ."<br>";
                $image = $doctor["image"] = $div->find('img', 0)->src;
                $name = $doctor["name"] = $div->find('h2', 0)->plaintext;
                $qua = $doctor["qua"] = $div->find('p', 0)->plaintext;

                $details = $div->find('ul.entry-meta', 0)->find("li");
                $spec = $doctor["Spec"] = trim($details[0]->plaintext);
                $avail = $doctor["avail"] = trim($details[1]->plaintext);
                $address = $doctor["address"] = trim($details[2]->plaintext);
                $fee = $doctor["fee"] = trim($details[3]->plaintext);

                $doctors[] = $doctor;
                echo "
                    <div class='col-xs-4 col-sm-4' style='padding-top: 20px;'>
                        <img src='$image' style='height: 60px;'>
                        <h5><span style='width: 40px; text-align: center;' class='glyphicon glyphicon-user'></span> Full Name: <a href='$link' target='_blank'> $name </a></h5>
                        <h5><span style='width: 40px; text-align: center;' class='glyphicon glyphicon-star'></span> Specialization: $spec</h5>
                        <h5><span style='width: 40px; text-align: center;' class='fa fa-building'></span> Qualification: $qua</h5>
                        <h5><span style='width: 40px; text-align: center;' class='fa fa-clock'></span> Availability: $avail</h5>
                        <h5><span style='width: 40px; text-align: center;' class='fa fa-map-marker'></span> Address: $address</h5>
                        <h5><span style='width: 40px; text-align: center;' class='fa fa-money'></span> Fee: $fee</h5>
                        <a href='$linksrc'>Data Extracted From : $linksrc</a>
                  </div>
                ";
            }
        }
    }
}

&GT;

0 个答案:

没有答案