DOM Xpath wordpress抓取内容

时间:2013-10-09 23:56:24

标签: php wordpress plugins xpath

我有一个我想修改的插件,但我坚持这里是php函数:

function wpr_ezinemarkpost($keyword,$num,$start,$optional="",$comments="",$options,$template,$ua,$proxy,$proxytype,$proxyuser) {
    global $wpdb,$wpr_table_templates;

    $page = $start / 20;
    $page = (string) $page; 
    $page = explode(".", $page);    
    $page=(int)$page[0];    
    $page++;    

    if($page == 0) {$page = 1;}
    $prep = floor($start / 20);
    $numb = $start - $prep * 20;
        $search_url = "http://www.freewptube.com/demo4/";

    // make the cURL request to $search_url
    if ( function_exists('curl_init') ) {
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_USERAGENT, $ua);
            if($proxy != "") {
                //curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, 1); 
                curl_setopt($ch, CURLOPT_PROXY, $proxy);
                if($proxyuser) {curl_setopt($ch, CURLOPT_PROXYUSERPWD, $proxyuser);}
                if($proxytype == "socks") {curl_setopt ($ch, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);}
            }           
        curl_setopt($ch, CURLOPT_URL,$search_url);
        curl_setopt($ch, CURLOPT_FAILONERROR, true);
        curl_setopt($ch, CURLOPT_AUTOREFERER, true);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
        curl_setopt($ch, CURLOPT_TIMEOUT, 45);
        $html = curl_exec($ch);
        if (!$html) {
            $return["error"]["module"] = "Article";
            $return["error"]["reason"] = "cURL Error";
            $return["error"]["message"] = __("cURL Error Number $search_url","wprobot").curl_errno($ch).": ".curl_error($ch);   
            return $return;
        }       
        curl_close($ch);
    } else {                
        $html = @file_get_contents($search_url);
        if (!$html) {
            $return["error"]["module"] = "Article";
            $return["error"]["reason"] = "cURL Error";
            $return["error"]["message"] = __("cURL is not installed on this server!","wprobot");    
            return $return;     
        }
    }   



    // parse the html into a DOMDocument  

    $dom = new DOMDocument();
    @$dom->loadHTML($html);

    // Grab Product Links  

    $xpath = new DOMXPath($dom);
    $paras = $xpath->query("//div[@class='boxtitle']//h2/a");

    $x = 0;
    $end = $numb + $num;

        if($paras->length == 0) {
            $posts["error"]["module"] = "Article";
            $posts["error"]["reason"] = "No content";
            $posts["error"]["message"] = __("No (more) articles found. $search_url","wprobot"); 
            return $posts;      
        }   

    if($end > $paras->length) { $end = $paras->length;}
    for ($i = $numb;  $i < $end; $i++ ) {

        $para = $paras->item($i);

        if(empty($para)) {
            $posts["error"]["module"] = "Article";
            $posts["error"]["reason"] = "No content";
            $posts["error"]["message"] = __("No (more) articles found. $search_url","wprobot"); 
            print_r($posts);
            return $posts;      
        } else {

            $target_url = $para->getAttribute('href');  

            // make the cURL request to $search_url
            if ( function_exists('curl_init') ) {
                $ch = curl_init();
                curl_setopt($ch, CURLOPT_USERAGENT, $ua);
                if($proxy != "") {
                    //curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, 1); 
                    curl_setopt($ch, CURLOPT_PROXY, $proxy);
                    if($proxyuser) {curl_setopt($ch, CURLOPT_PROXYUSERPWD, $proxyuser);}
                    if($proxytype == "socks") {curl_setopt ($ch, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);}
                }                   
                curl_setopt($ch, CURLOPT_URL,$target_url);
                curl_setopt($ch, CURLOPT_FAILONERROR, true);
                curl_setopt($ch, CURLOPT_AUTOREFERER, true);
                curl_setopt($ch, CURLOPT_RETURNTRANSFER,true);
                curl_setopt($ch, CURLOPT_TIMEOUT, 45);
                $html = curl_exec($ch);
                if (!$html) {
                    $return["error"]["module"] = "Article";
                    $return["error"]["reason"] = "cURL Error";
                    $return["error"]["message"] = __("cURL Error Number $search_url","wprobot").curl_errno($ch).": ".curl_error($ch);   
                    return $return;
                }       
                curl_close($ch);
            } else {                
                $html = @file_get_contents($target_url);
                if (!$html) {
                    $return["error"]["module"] = "Article";
                    $return["error"]["reason"] = "cURL Error";
                    $return["error"]["message"] = __("cURL is not installed on this server!","wprobot");    
                    return $return;     
                }
            }

            // parse the html into a DOMDocument  

            $dom = new DOMDocument();
            @$dom->loadHTML($html);

            // Grab Article Title           
            $xpath1 = new DOMXPath($dom);
            $paras1 = $xpath1->query("//div[@class='textsection']/h2");
            $para1 = $paras1->item(0);
            $title = $para1->textContent;       

                if (empty($title)) {
                    $return["error"]["module"] = "Article";
                    $return["error"]["reason"] = "IncNum";
                    $return["error"]["message"] = __("Video content skipped. ","wprobot");  
                    return $return;
                }               

            // Grab Article 
            $xpath2 = new DOMXPath($dom);
                        $paras2 = $xpath2->query("//div[@id='screen']/div[@class='videosection']"); 
            $para2 = $paras2->item(0);
            $string = $dom->saveXml($para2);
                    if ($options['wpr_eza_striplinks']=='yes') {$string = wpr_strip_selected_tags($string, array('a'));}    
            $articlebody .= $string. ' ';   



            // Grab Ressource Box   

            $xpath3 = new DOMXPath($dom);
            $paras3 = $xpath3->query("//div[@id='extras']//h4/a");      
            $ressourcetext = "";
            for ($y = 0;  $y < $paras3->length; $y++ ) {  //$paras->length
                $para3 = $paras3->item($y);
                $ressourcetext .= $dom->saveXml($para3);    
            }   

            $title = utf8_decode($title);

            // Split into Pages
            if($options['wpr_eza_split'] == "yes") {
                $articlebody = wordwrap($articlebody, $options['wpr_eza_splitlength'], "<!--nextpage-->");
            }

            $post = $template;
            $post = wpr_random_tags($post);
            $post = str_replace("{article}", $articlebody, $post);          
            $post = str_replace("{authortext}", $ressourcetext, $post); 
            $noqkeyword = str_replace('"', '', $keyword2);
            $post = str_replace("{keyword}", $noqkeyword, $post);
            $post = str_replace("{Keyword}", ucwords($noqkeyword), $post);              
            $post = str_replace("{title}", $title, $post);  
            $post = str_replace("{url}", $target_url, $post);
                    if(function_exists("wpr_rewrite_partial")) {
                        $post = wpr_rewrite_partial($post,$options);
                    }           
                    if(function_exists("wpr_translate_partial")) {
                        $post = wpr_translate_partial($post);
                    }   

            /* We are adding a call to this function to ensure that our keyword is used at least once */
            $posts[$x]["unique"] = $target_url;
            $posts[$x]["title"] = $title;
            $posts[$x]["content"] = $post;              
            $x++;
        }   
    }   
    return $posts;
}

我已经抓住了标题和嵌入视频,但我还要抓住位于主页的缩略图。我们如何让缩略图转到嵌入视频代码的顶部?顺便说一句,这是一个wordpress插件,我正在修改我使用。

感谢

0 个答案:

没有答案