PHP脚本内存泄漏

时间:2011-04-06 17:57:11

标签: php memory-leaks

我知道这不是最小的代码,我试图尽可能地削减它。该脚本消耗的内存越来越多,直到它最终用完为止。我尽可能使用了unset(),但似乎没有任何效果。在MultiGet函数中似乎总是出错,但我不确定这是否是泄漏的地方。任何意见都将不胜感激。

public function Test()
{
    $base = dirname(__FILE__) .'/';
    $prod_file = $base.'products.dbf';

    $this->dbf->load($prod_file);
    $num_rec=$ci->dbf->dbf_num_rec;

    $buffer = Array();
    for($i=0;$i<$num_rec;$i++):
        $row = $ci->dbf->getRowAssoc($i);

        $info = Array('part_number' => $row['PART_NUM'],
                      'td_group_id' => $row['GRP'],
                      'name' => 'DESCR');

        $this->db->where('td_group_id',$info['td_group_id']);
        $result = $this->db->get('tbl_categories')->row_array();
        if(isset($result['id'])):
            $info['category_id'] = $result['id'];
            $buffer[]  = $info;
        endif;

        if(count($buffer) == 100 || $i == $num_rec -1):
            $url_buffer = Array();
            foreach($buffer as $row):
                $url_buffer[] = $this->_product_url($row['part_number']);
            endforeach;

            $html_returns = $this->MultiCrawl($url_buffer);
            foreach($html_returns as $url_index=>$html):
                $more_info = $this->_extract_more_info($html);
                if($more_info):
                    $more_info['category_id'] = $buffer[$url_index]['category_id'];
                    $more_info['td_part_number'] = $buffer[$url_index]['part_number'];
                    $this->_parse_product($more_info);
                endif;
            endforeach;
            $buffer = Array();
        endif;

    endfor;



}


function MultiGet($all_urls)
{

    $useragent = $this->_useragent;
    $cookie_file = $this->_cookie_file;

    $url_index = $this->UrlIndex($all_urls);

    $return_buffer = Array();

    $mh = curl_multi_init();

    $ch = Array();
    $max_connections = 15;
    $index = 0;
    $open_connections = 0;
    $execReturnValue = true;
    $running = true;
    $max_index = count($all_urls)-1;
    $url_count = count($all_urls);
    $buffer_count = 0;

    while ($buffer_count < $url_count){

        if($open_connections < $max_connections && $index <= $max_index):
            for($i=$open_connections;$i<$max_connections && $index <= $max_index;$i++):
                $url = $all_urls[$index];
                $ch[$index] = curl_init($url);
                curl_setopt($ch[$index],CURLOPT_FOLLOWLOCATION, true);
                curl_setopt($ch[$index],CURLOPT_RETURNTRANSFER, true);
                curl_setopt($ch[$index],CURLOPT_COOKIESESSION, false);
                curl_setopt($ch[$index],CURLOPT_SSL_VERIFYHOST , false);
                curl_setopt($ch[$index],CURLOPT_SSL_VERIFYPEER , false);
                curl_setopt($ch[$index],CURLOPT_COOKIEJAR, $cookie_file);
                curl_setopt($ch[$index],CURLOPT_COOKIEFILE, $cookie_file);
                curl_setopt($ch[$index],CURLOPT_USERAGENT,$useragent);
                curl_multi_add_handle($mh, $ch[$index]);
                $open_connections++;
                $index++;
                $execReturnValue = curl_multi_exec($mh,$running);
                usleep(200);
            endfor;
        endif;

        $execReturnValue = curl_multi_exec($mh,$running);
        $ready=curl_multi_select($mh);


        while($info=curl_multi_info_read($mh)){
            $status=curl_getinfo($info['handle'],CURLINFO_HTTP_CODE);
            if($status==200){
                $successUrl=curl_getinfo($info['handle'],CURLINFO_EFFECTIVE_URL);
                $curl_index = $url_index[$successUrl];
                $return_buffer[$curl_index] = curl_multi_getcontent($ch[$curl_index]);
                $buffer_count = count($return_buffer);
                curl_multi_remove_handle($mh, $ch[$curl_index]);
                curl_close($ch[$curl_index]);
                unset($ch[$curl_index]);
                $open_connections--;
            }else{

                echo "ERROR: $status\n";
            }
        }
    } 

    curl_multi_close($mh);
    unset($mh);

    return $return_buffer;
}



private function _extract_more_info($html)
{

    $buffer = array();


    $query = "//img[@id='ctl00_cphMain_cntrlProductProfile_imgprodimage']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    if(!$node) return null;
    $buffer['td_img_url'] = $node?trim($node->getAttribute('src')):null;
    unset($result);


    $query = "//span[@class='priceLarge']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    if(!$node) return null;
    $buffer['price'] = $node?trim($node->nodeValue):null;
    if($buffer['price'] == 'Req. Auth.') return null;
    unset($result);


    $query = "//span[@id='ctl00_cphMain_cntrlProductProfile_newLtFinalPrice']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    if(!$node) return null;
    $buffer['msrp'] = $node?trim($node->nodeValue):null;
    unset($result);


    $query = "//span[@id='ctl00_cphMain_cntrlProductProfile_newLTMRF']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    if(!$node) return null;
    $buffer['manf_part_number'] = $node?trim($node->nodeValue):null;
    unset($result);


    $query = "//span[@id='ctl00_cphMain_cntrlProductProfile_newLblUPC']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    $buffer['upc_part_number'] = $node?trim($node->nodeValue):null;
    unset($result);


    $query = "//td[@class='black_text_WUL']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    if(!$node) return null;
    $buffer['manufacturer'] = $node?trim($node->nodeValue):null;
    unset($result);


    $query = "//td[@class='textt' and @colspan='3']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    if(!$node) return null;
    $buffer['short_description'] = $node?trim($node->nodeValue):null;
    unset($result);





    $query = "//div[@id='ctl00_cphMain_pnlMarketingDesc']//td[@class='textt']";
    $result = $this->_xquery($html,$query);
    $node = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    if(!$node) return null;
    $buffer['long_description'] = $node?trim($node->nodeValue):null;
    unset($result);

    $query = "//table[@id='ctl00_cphMain_cntrlMainSpecs_dgSpecs']";
    $result = $this->_xquery($html,$query);
    $table = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    unset($result);

    if(!$table) return null;
    $table_array = Array();
    $rows = $table->getElementsByTagName('tr');
    foreach($rows as $tr):
        $temp = Array();
        $columns = $tr->getElementsByTagName('td');
        $caption = $columns->length > 0 && $columns->length <= 2 ? trim($columns->item(0)->nodeValue) : null;
        $value = $columns->length == 2 ? trim($columns->item(1)->nodeValue) : null;

        if ($caption) $table_array[$caption] = $value;
    endforeach;


    $buffer['main_specs']=$table_array;


    $query = "//table[@id='ctl00_cphMain_cntrlExtSpecs_tblData']";
    $result = $this->_xquery($html,$query);
    $table = $result instanceof DOMNode?$this->_to_dom_node($result):null;
    unset($result);
    $buffer['additional_specs'] = null;
    if(!$table) return $buffer;


    $table_array = Array();
    $rows = $table->getElementsByTagName('tr');
    foreach($rows as $tr):
        $temp = Array();
        $columns = $tr->getElementsByTagName('td');
        $caption = $columns->length > 0 && $columns->length <= 2 ? trim($columns->item(0)->nodeValue) : null;
        $value = $columns->length == 2 ? trim($columns->item(1)->nodeValue) : null;

        if ($caption) $table_array[$caption] = $value;
    endforeach;
    $buffer['additional_specs']=$table_array;;
    return $buffer;

}



private function _xquery($html,$query,$allnodes = false){
    $src = '';
    $dom = new DOMDocument();
    $node = null;
    if (@$dom->loadHTML($html)) {
        $xpath = new DOMXpath($dom);
        $nodeList = $xpath->query($query);
        if ($nodeList->length > 0) {
            $node = $allnodes==false?$nodeList->item(0):$nodeList;
        }
    }
    unset($xpath);
    unset($nodeList);
    unset($dom);
    return $node;
}

1 个答案:

答案 0 :(得分:1)

找到泄漏的策略?

  • 确保 泄漏(如果处理1/100的数据,内存是否仍未释放?1/1000?)
  • 考虑复杂性:如果foo是O(n),bar是O(n)和bar调用foo,结果可能会变为O(n * n)中。
  • 实验:禁用程序的某些部分,直到它不再泄漏
乍一看,你正在爬行一系列网址。这些可能包含更多网址,使用MultiCrawl方法进行抓取。你确定那里不会有循环吗? (使用文件夹不止一次欺骗我:浏览'。'作为子文件夹产生无限循环)