即使在函数停止后,MySQL也会插入新行

时间:2013-03-28 01:56:20

标签: php mysql codeigniter

由于函数很长,您实际上可以跳过该函数,因为它们只从HTML标记中提取特定数据,请尝试通过为此规模的问题提供逻辑来提供帮助 我正在建立一个网站,可以深入到一个特定的网站,并提取几乎完整的数据库。实际的功能非常长,并且插入到4个不同的表中,估计总计60k行。我的问题是在函数执行完毕后,我可以看到新的行被添加到我的数据库中。我猜测MySQL的插入速度低于我的迭代速度。

控制器

public function start()
{
    // set the loop running for ever
    ini_set('MAX_EXECUTION_TIME', -1);  
    set_time_limit(0);          
    $lastId = $this->Kpkt_model->last_temp_id();
    if($lastId == 0 OR $lastId == '' OR $lastId == '0')
    {
        $lastId = 0;
    }
    else
    {
        $lastId = $this->Kpkt_model->last_temp_id();
    }
    // add 1 to start the process
    $i = $lastId+1;
    // set the errors to 0
    $errors = 0;

    while($errors < 25)
    {
        usleep(100);
        if($this->_scrap_all($i) == 'empty')
        {
            // its an empty record, add 1 to the errors
            $errors++;

        }
        else
        {
            $errors = 0;
        }
        $i++;
    }

}



function _scrap_all($i)
{

    $url = "mydesiredwebsite.com?PMJU_KOD=$i";

    $html = file_get_html($url);
    // then check if the html element exists to avoid trying to parse non-html

    $tag = $html->find('td[class=tdSecondtext1]');

    // now we need to remove all the redundant spaces
    $tag = preg_replace("/[[:blank:]]+/"," ",$tag);
    // lets sanitize the dirty string from the html special characters
    $tag = preg_replace("/&#?[a-z0-9]{2,8};/i","",$tag);

    $name =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[0])));

    $file = strip_tags(str_replace("\n","", str_replace("\r","", $tag[1])));
    $roc = strip_tags(str_replace("\n","", str_replace("\r","", $tag[2])));
    $address = strip_tags(str_replace("\n","", str_replace("\r","", $tag[3])));
    $city = strip_tags(str_replace("\n","", str_replace("\r","", $tag[4])));
    $postcode = strip_tags(str_replace("\n","", str_replace("\r","", $tag[5])));
    $district = strip_tags(str_replace("\n","", str_replace("\r","", $tag[6])));
    $state = strip_tags(str_replace("\n","", str_replace("\r","", $tag[7])));
    $telephone = strip_tags(str_replace("\n","", str_replace("\r","", $tag[8])));
    $fax = strip_tags(str_replace("\n","", str_replace("\r","", $tag[9])));
    $website = strip_tags(str_replace("\n","", str_replace("\r","", $tag[10])));
    $last_update = strip_tags(str_replace("\n","", str_replace("\r","", $tag[11])));

    // check to see if this array contains data or has returned from the error page
    if(strlen($name) < 4)
    {
        // here we can assume the project is empty by looking at it's file (unique id)

        return 'empty';
    }
    else
    {
        /*
         The project file is not empty, proceed to add to database
         we need to convert this number into a date format
         */
        $date = str_replace('/', '-', $last_update);

        $last_update = date('Y-m-d', strtotime($date));

        // begin insertion
        $this->Kpkt_model->insert_company_temp($name, $file, $roc, $address, $city, $postcode, $district, $state, $telephone, $fax, $website, $last_update);                            
        // echo "<span style='color:green'>company #<strong>$i</strong> added to database!</span><br/>"; 
        // lets search the details

        $links = $html->find('a[href^=DetailProjek.cfm]');
        if (sizeof($links > 0))
        {


            foreach($links as $key=>$link)
            {
                // eurika!
                $anchor = $link->getAttribute ( 'href' );
                // change the spaces to html notation
                $anchor = str_replace(' ', '%20', $anchor);

                $url = "mydesiredwebsite.com/$anchor";
                $html2 = file_get_html($url);

                $tag = $html2->find('td[class=tdSecondtext1]');

                // now we need to remove all the redundant spaces
                $tag = preg_replace("/[[:blank:]]+/"," ",$tag);
                // lets sanitize the dirty string from the html special characters
                $tag = preg_replace("/&#?[a-z0-9]{2,8};/i","",$tag);

                // this is our foreign key
                $developer = strip_tags(str_replace("\n","", str_replace("\r","", $tag[1])));

                // first batch
                $name =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[12])));
                $file =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[13])));
                $lot_no =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[14])));
                $state =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[15])));
                $housing_no =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[16])));
                $bank_name =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[17])));
                $license_no =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[18])));
                $license_expire =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[19])));
                $permit_no =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[20])));
                $permit_expire =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[21])));
                $land_status =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[22])));

                $date1 = str_replace('/', '-', $license_expire);
                $license_expire = date('Y-m-d', strtotime($date1));

                $date2 = str_replace('/', '-', $permit_expire);
                $permit_expire = date('Y-m-d', strtotime($date2));      

                $this->Kpkt_model->add_project_information_temp(
                    $developer,
                    $name,
                    $file,
                    $lot_no,
                    $state,
                    $housing_no,
                    $bank_name,
                    $license_no,
                    $license_expire,
                    $permit_no,
                    $permit_expire,
                    $land_status

                    );

                $project_id = $this->Kpkt_model->get_last_id();                     

                /* 
                    delete the first 23 items of the array
                    since we dont know how many rows of data are under here, we deduct the company details and
                    primary project details
                    sizeof(array) - 23 / 12 = the number of rows we need to insert
                    MD 30/01/2013
                */

                $newTags = array_slice($tag, 23);
                // separate them into batches of 12
                $newTags = array_chunk($newTags, 12);
                // now we iterate through the array and add the details in
                foreach($newTags AS $tag)
                {

                    $category =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[0])));
                    $type =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[1])));
                    $storey =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[2])));
                    $floorArea =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[3])));
                    $totalArea =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[4])));
                    $units =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[5])));
                    $tcf =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[6])));
                    $cf =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[7])));
                    $priceMax =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[8])));
                    $priceStandard =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[9])));
                    $priceMin =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[10])));
                    $progressReport =  strip_tags(str_replace("\n","", str_replace("\r","", $tag[11])));

                    $date1 = str_replace('/', '-', $tcf);
                    $tcf = date('Y-m-d', strtotime($date1));

                    $date2 = str_replace('/', '-', $cf);
                    $cf = date('Y-m-d', strtotime($date2)); 

                    $this->Kpkt_model->add_project_development_information_temp(
                        $developer,
                        $project_id,
                        $category,
                        $type,
                        $storey,
                        $floorArea,
                        $totalArea,
                        $units,
                        $tcf,
                        $cf,
                        $priceMax,
                        $priceStandard,
                        $priceMin,
                        $progressReport                                 
                        );
                }

                // clean the memory
                $sellingInfoLinks = $html2->find('a[href^=LaporanJualRumah.cfm]');
                $html->clear(); 
                unset($html2);
                unset($tag);
                unset($tags);


                if(sizeof($sellingInfoLinks > 0 ))
                {
                    foreach($sellingInfoLinks AS $key=>$selling)
                    {
                        // now we sift through the selling information
                        $anchor = $selling->getAttribute ( 'href' );
                        // change the spaces to html notation
                        $anchor = str_replace(' ', '%20', $anchor);

                        $url = "mydesiredwebsite.com/$anchor";
                        $html3 = file_get_html($url);

                        $tag = $html3->find('tr[bgcolor!=#fc6535] td div font');

                        // now we need to remove all the redundant spaces
                        $tag = preg_replace("/[[:blank:]]+/"," ",$tag);
                        // lets sanitize the dirty string from the html special characters
                        $tag = preg_replace("/&#?[a-z0-9]{2,8};/i","",$tag);

                        // there are 12 items per array
                        $numRows = sizeof($tag) / 12;


                        $tag = array_chunk($tag, 12);
                        foreach ($tag as $value)
                        {
                            // echo '<pre>';
                            // print_r($value);
                            // echo '</pre>';
                            // we break down the selling information into chunks of 12 to insert into the database, each bunch of 12 is one set of data
                            $company_id = $developer;
                            $project_id = $project_id;

                            $roomType = strip_tags(str_replace("\n","", str_replace("\r","", $value[0])));
                            $levels = strip_tags(str_replace("\n","", str_replace("\r","", $value[1])));
                            $local = strip_tags(str_replace("\n","", str_replace("\r","", $value[2])));
                            $chinse = strip_tags(str_replace("\n","", str_replace("\r","", $value[3])));
                            $indian = strip_tags(str_replace("\n","", str_replace("\r","", $value[4])));
                            $other = strip_tags(str_replace("\n","", str_replace("\r","", $value[5])));
                            $foreign = strip_tags(str_replace("\n","", str_replace("\r","", $value[6])));
                            $totalSold = strip_tags(str_replace("\n","", str_replace("\r","", $value[7])));
                            $totalUnsold = strip_tags(str_replace("\n","", str_replace("\r","", $value[8])));
                            $totalPerUnit = strip_tags(str_replace("\n","", str_replace("\r","", $value[9])));
                            $approvedUnits = strip_tags(str_replace("\n","", str_replace("\r","", $value[10])));
                            $developedUnits = strip_tags(str_replace("\n","", str_replace("\r","", $value[11])));
                            //echo sizeof($totalPerUnit);
                            $this->Kpkt_model->add_selling_information_temp(
                                $company_id,
                                $project_id,
                                $roomType,
                                $levels,
                                $local,
                                $chinse,
                                $indian,
                                $other,
                                $foreign,
                                $totalSold,
                                $totalUnsold,
                                $totalPerUnit,
                                $approvedUnits,
                                $developedUnits
                                );
                            # code...
                        }
                        $html3->clear(); 
                        unset($html3);
                        unset($tag);

                    }

                }
            }
        }

    }
// clean the memory
$html->clear(); 
unset($html);
unset($tag);        
}

模型

function insert_company_temp($name, $file, $roc, $address, $city, $postcode, $district, $state, $telephone, $fax, $website, $last_update)
{
    $data = array(
        'file'          =>  $file,
        'name'          =>  $name,
        'roc'           =>  $roc,
        'address'       =>  $address,
        'city'          =>  $city,
        'postcode'      =>  $postcode,
        'district'      =>  $district,
        'state'         =>  $state,
        'telephone'     =>  $telephone,
        'fax'           =>  $fax,
        'website'       =>  $website,
        'last_update'   =>  $last_update

    );

    $this->db->insert('kpkt_company_temp', $data);      
    //echo $this->db->last_query();
}   

function last_id()
{
    $query = "SELECT MAX(id) AS id FROM kpkt_company";
    $res = $this->db->query($query);
    return $res->row('id');
    //echo $this->db->last_query();

}

function last_temp_id()
{
    $query = "SELECT MAX(id) AS id FROM kpkt_company_temp";
    $res = $this->db->query($query);
    return $res->row('id');
    //echo $this->db->last_query();

}

function add_project_information_temp(
                $developer,
                $name,
                $file,
                $lot_no,
                $state,
                $housing_no,
                $bank_name,
                $license_no,
                $license_expire,
                $permit_no,
                $permit_expire,
                $land_status
                )
{
    $data = array(
        'developer_id'      =>  $developer,
        'name'              =>  $name,
        'file'              =>  $file,
        'lot_no'            =>  $lot_no,
        'state'             =>  $state,
        'housing_no'        =>  $housing_no,
        'bank_name'         =>  $bank_name,
        'license'           =>  $license_no,
        'license_expire'    =>  $license_expire,
        'permit_no'         =>  $permit_no,
        'permit_expire'     =>  $permit_expire,
        'land_status'       =>  $land_status

    );

    $this->db->insert('kpkt_project_information_temp', $data);          
}


function add_project_development_information_temp(
                $developer,
                $project_id,
                $category,
                $type,
                $storey,
                $floorArea,
                $totalArea,
                $units,
                $tcf,
                $cf,
                $priceMax,
                $priceStandard,
                $priceMin,
                $progressReport 
                )
{
    $data = array(
        'developer_id'      =>  $developer,
        'project_id'        =>  $project_id,
        'house_category'    =>  $category,
        'house_type'        =>  $type,
        'levels'            =>  $storey,
        'floor_area'        =>  $floorArea,
        'total_area'        =>  $totalArea,
        'units'             =>  $units,
        'tcf'               =>  $tcf,
        'cf'                =>  $cf,
        'price_max'         =>  $priceMax,
        'price_standard'    =>  $priceStandard,
        'price_min'         =>  $priceMin,
        'progress_report'   =>  $progressReport         
    );

    $this->db->insert('kpkt_project_development_information_temp', $data);      
}

function add_selling_information_temp(
                $company_id,
                $project_id,
                $roomType,
                $levels,
                $local,
                $chinese,
                $indian,
                $other,
                $foreign,
                $totalSold,
                $totalUnsold,
                $totalPerUnit,
                $approvedUnits,
                $developedUnits
                )
{
    $data = array(
        'developer_id'  =>  $company_id,
        'project_id'    =>  $project_id,
        'house_type'    =>  $roomType,
        'levels'        =>  $levels,
        'bumi'          =>  $local,
        'chinese'       =>  $chinese,
        'indian'        =>  $indian,
        'other'         =>  $other,
        'foreigner'     =>  $foreign,
        'units_sold'    =>  $totalSold,
        'units_unsold'  =>  $totalUnsold,
        'price_per_unit'=>  $totalPerUnit,
        'approved_units'=>  $approvedUnits,
        'developed_units'=> $developedUnits         
    );

    $this->db->insert('kpkt_selling_information_temp', $data);
} 

所以再说一遍,一切都运行得非常顺利,没有任何问题,除了INSERT陈述继续落后所以说。如何限制while中的function start()? 谢谢

1 个答案:

答案 0 :(得分:2)

你正在使用usleep,这是一个微秒的暂停; http://php.net/manual/en/function.usleep.php

你有睡眠(100),暂停几乎不会引起注意。

也许用;睡觉(1)或将usleep移动到100000 = 0.1秒