我从页面中提取数据,我知道这是一个漫长的过程,具体取决于所提取的日期。在拉动数据132秒后,页面超时。 我设置了set_time_limit(0);和ignore_user_abort(true); - 我不知道还能做些什么来保持脚本存活并拉出所有数据。
我已经添加了以下代码,以防我有什么办法可以加快速度?
set_time_limit(0);
ignore_user_abort(true);
error_reporting(-1);
ini_set('display_errors', 'On');
include "../include/class.php";
include "../include/db.php";
//the below will get the list of id's for each race that day
function curl($url){
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch,CURLOPT_FOLLOWLOCATION,true);
$data = curl_exec($ch);
curl_close($ch);
return $data;
}
$url = "http://form.timeform.betfair.com/daypage?date=20150516"; //WILL NEED TO PULL TOMORROWS DATE AS DD-MM-YYY
$html = curl($url);
$dom = new DOMDocument();
@$dom->loadHTML($html);
$dom->preserveWhiteSpace = false;
$xpath = new DOMXPath($dom);
//pull the individual cards for the day
//li class="rac-cardsclass="ix ixc"
$getdropdown = '//div[contains(@data-location, "RACING_COUNTRY_GB_IE")]//div[contains(@class, "course")]';
$getdropdown2 = $xpath->query($getdropdown);
//loop through each individual card
foreach($getdropdown2 as $dropresults) {
//loop through and get all the a tags
$arr = $dropresults->getElementsByTagName("a");
foreach($arr as $item) {
//only grab the links which point to the results page
if(strpos($item->getAttribute('href'), 'raceresult') !== false) {
//grab the code
$code = explode("=", $item->getAttribute('href'));
$code = end($code);
$url = "http://form.timeform.betfair.com/raceresult?raceId=" . $code; //WILL NEED TO PULL TOMORROWS DATE AS DD-MM-YYY
$html = curl($url);
$dom = new DOMDocument();
@$dom->loadHTML($html);
$dom->preserveWhiteSpace = false;
$xpath = new DOMXPath($dom);
$spanTexts = array();
//get the place name
$getplacename = '//span[contains(@class, "locality")]';
$getplacename2 = $xpath->query($getplacename);
//loop through each individual card
foreach($getplacename2 as $getplacename22) {
echo "Venue: " . $venue = $getplacename22->textContent;
} //$getplacename2 as $getplacename22
$gettime = '//abbr [contains(@class, "dtstart")]';
//get the Date and the Time
$gettime2 = $xpath->query($gettime);
foreach($gettime2 as $gettime22) {
echo "Date : " . $Dateandtime = date(trim($gettime22->getAttribute('title')), strtotime('+5 hours'));
} //$gettime2 as $gettime22
//pull the data for the race e.g going money ect
$getdropdown22 = '//div[contains(@class, "content")]/p';
$getdropdown222 = $xpath->query($getdropdown22);
foreach($getdropdown222 as $dropresults2) {
$racename = trim($dropresults2->childNodes->item(0)->textContent);
//foreach ($dropresults2->childNodes as $node) { if(is_object($node)) { echo $node->nodeType; } else { echo $node; } }
foreach($dropresults2->childNodes as $node) {
if(is_object($node) && $node->nodeType === XML_ELEMENT_NODE && strtolower($node->tagName) === 'span') {
$spanTexts[] = (string) $node->textContent;
} //is_object($node) && $node->nodeType === XML_ELEMENT_NODE && strtolower($node->tagName) === 'span'
} //$dropresults2->childNodes as $node
if(count($spanTexts) < 6)
continue;
list($going, $distance, $age, $prizemoney, $runners, $racetype) = $spanTexts;
$going = str_replace(array(
'Â',
'Going:',
'|'
), '', $going);
$distance = miletofurlong($distance = trim(GetBetween($distance, ':', 'Â')));
$age = trim(GetBetween($age, ':', 'Â'));
$prizemoney = trim(GetBetween($prizemoney, '£', 'Â'));
$runners = trim(GetBetween($runners, ':', 'Â'));
$racetype = trim(GetBetween($racetype, ':', 'Â'));
} //$getdropdown222 as $dropresults2
//pull the individual horse data
$getdropdown = '//div[contains(@class, "table-container")]//tbody//tr';
$getdropdown2 = $xpath->query($getdropdown);
//loop through each individual card
foreach($getdropdown2 as $dropresults) {
$position = $dropresults->childNodes->item(0)->childNodes->item(1)->textContent;
$draw = str_replace(array('(',')'), '', $dropresults->childNodes->item(0)->childNodes->item(3)->textContent);
$losingdist = str_replace('Â', '', trim($dropresults->childNodes->item(2)->textContent));
if(strpos($losingdist, '¾') !== false) {
$losingdist = str_replace('¾', '.75', $losingdist);
} //strpos($losingdist, '¾') !== false
if(strpos($losingdist, '½') !== false) {
$losingdist = str_replace('½', '.5', $losingdist);
} //strpos($losingdist, '½') !== false
if(strpos($losingdist, '¼') !== false) {
$losingdist = str_replace('¼', '.25', $losingdist);
} //strpos($losingdist, '¼') !== false
$losingdist;
$horse = trim(preg_replace("/\([^\)]+\)/","",str_replace("'","",trim($dropresults->childNodes->item(4)->textContent))));
$horseage = trim($dropresults->childNodes->item(6)->textContent);
$weight = trim($dropresults->childNodes->item(8)->childNodes->item(1)->textContent);
$or = str_replace(array('(',')'), '', trim($dropresults->childNodes->item(8)->childNodes->item(3)->textContent));
str_replace('-', '', $eq = trim($dropresults->childNodes->item(10)->textContent));
$jockey = trim($dropresults->childNodes->item(12)->childNodes->item(1)->textContent);
$trainer = trim($dropresults->childNodes->item(12)->childNodes->item(4)->textContent);
$highandlowinrunning = trim($dropresults->childNodes->item(14)->childNodes->item(1)->textContent);
$highandlow = explode("/", $highandlowinrunning);
str_replace('-', '', $lowodds = trim($highandlow['1']));
str_replace('-', '', $highodds = trim($highandlow['0']));
$bfsp = trim($dropresults->childNodes->item(16)->childNodes->item(1)->textContent);
$isp = trim(str_replace('/', '', $dropresults->childNodes->item(16)->childNodes->item(3)->textContent));
$placeodds = trim($dropresults->childNodes->item(18)->textContent);
$venue = mysqli_real_escape_string($db, $venue);
$Dateandtime = mysqli_real_escape_string($db,$Dateandtime);
$going = mysqli_real_escape_string($db, $going);
$distance = mysqli_real_escape_string($db,$distance);
$age = mysqli_real_escape_string($db,$age);
$prizemoney = mysqli_real_escape_string($db,$prizemoney);
$runners = mysqli_real_escape_string($db,$runners );
$racetype = mysqli_real_escape_string($db,$racetype);
$position = mysqli_real_escape_string($db,$position );
$draw = mysqli_real_escape_string($db,$draw);
$losingdist = mysqli_real_escape_string($db,$losingdist);
$horse = mysqli_real_escape_string($db,$horse );
$age = mysqli_real_escape_string($db,$age);
$weight = mysqli_real_escape_string($db,$weight);
$or = mysqli_real_escape_string($db,$or );
$eq = mysqli_real_escape_string($db,$eq );
$jockey = mysqli_real_escape_string($db,$jockey);
$trainer = mysqli_real_escape_string($db,$trainer);
$lowodds = mysqli_real_escape_string($db,$lowodds);
$highodds = mysqli_real_escape_string($db,$highodds);
$bfsp = mysqli_real_escape_string($db,$bfsp);
$isp = mysqli_real_escape_string($db,$isp);
$placeodds = mysqli_real_escape_string($db,$placeodds);
$sql = "
INSERT INTO `Race_Records`
(
`Venue`,
`DateandTime`,
`Going`,
`Distance`,
`Age`,
`PrizeMoney`,
`Runners`,
`RaceType`,
`Position`,
`Draw`,
`LosingDist`,
`Horse`,
`HorseAge`,
`Weight`,
`OR`,
`EQ`,
`Jockey`,
`Trainer`,
`InRunningLow`,
`InRunningHigh`,
`BFSP`,
`ISP`,
`PlaceOdds`,
`RaceName`
)
VALUES
(
'$venue',
'$Dateandtime',
'$going',
'$distance',
'$age',
'$prizemoney',
'$runners',
'$racetype',
'$position',
'$draw',
'$losingdist',
'$horse',
'$age',
'$weight',
'$or',
'$eq',
'$jockey',
'$trainer',
'$lowodds',
'$highodds',
'$bfsp',
'$isp',
'$placeodds',
'$racename'
)
";
$res = mysqli_query($db, $sql);
if (!$res) {
echo PHP_EOL . "FAIL: $sql";
trigger_error(mysqli_error($db), E_USER_ERROR);
}
}
}
}
}
$id = date_create($id);
$theid2 = date_format($id,"d-m-Y");
$url = "www.sportinglife.com/racing/results/".$theid2; //WILL NEED TO PULL TOMORROWS DATE AS DD-MM-YYY
$html = curl($url);
$dom = new DOMDocument();
@$dom->loadHTML($html);
$dom->preserveWhiteSpace = false;
$xpath = new DOMXPath($dom);
$getdropdown = '//li[contains(@class, "rac-cards")]//div[contains(@class, "ix ixv")]';
$getdropdown2 = $xpath->query($getdropdown);
//loop through each individual card
foreach($getdropdown2 as $dropresults) {
//loop through and get all the a tags
$arr = $dropresults->getElementsByTagName("a");
foreach($arr as $item) {
//only grab the links which point to the results page
//grab the code
$getcomments = $item->getAttribute('href');
foreach ($listofcorses as $bad) {
if (strstr( strtolower($getcomments),strtolower($bad)) !== false) {
$url = "http://www.sportinglife.com/".$getcomments; //WILL NEED TO PULL TOMORROWS DATE AS DD-MM-YYY
$html = curl($url);
$dom = new DOMDocument();
@$dom->loadHTML($html);
$dom->preserveWhiteSpace = false;
$xpath = new DOMXPath($dom);
$spanTexts = array();
//get the place name
$getplacename = '//table';
$getplacename2 = $xpath->query($getplacename);
//loop through each individual card
$loopnumber = 0;
foreach($getplacename2 as $getplacename22) {
// get how many child nodes are in the loop
$count = 0;
foreach($getplacename22 ->childNodes->item(11)->childNodes as $node)
if(!($node instanceof \DomText))
$count++;
//loop through and get the horses name and the comment
for ($i = 0; $i < $count; $i++) {
if ($i % 2 == 0)
{
if ($getplacename22 ->childNodes->item(11)->childNodes->item($i)->childNodes->item(4) != null)
{
$horse = mysqli_real_escape_string($db,trim(preg_replace("/[^A-Za-z ]+/", "", preg_replace("/\([^\)]+\)/","",trim($getplacename22 ->childNodes->item(11)->childNodes->item($i)->childNodes->item(4)->textContent)))));
$check = "ok";
}
else
{
$check = "no";
}
}
else
{
if ($check == "ok") {
$comments = mysqli_real_escape_string($db,trim($getplacename22 ->childNodes->item(11)->childNodes->item($i)->textContent));
//update the database
$results = $db->query("UPDATE Race_Records SET comments= '$comments' WHERE Horse='$horse'");
}
}
}
}
}
}
}
}
?>
答案 0 :(得分:0)
您可以尝试设置curl超时
curl_setopt($ch,CURLOPT_TIMEOUT,1000);
您可能还想检查您在循环中访问的服务是否受速率限制,如果是这样,请在循环中放入适当的sleep
以确保您不会制作连续周期中来自服务的请求太多;很可能代码运行正常,但是在对远程服务发出大量HTTP请求后超时
答案 1 :(得分:-1)
设置最长执行时间
// Begin your php code with this
ini_set('max_execution_time',300); // 60s*5=300s 5 minutes