我有点卡住了。我的目标是从拥有数百页的网站中提取数据。它是一个体育网站,我必须提取团队名称和其他相关数据。到目前为止,我已经成功地做到了。我跑了6-7页的循环,它的工作非常好。但是当我将循环更改为大约一个月(25)时,它会检索不完整的数据。
例如,如果目的地日期是10月25日,它可能会在10月10日至12日随机停止。
我正在使用phpQuery
,我的互联网连接是1MB。
请指导我,谢谢。
include 'phpQueryOneFile.php';
$fileName = "test.txt";
$fileHandle = fopen($fileName, 'w') or die("can't open file");
$team2Arr[] = "" ; // Array that will store all th team2 names, in complete season
$finalCount2 = 0 ; // counter that is maintained for the $team2Arr, including 'FFF', means this
// will hold the atual size of array including 'FFF'
$totalNumberOfTeam2 = 0; // counter that will contain only the number of teams in total,
// excluding the "FFF", hence the aise of team2Arr - 'FFF's = # of teams
$team1Arr[] = "" ; // Array that will store all th team1 names, in complete season
$finalCount1 = 0; // counter that is maintained for the $team1Arr, including 'FFF', means this
// will hold the atual size of array including 'FFF'
$totalNumberOfTeam1 = 0; // counter that will contain only the number of teams in total,
// excluding the "FFF", hence the aise of team1Arr - 'FFF's = # of teams
$year = '2012' ;
$month = '10' ;
$day = '6';
while($day < 32)
{
if($day < 10)
{
$day = '0'.$day ;
}
//$doc[] = phpQuery::newDocumentFileHTML('http://www.nba.com/gameline/'.$date.'/');
phpQuery::newDocumentFileHTML('http://www.nba.com/gameline/'.$year.$month.$day.'/');
$time1Element[$i] = pq('.nbaFnlStatTxSm');
$time1[$i] = $time1Element[$i]->html();
//echo '<h2>This is The Time</h2>';
//echo '<pre>' . htmlentities($time1[$i]) . '</pre>';
fwrite($fileHandle, "This is The Time"."\n");
fwrite($fileHandle, $time1[$i]."\n"."\n");
$time2Element[$i] = pq('.nbaPreMnStatus .nbaPreStatTx');
$time2[$i] = $time2Element[$i]->html();
//echo '<h2>This is The Second Time that is not visible</h2>';
//echo '<pre>' . htmlentities($time2[$i]) . '</pre>';
fwrite($fileHandle, "This is Only Time"."\n");
fwrite($fileHandle, $time2[$i]."\n"."\n");
$dateElement[$i] = pq('#nbaCalSelectedDate');
$date[$i] = $dateElement[$i]->html();
//echo '<pre>' . htmlentities($dateElement[$i]) . '</pre>';
//echo '<h2>The Date of the match is</h2>';
//echo '<pre>'.$dateElement[$i].'</pre>';
fwrite($fileHandle, "The Date of the match is (Proccessed)"."\n");
fwrite($fileHandle, $date[$i]."\n"."\n");
fwrite($fileHandle, "The Date of the match is (Not Proccessed)"."\n");
fwrite($fileHandle, $dateElement[$i]."\n"."\n");
$team2Element[$i] = pq('.nbaPreMnStatusTeamHm .nbaModTopTeamName');
$team2[$i] = $team2Element[$i]->html();
//echo '<h2>The Home Team is </h2>';
//echo '<pre>' . htmlentities($team2[$i]). '</pre>';
fwrite($fileHandle, "The Home Team is "."\n");
fwrite($fileHandle, $team2[$i]."\n"."\n");
//here we split the data
//get the lenght of the string
echo ("The lenght of arrays is = ".strlen($team2[$i]));
//loop till 3
$out2 = ""; // String of lenght 3 that will pick and store 3 char and put in team2Arr
$threeCount = 0; // counter that is represents the second dimenssion of array,
// i.e character in string and number of strings are stored in array
$teamNumber = 1; // counter that is represents that team number of particular day
$numberOfTeams = strlen($team2[$i]) / 3; // this calutate the numberOfTeams on one day
echo ("<br / >"."team number = ".$numberOfTeams."<br />");
for($j=0 ; $j<$numberOfTeams ; $j++)
{
echo '<br />'."Team".$teamNumber." is : ";
//here we extract pair of 3 character
for($k=0 ; $k<3 ; $k++)
{
$out2 .= $team2[$i][$threeCount];
$threeCount++;
}
echo $out2."\n";
$team2Arr[$finalCount2] .= $out2;
fwrite($fileHandle, $out2."\n");
$teamNumber++;
$finalCount2++;
$totalNumberOfTeam2++;
$out2 = "";
}
$team2Arr[$finalCount2] = "FFF";
$finalCount2++;
$team1Element[$i] = pq('.nbaPreMnStatusTeamAw .nbaModTopTeamName');
$team1[$i] = $team1Element[$i]->html();
//echo '<h2>The Away Team is </h2>';
//echo '<pre>' . htmlentities($team1[$i]). '</pre>';
fwrite($fileHandle, "The Away Team is "."\n");
fwrite($fileHandle, $team1[$i]."\n"."\n");
echo ("\n"."The lenght of arrays is = ".strlen($team1[$i]));
//here we split the data
//get the lenght of the string
//loop till 3
$out1 = ""; // String of lenght 3 that will pick and store 3 char and put in team1Arr
$threeCount = 0; // REINITIALIZED counter that is represents the second dimenssion of array,
// i.e character in string and number of strings are stored in array
$teamNumber = 1; // REINITIALIZED counter that is represents that team number of particular day
$numberOfTeams = strlen($team1[$i]) / 3;
echo "<br />";
echo ("team number = ".$numberOfTeams."<br />");
for($j=0 ; $j<$numberOfTeams ; $j++)
{
echo "<br />"."Team".$teamNumber." is : ";
//here we extract pair of 3 character
for($k=0 ; $k<3 ; $k++)
{
$out1 .= $team1[$i][$threeCount];
$threeCount++;
}
echo $out1;
$team1Arr[$finalCount1] .= $out1;
fwrite($fileHandle, $out1."\n");
$teamNumber++;
$finalCount1++;
$totalNumberOfTeam1++;
$out1 = "";
}
$team1Arr[$finalCount1] = "FFF";
$finalCount1++;
$day = $day + 1 ;
$i++;
}
fwrite($fileHandle, $team2Arr);
fwrite($fileHandle, $team1Arr);
fclose($fileHandle);
echo "<br />"."number is iteratin : ".$i;