你能帮我找一下导致这个过程达到500MB内存使用量的原因。 它基本上是一个html页面下载器。 尽管该过程是稳定的(并且不超过该限制),但它意味着在低性能机器上使用并且我不满意。 mysql表'Sites'的大小是170MB。 遵循脚本代码。 提前谢谢。
function start() {
try {
global $log;
$db = getConnection();
Zend_Db_Table::setDefaultAdapter($db);
$log->logInfo("logger start");
while (1) {
$sitesTable = new Zend_Db_Table('Sites');
$rowset = $sitesTable->fetchAll();
foreach ($rowset as $row) {
if (time() >= (strtotime($row->lastUpdate) + $row->pollingHours * 60 * 60)) {
db_updateHtml($row);
}
}
}
} catch (Exception $e) {
global $log;
$log->logError($e->getMessage());
}
}
function db_updateHtml($siteRecord) {
try {
if ($siteRecord instanceof Zend_Db_Table_Row) {
$rowwithConnection = $siteRecord;
$url = $siteRecord->url;
$idSite = $siteRecord->idSite;
$crawler = new Crawler();
$sitesTable = new Zend_Db_Table('Sites');
//$rowwithConnection = $sitesTable->fetchRow(
// $sitesTable->select()->where('idSite = ?', $idSite));
$newHtml = HtmlDbEncode($crawler->get_web_page($url));
if (strlen($newHtml) < 10) {
global $log;
$log->logError("Download failed for: url: $url \t idsite: $idSite ");
}
if ($rowwithConnection->isChecked != 0) {
$rowwithConnection->oldHtml = $rowwithConnection->newHtml;
$rowwithConnection->isChecked = 0;
}
$rowwithConnection->newHtml = $crawler->get_web_page($url);
$rowwithConnection->lastUpdate = date("Y-m-d H:i:s");
//$rowwithConnection->diffHtml = getDiff($rowwithConnection->oldHtml, $rowwithConnection->newHtml, false, $rowwithConnection->minLengthChange);
$rowwithConnection->diffHtml = getDiffFromRecord($rowwithConnection, false, $rowwithConnection->minLengthChange);
/* if (strlen($rowwithConnection->diffHtml) > 30) {
$rowwithConnection->lastChanged = $rowwithConnection->lastUpdate;
} */
$rowwithConnection->save();
} else {
$log->logCrit("siteRecord is uninitialized");
}
} catch (Exception $e) {
global $log;
$log->logError($e->getMessage());
}
}
function getDiffFromRecord($row, $force = false, $minLengthChange = 100) {
if ($row instanceof Zend_Db_Table_Row) {
require_once '/var/www/diff/library/finediff.php';
include_once '/var/www/diff/library/Text/Diff.php';
$diff = new AndreaDiff();
$differences = $diff->getDiff($row->oldHtml, $row->newHtml);
if ($diff->isChanged($minLengthChange) || $force) {
$row->lastChanged = $row->lastUpdate;
$row->isChecked = false;
return ($differences);
}
}
return null;
}
function getConnection() {
try {
$pdoParams = array(
PDO::MYSQL_ATTR_USE_BUFFERED_QUERY => true
);
$db = new Zend_Db_Adapter_Pdo_Mysql(array(
'host' => '127.0.0.1',
'username' => 'root',
'password' => 'administrator',
'dbname' => 'diff',
'driver_options' => $pdoParams
));
return $db;
} catch (Exception $e) {
global $log;
$log->logError($e->getMessage());
}
}
答案 0 :(得分:0)
1)尝试使用fetch方法,而不是fetchAll:
foreach($sitesTable->fetch() as $row){
//...
}
2)尝试取消设置存储html代码的所有变量(如果将其保存在内存中),在最后一次迭代中我假设变量$rowwithConnection
内部将包含html代码。
当我想要个人资料php应用程序时,我使用xhprof它会为你节省很多时间。祝你好运!