我有一个脚本,可下载大型产品CSV文件,处理其中的信息(下载图像并调整大小并准备其他数据以便插入数据库),然后创建所有已处理项目的另一个txt文件。问题是它似乎在某处扼杀记忆。我收到错误500,但日志显示内存使用量太大。我尽可能地取消了,我正在使用SPL迭代器,它应该是内存密集度较低的,但我仍然可以使用脚本来完成执行并输入所有信息。任何人都可以在脚本中指出有助于防止内存泄漏的内容吗?
<?php
define('IN_PHPBB', true);
define('IN_SHOP', true);
$phpbb_root_path = './../forum/';
$root_path = './../';
$phpEx = substr(strrchr(__FILE__, '.'), 1);
include($phpbb_root_path.'common.'.$phpEx);
// Start session management
$user->session_begin();
$auth->acl($user->data);
$user->setup();
set_time_limit(172800);
define('THUMBNAIL_IMAGE_MAX_WIDTH', 150);
define('THUMBNAIL_IMAGE_MAX_HEIGHT', 150);
function generate_thumb($source_image_path, $thumbnail_image_path)
{
list($source_image_width, $source_image_height, $source_image_type) = getimagesize($source_image_path);
switch ($source_image_type) {
case IMAGETYPE_GIF:
$source_gd_image = imagecreatefromgif($source_image_path);
break;
case IMAGETYPE_JPEG:
$source_gd_image = imagecreatefromjpeg($source_image_path);
break;
case IMAGETYPE_PNG:
$source_gd_image = imagecreatefrompng($source_image_path);
break;
}
if ($source_gd_image === false) {
return false;
}
$source_aspect_ratio = $source_image_width / $source_image_height;
$thumbnail_aspect_ratio = THUMBNAIL_IMAGE_MAX_WIDTH / THUMBNAIL_IMAGE_MAX_HEIGHT;
if ($source_image_width <= THUMBNAIL_IMAGE_MAX_WIDTH && $source_image_height <= THUMBNAIL_IMAGE_MAX_HEIGHT) {
$thumbnail_image_width = $source_image_width;
$thumbnail_image_height = $source_image_height;
} elseif ($thumbnail_aspect_ratio > $source_aspect_ratio) {
$thumbnail_image_width = (int) (THUMBNAIL_IMAGE_MAX_HEIGHT * $source_aspect_ratio);
$thumbnail_image_height = THUMBNAIL_IMAGE_MAX_HEIGHT;
} else {
$thumbnail_image_width = THUMBNAIL_IMAGE_MAX_WIDTH;
$thumbnail_image_height = (int) (THUMBNAIL_IMAGE_MAX_WIDTH / $source_aspect_ratio);
}
$thumbnail_gd_image = imagecreatetruecolor($thumbnail_image_width, $thumbnail_image_height);
imagecopyresampled($thumbnail_gd_image, $source_gd_image, 0, 0, 0, 0, $thumbnail_image_width, $thumbnail_image_height, $source_image_width, $source_image_height);
imagejpeg($thumbnail_gd_image, $thumbnail_image_path, 90);
imagedestroy($source_gd_image);
imagedestroy($thumbnail_gd_image);
unset($source_image_width, $source_image_height, $source_image_type, $source_gd_image, $source_aspect_ratio, $thumbnail_aspect_ratio, $thumbnail_image_width, $thumbnail_image_height, $thumbnail_gd_image);
return true;
}
$regex = <<<'END'
/
(
(?: [\x00-\x7F] # single-byte sequences 0xxxxxxx
| [\xC0-\xDF][\x80-\xBF] # double-byte sequences 110xxxxx 10xxxxxx
| [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences 1110xxxx 10xxxxxx * 2
| [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
)+ # ...one or more times
)
| ( [\x80-\xBF] ) # invalid byte in range 10000000 - 10111111
| ( [\xC0-\xFF] ) # invalid byte in range 11000000 - 11111111
/x
END;
function utf8replacer($captures) {
if ($captures[1] != "") {
// Valid byte sequence. Return unmodified.
return $captures[1];
}
elseif ($captures[2] != "") {
// Invalid byte of the form 10xxxxxx.
// Encode as 11000010 10xxxxxx.
return "\xC2".$captures[2];
}
else {
// Invalid byte of the form 11xxxxxx.
// Encode as 11000011 10xxxxxx.
return "\xC3".chr(ord($captures[3])-64);
}
}
/* download file from source */
function getDataCSV(){
$thefile = 'http://feeds.cnv.com/xxxxxxxxxxxxxx/Bronze/ELD-B01.csv';
$file = 'ELD-B01.csv';
$fh = fopen($file, "w");
$rows = file($thefile);
foreach($rows as $num => $row){
if($num != 0){
fwrite($fh, $row);
}
}
fclose($fh);
include("DataSource.php");
$csv = new File_CSV_DataSource;
if ($csv->load($file)) {
$items = $csv->getHeaders();
$csv->getColumn($items[2]);
if ($csv->isSymmetric()) {
$items = $csv->connect();
} else {
$items = $csv->getAsymmetricRows();
}
$items = $csv->getrawArray();
}
unset($csv);
return $items;
}
$iter = new ArrayIterator(getDataCSV());
$google_list = array();
$google_list[] = array('id', 'title', 'description', 'google_product_category', 'product_type', 'link', 'image_link', 'condition', 'availability', 'price', 'brand', 'mpn');
$sql = "TRUNCATE TABLE ".SHOP_ITEMS;
$db->sql_query($sql);
foreach($iter as $item){
if($item[12] != ""){
$catName = str_replace(" ", "-", str_replace("and ", "", str_replace(",", "", str_replace("&", "and", str_replace("-", "", $item[12])))));
}else{
$catName = str_replace(" ", "-", str_replace("and ", "", str_replace(",", "", str_replace("&", "and", str_replace("-", "", $item[11])))));
}
$sql = 'SELECT cat_id FROM '.SHOP_CATS.' WHERE cat_clean = "'.$catName.'"';
$result = $db->sql_query($sql);
$row = $db->sql_fetchrow($result);
$db->sql_freeresult($result);
$catId = $row['cat_id'];
$img = $item[9];
$ext = substr($img, strrpos($img, '.') + 1);
$image = 'images/products/'.$item[0].'.'.$ext;
file_put_contents($root_path.$image, file_get_contents($img));
$thumb = "images/products/thumbs/".$item[0]."_thumb.".$ext;
generate_thumb($root_path.$image, $thumb);
$itmRow = array(
'item_name' => str_replace("/", "", preg_replace_callback($regex, "utf8replacer", html_entity_decode(html_entity_decode($item[1], ENT_QUOTES)))),
'item_price' => $item[2],
'item_description' => preg_replace_callback($regex, "utf8replacer", html_entity_decode(html_entity_decode($item[4], ENT_QUOTES))),
'item_model' => $item[0],
'item_manufacturer' => ($item[6] == '') ? 'No Info' : $item[6],
'item_image' => $image,
'item_cat' => ($catId) ? $catId : 0,
'item_number' => $item[0],
'item_vendor_code' => "ELD",
'item_stock' => (strtolower($item[5]) == 'in stock') ? 1 : 0,
'item_added' => strtotime($item[8]),
'item_upc' => ($item[13] == '') ? 'No Info' : $item[13],
'item_url' => '',
'item_weight' => ($item[14] == '') ? 'No Info' : $item[14],
);
$sql = 'INSERT INTO '.SHOP_ITEMS.' '.$db->sql_build_array('INSERT', $itmRow);
$db->sql_query($sql);
$itmId = $db->sql_nextid();
if(strstr($itmRow['item_name'], "-") == FALSE){
$seo = urlencode(str_replace(" ", "-", $itmRow['item_name'])).".html";
}else{
$seo = urlencode(str_replace(" ", "_", $itmRow['item_name'])).".html";
}
if($item[5] == "oos"){
$stock = "Out of Stock";
}else{
$stock = "In Stock";
}
$u_product = "https://therealmsofwickedry.com/product/".$seo;
$google_list[] = array($itmId, $itmRow['item_name'], $itmRow['item_description'], 'Mature > Erotic', $catName, $u_product, "https://therealmsofwickedry.com/".$itmRow['item_image'], "new", $stock, $itmRow['item_price'], $itmRow['item_manufacturer'], $itmRow['item_model']);
unset($catName, $catId, $img, $ext, $image, $thumb, $itmRow, $itmId, $seo, $stock, $u_product);
}
$line = '';
foreach($google_list as $list){
$line .= implode("\t", $list);
$line .= "\n";
}
$google = 'google_products.txt';
$h = fopen($google, "w");
fwrite($h, $line);
fclose($h);
?>
答案 0 :(得分:2)
Tanzeel在假设文件被完全读入内存时是正确的。
以下是逐行读取文件的方法。
$file_handle = fopen($file, 'r');
// You can ignore the file header line if you know the format.
$first_line = fgetcsv($fh);
while ($single_line = fgetcsv($file_handle)) {
print_r($single_line);
}
fclose($single_line);
答案 1 :(得分:1)
我不确定是内存泄漏,它必须是“内存不足”异常。我的猜测是,在阅读大文件时,您的脚本必须死亡。在阅读您的代码时,我发现了以下内容:
$rows = file($thefile);
此代码行会将整个“大文件”读入内存中的数组。第一步应该是确保您的脚本不会因此而死亡。您可以尝试在PHP中使用fopen
和fread
函数来读取字节块并写入目标文件。理想情况下,这应该在读取时占用内存资源。
要诊断getDataCSV()
是否是真正的罪魁祸首,请修改以下行:
$iter = new ArrayIterator(getDataCSV());
在您的代码中:
$iter = new ArrayIterator(getDataCSV());
die('I died after getDataCSV. There is another culprit somewhere else causing the script to fail!');
如果您在浏览器上收到die
消息,那么您应该开始查看代码中可能会破坏脚本的其他位置。
我没有彻底查看您的代码,但您还应该确保在本地处理文件时遵循相同的文件块读取过程。对于例如下载文件后,您将对其进行处理以生成一些数据。您可以使用数组和循环来实现它,但由于要处理的数据很大,您仍然应该处理文件的部分块而不是将其全部转储到内存中。
答案 2 :(得分:0)
原来utf8replacer()
函数是导致问题的原因。感谢您的投入,但是:)