我有一个用希腊语写的新闻网站。
在我的主页中,我必须展示每个故事的标题以及故事内容的一些字符。
我希望在最后一个符合我设定的角色限制的单词中删除故事的内容。
我通过使用一些多字节安全功能并将它们组合起来实现了这一点,但它似乎性能极其昂贵,因为对于没有剪切字符串的页面而言,它花费我大约150ms可以在不到10ms的时间内加载。 / p>
所以我很想知道。有没有办法对下面的函数进行性能调优并保持多字节安全,或者我应该按原样接受它们?
// multibyte wordcutter
function cutString( $string , $width = 20 , $cut=false ){
if( mb_strlen( $string , "UTF-8" ) == 0 ){
return $string;
}
$input = $string;
if ( mb_strlen( $string , "UTF-8" ) > $width ) {
$string = mb_wordwrap( $string , $width , "\n" , $cut );
$string = mb_substr( $string , 0 , mb_strpos( $string , "\n" , NULL , "UTF-8" ) , "UTF-8" );
if( mb_strlen( $string , "UTF-8" ) > $width ) {
$string = mb_wordwrap( $string , $width , "\n" , true );
$string = mb_substr( $string , 0 , mb_strpos( $string , "\n" , NULL , "UTF-8" ) , "UTF-8" );
}
elseif( mb_strlen( $string , "UTF-8" ) == 0 ){
$string = mb_wordwrap( $input , $width , "\n" , true );
$string = mb_substr( $string , 0 , mb_strpos( $string , "\n" , NULL , "UTF-8" ) , "UTF-8" );
}
}
return $string;
}
// multibyte safe wordwrap
function mb_wordwrap($string, $width = 75, $break = "\n", $cut = false, $charset = 'utf-8')
{
$stringWidth = iconv_strlen($string, $charset);
$breakWidth = iconv_strlen($break, $charset);
if (strlen($string) === 0) {
return '';
} elseif ($breakWidth === null) {
throw new Zend_Text_Exception('Break string cannot be empty');
} elseif ($width === 0 && $cut) {
throw new Zend_Text_Exception('Can\'t force cut when width is zero');
}
$result = '';
$lastStart = $lastSpace = 0;
for ($current = 0; $current < $stringWidth; $current++) {
$char = mb_substr($string, $current, 1, $charset);
if ($breakWidth === 1) {
$possibleBreak = $char;
} else {
$possibleBreak = mb_substr($string, $current, $breakWidth, $charset);
}
if ($possibleBreak === $break) {
$result .= mb_substr($string, $lastStart, $current - $lastStart + $breakWidth, $charset);
$current += $breakWidth - 1;
$lastStart = $lastSpace = $current + 1;
} elseif ($char === ' ') {
if ($current - $lastStart >= $width) {
$result .= mb_substr($string, $lastStart, $current - $lastStart, $charset) . $break;
$lastStart = $current + 1;
}
$lastSpace = $current;
} elseif ($current - $lastStart >= $width && $cut && $lastStart >= $lastSpace) {
$result .= mb_substr($string, $lastStart, $current - $lastStart, $charset) . $break;
$lastStart = $lastSpace = $current;
} elseif ($current - $lastStart >= $width && $lastStart < $lastSpace) {
$result .= mb_substr($string, $lastStart, $lastSpace - $lastStart, $charset) . $break;
$lastStart = $lastSpace = $lastSpace + 1;
}
}
if ($lastStart !== $current) {
$result .= mb_substr($string, $lastStart, $current - $lastStart, $charset);
}
return $result;
}
编辑:这是我最终使用
// multibyte wordcutter
function cutString( $string , $width = 70 , $cut=false ){
$string = mb_substr( $string , 0 , $width , "UTF-8" );
if( mb_strlen( $string , "UTF-8" ) < $width ){
return $string;
}
$dot = mb_strripos( $string , '.' , 0 , "UTF-8" ) + 1;
$space = mb_strripos( $string , ' ' , 0 , "UTF-8" );
if( $space ){
if( ($space-$dot) < 20 ){
$string = mb_substr( $string , 0 , $dot , "UTF-8" );
}
else {
$string = mb_substr( $string , 0 , $space , "UTF-8" );
}
}
return $string;
}
答案 0 :(得分:1)
//测试页
<?php
$strSmall = "ये एक हिन्दी वाक्य है, इसमे बहुत सारे शब्द हैं |";
$strBig = implode(".",array_fill(0,100,$strSmall)); // Create a big string
$tStart = microtime(true);
echo cutString($strBig);
$tEnd = microtime(true);
echo "\nTime taken:" , ($tEnd - $tStart) , " s";
// multibyte wordcutter
function cutString( $string , $width = 20 , $cut=false ){
if( mb_strlen( $string , "UTF-8" ) == 0 ){
return $string;
}
$input = $string;
if ( mb_strlen( $string , "UTF-8" ) > $width ) {
$string = mb_wordwrap( $string , $width , "\n" , $cut );
$string = mb_substr( $string , 0 , mb_strpos( $string , "\n" , NULL , "UTF-8" ) , "UTF-8" );
if( mb_strlen( $string , "UTF-8" ) > $width ) {
$string = mb_wordwrap( $string , $width , "\n" , true );
$string = mb_substr( $string , 0 , mb_strpos( $string , "\n" , NULL , "UTF-8" ) , "UTF-8" );
}
elseif( mb_strlen( $string , "UTF-8" ) == 0 ){
$string = mb_wordwrap( $input , $width , "\n" , true );
$string = mb_substr( $string , 0 , mb_strpos( $string , "\n" , NULL , "UTF-8" ) , "UTF-8" );
}
}
return $string;
}
// multibyte safe wordwrap
function mb_wordwrap($string, $width = 75, $break = "\n", $cut = false, $charset = 'utf-8')
{
$stringWidth = iconv_strlen($string, $charset);
$breakWidth = iconv_strlen($break, $charset);
if (strlen($string) === 0) {
return '';
} elseif ($breakWidth === null) {
throw new Zend_Text_Exception('Break string cannot be empty');
} elseif ($width === 0 && $cut) {
throw new Zend_Text_Exception('Can\'t force cut when width is zero');
}
$result = '';
$lastStart = $lastSpace = 0;
for ($current = 0; $current < $stringWidth; $current++) {
$char = mb_substr($string, $current, 1, $charset);
if ($breakWidth === 1) {
$possibleBreak = $char;
} else {
$possibleBreak = mb_substr($string, $current, $breakWidth, $charset);
}
if ($possibleBreak === $break) {
$result .= mb_substr($string, $lastStart, $current - $lastStart + $breakWidth,charset);
$current += $breakWidth - 1;
$lastStart = $lastSpace = $current + 1;
} elseif ($char === ' ') {
if ($current - $lastStart >= $width) {
$result .= mb_substr($string, $lastStart, $current - $lastStart, $charset) . $break;
$lastStart = $current + 1;
}
$lastSpace = $current;
} elseif ($current - $lastStart >= $width && $cut && $lastStart >= $lastSpace) {
$result .= mb_substr($string, $lastStart, $current - $lastStart, $charset) . $break;
$lastStart = $lastSpace = $current;
} elseif ($current - $lastStart >= $width && $lastStart < $lastSpace) {
$result .= mb_substr($string, $lastStart, $lastSpace - $lastStart, $charset) . $break;
$lastStart = $lastSpace = $lastSpace + 1;
}
}
if ($lastStart !== $current) {
$result .= mb_substr($string, $lastStart, $current - $lastStart, $charset);
}
return $result;
}
?>
花了大约200毫秒。
性能测试 येएकहिन्दीवाक्य 所用时间:0.23847889900208 s
这是xdebug分析结果,您可以看到iconv_strlen
和mb_substr
正在吃的时间:
。而不是那么多函数调用使代码变得容易,尝试最小化函数调用,并编写一些自己的循环。
答案 1 :(得分:1)
目前的实施过于复杂。
如果我理解正确,那么更好的策略是:
它应该显着提高性能。