我目前正在寻找动态过滤网站内容的解决方案。 “动态”是指我会在第一页上的整个单词中计算坏词的百分比,即shit
$filename = "filters.txt";
$fp = @fopen($filename, 'r');
if ($fp) {
$array = explode("\n", fread($fp, filesize($filename)));
foreach($array as $key => $val){
list($before,$after) = split("~",$val);
$input = preg_replace($before,$after,$input);
* filter.txt 包含错误字词列表
Thanx Erisco!
function get_content($url)
$ch = curl_init();
curl_setopt ($ch, CURLOPT_URL, $url);
curl_setopt ($ch, CURLOPT_HEADER, 0);
curl_exec ($ch);
curl_close ($ch);
$string = ob_get_contents();
return $string;
/* $toLoad is from Browse.php */
$sourceOfWebpage = get_content($toLoad);
$textOfWebpage = strip_tags($sourceOfWebpage);
/* array: Obtained by your filter.txt file */
// Open the filters file and filter all of the results.
$filename = "filters.txt";
$badWords = @fopen($filename, 'r');
if ($badWords) {
$array = explode("\n", fread($fp, filesize($filename)));
foreach($array as $key => $val){
list($before,$after) = split("~",$val);
$input = preg_replace($before,$after,$input);
/* float: Some decimal value */
$allowedBadWordsPercent = 0.30;
$numberOfWords = str_word_count($textOfWebpage);
$numberOfBadWords = 0;
str_ireplace($badWords, '', $sourceOfWebpage, $numberOfBadWords);
if ($numberOfBadWords != 0) {
$badWordsPercent = $numberOfWords / $numberOfBadWords;
} else {
$badWordsPercent = 0;
if ($badWordsPercent > $allowedBadWordsPercent) {
echo 'This is a naughty webpage';
答案 0 :(得分:1)
/* string: Obtained by CURL or similar */
$textOfWebpage = strip_tags($sourceOfWebpage);
/* array: Obtained by your filter.txt file */
/* float: Some decimal value */
$allowedBadWordsPercent = 0.30;
$numberOfWords = str_word_count($textOfWebpage);
$numberOfBadWords = 0;
str_ireplace($badWords, '', $sourceOfWebpage, $numberOfBadWords);
if ($numberOfBadWords != 0) {
$badWordsPercent = $numberOfWords / $numberOfBadWords;
} else {
$badWordsPercent = 0;
if ($badWordsPercent > $allowedBadWordsPercent) {
echo 'This is a naughty webpage';