我想在我的php项目中实现一个名为NaiveBayesClassifier的github项目,当我在命令行中运行它并解析2个参数时,我会遇到此错误:
下面是index.php和classifier.php的代码。
index.php文件代码:
ini_set('memory_limit','512M');
require_once 'NaiveBayesClassifier.php';
$nbc = new NaiveBayesClassifier(array(
'store' => array(
'mode' => 'redis',
'db' => array(
'db_host' => '127.0.0.1',
'db_port' => '6379',
'namespace' => 'bids' // Added to differentiate multiple trainsets
)
),
'debug' => FALSE
));
echo "Training started.".PHP_EOL;
$_s = microtime(TRUE);
$urb = mysql_connect('127.0.0.1', 'root', '');
mysql_select_db('tender');
$sql = "SELECT * FROM bids";
$q = mysql_query($sql);
while($row = mysql_fetch_object($q)) {
$nbc->train($row->quotation, $row->reg_number);
}
mysql_close($urb);
$_e = microtime(TRUE);
$_t = $_e - $_s;
echo "Training finished. Took {$_t} seconds.".PHP_EOL;
$_start = 0;
if(!empty($argv) && count($argv) > 1) {
$words = "";
for($i=1, $max=count($argv); $i<$max; $i++) {
$words .= $argv[$i] . " ";
}
echo "Classifier started.".PHP_EOL;
$_start = microtime(TRUE);
$offset = 0;
$row = 10;
$result = $nbc->classify($words, $row, $offset);
var_dump($result);
echo PHP_EOL;
}
else {
die('No arguments passed.'.PHP_EOL);
}
$_end = microtime(TRUE);
echo "Memory Usage: ", memory_get_usage(TRUE)/1024, " KB", PHP_EOL,
"TIME Spent: ", ($_end - $_start), " seconds", PHP_EOL, PHP_EOL;
分类器类的代码
classifier.php代码:
require_once 'NaiveBayesClassifierException.php';
class NaiveBayesClassifier {
private $store;
private $debug = TRUE;
public function __construct($conf = array()) {
if(empty($conf))
throw new NaiveBayesClassifierException(1001);
if(empty($conf['store']))
throw new NaiveBayesClassifierException(1002);
if(empty($conf['store']['mode']))
throw new NaiveBayesClassifierException(1003);
if(empty($conf['store']['db']))
throw new NaiveBayesClassifierException(1004);
if(!empty($conf['debug']) && $conf['debug'] === TRUE)
$this->debug = TRUE;
switch($conf['store']['mode']) {
case 'redis':
require_once 'NaiveBayesClassifierStoreRedis.php';
$this->store = new NaiveBayesClassifierStoreRedis($conf['store']['db']);
break;
}
}
public function train($words, $set) {
$words = $this->cleanKeywords(explode(" ", $words));
foreach($words as $w) {
$this->store->trainTo(html_entity_decode($w), $set);
}
}
public function deTrain($words, $set) {
$words = $this->cleanKeywords(explode(" ", $words));
foreach($words as $w) {
$this->store->deTrainFromSet(html_entity_decode($w), $set);
}
}
public function classify($words, $count = 10, $offset = 0) {
$P = array();
$score = array();
// Break keywords
$keywords = $this->cleanKeywords(explode(" ", $words));
// All sets
$sets = $this->store->getAllSets();
$P['sets'] = array();
// Word counts in sets
$setWordCounts = $this->store->getSetWordCount($sets);
$wordCountFromSet = $this->store->getWordCountFromSet($keywords, $sets);
foreach($sets as $set) {
foreach($keywords as $word) {
$key = "{$word}{$this->store->delimiter}{$set}";
if($wordCountFromSet[$key] > 0)
$P['sets'][$set] += $wordCountFromSet[$key] / $setWordCounts[$set];
}
if(!is_infinite($P['sets'][$set]) && $P['sets'][$set] > 0)
$score[$set] = $P['sets'][$set];
}
arsort($score);
return array_slice($score, $offset, $count-1);
}
public function blacklist($words = array()) {
$clean = array();
if(is_string($words)) {
$clean = array($words);
}
else if(is_array($words)) {
$clean = $words;
}
$clean = $this->cleanKeywords($clean);
foreach($clean as $word) {
$this->store->addToBlacklist($word);
}
}
private function cleanKeywords($kw = array()) {
if(!empty($kw)) {
$ret = array();
foreach($kw as $k) {
$k = strtolower($k);
$k = preg_replace("/[^a-z]/i", "", $k);
if(!empty($k) && strlen($k) > 2) {
$k = strtolower($k);
if(!empty($k))
$ret[] = $k;
}
}
return $ret;
}
}
private function isBlacklisted($word) {
return $this->store->isBlacklisted($word);
}
private function _debug($msg) {
if($this->debug)
echo $msg . PHP_EOL;
}
非常感谢您的帮助