在现有的php项目中实现php NaiveBayesClassier

时间:2019-01-24 18:10:51

标签: php

我想在我的php项目中实现一个名为NaiveBayesClassifier的github项目,当我在命令行中运行它并解析2个参数时,我会遇到此错误:

Error from command line

下面是index.php和classifier.php的代码。

index.php文件代码:

ini_set('memory_limit','512M');

require_once 'NaiveBayesClassifier.php';

$nbc = new NaiveBayesClassifier(array(
    'store' => array(
        'mode'  => 'redis',
        'db'    => array(
            'db_host'   => '127.0.0.1',
            'db_port'   => '6379',
            'namespace' => 'bids'   // Added to differentiate multiple trainsets
        )
    ),
    'debug' => FALSE
));

echo "Training started.".PHP_EOL;
$_s = microtime(TRUE);

$urb = mysql_connect('127.0.0.1', 'root', '');
mysql_select_db('tender');

$sql = "SELECT * FROM bids";
$q = mysql_query($sql);
while($row = mysql_fetch_object($q)) {
    $nbc->train($row->quotation, $row->reg_number);
}

mysql_close($urb);

$_e = microtime(TRUE);
$_t = $_e - $_s;
echo "Training finished. Took {$_t} seconds.".PHP_EOL;

$_start = 0;
if(!empty($argv) && count($argv) > 1) {
    $words = "";
    for($i=1, $max=count($argv); $i<$max; $i++) {
        $words .= $argv[$i] . " ";
    }
    echo "Classifier started.".PHP_EOL;
    $_start = microtime(TRUE);

    $offset = 0;
    $row = 10;
    $result = $nbc->classify($words, $row, $offset);

    var_dump($result);
    echo PHP_EOL;
}
else {
    die('No arguments passed.'.PHP_EOL);
}

$_end = microtime(TRUE);
echo    "Memory Usage: ", memory_get_usage(TRUE)/1024, " KB", PHP_EOL,
    "TIME Spent: ", ($_end - $_start), " seconds", PHP_EOL, PHP_EOL;

分类器类的代码

classifier.php代码:

require_once 'NaiveBayesClassifierException.php';

class NaiveBayesClassifier {

    private $store;
    private $debug = TRUE;

    public function __construct($conf = array()) {
        if(empty($conf))
            throw new NaiveBayesClassifierException(1001);
        if(empty($conf['store']))
            throw new NaiveBayesClassifierException(1002);
        if(empty($conf['store']['mode']))
            throw new NaiveBayesClassifierException(1003);
        if(empty($conf['store']['db']))
            throw new NaiveBayesClassifierException(1004);

        if(!empty($conf['debug']) && $conf['debug'] === TRUE)
            $this->debug = TRUE;

        switch($conf['store']['mode']) {
            case 'redis':
                require_once 'NaiveBayesClassifierStoreRedis.php';
                $this->store = new NaiveBayesClassifierStoreRedis($conf['store']['db']);
                break;
        }
    }

    public function train($words, $set) {
        $words = $this->cleanKeywords(explode(" ", $words));
        foreach($words as $w) {
            $this->store->trainTo(html_entity_decode($w), $set);
        }
    }

    public function deTrain($words, $set) {
        $words = $this->cleanKeywords(explode(" ", $words));
        foreach($words as $w) {
            $this->store->deTrainFromSet(html_entity_decode($w), $set);
        }
    }

    public function classify($words, $count = 10, $offset = 0) {
        $P = array();
        $score = array();

        // Break keywords
        $keywords = $this->cleanKeywords(explode(" ", $words));

        // All sets
        $sets = $this->store->getAllSets();
        $P['sets'] = array();

        // Word counts in sets
        $setWordCounts = $this->store->getSetWordCount($sets);
        $wordCountFromSet = $this->store->getWordCountFromSet($keywords, $sets);

        foreach($sets as $set) {
            foreach($keywords as $word) {
                $key = "{$word}{$this->store->delimiter}{$set}";
                if($wordCountFromSet[$key] > 0)
                    $P['sets'][$set] += $wordCountFromSet[$key] / $setWordCounts[$set];
            }

            if(!is_infinite($P['sets'][$set]) && $P['sets'][$set] > 0)
                $score[$set] = $P['sets'][$set];
        }

        arsort($score);

        return array_slice($score, $offset, $count-1);
    }

    public function blacklist($words = array()) {
        $clean = array();
        if(is_string($words)) {
            $clean = array($words);
        }
        else if(is_array($words)) {
            $clean = $words;
        }
        $clean = $this->cleanKeywords($clean);

        foreach($clean as $word) {
            $this->store->addToBlacklist($word);
        }
    }

    private function cleanKeywords($kw = array()) {
        if(!empty($kw)) {
            $ret = array();
            foreach($kw as $k) {
                $k = strtolower($k);
                $k = preg_replace("/[^a-z]/i", "", $k);

                if(!empty($k) && strlen($k) > 2) {
                    $k = strtolower($k);
                    if(!empty($k))
                        $ret[] = $k;
                }
            }
            return $ret;
        }
    }

    private function isBlacklisted($word) {
        return $this->store->isBlacklisted($word);
    }

    private function _debug($msg) {
        if($this->debug)
            echo $msg . PHP_EOL;
    }

非常感谢您的帮助Table bids

0 个答案:

没有答案