Zend Search Lucene正在搜索整个产品描述

时间:2017-05-30 09:23:24

标签: php zend-framework lucene zend-lucene

以下是我在网站上的搜索引擎代码。现在只搜索所谓的ProductName和ProductNumber。我不知道在搜索整个ProductDescription时需要改变什么 这是Search.php文件

 protected $_index;
protected $_indexed = array();
/**
 * 
 * @var Zend_Http_Client
 */
protected $_httpClient;

public function __construct()
{
    try {
        $indexDir = realpath($_SERVER['DOCUMENT_ROOT'] . '/../tmp/search');
        $this->_index = Zend_Search_Lucene::open($indexDir);
    } catch (Zend_Search_Lucene_Exception $e) {
        $this->_index = Zend_Search_Lucene::create($indexDir);
    }

    $this->_httpClient = new Zend_Http_Client();
    $this->_httpClient->setConfig(array('timeout' => 10));

    Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive());
}

public function indexUrl($url)
{
    if (is_array($url)) {
        foreach ($url as $uri) {
            $this->_indexUrl($uri);
        }
    } else if (is_string($url)) {
        $this->_indexUrl($url);
    }
}

public function indexWholePage()
{
    $pageUrl = $this->_getHostName();

    $this->_indexUrl($pageUrl . '/');
}

protected function _indexUrl($url)
{
    if (in_array($url, $this->_indexed))
        return;

    $log = Zend_Registry::get('Zend_Log');
    $log->log($url, Zend_Log::NOTICE);

    $this->_httpClient->setUri($url);
    $response = $this->_httpClient->request();

    $this->_indexed[] = $url;

    if ($response->isSuccessful()) {
        $body = $response->getBody();

        $doc = Zend_Search_Lucene_Document_Html::loadHTML($body, true);

        foreach ($doc->getLinks() as $link) {
            if ($this->_isValidPageLink($link) && !in_array($this->_getHostName() . $link, $this->_indexed)) {
                $this->_indexUrl($this->_getHostName() . $link);
            }
        }

        $t = new Zend_Search_Lucene_Index_Term($url, 'url');
        $q = new Zend_Search_Lucene_Search_Query_Term($t);
        $hits = $this->_index->find($q);

        foreach ($hits as $hit) {
            if ($hit->md5 == md5($body)) {
                return;
            } else {
                $this->_index->delete($hit->id);
            }
        }

        $doc->addField(Zend_Search_Lucene_Field::Keyword('url', $url));
        $doc->addField(Zend_Search_Lucene_Field::UnIndexed('md5', md5($body)));

        $this->_index->addDocument($doc);

        $log = Zend_Registry::get('Zend_Log');
        $log->log('done', Zend_Log::NOTICE);
    }
}

public function search($query)
{
    return $this->_index->find($query);
}

public function deleteIndex()
{

}

protected function _getHostName()
{
    $host = isset($_SERVER['HTTP_HOST']) ? $_SERVER['HTTP_HOST'] : '';
    $proto = (isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] !== "off") ? 'https' : 'http';
    $port = isset($_SERVER['SERVER_PORT']) ? $_SERVER['SERVER_PORT'] : 80;
    $uri = $proto . '://' . $host;

    if ((('http' == $proto) && (80 != $port)) || (('https' == $proto) && (443 != $port))) {
        $uri .= ':' . $port;
    }

    return $uri;
}

protected function _isValidPageLink($url)
{
    $hostName = $this->_getHostName();

    if (substr($url, 0, strlen($hostName)) == $hostName ||
            substr($url, 0, 1) == '/' || substr($url, 0, 1) == '?') {
        if (@preg_match('#^(.+)\.(jpg|gif|png|pdf|doc|xls)$#i', $url)) {
            return false;
        }
        return true;
    }

    return false;
}

这是用于生成搜索结果的php表单。我在搜索后发现的Lucene实现完全不同于这里的内容。这是我第一次使用ZendFramework。

 <form method="get" action="/search.html" class="searchForm" enctype="application/x-www-form-urlencoded" id="searchForm">
  <fieldset>
    <input type="text" id="search_text" name="q" value="<?php echo $this->escape($this->query) ?>"><br>
     <input type="submit" value="search" id="search" name="search"> 
  </fieldset>
</form>

<h1>Search results</h1>

<?php if(empty($this->searchString)): ?>
          <p><strong>Please write text of minimal lenght of<?php echo $this->minimumLength ?></strong></p>
<?php else: ?>

<?php if(count($this->products)){ ?>

<?php foreach ($this->products as $product): ?>
<?php $link = '/'.$this->permalink($product->product_name).','.$product->product_id.','.$product->category_id.',p.html'; ?>
<div class="productlist clearfix">
  <a href="<?= $link; ?>" class="clearfix">
<div class="txt">
  <h2><?= $product->product_name ?><?php if(strlen($product->product_number) > 2){ echo '<small> [ '.$product->product_number.' ]</small>'; } ?></h2>
  <p><?= stripslashes($product->product_intro2) ?></p>
</div>
<div class="pic">
   <?php if($product->has_media): ?>
     <?php echo $this->thumb($product->media_src, 110, 110) ?>
   <?php endif; ?>
   <p style="text-align: center;">More</p>
</div>
</a>
</div>
<hr/>

<?php endforeach; ?>

<?php }else{ ?>
<p>0 product was found</p>
<?php } ?>

<div style="clear: both;">
<?php echo $this->products; ?>
</div>



<?php endif ?>

0 个答案:

没有答案