使用Scala在Lucene 6.2中使用Thread进行搜索

时间:2016-09-12 00:59:36

标签: scala lucene

我正在尝试使用Lucene 6.2索引来自MySQL的数据(在Scala中使用Slick)。这是下面的代码

package oc.api.services

/**
  * Created by sujit on 9/7/16.
  */
import org.apache.lucene.document._
import org.apache.lucene.analysis.standard.StandardAnalyzer
import org.apache.lucene.index._
import org.apache.lucene.search.IndexSearcher
import java.io.{File, IOException}
import java.nio.file.Paths

import akka.actor.ActorSystem
import akka.event.{Logging, LoggingAdapter}
import akka.stream.ActorMaterializer
import oc.api.utils.{Config, DatabaseService}
import org.apache.lucene.analysis.core.KeywordAnalyzer
import org.apache.lucene.index.IndexWriterConfig.OpenMode
import org.apache.lucene.queryparser.classic.{MultiFieldQueryParser, QueryParser}
import org.apache.lucene.store.FSDirectory

import scala.concurrent.ExecutionContext

class Indexer extends Config {
  implicit val actorSystem = ActorSystem()
  implicit val executor: ExecutionContext = actorSystem.dispatcher
  implicit val log: LoggingAdapter = Logging(actorSystem, getClass)
  implicit val materializer: ActorMaterializer = ActorMaterializer()

  val databaseService = new DatabaseService(jdbcUrl, dbUser, dbPassword)

  val notesService = new NotesService(databaseService)

  def setIndex = {
    val IndexStoreDir = Paths.get("/var/www/html/LuceneIndex")
    val analyzer = new StandardAnalyzer()
    val writerConfig = new IndexWriterConfig(analyzer)
    writerConfig.setOpenMode(OpenMode.CREATE)
    writerConfig.setRAMBufferSizeMB(500)
    val directory = FSDirectory.open(IndexStoreDir)
    var writer = new IndexWriter(directory, writerConfig)
    val notes = notesService.getNotes() //Gets all notes from slick. Data is coming in getNotes()
    var doc = new Document()
    var count = 0

    val stringType = new FieldType()
    notes.map(_.foreach{
      case(note) =>
        doc = new Document()

        var field = new TextField("id", note.title, Field.Store.YES)
        doc.add(field)

        field = new TextField("title", note.title, Field.Store.YES)
        doc.add(field)

        field = new TextField("teaser", note.teaser, Field.Store.YES)
        doc.add(field)

        field = new TextField("description", note.description, Field.Store.YES)
        doc.add(field)

        writer.addDocument(doc)
        writer.commit()
    })
    //
  }

  def search(keyword: String) = {
    val IndexStoreDir = Paths.get("/var/www/html/LuceneIndex")
    var directoryReader = DirectoryReader.open(FSDirectory.open(IndexStoreDir))
    val analyzer = new StandardAnalyzer()

    val searcher = new IndexSearcher(directoryReader)
    val fieldsToSearch = Array("title", "teaser", "description")

    val mqp = new MultiFieldQueryParser(fieldsToSearch,analyzer) //QueryParser("title", analyzer) //MultiFieldQueryParser(filesToSearch,analyzer)
    val query = mqp.parse(keyword)

    val hits = searcher.search(query,500)
    val scoreDoc = hits.scoreDocs
    scoreDoc.foreach( docs => {
      val doc = searcher.doc(docs.doc)
      println("***** Document Found: ")
      println("***** Title: ")
      println(doc.get("title"))
      println("***** Teaser: ")
      println(doc.get("teaser"))
      println("***** Description: ")
      println(doc.get("description"))
    })
    println("****** Results Found: " + hits.totalHits)
  }

}

object Indexer extends App {
  val index = new Indexer
  //index.setIndex
  index.search("Donec")
}

setIndex函数在提供的Path中按预期工作。但是当我基于关键字搜索索引时,它会抛出0结果。搜索功能有什么错误吗?怎么解决这个问题?

如何以优化使用Thread创建索引的方式编写上述代码?

1 个答案:

答案 0 :(得分:1)

最后我找到了研究很长时间的答案:

使用线程:

<textarea>

使用Scala Future:

def setI = {
    val NUM_THREADS = Runtime.getRuntime().availableProcessors()
    val curNotes = notesService.getNotes()

    val totalRows = Await.result(curNotes, Duration.Inf).length
    var totalPages =  totalRows / NUM_THREADS
    if(totalPages != totalPages.toInt){
      totalPages = totalPages + 1
    }
    var tmp = Await.result(curNotes, Duration.Inf).grouped(totalPages).toList
    val rows = tmp(tmp.length-2) ++ tmp.last
    val threads = Array.ofDim[Index](NUM_THREADS)

    val IndexStoreDir = Paths.get("/var/www/html/LuceneIndex")
    val analyzer = new StandardAnalyzer()
    val writerConfig = new IndexWriterConfig(analyzer)
    writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND)
    writerConfig.setRAMBufferSizeMB(500)
      .setMaxBufferedDocs(10)
      .setMergeScheduler(new ConcurrentMergeScheduler())
    val directory = FSDirectory.open(IndexStoreDir)
    val writer = new IndexWriter(directory, writerConfig)
    var count = 0

    for(i <- 0 until tmp.length - 2){
      count = i
      threads(i) = new Index(tmp(i), writer, i)
    }
    count = count + 1
    threads(count) = new Index(rows, writer, count)

    for (i <- 0 until NUM_THREADS) {
      println("Thread :" + threads(i).getName + " => " + (i + 1) + " Started!")
      threads(i).start()
    }
  }