我正在学习如何使用Lucene Java API,并为概念验证设置了一个小例子。每当我发送一个查询时,它会给我一个结果:无论查询是什么,字典中的第一项。
我的词典中有89834个术语,这里是前5个。
Field name: "drug"
"cysteine"
"glycine"
"dihydroxyacetone"
"glycerone"
"arginine"
...
搜索“精氨酸”会返回以下内容:
Found 1 results
"cysteine"
我投入的任何东西都会让半胱氨酸成为唯一的结果。
这是代码。
def luceneTest(cxn: RepositoryConnection)
{
//build the index
val analyzer: Analyzer = new StandardAnalyzer()
val indexPath: Path = Paths.get("lucene/model1")
val directory: Directory = FSDirectory.open(indexPath)
val config: IndexWriterConfig = new IndexWriterConfig(analyzer)
val iwriter: IndexWriter = new IndexWriter(directory, config)
val doc = addCSVtoLuceneIndex("lucene_dictionary.csv")
iwriter.addDocument(doc)
iwriter.close()
//query the index
val ireader: DirectoryReader = DirectoryReader.open(directory)
val isearcher: IndexSearcher = new IndexSearcher(ireader)
val parser: QueryParser = new QueryParser("drug", analyzer)
val query: Query = parser.parse("arginine")
val hits: Array[ScoreDoc] = isearcher.search(query, 10).scoreDocs
logger.info("found " + hits.size + " results.")
for (a <- hits)
{
val hitdoc: Document = isearcher.doc(a.doc)
logger.info(hitdoc.get("drug"))
}
ireader.close()
directory.close()
}
def addCSVtoLuceneIndex(dictionary: String): Document =
{
val doc: Document = new Document()
val br: BufferedReader = new BufferedReader(new FileReader(dictionary))
var index = 1
for (b <- 1 to 83984)
{
var line = br.readLine()
var strArray: Array[String] = line.split(",")
var strToAdd = ""
for (a <- 1 to strArray.length - 1) strToAdd += strArray(a)
doc.add(new Field("drug", strToAdd, TextField.TYPE_STORED))
//logger.info("added " + strToAdd)
}
doc
}
答案 0 :(得分:1)
您在addCSVtoLuceneIndex
中迭代文件,但将所有内容放在一个lucene文档中。假设您希望每行都有文档。
你确定要把文件中的每一行都放在没有逗号的地方吗?