lucene,还是sql全文?

时间:2009-10-02 18:13:58

标签: sql full-text-search lucene

我想创建一个搜索网站来搜索文档(包括pdf在内的各种格式),图像,视频和音频。我还希望能够根据作者姓名,日期等一些标准来过滤我的搜索结果。

我在.NET中这样做,那么最简单的启动和运行方式是什么? SQL全文搜索看起来很诱人,因为我熟悉sql,而且因为我想过滤搜索结果,所以很容易为每个项目存储过滤字段。

2 个答案:

答案 0 :(得分:4)

如果您主要关注的是快速轻松地启动和运行,那么SQL全文搜索绝对是您的选择。

Lucene.NET有其优点,但绝不是在公园散步才能正确设置。文档有点缺乏,网上的例子非常有限。

答案 1 :(得分:0)

片段的存储过程:

CREATE PROCEDURE SimpleCommentar
  @SearchTerm nvarchar(100),
  @Style nvarchar(200)
AS
BEGIN
  CREATE TABLE #match_docs
  (
    doc_id bigint NOT NULL PRIMA
  );
  INSERT INTO #match_docs
  (
    doc_id
  )
  SELECT DISTINCT
    Commentary_ID
  FROM Commentary
  WHERE FREETEXT 
  (
    Commentary, 
    @SearchTerm, 
    LANGUAGE N'English'
  );
  DECLARE @db_id int = DB_ID(),
    @table_id int = OBJECT_ID(N'
    @column_id int =
    (
      SELECT 
        column_id
      FROM sys.columns
      WHERE object_id = OBJECT_I
        AND name = N'Commentary'
    );
  SELECT
    s.Commentary_ID,
    t.Title,
    MIN
    (
      N'...' + SUBSTRING
      (
        REPLACE
          (
            c.Commentary, 
            s.Display_Term, 
 N'<span style="' + @Style + '">' + s.Display_Term + '</span>'
          ), 
        s.Pos - 512, 
        s.Length + 1024
      ) + N'...'
    ) AS Snippet
  FROM
    (
      SELECT DISTINCT 
        c.Commentary_ID,
        w.Display_Term,
        PATINDEX
          (
            N'%[^a-z]' + w.Display_Term + N'[^a-z]%', 
            c.Commentary
          ) AS Pos, 
        LEN(w.Display_Term) AS Length
      FROM sys.dm_fts_index_keywords_by_document
        (
          @db_id, 
          @table_id
        ) w
      INNER JOIN dbo.Commentary c
        ON w.document_id = c.Commentary_ID
      WHERE w.column_id = @column_id
        AND EXISTS 
          (
            SELECT 1
            FROM #match_docs m
            WHERE m.doc_id = w.document_id 
          )
        AND EXISTS 
          (
            SELECT 1
            FROM sys.dm_fts_parser
              (
                N'FORMSOF(FREETEXT, "' + @SearchTerm + N'")', 
                1033, 
                0, 
                1
              ) p
            WHERE p.Display_Term = w.Display_Term
          )
    ) s
  INNER JOIN dbo.Commentary c
    ON s.Commentary_ID = c.Commentary_ID
INNER JOIN dbo.Book_Commentary bc
    ON c.Commentary_ID = bc.Commentary_ID
  INNER JOIN dbo.Book_Title bt
    ON bc.Book_ID = bt.Book_ID
  INNER JOIN dbo.Title t
    ON bt.Title_ID = t.Title_ID
  WHERE t.Is_Primary_Title = 1
  GROUP BY
    s.Commentary_ID,
    t.Title;
  DROP TABLE #match_docs;
END;