Postgres全文搜索与近运算符LIKE oracle上下文索引

时间:2013-06-19 11:50:02

标签: oracle postgresql full-text-search

在Oracle中,我可以在Clob中搜索“NEAR((a,b,c),5)”之类的查询。这是来自oracle的文档:

  

使用NEAR运算符根据两个或多个查询字词的接近度返回分数。对于较接近的术语,Oracle Text会返回较高的分数,而对于文档中较远的术语,Oracle Text会返回较低的分数。

我怎样才能在Postgres中做到这一点?我只需要一个可以在另一个单词附近搜索单词的索引。

1 个答案:

答案 0 :(得分:1)

这是汉明距离函数http://en.wikipedia.org/wiki/Hamming_distance

标头文件

/**


    @file HammingDistance.h A C header file to compute the Hamming Distance between two strings
            as PostgreSQl C User Defined Function
*/
#ifndef HAMMINGDISTANCE_H_INCLUDED
#define HAMMINGDISTANCE_H_INCLUDED

DLLEXPORT Datum DistanceCstring(PG_FUNCTION_ARGS);
DLLEXPORT Datum Distance(PG_FUNCTION_ARGS);

#endif // HAMMINGDISTANCE_H_INCLUDED

源文件

/**


    @file HammingDistance.c A C source file to compute the Hamming Distance between two strings
            as PostgreSQl C User Defined Function
*/

#include <stdio.h>
#include <stdint.h>
#include <string.h>

#include "postgres.h"
#include "utils/geo_decls.h"
#include "utils/builtins.h"
#include "catalog/pg_type.h"
#include "funcapi.h"

#define VC_EXTRALEAN

#pragma warning (disable : 4996)

#ifdef PG_MODULE_MAGIC
PG_MODULE_MAGIC;
#endif

#ifdef _WIN32
#define DLLEXPORT _declspsec(dllexport)
#else
#define DLLEXPORT
#endif // _WIN32

static  int32_t DistanceString( unsigned char * a,  int32_t la , unsigned char * b, int32_t lb);
static  int32_t DistanceUChar( unsigned char  a,  unsigned char b);


PG_FUNCTION_INFO_V1(Distance);
/**
    DistanceCstring An SQL function to Compute Hamming Distance between two text types
    @param[in]   A a Text type;
    @param[in]   B a text type
    @return     The hamming Distance between two Text types
**/

DLLEXPORT Datum Distance(PG_FUNCTION_ARGS)
{
    text *      a   = PG_GETARG_TEXT_PP(0);
    text *      b   = PG_GETARG_TEXT_PP(1);
    unsigned char *      ac;
    unsigned char *      bc;
    int32_t     distance = 0;

    ac = text_to_cstring( a );
    bc = text_to_cstring( b );

    distance = DistanceString( ac, strlen(ac), bc, strlen(bc) );

    PG_RETURN_INT32(distance);

}

PG_FUNCTION_INFO_V1(DistanceCstring);
/**
    DistanceCstring An SQL function to Compute Hamming Distance between two strings
    @param[in]   A a Cstring type
    @param[in]   B a Cstring type
    @return     The hamming Distance between two Cstring types
**/

DLLEXPORT Datum DistanceCstring(PG_FUNCTION_ARGS)
{
    unsigned char *      ac = (unsigned char *) PG_GETARG_CSTRING(0);
    unsigned char *      bc = (unsigned char *) PG_GETARG_CSTRING(1);
    int32_t     distance = 0;

    distance = DistanceString( ac, strlen(ac), bc, strlen(bc) );

    PG_RETURN_INT32(distance);

}
/**
    DistanceString  Compute Hamming Distance between two unsigned char strings
    @param[in]      a an unsigned char array
    @param[in]      la  length of a in char
    @param[in]      b an unsigned char array
    @param[in]      lb length of b  in char

    @return         Hamming distance
**/

static  int32_t DistanceString( unsigned char * a,  int32_t la , unsigned char * b, int32_t lb)
{
    unsigned char * smaller;
    unsigned char * larger;
    int i = 0;
    int length = 0;
    int32_t distance = 0;
    int delta = 0;

    if ( lb > la )
    {
        delta = lb - la;
        length = la;
        smaller = a;
        larger = b;
    }
    else
    {
        delta = la - lb;
        length = lb;
        smaller = b;
        larger  = a;
    }

    for( i = 0; i < length; i++ )
    {
        distance += DistanceUChar( * smaller++, * larger++);
    }

    for( i = 0; i < delta ; i++ )
    {
        distance += DistanceUChar( 0, * larger++);
    }

     return distance;
}
/**
    DistanceUChar  Compute Hamming Distance between two unsigned chars
    @param[in]      a unsigned char
    @param[in]      b unsigned char

    @return         Hamming Distance between two unsigned chars
**/
static  int32_t DistanceUChar( unsigned char  a,  unsigned char b)
{
    unsigned char x = a ^ b;
    int32_t     distance = 0;

    if ( (x & 0x1 )== 0x1 )
        distance++;
    if ( (x & 0x2) == 0x2 )
        distance++;
    if ( (x & 0x4) == 0x4 )
        distance++;
    if ( (x & 0x8) == 0x8 )
        distance++;
    if ( x & 0x10 == 0x10 )
        distance++;
    if ( (x & 0x20) == 0x20 )
        distance++;
    if ( (x & 0x40) == 0x40 )
        distance++;
    if ( (x & 0x80) == 0x80 )
        distance++;

    return distance;

}

生成文件

OPTS        := -g -fpic -c -I /opt/PostgreSQL/9.1/include/postgresql/server
INSTALLDIR  := /opt/PostgreSQL/9.1/lib/postgresql

all: HammingDistance.so

HammingDistance.so: HammingDistance.c HammingDistance.h
    gcc HammingDistance.c $(OPTS) -o HammingDistance.o
    gcc -shared -o HammingDistance.so HammingDistance.o

clean:
    rm -rf *.o *.so

register:
    psql -f install.sql -p 5433  -U postgres -d postgres ;

install:
    sudo cp  HammingDistance.so $(INSTALLDIR);


test:
    psql -f Test.sql -p 5433  -U postgres -d postgres ;

安装SQL

-- define the schema
set search_path to public;

-- Remove existing function

drop function if exists Distance( text, text ) cascade;
drop function if exists Distance( cstring , cstring ) cascade;

-- Create the new one
create or replace function Distance( text, text )  returns integer
 as '$libdir/HammingDistance',  'Distance'
 language c strict;

 create or replace function Distance( cstring, cstring )  returns integer
 as '$libdir/HammingDistance',  'DistanceCstring'
 language c strict