postgresql C输入/输出函数奇怪的行为

时间:2016-04-06 16:47:24

标签: c database postgresql

最近我一直在使用postgresql及其在C中的自定义用户定义类型。

在我实现用户定义类型EmailAddress的输入/输出功能之前,这里有奇怪的行为

如果我使用 psql -f 选项从文件插入数据,则用户名栏中的电子邮件地址都会搞砸。

enter image description here

但是如果我在psql [dbname]中使用 \ i data.sql 选项从文件插入数据,我就会得到这个结果。所有电子邮件都正确显示 enter image description here

--- ------ email.c

PG_MODULE_MAGIC;
typedef struct EmailAdress {
    char* domain;
    char* local;
} EmailAddress;

PG_FUNCTION_INFO_V1(email_in);
Datum email_in(PG_FUNCTION_ARGS) {
    // Get arg c string
    char* pStr = palloc(strlen(PG_GETARG_CSTRING(0)));
    strcpy(pStr, PG_GETARG_CSTRING(0));

    // Convert to connical form
    int i;
    for (i = 0; pStr[i]; i++) {
        pStr[i] = tolower(pStr[i]);
    }

    EmailAddress* pEmail = (EmailAddress*) palloc(sizeof(EmailAddress));

    char* pToken;
    pToken = strtok(pStr, AT);
    pEmail->local = malloc(strlen(pToken) + 1);
    strcpy(pEmail->local, pToken);

    pToken = strtok(NULL, AT);
    pEmail->domain = malloc(strlen(pToken) + 1);
    strcpy(pEmail->domain, pToken);

    pfree(pStr);

    if (!valid(pEmail)) {
        ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid EmailAddress: \"%s@%s\" - pointer: %p", pEmail->local, pEmail->domain, pEmail)));
    }

    PG_RETURN_POINTER(pEmail);
}


PG_FUNCTION_INFO_V1(email_out);
Datum email_out(PG_FUNCTION_ARGS) {
    EmailAddress* pEmail = (EmailAddress *) PG_GETARG_POINTER(0);
    char* pResult = psprintf("%s@%s", pEmail->local, pEmail->domain);

    PG_RETURN_CSTRING(pResult);
}

---- ---- email.source

CREATE FUNCTION email_in(cstring)
    RETURNS EmailAddress
    AS '_OBJWD_/email'
    LANGUAGE C IMMUTABLE STRICT;

CREATE FUNCTION email_out(EmailAddress)
    RETURNS cstring
    AS '_OBJWD_/email'
    LANGUAGE C IMMUTABLE STRICT;

CREATE TYPE EmailAddress (
    internallength   = 16,
    input            = email_in,
    output           = email_out,
    alignment        = double
);


CREATE FUNCTION email_eq(EmailAddress, EmailAddress) RETURNS bool
    AS '_OBJWD_/email' LANGUAGE C IMMUTABLE STRICT;

CREATE FUNCTION email_neq(EmailAddress, EmailAddress) RETURNS bool
    AS '_OBJWD_/email' LANGUAGE C IMMUTABLE STRICT;

CREATE FUNCTION email_gt(EmailAddress, EmailAddress) RETURNS bool
    AS '_OBJWD_/email' LANGUAGE C IMMUTABLE STRICT;

CREATE FUNCTION email_ge(EmailAddress, EmailAddress) RETURNS bool
    AS '_OBJWD_/email' LANGUAGE C IMMUTABLE STRICT;

CREATE FUNCTION email_lt(EmailAddress, EmailAddress) RETURNS bool
    AS '_OBJWD_/email' LANGUAGE C IMMUTABLE STRICT;

CREATE FUNCTION email_le(EmailAddress, EmailAddress) RETURNS bool
    AS '_OBJWD_/email' LANGUAGE C IMMUTABLE STRICT;

CREATE FUNCTION email_deq(EmailAddress, EmailAddress) RETURNS bool
    AS '_OBJWD_/email' LANGUAGE C IMMUTABLE STRICT;

CREATE FUNCTION email_ndeq(EmailAddress, EmailAddress) RETURNS bool
    AS '_OBJWD_/email' LANGUAGE C IMMUTABLE STRICT;

CREATE OPERATOR < (
    leftarg = EmailAddress, rightarg = EmailAddress, procedure = email_lt,
    commutator = > , negator = >=
);

CREATE OPERATOR <= (
    leftarg = EmailAddress, rightarg = EmailAddress, procedure = email_le,
    commutator = >= , negator = >
);

CREATE OPERATOR = (
    leftarg = EmailAddress, rightarg = EmailAddress, procedure = email_eq,
    commutator = = , negator = <>
);

CREATE OPERATOR <> (
    leftarg = EmailAddress, rightarg = EmailAddress, procedure = email_neq,
    commutator = <> , negator = =
);

CREATE OPERATOR >= (
    leftarg = EmailAddress, rightarg = EmailAddress, procedure = email_ge,
    commutator = <= , negator = <
);

CREATE OPERATOR > (
    leftarg = EmailAddress, rightarg = EmailAddress, procedure = email_gt,
    commutator = < , negator = <=
);

CREATE OPERATOR ~ (
    leftarg = EmailAddress, rightarg = EmailAddress, procedure = email_deq,
    commutator = ~ , negator = !~
);

CREATE OPERATOR !~ (
    leftarg = EmailAddress, rightarg = EmailAddress, procedure = email_ndeq,
    commutator = !~ , negator = ~
);

CREATE FUNCTION email_cmp(EmailAddress, EmailAddress) RETURNS int4
    AS '_OBJWD_/email' LANGUAGE C IMMUTABLE STRICT;

CREATE FUNCTION email_hash(EmailAddress) RETURNS int4
    AS '_OBJWD_/email' LANGUAGE C IMMUTABLE STRICT;

-- now we can make the operator class
CREATE OPERATOR CLASS email_btree_ops
    DEFAULT FOR TYPE EmailAddress USING btree AS
        OPERATOR        1       <   ,
        OPERATOR        2       <=  ,
        OPERATOR        3       =   ,
        OPERATOR        4       >=  ,
        OPERATOR        5       >   ,
        FUNCTION        1       email_cmp(EmailAddress, EmailAddress);

CREATE OPERATOR CLASS email_hash_ops
    DEFAULT FOR TYPE EmailAddress USING hash AS
        OPERATOR        1       =   ,
        FUNCTION        1       email_hash(EmailAddress);

1 个答案:

答案 0 :(得分:0)

确定。 得到了很多人的帮助和支持,比如@Antti和我的教授,

这是此行为背后的解决方案和原因(总结):

问题在于:

typedef struct EmailAdress {
    char* domain;
    char* local;
} EmailAddress;

我为本地和域分配的缓冲区仅存在于原始sql进程的内存上下文中。

我实际存储在数据库中的是指向这些缓冲区的指针,这些将在原始sql进程运行时正常工作。当我启动一个新的sql进程时,我仍然在元组中有相同的缓冲区地址,但是在新的内存上下文中不存在缓冲区。

有两种可能的解决方案:

  • 一个容易但空间效率低,另一个更难,但也浪费更少的空间。
  • 一个更复杂的但节省空间的(使用varlena数据类型 - 我无法弄清楚如何实现这种方法,所以我放手了)

简单的(大多数人使用过的)是将结构定义为

typedef struct EmailAdress {
    char domain[MAX];
    char local[MAX];
} EmailAddress;

然后我不再在此策略下使用malloc()作为本地和域。 当你执行palloc()

时,会创建它们的缓冲区空间

这是有效的,因为所有字符串数据都包含在由palloc()创建的内存块中,因此在PostgreSQL插入元组时会写入磁盘。

在相应更改email.source方面,唯一需要改变的是:

CREATE TYPE EmailAddress (
    internallength   = 258,
    input            = email_in,
    output           = email_out,
    alignment        = int4,
    storage          = plain
);

注意内部长度= 258(129 * 2),因为在我的情况下,我可以假设所有本地/域部分都是128位最大+ 1位用于空终止符。