在我们的网络堆栈中,我正致力于实现一项功能,该功能允许用户基本上指定几个过滤条件(从现在称为 list ),这些条件是从一个表中获取的Postgres数据库。用户可以拥有许多列表,每个列表都具有类似"包含"或"等于"对于一组列。
所涉及的逻辑相对简单,但问题的出现是因为客户希望能够每天查看列表查询结果的更改/更新(因此基本上存储增量的每日快照)以及其中一些过滤条件可能很慢并且在非索引列上运行大表(所讨论的表每个都有2-3百万行)。
目前我们使用Redis和Postgres作为我们的存储后端,并且我不完全确定表示甚至管理这些每日更新的最佳方式,以及每个列表的每个列表的索引编制方式用户。
我确定许多处理分析数据的软件解决了类似的问题,但我并不熟悉如何以有效的方式解决这个问题而且我不想尝试重新发明轮子,所以我想询问是否有人对如何实现这一点有任何想法/建议(可能使用其他软件以及PG和Redis)?
详细说明,对于所有现有的批量列表,此操作必须每12小时(当前)执行一次,最有可能使用调用更新程序的守护程序(或只是一个cronjob)。
(对不起,如果这个问题看起来很模糊,我试着概述一下我能想到的每一个可能的方面,但我不确定我做得不够好) < / p>
答案 0 :(得分:0)
我们所做的是使用触发器将所有更改存储在审计表中,将表的键存储为文本数组,将更改的值(旧的和新的)存储为两个jsonb字段:
CREATE OR REPLACE FUNCTION text2intsafe(text) RETURNS int AS $$
SELECT CASE WHEN $1 ~ '^ *-?\d{1,9} *$' THEN $1::int END
$$ LANGUAGE SQL IMMUTABLE RETURNS NULL ON NULL INPUT;
CREATE TABLE IF NOT EXISTS crm.audit (
audit_id bigserial NOT NULL PRIMARY KEY,
date timestamptz NOT NULL DEFAULT CURRENT_TIMESTAMP,
username text NOT NULL DEFAULT crm.f_user(),
tableclass regclass NOT NULL,
action text NOT NULL CHECK (action IN ('I','U','D')),
id text[] NOT NULL,
old_data jsonb NULL,
new_data jsonb NULL
);
CREATE INDEX ON audit (date);
CREATE INDEX ON audit (tableclass, id);
CREATE INDEX ON audit USING gin (old_data);
CREATE INDEX audit_tableclass_id_idx ON audit (tableclass, text2intsafe(id[1]));
CREATE OR REPLACE FUNCTION f_audit_get_value(p_tableclass regclass, p_date timestamptz, p_id text[], p_object text[]) RETURNS jsonb AS $$
SELECT COALESCE(
(SELECT a.old_data#>$4
FROM audit a
WHERE a.tableclass = $1 AND a.date >= $2 AND id = $3 AND (a.old_data#>$4) IS NOT NULL
ORDER BY a.date LIMIT 1),
(SELECT a.new_data#>$4
FROM audit a
WHERE a.tableclass = $1 AND a.date < $2 AND id = $3 AND (a.new_data#>$4) IS NOT NULL
ORDER BY a.date DESC LIMIT 1))
$$ LANGUAGE SQL SECURITY DEFINER STABLE STRICT;
CREATE OR REPLACE FUNCTION f_audit_get_text(p_tableclass regclass, p_date timestamptz, p_id text[], p_object text[]) RETURNS text AS $$
SELECT COALESCE(
(SELECT a.old_data#>>$4
FROM audit a
WHERE a.tableclass = $1 AND a.date >= $2 AND id = $3 AND (a.old_data#>$4) IS NOT NULL
ORDER BY a.date LIMIT 1),
(SELECT a.new_data#>>$4
FROM audit a
WHERE a.tableclass = $1 AND a.date < $2 AND id = $3 AND (a.new_data#>$4) IS NOT NULL
ORDER BY a.date DESC LIMIT 1))
$$ LANGUAGE SQL SECURITY DEFINER STABLE STRICT;
CREATE OR REPLACE FUNCTION f_jsonb_diff(jsonb, jsonb) RETURNS jsonb AS $$
-- return an object with attributes from $2 that differ from those of $1
SELECT json_object_agg(v2.key, CASE COALESCE(t2, t1)
WHEN 'object' THEN public.f_json_diff(CASE WHEN t1 IS NULL THEN '{}'::jsonb ELSE v1.value END, CASE WHEN t2 IS NULL THEN '{}'::jsonb ELSE v2.value END)
WHEN 'array' THEN public.f_json_array_diff(CASE WHEN t1 = 'array' THEN v1.value ELSE '[]'::jsonb END, CASE WHEN t2 = 'array' THEN v2.value ELSE '[]'::jsonb END)
ELSE v2.value END)::jsonb
FROM jsonb_each(COALESCE(CASE WHEN jsonb_typeof($2) = 'object' THEN $2 END, '{}'::jsonb)) v2
LEFT JOIN jsonb_each(COALESCE(CASE WHEN jsonb_typeof($1) = 'object' THEN $1 END, '{}'::jsonb)) v1 on (v1.key = v2.key)
LEFT JOIN LATERAL NULLIF(jsonb_typeof(v1.value), 'null') t1 ON (true)
LEFT JOIN LATERAL NULLIF(jsonb_typeof(v2.value), 'null') t2 ON (true)
WHERE v1.value IS DISTINCT FROM v2.value
$$ LANGUAGE sql IMMUTABLE;
CREATE OR REPLACE FUNCTION tf_audit() RETURNS TRIGGER AS $$
DECLARE
_id text[];
_key int2vector;
_orow jsonb;
_nrow jsonb;
_odata jsonb;
_ndata jsonb;
BEGIN
_key := indkey FROM pg_index WHERE indrelid = TG_RELID AND indisunique ORDER BY indisprimary DESC LIMIT 1;
IF TG_OP = 'INSERT' THEN
_ndata := to_jsonb(NEW);
_id := ARRAY(SELECT _ndata->>attname FROM pg_attribute WHERE attrelid = TG_RELID AND attnum = ANY(_key) ORDER BY attnum);
ELSIF TG_OP = 'UPDATE' THEN
_nrow := to_jsonb(NEW);
_orow := to_jsonb(OLD);
_odata := f_jsonb_diff(_nrow, _orow);
_ndata := f_jsonb_diff(_orow, _nrow);
_id := ARRAY(SELECT _nrow->>attname FROM pg_attribute WHERE attrelid = TG_RELID AND attnum = ANY(_key) ORDER BY attnum);
ELSIF TG_OP = 'DELETE' THEN
_odata := to_jsonb(OLD);
_id := ARRAY(SELECT _odata->>attname FROM pg_attribute WHERE attrelid = TG_RELID AND attnum = ANY(_key) ORDER BY attnum);
END IF;
IF _odata <> '{}'::jsonb OR _ndata <> '{}'::jsonb THEN
INSERT INTO audit (username, tableclass, action, id, old_data, new_data)
VALUES (crm.f_user(), TG_RELID, substring(TG_OP FROM 1 FOR 1), _id, _odata, _ndata);
END IF;
RETURN CASE WHEN TG_OP = 'DELETE' THEN OLD ELSE NEW END;
END;
$$ LANGUAGE plpgsql SECURITY DEFINER;
然后对于您要审核的每个表,只需添加:
CREATE TRIGGER t_table_audit BEFORE UPDATE OR DELETE ON table
FOR EACH ROW EXECUTE PROCEDURE crm.tf_audit();
然后,您可以在任何两个时间点之间为任何审计表(从您实施审计之日起)产生准确的增量。