我想为树木育种项目创建一个线性祖先列表。父母是男性/女性对,不得相关(没有近亲繁殖),因此追踪和可视化这些血统的重要性......
以下是使用Postgresql 9.1的测试表/数据:
DROP TABLE if exists family CASCADE;
DROP TABLE if exists plant CASCADE;
CREATE TABLE family (
id serial,
family_key VARCHAR(20) UNIQUE,
female_plant_id INTEGER NOT NULL DEFAULT 1,
male_plant_id INTEGER NOT NULL DEFAULT 1,
filial_n INTEGER NOT NULL DEFAULT -1, -- eg 0,1,2... Which would represent None, F1, F2...
CONSTRAINT family_pk PRIMARY KEY (id)
);
CREATE TABLE plant (
id serial,
plant_key VARCHAR(20) UNIQUE,
id_family INTEGER NOT NULL,
CONSTRAINT plant_pk PRIMARY KEY (id),
CONSTRAINT plant_id_family_fk FOREIGN KEY(id_family) REFERENCES family(id) -- temp may need to remove constraint...
);
-- FAMILY Table DATA:
insert into family (id, family_key, female_plant_id, male_plant_id, filial_n) VALUES (1,'NA',1,1,1); -- Default place holder record
-- Root level Alba families
insert into family (id, family_key, female_plant_id, male_plant_id, filial_n) VALUES (2,'family1AA',2,3,1);
insert into family (id, family_key, female_plant_id, male_plant_id, filial_n) VALUES (3,'family2AA',4,5,1);
insert into family (id, family_key, female_plant_id, male_plant_id, filial_n) VALUES (4,'family3AA',6,7,1);
-- F2 Hybrid Families
insert into family (id, family_key, female_plant_id, male_plant_id, filial_n) VALUES (5,'family4AE',8,11,0);
insert into family (id, family_key, female_plant_id, male_plant_id, filial_n) VALUES (6,'family5AG',9,12,0);
insert into family (id, family_key, female_plant_id, male_plant_id, filial_n) VALUES (7,'family6AT',10,13,0);
-- F3 Double Hybrid family:
insert into family (id, family_key, female_plant_id, male_plant_id, filial_n) VALUES (9,'family7AEAG',14,15,0);
-- F3 Tri-hybrid backcross family:
insert into family (id, family_key, female_plant_id, male_plant_id, filial_n) VALUES (10,'family8AEAGAT',17,16,0);
-- PLANT Table DATA:
-- Root level Alba Parents:
insert into plant (id, plant_key, id_family) VALUES (1,'NA',1); -- Default place holder record
insert into plant (id, plant_key, id_family) VALUES (2,'female1A',1);
insert into plant (id, plant_key, id_family) VALUES (3,'male1A',1);
insert into plant (id, plant_key, id_family) VALUES (4,'female2A',1);
insert into plant (id, plant_key, id_family) VALUES (5,'male2A',1);
insert into plant (id, plant_key, id_family) VALUES (6,'female3A',1);
insert into plant (id, plant_key, id_family) VALUES (7,'male3A',1);
-- Female Alba progeny:
insert into plant (id, plant_key, id_family) VALUES (8,'female4A',2);
insert into plant (id, plant_key, id_family) VALUES (9,'female5A',3);
insert into plant (id, plant_key, id_family) VALUES (10,'female6A',4);
-- Male Aspen Root level parents:
insert into plant (id, plant_key, id_family) VALUES (11,'male1E',1);
insert into plant (id, plant_key, id_family) VALUES (12,'male1G',1);
insert into plant (id, plant_key, id_family) VALUES (13,'female1T',1);
-- F1 Hybrid progeny:
insert into plant (id, plant_key, id_family) VALUES (14,'female1AE',5);
insert into plant (id, plant_key, id_family) VALUES (15,'male1AG',6);
insert into plant (id, plant_key, id_family) VALUES (16,'male1AT',7);
-- Hybrid progeny
insert into plant (id, plant_key, id_family) VALUES (17,'female1AEAG',9);
-- Tri-hybrid backcross progeny:
insert into plant (id, plant_key, id_family) VALUES (18,'female1AEAGAT',10);
insert into plant (id, plant_key, id_family) VALUES (19,'female2AEAGAT',10);
以下是我从Postgres WITH Queries文档中派生的递归查询:
WITH RECURSIVE search_tree(
family_key
, female_plant
, male_plant
, depth
, path
, cycle
) AS (
SELECT
f.family_key
, pf.plant_key
, pm.plant_key
, 1
, ARRAY[ROW(pf.plant_key, pm.plant_key)]
, false
FROM
family f
, plant pf
, plant pm
WHERE
f.female_plant_id = pf.id
AND f.male_plant_id = pm.id
AND f.filial_n = 1 -- Include only F1 families (root level)
AND f.id <> 1 -- omit the default first family record
UNION ALL
SELECT
f.family_key
, pf.plant_key
, pm.plant_key
, st.depth + 1
, path || ROW(pf.plant_key, pm.plant_key)
, ROW(pf.plant_key, pm.plant_key) = ANY(path)
FROM
family f
, plant pf
, plant pm
, search_tree st
WHERE
f.female_plant_id = pf.id
AND f.male_plant_id = pm.id
AND f.family_key = st.family_key
AND pf.plant_key = st.female_plant
AND pm.plant_key = st.male_plant
AND f.filial_n <> 1 -- Include only non-F1 families (non-root levels)
AND NOT cycle
)
SELECT * FROM search_tree;
以下是所需的输出:
F1 family1AA=(female1A x male1A) > F2 family4AE=(female4A x male1E) > F3 family7AEAG=(female1AE x male1AG) > F4 family8AEAGAT=(female1AEAG x male1AT)
F1 family2AA=(female2A x male2A) > F2 family5AG=(female5A x male1G) > F3 family7AEAG=(female1AE x male1AG) > F4 family8AEAGAT=(female1AEAG x male1AT)
F1 family3AA=(female3A x male3A) > F2 family6AT=(female6A x female1T) > F3 family8AEAGAT=(female1AEAG x male1AT)
上面的递归查询显示具有相应F1父项的3行,但该路径不显示下游族/父项。我很感激帮助使递归输出类似于上面列出的所需输出。
答案 0 :(得分:4)
我已根据我所理解的内容调整了查询,不一定符合要求: - )
查询从f.id != 1 AND f.filial_n = 1
定义的三个给定系列开始,并递归扩展可用的子系列。
在什么条件下只选择最后三场比赛是我无法理解的。也许对于每个起始家庭来说,最长的一系列安慰者?
WITH RECURSIVE expanded_family AS (
SELECT
f.id,
f.family_key,
pf.id pd_id,
pf.plant_key pf_key,
pf.id_family pf_family,
pm.id pm_id,
pm.plant_key pm_key,
pm.id_family pm_family,
f.filial_n
FROM family f
JOIN plant pf ON f.female_plant_id = pf.id
JOIN plant pm ON f.male_plant_id = pm.id
),
search_tree AS (
SELECT
f.*,
1 depth,
ARRAY[f.family_key::text] path
FROM expanded_family f
WHERE
f.id != 1
AND f.filial_n = 1
UNION ALL
SELECT
f.*,
depth + 1,
path || f.family_key::text
FROM search_tree st
JOIN expanded_family f
ON f.pf_family = st.id
OR f.pm_family = st.id
WHERE
f.id <> 1
)
SELECT
family_key,
depth,
path
FROM search_tree;
结果是:
family_key | depth | path
---------------+-------+-------------------------------------------------
family1AA | 1 | {family1AA}
family2AA | 1 | {family2AA}
family3AA | 1 | {family3AA}
family4AE | 2 | {family1AA,family4AE}
family5AG | 2 | {family2AA,family5AG}
family6AT | 2 | {family3AA,family6AT}
family7AEAG | 3 | {family1AA,family4AE,family7AEAG}
family7AEAG | 3 | {family2AA,family5AG,family7AEAG}
family8AEAGAT | 3 | {family3AA,family6AT,family8AEAGAT}
family8AEAGAT | 4 | {family1AA,family4AE,family7AEAG,family8AEAGAT}
family8AEAGAT | 4 | {family2AA,family5AG,family7AEAG,family8AEAGAT}
技术资料:
我删除了cycle
内容,因为干净的数据不一定需要(恕我直言)。
expanded_family
,但是现在它使递归查询更具可读性。
修改强>
稍微修改一下查询可以过滤这些行,对于每个&#34; root&#34; family(即查询开始的那个),存在最长的路径。
我只在search_tree
中显示已更改的部分,因此您必须复制上一部分的头部:
-- ...
search_tree AS
(
SELECT
f.*,
f.id family_root, -- remember where the row came from.
1 depth,
ARRAY[f.family_key::text] path
FROM expanded_family f
WHERE
f.id != 1
AND f.filial_n = 1
UNION ALL
SELECT
f.*,
st.family_root, -- propagate the anchestor
depth + 1,
path || f.family_key::text
FROM search_tree st
JOIN expanded_family f
ON f.pf_family = st.id
OR f.pm_family = st.id
WHERE
f.id <> 1
)
SELECT
family_key,
path
FROM
(
SELECT
rank() over (partition by family_root order by depth desc),
family_root,
family_key,
depth,
path
FROM search_tree
) AS ranked
WHERE rank = 1;
结果是:
family_key | path
---------------+-------------------------------------------------
family8AEAGAT | {family1AA,family4AE,family7AEAG,family8AEAGAT}
family8AEAGAT | {family2AA,family5AG,family7AEAG,family8AEAGAT}
family8AEAGAT | {family3AA,family6AT,family8AEAGAT}
(3 rows)
<强> EDIT2 强>
根据评论,我添加了pretty_print
版本的路径:
WITH RECURSIVE expanded_family AS (
SELECT
f.id,
pf.id_family pf_family,
pm.id_family pm_family,
f.filial_n,
f.family_key || '=(' || pf.plant_key || ' x ' || pm.plant_key || ')' pretty_print
FROM family f
JOIN plant pf ON f.female_plant_id = pf.id
JOIN plant pm ON f.male_plant_id = pm.id
),
search_tree AS
(
SELECT
f.id,
f.id family_root,
1 depth,
'F1 ' || f.pretty_print path
FROM expanded_family f
WHERE
f.id != 1
AND f.filial_n = 1
UNION ALL
SELECT
f.id,
st.family_root,
st.depth + 1,
st.path || ' -> F' || st.depth+1 || ' ' || f.pretty_print
FROM search_tree st
JOIN expanded_family f
ON f.pf_family = st.id
OR f.pm_family = st.id
WHERE
f.id <> 1
)
SELECT
path
FROM
(
SELECT
rank() over (partition by family_root order by depth desc),
path
FROM search_tree
) AS ranked
WHERE rank = 1;
结果是
path
----------------------------------------------------------------------------------------------------------------------------------------------------------
F1 family1AA=(female1A x male1A) -> F2 family4AE=(female4A x male1E) -> F3 family7AEAG=(female1AE x male1AG) -> F4 family8AEAGAT=(female1AEAG x male1AT)
F1 family2AA=(female2A x male2A) -> F2 family5AG=(female5A x male1G) -> F3 family7AEAG=(female1AE x male1AG) -> F4 family8AEAGAT=(female1AEAG x male1AT)
F1 family3AA=(female3A x male3A) -> F2 family6AT=(female6A x female1T) -> F3 family8AEAGAT=(female1AEAG x male1AT)
(3 rows)