使用SQL的贝叶斯规则

时间:2013-05-18 16:18:46

标签: sql-server probability bayesian-networks

想要确认我用于计算以下疾病的后验概率的技术对于以下贝叶斯网络是正确的http://spark-public.s3.amazonaws.com/bigdata/documents/HW6.pdf

SELECT Dyspnoea.d, SUM(Dyspnoea.p*Either.p*Tuberculosis.p*Asia.p*p*Smoking.p*Bronchitis.p)
    FROM Dyspnoea
    INNER JOIN Either ON Dyspnoea.e = Either.e
    INNER JOIN Tuberculosis ON Either.t = Tuberculosis.t
    INNER JOIN Asia ON Tuberculosis.a = Asia.a
    INNER JOIN LungCancer ON Either.l = LungCancer.l
    INNER JOIN Bronchitis ON Dyspnoea.b = Bronchitis.b
    INNER JOIN Smoking ON LungCancer.s = Smoking.s AND Bronchitis.s = Smoking.s
    WHERE Asia.a = 1 AND Smoking.s = 0
    GROUP BY Dyspnoea.d

或者应该像这样计算,因为有些事件是彼此独立的

SELECT Dyspnoea.d, SUM(((Dyspnoea.p*Either.p*Tuberculosis.p*Asia.p)+(Dyspnoea.p*Either.p*LungCancer.p*Smoking.p)) + (Dyspnoea.p*Bronchitis.b*Smoking.p))
    FROM Dyspnoea
    INNER JOIN Either ON Dyspnoea.e = Either.e
    INNER JOIN Tuberculosis ON Either.t = Tuberculosis.t
    INNER JOIN Asia ON Tuberculosis.a = Asia.a
    INNER JOIN LungCancer ON Either.l = LungCancer.l
    INNER JOIN Bronchitis ON Dyspnoea.b = Bronchitis.b
    INNER JOIN Smoking ON LungCancer.s = Smoking.s AND Bronchitis.s = Smoking.s
    WHERE Asia.a = 1 AND Smoking.s = 0
    GROUP BY Dyspnoea.d

1 个答案:

答案 0 :(得分:1)

它基于完整查询。

我使用的表是:

CREATE TABLE `taba` (
  `A` char(1) DEFAULT NULL,
  `P` decimal(4,2) DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1;

insert  into `taba`(`A`,`P`) values ('Y','0.01'),('N','0.99');

CREATE TABLE `tabbs` (
  `B` char(1) DEFAULT NULL,
  `S` char(1) DEFAULT NULL,
  `P` decimal(4,2) DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1;

insert  into `tabbs`(`B`,`S`,`P`) 
values ('Y','Y','0.60'),('Y','N','0.30'),('N','Y','0.40'),('N','N','0.70');

CREATE TABLE `tabdeb` (
  `D` char(1) DEFAULT NULL,
  `E` char(1) DEFAULT NULL,
  `B` char(1) DEFAULT NULL,
  `P` decimal(4,2) DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1;

insert  into `tabdeb`(`D`,`E`,`B`,`P`) 
values ('Y','Y','Y','0.90'), ('Y','Y','N','0.70'), ('Y','N','Y','0.80')
     , ('Y','N','N','0.10'), ('N','Y','Y','0.10'), ('N','Y','N','0.30')
     , ('N','N','Y','0.20'), ('N','N','N','0.90');

CREATE TABLE `tabelt` (
  `E` char(1) DEFAULT NULL,
  `L` char(1) DEFAULT NULL,
  `T` char(1) DEFAULT NULL,
  `P` decimal(4,2) DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1;


insert into `tabelt`(`E`,`L`,`T`,`P`) 
values ('Y','Y','Y','1.00'), ('Y','Y','N','1.00'), ('Y','N','Y','1.00')
     , ('Y','N','N','0.00'), ('N','Y','Y','0.00'), ('N','Y','N','0.00')
     , ('N','N','Y','0.00'), ('N','N','N','1.00');

CREATE TABLE `tabls` (
  `L` char(1) DEFAULT NULL,
  `S` char(1) DEFAULT NULL,
  `P` decimal(4,2) DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1;


insert into `tabls` (`L`,`S`,`P`) 
values ('Y','Y','0.10'), ('Y','N','0.01'), ('N','Y','0.90')
     , ('N','N','0.99');

CREATE TABLE `tabs` (
  `S` char(1) DEFAULT NULL,
  `P` decimal(4,2) DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1;

insert  into `tabs`(`S`,`P`) values ('Y','0.50'), ('N','0.50');


CREATE TABLE `tabta` (
  `T` char(1) DEFAULT NULL,
  `A` char(1) DEFAULT NULL,
  `P` decimal(4,2) DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1;


insert into `tabta`(`T`,`A`,`P`) 
values ('Y','Y','0.05'), ('Y','N','0.01')
     , ('N','Y','0.95'), ('N','N','0.99');

CREATE TABLE `tabxe` (
  `X` char(1) DEFAULT NULL,
  `E` char(1) DEFAULT NULL,
  `P` decimal(4,2) DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1;

insert into `tabxe`(`X`,`E`,`P`) 
values ('Y','Y','0.98'), ('Y','N','0.05')
     , ('N','Y','0.02'), ('N','N','0.95');

和查询:

SELECT tabdeb.e 
     , SUM(tabdeb.p * tabeLT.p * TaBTA.p * taba.p 
          * tabls.p * |tabbs.p * tabs.P * tabxe.P ) prob
  FROM tabdeb
 INNER JOIN tabelt ON tabdeb.e = tabelt.e
 INNER JOIN tabta ON tabelt.t = tabta.t
 INNER JOIN taba ON tabta.a = taba.a
 INNER JOIN tabls ON tabelt.l = tabls.l
 INNER JOIN tabbs ON tabdeb.b = tabbs.b
 INNER JOIN tabs ON tabls.s = tabs.s AND tabbs.s = tabs.s
 INNER JOIN tabxe ON tabxe.E=tabelt.E
 WHERE tabta.A = 'N' 
   AND tabxe.x='Y' -- AND tabdeb.D='Y'
 GROUP BY tabdeb.e

SELECT tabelt.t, SUM(tabdeb.p*
tabeLT.p*
TaBTA.p*
taba.p*
tabls.p*
tabbs.p*
tabs.P*tabxe.P
) AS prob
          FROM tabdeb
    INNER JOIN tabelt ON tabdeb.e = tabelt.e
    INNER JOIN tabta ON tabelt.t = tabta.t
    INNER JOIN taba ON tabta.a = taba.a
    INNER JOIN tabls ON tabelt.l = tabls.l
    INNER JOIN tabbs ON tabdeb.b = tabbs.b
    INNER JOIN tabs ON tabls.s = tabs.s AND tabbs.s = tabs.s
    INNER JOIN tabxe ON tabxe.E=tabelt.E
    WHERE tabta.A='N' AND tabxe.x='Y' -- AND tabdeb.D='Y'
    GROUP BY tabelt.t

=====================================================
=====================================================
-- lungcancer
SELECT tabelt.l, SUM(tabdeb.p*
tabeLT.p*
TaBTA.p*
taba.p*
tabls.p*
tabbs.p*
tabs.P*tabxe.P
) AS prob
          FROM tabdeb
    INNER JOIN tabelt ON tabdeb.e = tabelt.e
    INNER JOIN tabta ON tabelt.t = tabta.t
    INNER JOIN taba ON tabta.a = taba.a
    INNER JOIN tabls ON tabelt.l = tabls.l
    INNER JOIN tabbs ON tabdeb.b = tabbs.b
    INNER JOIN tabs ON tabls.s = tabs.s AND tabbs.s = tabs.s
    INNER JOIN tabxe ON tabxe.E=tabelt.E
    WHERE tabta.A='N' AND tabxe.x='Y' -- AND tabdeb.D='Y'
    GROUP BY tabelt.l
=====================================================
=====================================================
-- Bronchitis
SELECT tabdeb.b, SUM(tabdeb.p*
tabeLT.p*
TaBTA.p*
taba.p*
tabls.p*
tabbs.p*
tabs.P*tabxe.P
) AS prob
          FROM tabdeb
    INNER JOIN tabelt ON tabdeb.e = tabelt.e
    INNER JOIN tabta ON tabelt.t = tabta.t
    INNER JOIN taba ON tabta.a = taba.a
    INNER JOIN tabls ON tabelt.l = tabls.l
    INNER JOIN tabbs ON tabdeb.b = tabbs.b
    INNER JOIN tabs ON tabls.s = tabs.s AND tabbs.s = tabs.s
    INNER JOIN tabxe ON tabxe.E=tabelt.E
    WHERE tabta.A='N' AND tabxe.x='Y' -- AND tabdeb.D='Y'
    GROUP BY tabdeb.B

=======================================================
=======================================================
-- dysomnia
SELECT tabdeb.d, SUM(tabdeb.p*
tabeLT.p*
TaBTA.p*
taba.p*
tabls.p*
tabbs.p*
tabs.P*tabxe.P
) prob
          FROM tabdeb
    INNER JOIN tabelt ON tabdeb.e = tabelt.e
    INNER JOIN tabta ON tabelt.t = tabta.t
    INNER JOIN taba ON tabta.a = taba.a
    INNER JOIN tabls ON tabelt.l = tabls.l
    INNER JOIN tabbs ON tabdeb.b = tabbs.b
    INNER JOIN tabs ON tabls.s = tabs.s AND tabbs.s = tabs.s
    INNER JOIN tabxe ON tabxe.E=tabelt.E
    WHERE tabta.A='N' AND tabxe.x='Y' -- AND tabdeb.D='Y'
    GROUP BY tabdeb.d