将PyMC2 ERGM示例转换为PyMC3:结果不同

时间:2017-12-29 00:57:59

标签: python numpy theano pymc3

我试图将此PyMC2 example of an ERGM转换为PyMC3。

使用文档和其他示例,我提出了这段代码。它运行时不会抛出错误,但会给出错误的答案(估计值为~0)。第一部分只是设置:它几乎与教程完全相同,并且工作正常。

import pymc3 as pm
import networkx as nx
import csv
import theano.tensor as tt
from theano.compile.ops import as_op

with open("grey_adjacency.tsv") as f:
    first_line = f.readline()

adj = np.loadtxt("grey_adjacency.tsv", delimiter="\t",skiprows=1, usecols=list(range(1,45)))
G = nx.from_numpy_matrix(adj)
names = [name.strip() for name in first_line.split("\t")[1:]]
G = nx.relabel_nodes(G, {i: names[i] for i in range(44)})

node_attributes = []
with open("grey_nodes.tsv") as f:
    reader = csv.DictReader(f, dialect=csv.excel_tab)
    for row in reader:
        node_attributes.append(row)

for node in node_attributes:
    name = node["name"]
    for key, val in node.items():
        if key == "name":
            continue
        G.node[name][key] = val

matrix = nx.to_numpy_matrix(G)
matrix[np.triu_indices_from(matrix)] = 0

def edge_count(G):
    size = len(G)
    ones = np.ones((size, size))
    # Zero out the upper triangle:
    if not G.is_directed():
        ones[np.triu_indices_from(ones)] = 0
    return ones

def node_match(G, attrib):
    size = len(G)
    attribs = [node[1][attrib] for node in G.nodes(data=True)]
    match = np.zeros(shape=(size, size))
    for i in range(size):
        for j in range(size):
            if i != j and attribs[i] == attribs[j]:
                match[i,j] = 1
    if not G.is_directed():
        match[np.triu_indices_from(match)] = 0
    return match

# Create the gender-match matrix
gender_match_mat = node_match(G, "sex")

下一部分是我在将旧的PyMC2代码转换为使用PyMC3时遇到问题的地方:

@as_op(itypes=[tt.dmatrix, tt.dmatrix], otypes=[tt.dmatrix])
def probs(t1, t2):
    probs = 1/(1+np.exp(-1*sum([t1, t2]))) 
    probs[np.diag_indices_from(probs)] = 0
    probs[np.triu_indices_from(probs)] = 0
    return probs

with pm.Model():
    density_coef = pm.Normal("density", mu=0, sd=0.001)
    gender_match_coef = pm.Normal("gender_match", mu=0, sd=0.001)
    density_term = density_coef * edge_count(G)
    gender_match_term = gender_match_coef * gender_match_mat
    ps = probs(density_term, gender_match_term)
    outcome = pm.Bernoulli("outcome", p=ps, observed=matrix)
    trace = pm.sample(5000, step=pm.Metropolis(), tune=500, njobs=1)

density_trace = trace["density"]
gender_match_trace = trace["gender_match"]
print("Density: {0:.3f}, {1:.3f}".format(np.mean(density_trace), np.std(density_trace)))
print("Gender: {0:.3f}, {1:.3f}".format(np.mean(gender_match_trace), np.std(gender_match_trace)))

哪个输出不正确:

Density: -0.000, 0.001
Gender: -0.000, 0.001

This answer建议我可以使用张量而不是原始示例numpy函数来使其工作。我试过这个:

@as_op(itypes=[tt.dmatrix, tt.dmatrix], otypes=[tt.dmatrix])
def probs(t1, t2):
    probs = 1/(1+tt.exp(-1*tt.sum([t1, t2])))
    probs = tt.fill_diagonal(probs, 0)  
    probs = tt.tril(probs)
    return probs

但是这给出了错误:

TypeError: FillDiagonal: first parameter must have at least two dimensions

我也想知道我是否能以正确的方式解决这个问题? documentation for logit使用了GLM,但我不知道如何使用该方法解决此问题。

0 个答案:

没有答案