我试图将此PyMC2 example of an ERGM转换为PyMC3。
使用文档和其他示例,我提出了这段代码。它运行时不会抛出错误,但会给出错误的答案(估计值为~0)。第一部分只是设置:它几乎与教程完全相同,并且工作正常。
import pymc3 as pm
import networkx as nx
import csv
import theano.tensor as tt
from theano.compile.ops import as_op
with open("grey_adjacency.tsv") as f:
first_line = f.readline()
adj = np.loadtxt("grey_adjacency.tsv", delimiter="\t",skiprows=1, usecols=list(range(1,45)))
G = nx.from_numpy_matrix(adj)
names = [name.strip() for name in first_line.split("\t")[1:]]
G = nx.relabel_nodes(G, {i: names[i] for i in range(44)})
node_attributes = []
with open("grey_nodes.tsv") as f:
reader = csv.DictReader(f, dialect=csv.excel_tab)
for row in reader:
node_attributes.append(row)
for node in node_attributes:
name = node["name"]
for key, val in node.items():
if key == "name":
continue
G.node[name][key] = val
matrix = nx.to_numpy_matrix(G)
matrix[np.triu_indices_from(matrix)] = 0
def edge_count(G):
size = len(G)
ones = np.ones((size, size))
# Zero out the upper triangle:
if not G.is_directed():
ones[np.triu_indices_from(ones)] = 0
return ones
def node_match(G, attrib):
size = len(G)
attribs = [node[1][attrib] for node in G.nodes(data=True)]
match = np.zeros(shape=(size, size))
for i in range(size):
for j in range(size):
if i != j and attribs[i] == attribs[j]:
match[i,j] = 1
if not G.is_directed():
match[np.triu_indices_from(match)] = 0
return match
# Create the gender-match matrix
gender_match_mat = node_match(G, "sex")
下一部分是我在将旧的PyMC2代码转换为使用PyMC3时遇到问题的地方:
@as_op(itypes=[tt.dmatrix, tt.dmatrix], otypes=[tt.dmatrix])
def probs(t1, t2):
probs = 1/(1+np.exp(-1*sum([t1, t2])))
probs[np.diag_indices_from(probs)] = 0
probs[np.triu_indices_from(probs)] = 0
return probs
with pm.Model():
density_coef = pm.Normal("density", mu=0, sd=0.001)
gender_match_coef = pm.Normal("gender_match", mu=0, sd=0.001)
density_term = density_coef * edge_count(G)
gender_match_term = gender_match_coef * gender_match_mat
ps = probs(density_term, gender_match_term)
outcome = pm.Bernoulli("outcome", p=ps, observed=matrix)
trace = pm.sample(5000, step=pm.Metropolis(), tune=500, njobs=1)
density_trace = trace["density"]
gender_match_trace = trace["gender_match"]
print("Density: {0:.3f}, {1:.3f}".format(np.mean(density_trace), np.std(density_trace)))
print("Gender: {0:.3f}, {1:.3f}".format(np.mean(gender_match_trace), np.std(gender_match_trace)))
哪个输出不正确:
Density: -0.000, 0.001
Gender: -0.000, 0.001
This answer建议我可以使用张量而不是原始示例numpy
函数来使其工作。我试过这个:
@as_op(itypes=[tt.dmatrix, tt.dmatrix], otypes=[tt.dmatrix])
def probs(t1, t2):
probs = 1/(1+tt.exp(-1*tt.sum([t1, t2])))
probs = tt.fill_diagonal(probs, 0)
probs = tt.tril(probs)
return probs
但是这给出了错误:
TypeError: FillDiagonal: first parameter must have at least two dimensions
我也想知道我是否能以正确的方式解决这个问题? documentation for logit使用了GLM,但我不知道如何使用该方法解决此问题。