我有一个包含交易数据的大型数据框。每个事务指的是对工件做出贡献的人(例如,开发人员修改了文件)。
我努力将此数据转换为双边networkDynamic
图,其中人和工件是节点,事务表示为边,仅在事务的时间点处于活动状态。显然,在两个节点之间可以存在多个事务,这意味着我们有多次激活一条边。
到目前为止一切顺利。在一天结束时,我需要计算有关此网络演变的统计数据,例如:测量统计数据,如网络在多个时间点的连通性。
出于某种原因,我经常遇到问题。例如,在下面的可重现代码示例中,函数tSnaStats
的最后一次调用会抱怨多个属性值。这通常应该没问题,因为我们在查询法术中有一个多次激活的边缘。然而,尽管我将规则指定为最新值,但它使用最早的值是奇怪的。最后一次调用(使用gtrans
)甚至失败并显示错误。
所以我的第一个问题是我的网络构造代码是否有效(也许我误解了一些东西)。如果是,问题是这是否是tsna包中的错误......
if (!require("pacman")) install.packages("pacman")
library("pacman")
pacman::p_load(network, networkDynamic, tsna)
dfTransactions <-
structure(
list(
weight = c(1, 2, 2),
contributorweight = c(1, 2, 2),
artifactweight = c(1, 2, 2),
contributorId = c("u1", "u1", "u2"),
instantId =
c(1000, 2000, 3000) ,
artifactId = c("a1", "a1", "a2")
),
.Names = c(
"weight",
"contributorweight",
"artifactweight",
"contributorId",
"instantId",
"artifactId"
),
row.names = c(1L, 2L, 3L),
class = "data.frame"
)
dfEdges <- unique(dfTransactions[, c("contributorId", "artifactId")])
veUniqueContributors <- unique(dfEdges[[1]])
veUniqueArtifacts <- unique(dfEdges[[2]])
veUniqueVertices <- c(veUniqueContributors, veUniqueArtifacts)
nuNrUniqueContributors <- length(veUniqueContributors)
nuNrUniqueArtifacts <- length(veUniqueArtifacts)
nuNrUniqueVertices <- length(veUniqueVertices)
dfEdgeSpells <-
dfTransactions[c("instantId",
"instantId",
"contributorId",
"artifactId",
"weight")]
dfContributorSpells <-
dfTransactions[c("instantId",
"instantId",
"contributorId",
"contributorweight")]
dfArtifactSpells <-
dfTransactions[c("instantId", "instantId", "artifactId", "artifactweight")]
names(dfContributorSpells) <-
c("onset", "terminus", "vertex.id", "weight")
names(dfArtifactSpells) <-
c("onset", "terminus", "vertex.id", "weight")
dfVertexSpells <- data.frame(
onset = numeric(),
terminus = numeric(),
vertex.id = character(),
weight = numeric()
)
dfVertexSpells <- rbind(dfContributorSpells, dfArtifactSpells)
# Convert vertex names to vertex ids
dfEdgeSpells[["contributorId"]] <-
match(dfEdgeSpells[["contributorId"]], veUniqueVertices)
dfEdgeSpells[["artifactId"]] <-
match(dfEdgeSpells[["artifactId"]], veUniqueVertices)
dfVertexSpells$vertex.id <-
match(dfVertexSpells$vertex.id, veUniqueVertices)
net <- network.initialize(
nuNrUniqueVertices,
directed = TRUE,
hyper = FALSE,
loops = FALSE,
multiple = FALSE,
bipartite = nuNrUniqueContributors
)
net %v% "vertex.names" <- veUniqueVertices
net %v% "vertex.type" <-
c(rep("contributor", nuNrUniqueContributors),
rep("artifact", nuNrUniqueArtifacts))
net <- networkDynamic(
net,
create.TEAs = TRUE,
edge.spells = dfEdgeSpells,
edge.TEA.names = c("weight"),
vertex.spells = dfVertexSpells,
vertex.TEA.names = c("weight")
)
reconcile.vertex.activity(net = net,
mode = "encompass.edges",
edge.active.default = TRUE)
# Returns Warning
tSnaStats(
net,
"connectedness",
time.interval = 1001,
aggregate.dur = 1001,
rule = "latest"
)
# Returns Error
tSnaStats(
net,
"gtrans",
time.interval = 100,
aggregate.dur = 100,
rule = "latest"
)