Question

我正在生成一系列无标度网络，我可以根据从均匀分布中采样的值来添加和删除边缘。以下代码有效，但偶尔会抛出一次警告（每10次运行一次）。警告是：

Warning message:
In data.table::data.table(...) :
  Item 1 is of size 64 but maximum size is 66 (recycled leaving remainder of 2 items)

我见过this question，但我真的不明白答案，是否适用于我的情况。

代码是：

library(igraph)

create_graph_sequence = function(num_nodes, num_timesteps) {

    keep_graphs <- vector(mode="list", length=num_timesteps)
    proportions = runif(2)
    cat('proportions are: ', proportions)
    prop_add = proportions[1] #Let both follow a uniform distribution
    prop_del= proportions[2] 

    min_num_edges = ceiling(num_nodes/2)

    g <- barabasi.game(num_nodes, power=1.2, directed=TRUE, algorithm="psumtree")
    keep_graphs[[1]] = g

    for(i in 2:num_timesteps) {
        print(i)
        edgelist = get.edgelist(keep_graphs[[i-1]]) #(g)

        #Add and remove edges per time step.

        add_edge_to_graph = function() {
            #Do not allow creation of loops! If farm a ships to farm b, then farm b cannot ship to farm a.
            #Do not allow self-loops! If farm a is in the network, it cannot ship to farm a.
            reverse_edgelist = cbind(edgelist[,2], edgelist[,1])
            self_edgelist = cbind(seq(1:num_nodes), seq(1:num_nodes))
            total_edges_not_to_repeat = rbind(edgelist, reverse_edgelist, self_edgelist)

            #Find two nodes that are not in the current edgelist.
            #1: get a (num_nodes)*2 matrix of possible edges
            possible_edges_1 = rep(seq(1:num_nodes), each=num_nodes)
            possible_edges_2 = rep(seq(1:num_nodes), num_nodes)
            possible_edges = cbind(possible_edges_1, possible_edges_2)
            possible_edges = data.matrix(possible_edges)

            DT1 <- data.table(possible_edges)
            DT2 <- data.table(cbind(total_edges_not_to_repeat, 0), key=paste0("V", seq(len=ncol(total_edges_not_to_repeat))))
            setnames(DT2, c(head(names(DT2), -1L), 'found'))
            da <- DT2[DT1, list(found=ifelse(is.na(found), 0, 1))]

            #Append found to the possible_edges
            dt1 <- cbind(DT1, da)

            #randomly select *prop_add* rows that have '0' in the found column and add the edges
            dt1 = data.matrix(dt1)
            select_0 = dt1[dt1[, "found"]==0,]
            new_edge_row = sample(nrow(select_0), ceiling(nrow(edgelist)*prop_add))
            new_edges = select_0[new_edge_row, 1:2] #possible_edges[new_edge_row,]

            #While not all new_edges fit the bill: are self-loops, create loops with other farms, etc.
            #take a new sample.

            new_edges_df = as.data.frame(new_edges, by_row=False)
            tentr_df = as.data.frame(total_edges_not_to_repeat, by_row=True)

            while(any(do.call(paste0,new_edges_df) %in% do.call(paste0, tentr_df))) {
                new_edge_row = sample(nrow(select_0), ceiling(nrow(edgelist)*prop_add))
                new_edges = select_0[new_edge_row, 1:2]
                new_edges_df = as.data.frame(new_edges)
                tentr_df = as.data.frame(total_edges_not_to_repeat)
            }

            new_edges = t(as.matrix(new_edges_df)) #for some reason matrix -> df -> matrix transposes. Transposing back.
            #g2 = g
            print('i-1 is')
            print(i-1)
            #print(keep_graphs[[i-1]])
            g2 = keep_graphs[[i-1]]

            for(i in 1:nrow(new_edges)) {
                addthisedge = c(new_edges[i,][[1]], new_edges[i,][[2]])
                g2 = add_edges(g2, edges = addthisedge)
            }
        return(g2)
        }

        delete_edge_from_graph = function() {
            #Randomly select a second proportion *prop_del* rows to delete 
            #(include the edges from the new graph - this means the number of edges DOES NOT remain constant; 
            #if prop_add = prop_del then yes, stays constant; 
            #if prop_del > prop_add then going to have a graph that gets smaller over time.)
            #BUT if the number to remove results in a graph with unacceptably few edges, then remove no edges.

            g2_edgelist = get.edgelist(g2)
            edges_to_remove = sample(nrow(g2_edgelist), floor(nrow(g2_edgelist)*prop_del)) 
            edgefunctiong2 = E(g2)
            if(nrow(g2_edgelist) - length(edgefunctiong2[edges_to_remove]) < min_num_edges) {
                print('g2')
                print(g2)
                return(g2)
            } else {
            g2 = g2 - edgefunctiong2[edges_to_remove]
            print('g2')
            print(g2)
            return(g2)
            }
        }

        g2 = add_edge_to_graph()
        g2 = delete_edge_from_graph()
        keep_graphs[[i]] = g2
    }
    return(keep_graphs)
}

您可以通过以下方式调用此MWE：

kept_graphs = create_graph_sequence(8, 3)

创建包含8个节点的3个图表的序列。在基本的笔记本电脑上运行它不应该花费几秒钟。

如何摆脱此警告消息？

Answer 1

问题出在

行

dt1 <- cbind(DT1, da)

有时会发生DT1有64行，而da有66行。这需要警告，因为它看起来不像典型的回收（例如，当一个向量是（1,2）时，3,4,5,6），我们给另一个（2,3）期待回收利用（2,3,2,3,2,3）。

由于该功能的作用并不明显，我相信如果您自己解决问题会更好。要复制它，请在调用函数前执行set.seed(123)，然后在if(i == 3) browser()之前添加dt1 <- cbind(DT1, da)。

R：生成一系列图表（无标度网络）

1 个答案: