我有关于大学俱乐部每周社交时间可能发生和确实发生过的每次互动的数据
我的数据样本如下
<intent-filter>
<action android:name="android.intent.action.MAIN" />
<category android:name="android.intent.category.LAUNCHER" />
<category android:name="android.intent.category.DEFAULT" />
<category android:name="android.intent.category.HOME" />
</intent-filter>
我正在尝试计算网络统计信息,例如structure(list(from = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("A",
"B", "C"), class = "factor"), to = structure(c(2L, 3L, 2L, 3L,
2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("A",
"B", "C"), class = "factor"), timestalked = c(0L, 1L, 0L, 4L,
1L, 2L, 0L, 1L, 0L, 2L, 1L, 0L, 1L, 2L, 1L, 0L, 0L, 0L), week = structure(c(1L,
1L, 3L, 3L, 2L, 2L, 1L, 1L, 3L, 3L, 2L, 2L, 1L, 1L, 3L, 3L, 2L,
2L), .Label = c("1/1/2010", "1/15/2010", "1/8/2010"), class = "factor")), .Names = c("from",
"to", "timestalked", "week"), class = "data.frame", row.names = c(NA,
-18L))
,A
,B
的中心性,用于每个星期,过去两周和年初至今。我实现这一目标的唯一方法是手动分解我想要分析的时间单位中的文件,但我希望这种方法不那么繁琐。
当C
为0时,应将其视为无边
输出将生成timestalked
,其中包含以下内容:
.csv
actor cent_week1 cent_week2 cent_week3 cent_last2weeks cent_yeartodate
A
B
C
是1/1/2010的中心地位; cent_week1
正在考虑2010年1月8日和2010年1月15日;并cent_last2weeks
同时考虑所有数据。这被应用于数百万观测的更大数据集。
答案 0 :(得分:1)
无法评论,所以我正在写一个“答案”。如果您想在[x]
上执行某些数学运算并通过timestalked
获取值(在您的示例中找不到任何名为from
的变量),这里是actor
方法这可能会有所帮助:
data.table
这给出了以下输出:
dat [,。(cent = mean(timestalked)),by = list(from,weeknum = week(week))]
dat <- as.data.table(dat) # or add 'data.table' to the class parameter
dat$week <- as.Date(dat$week, format = "%m/%d/%Y")
dat[, .(cent = mean(timestalked)), by = list(from, weeknum = week(week))]
将此内容分配给 from weeknum cent
1: A 1 0.5
2: A 2 2.0
3: A 3 1.5
4: B 1 0.5
5: B 2 1.0
6: B 3 0.5
7: C 1 1.5
8: C 2 0.5
9: C 3 0.0
。您可以使用new_dat
或您想要的任何其他变体或new_dat[weeknum %in% 2:3]
一年中按周分组。此外,您还可以根据需要进行排序/订购。
希望这有帮助!
答案 1 :(得分:1)
怎么样:
Option Explicit
Sub SaveWorksheetsAsCsv()
Dim xWs As Worksheet
Dim xDir As String
Dim folder As FileDialog
Dim r As Range
Dim cell As Range
Dim k As Long
Set folder = Application.FileDialog(msoFileDialogFolderPicker)
If folder.Show <> -1 Then Exit Sub
xDir = folder.SelectedItems(1)
For Each xWs In ThisWorkbook.Worksheets ' it's safer to use ThisWorkbook is you reffer to the worksheets inside the workbook which thid code resides
With xWs
' getting the last row needs to be inside the loop
k = .Cells(.rows.Count, "A").End(xlUp).Row
Set r = Nothing ' reset Range Object
Select Case .Name
Case "Worksheet"
Set r = .Range("FA2:FA" & k)
'xWs.Columns(41).EntireColumn.Delete
Case "Worksheet 1"
Set r = .Range("AG2:AG" & k)
'xWs.Columns(126).EntireColumn.Delete
Case "Worksheet 5"
Set r = .Range("FR2:FR" & k)
End Select
' check if r is not nothing (it passed one of the 3 Cases in the above select case)
If Not r Is Nothing Then
For Each cell In r
If IsEmpty(cell.Value) = False And IsNumeric(cell.Value) Then
cell.Value = cell.Value & " мм"
End If
Next cell
End If
.SaveAs xDir & "\" & .Name, xlCSV, Local:=True
End With
Next xWs
End Sub
如果你有很多演员,你可以使用library(dplyr)
centralities <- tmp %>%
group_by(week) %>%
filter(timestalked > 0) %>%
do(
week_graph=igraph::graph_from_edgelist(as.matrix(cbind(.$from, .$to)))
) %>%
do(
ecs = igraph::eigen_centrality(.$week_graph)$vector
) %>%
summarise(ecs_A = ecs[[1]], ecs_B = ecs[[2]], ecs_C = ecs[[3]])
。将它放在长格式中是一种练习。
答案 2 :(得分:1)
可以通过在另一个表中设置窗口,然后在每个窗口上进行组操作来完成此操作:
数据准备:
# Load Data
DT <- structure(list(from = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("A",
"B", "C"), class = "factor"), to = structure(c(2L, 3L, 2L, 3L,
2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("A",
"B", "C"), class = "factor"), timestalked = c(0L, 1L, 0L, 4L,
1L, 2L, 0L, 1L, 0L, 2L, 1L, 0L, 1L, 2L, 1L, 0L, 0L, 0L), week = structure(c(1L,
1L, 3L, 3L, 2L, 2L, 1L, 1L, 3L, 3L, 2L, 2L, 1L, 1L, 3L, 3L, 2L,
2L), .Label = c("1/1/2010", "1/15/2010", "1/8/2010"), class = "factor")), .Names = c("from",
"to", "timestalked", "week"), class = "data.frame", row.names = c(NA,
-18L))
# Code
library(igraph)
library(data.table)
setDT(DT)
# setup events
DT <- DT[timestalked > 0]
DT[, week := as.Date(week, format = "%m/%d/%Y")]
# setup windows, edit as needed
date_ranges <- data.table(label = c("cent_week_1","cent_week_2","cent_last2weeks","cent_yeartodate"),
week_from = as.Date(c("2010-01-01","2010-01-08","2010-01-08","2010-01-01")),
week_to = as.Date(c("2010-01-01","2010-01-08","2010-01-15","2010-01-15"))
)
# find all events within windows
DT[, JA := 1]
date_ranges[, JA := 1]
graph_base <- merge(DT, date_ranges, by = "JA", allow.cartesian = TRUE)[week >= week_from & week <= week_to]
现在是按群组代码,第二行有点粗略,有关如何避免双重调用的想法
graph_base <- graph_base[, .(graphs = list(graph_from_data_frame(.SD))), by = label, .SDcols = c("from", "to", "timestalked")] # create graphs
graph_base <- graph_base[, .(vertex = names(eigen_centrality(graphs[[1]])$vector), ec = eigen_centrality(graphs[[1]])$vector), by = label] # calculate centrality
dcast进行最终格式化:
dcast(graph_base, vertex ~ label, value.var = "ec")
vertex cent_last2weeks cent_week_1 cent_week_2 cent_yeartodate
1: A 1.0000000 0.7071068 0.8944272 0.9397362
2: B 0.7052723 0.7071068 0.4472136 0.7134685
3: C 0.9008487 1.0000000 1.0000000 1.0000000
答案 3 :(得分:0)
此分析遵循一般的拆分 - 应用 - 组合方法,其中数据按周拆分,应用图形函数,然后将结果组合在一起。有几种工具,但下面使用基数R和data.table
。
基础R
首先为您的数据设置数据类,以便术语持续两周具有意义。
# Set date class and order
d$week <- as.Date(d$week, format="%m/%d/%Y")
d <- d[order(d$week), ]
d <- d[d$timestalked > 0, ] # remove edges // dont need to do this is using weights
然后拆分并应用图形函数
# split data and form graph for eack week
g1 <- lapply(split(seq(nrow(d)), d$week), function(i)
graph_from_data_frame(d[i,]))
# you can then run graph functions to extract specific measures
(grps <- sapply(g1, function(x) eigen_centrality(x,
weights = E(x)$timestalked)$vector))
# 2010-01-01 2010-01-08 2010-01-15
# A 0.5547002 0.9284767 1.0000000
# B 0.8320503 0.3713907 0.7071068
# C 1.0000000 1.0000000 0.7071068
# Aside: If you only have one function to run on the graphs,
# you could do this in one step
#
# sapply(split(seq(nrow(d)), d$week), function(i) {
# x = graph_from_data_frame(d[i,])
# eigen_centrality(x, weights = E(x)$timestalked)$vector
# })
然后,您需要将所有数据的分析结合起来 - 因为您只需要构建另外两个图表,这不是一个耗时的部分。
fun1 <- function(i, name) {
x = graph_from_data_frame(i)
d = data.frame(eigen_centrality(x, weights = E(x)$timestalked)$vector)
setNames(d, name)
}
a = fun1(d, "alldata")
lt = fun1(d[d$week %in% tail(unique(d$week), 2), ], "lasttwo")
# Combine: could use `cbind` in this example, but perhaps `merge` is
# safer if there are different levels between dates
data.frame(grps, lt, a) # or
Reduce(merge, lapply(list(grps, a, lt), function(x) data.frame(x, nms = row.names(x))))
# nms X2010.01.01 X2010.01.08 X2010.01.15 alldata lasttwo
# 1 A 0.5547002 0.9284767 1.0000000 0.909899 1.0
# 2 B 0.8320503 0.3713907 0.7071068 0.607475 0.5
# 3 C 1.0000000 1.0000000 0.7071068 1.000000 1.0
<强> data.table 强>
这个耗时的步骤很可能会明确拆分 - 在数据上应用函数。 data.table
应该在这里提供一些好处,特别是当数据变大时,和/或有更多的组。
# function to apply to graph
fun <- function(d) {
x = graph_from_data_frame(d)
e = eigen_centrality(x, weights = E(x)$timestalked)$vector
list(e, names(e))
}
library(data.table)
dcast(
setDT(d)[, fun(.SD), by=week], # apply function - returns data in long format
V2 ~ week, value.var = "V1") # convert to wide format
# V2 2010-01-01 2010-01-08 2010-01-15
# 1: A 0.5547002 0.9284767 1.0000000
# 2: B 0.8320503 0.3713907 0.7071068
# 3: C 1.0000000 1.0000000 0.7071068
然后就像以前一样,在完整数据/最近两周内运行该功能。
答案之间存在差异,这取决于我们在计算中心点时如何使用weights
参数,而其他答案则不使用权重。
d=structure(list(from = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("A",
"B", "C"), class = "factor"), to = structure(c(2L, 3L, 2L, 3L,
2L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("A",
"B", "C"), class = "factor"), timestalked = c(0L, 1L, 0L, 4L,
1L, 2L, 0L, 1L, 0L, 2L, 1L, 0L, 1L, 2L, 1L, 0L, 0L, 0L), week = structure(c(1L,
1L, 3L, 3L, 2L, 2L, 1L, 1L, 3L, 3L, 2L, 2L, 1L, 1L, 3L, 3L, 2L,
2L), .Label = c("1/1/2010", "1/15/2010", "1/8/2010"), class = "factor")), .Names = c("from",
"to", "timestalked", "week"), class = "data.frame", row.names = c(NA,
-18L))