可视化社交网络以显示R中提到用户的频率

时间:2016-01-24 07:37:36

标签: r ggplot2 data-visualization social-networking

给出如下数据框:

v1    v2  v3     v4   v5
tom   A    pinky  A   3
ben   B    hugo   C   2
lily  A    tom    A   1
...

表示v1组中v2 v3v4 v5提及global $variables; $variables['menu_buttons'] = array( 'home' => array( 'title' => 'Home', 'href' => 'index.php', 'show' => true, ), 'news' => array( 'title' => 'News', 'href' => 'index.php?action=news', 'show' => true, ), 'about' => array( 'title' => 'About us', 'href' => 'index.php?action=about', 'show' => true, ), ); $menu_buttons = array(); foreach($variables['menu_buttons'] as $act => $button) { if(!empty($button['show'])) { $button['active'] = true; $menu_buttons[$act] = $button; } } $variables['menu_buttons'] = $menu_buttons; echo ' <nav> <ul>'; foreach($variables['menu_buttons'] as $act => $button) { $url = 'http://'.$_SERVER['SERVER_NAME'].$_SERVER['REQUEST_URI']; $class = (GetCurrentPageAdress() == $button['href']) ? 'active' : ''; echo '<a href="'.$button['href'].'"><li class="'.$class.'" id="button_'.$act.'">'.$button['title'].'</li></a>'; } echo ' </ul> </nav>'; 次。例如,来自A组的汤姆已经提到了来自A组的小指3次。现在我想绘制一个社交网络,每个用户用一个点表示,其大小与他或她完全被提及的时间成正比。如果他们相互或单方面地互相提及,那么就有两点联系。

当我查看ggplot文档时,我找不到任何函数来执行它。

你知道吗?提前致谢!

编辑:

这是我到目前为止的图表: enter image description here

1 个答案:

答案 0 :(得分:2)

library(igraph)
library(dplyr)

# create example dataset
dt = data.frame(v1 = c("tom", "ben", "lilly", "mark"),
                v2 = c("A","B","A","C"),
                v3 = c("pinky", "hugo", "tom", "pinky"),
                v4 = c("A","D","A","A"),
                v5 = c(20,10,15,15),
                stringsAsFactors = F)

dt

#      v1 v2    v3 v4 v5
# 1   tom  A pinky  A 20
# 2   ben  B  hugo  D 10
# 3 lilly  A   tom  A 15
# 4  mark  C pinky  A 15


# select columns of names to use for the graph
dt_graph = dt %>% select(v1,v3)

# create the graph
g = graph.data.frame(dt_graph)

# count number of times names were mentioned
dt_times_mentioned =
  dt %>%
  group_by(v3) %>%
  summarise(times = sum(v5))

dt_times_mentioned

#      v3 times
#   (chr) (dbl)
# 1  hugo    10
# 2 pinky    35
# 3   tom    15


# join back to the vertex names to include names in the graph that were not mentioned
dt_weights =
  data.frame(names = names(V(g)), stringsAsFactors = F) %>%
  left_join(dt_times_mentioned, by=c("names"="v3")) %>%
  mutate(times = ifelse(is.na(times), 0, times))

dt_weights

#   names times
# 1   tom    15
# 2   ben     0
# 3 lilly     0
# 4  mark     0
# 5 pinky    35
# 6  hugo    10


# create two datasets based on 1st and 2nd column of names
dt1 = dt %>% select(names=v1, group=v2) 
dt2 = dt %>% select(names=v3, group=v4)


# get distinct names and their group values
dt_group = 
  dt1 %>% rbind(dt2) %>% distinct() %>% 
  mutate(color = colors()[as.numeric(factor(group))+5]) # get colours from group values

#(note that the +5 above is an arbitrary value for this example in order to get colors easy to distinguish. this is not really needed. if you don't have many groups you can manually set the colors)

dt_group

#   names group         color
# 1   tom     A antiquewhite3
# 2   ben     B antiquewhite4
# 3 lilly     A antiquewhite3
# 4  mark     C    aquamarine
# 5 pinky     A antiquewhite3
# 6  hugo     D   aquamarine1


# plot the graph
plot(g, vertex.size = dt_weights$times, vertex.color = dt_group$color)

# add legend
legend(1.5, 1.5,
       legend=unique(dt_group$group),
       pch=19,
       col=unique(dt_group$color),
       title = "Colors - Groups")

enter image description here