在R中将余弦相似度矩阵转换为JSON

时间:2018-07-31 20:59:35

标签: r json sparse-matrix cosine-similarity tibble

我有一个余弦相似度矩阵(csm),如下所示:

 csm<-matrix( c(1,0,0.4,0.6, 0,0,1, 0.1,0.4,0.7,0.4,0.1,1,0.9,0.5,0.6,0.4,0.9,1,0.6,0,0.7,0.5,0.6,1),nrow=5,ncol=5)
 rownames(csm) <- c("AAB","AAC","AAD","AAE","AAF")
 colnames(csm) <- c("AAB","AAC","AAD","AAE","AAF")

csm
    AAB   AAC   AAD   AAE    AAF
AAB 1     0     0.4   0.6    0
AAC 0     1     0.1   0.4    0.7
AAD 0.4   0.1   1     0.9    0.5
AAE 0.6   0.4   0.9   1      0.6
AAF 0     0.7   0.5   0.6    1

我的目标是建立一个看起来像这样的JSON文件:

    {"AAB":[{"ID":"AAB","value":1},{"ID":"AAC","value":0},{"ID":"AAD","value":0.4},{"ID":"AAE","value":0.6},{"ID":"AAF","value":0}],
     "AAC":[{"ID":"AAB","value":0},{"ID":"AAC","value":1},{"ID":"AAD","value":0.1},{"ID":"AAE","value":0.4},{"ID":"AAF","value":0.7}],
     "AAD":[{"ID":"AAB","value":0.4},{"ID":"AAC","value":0.3},{"ID":"AAD","value":1},{"ID":"AAE","value":0.9},{"ID":"AAF","value":0.5}],
     "AAE":[{"ID":"AAB","value":0.6},{"ID":"AAC","value":0.4},{"ID":"AAD","value":0.5},{"ID":"AAE","value":1},{"ID":"AAF","value":0.6}],
     "AAF":[{"ID":"AAB","value":0},{"ID":"AAC","value":0},{"ID":"AAD","value":0.7},{"ID":"AAE","value":0},{"ID":"AAF","value":1}]}

我所做的是这样:

csm %>% as_tibble() %>% 
    gather( ID, value) %>% 
    complete(ID, value)%>%
    jsonlite::toJSON()

我所拥有的是:

[{"ID":"AAB","value":0},{"ID":"AAB","value":0},{"ID":"AAB","value":0.1},{"ID":"AAB","value":0.3},{"ID":"AAB","value":0.4},...

有人可以帮忙吗? 谢谢!

1 个答案:

答案 0 :(得分:1)

这是使用stack

的基本R解决方案
library(jsonlite)
toJSON(lapply(split(df2 <- data.frame(ID = rownames(df), stack(df)), df2$ind), function(x)
    x[, -3]))
#{"AAB":[{"ID":"AAB","values":1},{"ID":"AAC","values":0},{"ID":"AAD","values":0.4},{"ID":"AAE","values":0.6},{"ID":"AAF","values":0}],"AAC":[{"ID":"AAB","values":0},{"ID":"AAC","values":1},{"ID":"AAD","values":0.1},{"ID":"AAE","values":0.4},{"ID":"AAF","values":0.7}],"AAD":[{"ID":"AAB","values":0.4},{"ID":"AAC","values":0.1},{"ID":"AAD","values":1},{"ID":"AAE","values":0.9},{"ID":"AAF","values":0.5}],"AAE":[{"ID":"AAB","values":0.6},{"ID":"AAC","values":0.4},{"ID":"AAD","values":0.9},{"ID":"AAE","values":1},{"ID":"AAF","values":0.6}],"AAF":[{"ID":"AAB","values":0},{"ID":"AAC","values":0.7},{"ID":"AAD","values":0.5},{"ID":"AAE","values":0.6},{"ID":"AAF","values":1}]}

或者采用tidyverse方式

df %>%
    rownames_to_column("row") %>%
    gather(ID, value, -row) %>%
    split(.$row) %>%
    map(~.x[, -1]) %>%
    toJSON()
#{"AAB":[{"ID":"AAB","value":1},{"ID":"AAC","value":0},{"ID":"AAD","value":0.4},{"ID":"AAE","value":0.6},{"ID":"AAF","value":0}],"AAC":[{"ID":"AAB","value":0},{"ID":"AAC","value":1},{"ID":"AAD","value":0.1},{"ID":"AAE","value":0.4},{"ID":"AAF","value":0.7}],"AAD":[{"ID":"AAB","value":0.4},{"ID":"AAC","value":0.1},{"ID":"AAD","value":1},{"ID":"AAE","value":0.9},{"ID":"AAF","value":0.5}],"AAE":[{"ID":"AAB","value":0.6},{"ID":"AAC","value":0.4},{"ID":"AAD","value":0.9},{"ID":"AAE","value":1},{"ID":"AAF","value":0.6}],"AAF":[{"ID":"AAB","value":0},{"ID":"AAC","value":0.7},{"ID":"AAD","value":0.5},{"ID":"AAE","value":0.6},{"ID":"AAF","value":1}]}

样本数据

df <- read.table(text =
    "    AAB   AAC   AAD   AAE    AAF
AAB 1     0     0.4   0.6    0
AAC 0     1     0.1   0.4    0.7
AAD 0.4   0.1   1     0.9    0.5
AAE 0.6   0.4   0.9   1      0.6
AAF 0     0.7   0.5   0.6    1")