如何计算两个状态之间的转换频率?

时间:2018-09-12 06:46:51

标签: r frequency

我的数据集如下:

Data <- read.table(header=TRUE, text="
itemset
aac,
cca,
bab,
caa,
aba,
abb,
cab,
bcc,
aca,
bab,
cca,
cac,
baa,
baa,
abc,
abb,
cbb,
baa,
cba,
acb,
ccb,
bbc,
aac,
bac,
abb,
bba,
bca,
acc,
caa,
cca")

让我们说每一行对应一个状态。我需要计算两个相邻状态之间的跃迁频率。

问题。是否存在标准功能?

我找到了部分答案here

cbind(table(Data), table(Data) / nrow(Data))

Tab <- table(Data)                        # observed freq.
Tab <- cbind(Tab, Tab/nrow(Data))             # combine freq. and prop.
Tab <- Tab[order(Tab[,2], decreasing=TRUE),]  # sort
colnames(Tab) <- c("freq", "prop")        # add column names

dim(Tab)[1]为22,结果应为22x22矩阵。

3 个答案:

答案 0 :(得分:2)

使用reshape2的另一种方法,产生21x21的概率转换矩阵

library(reshape2)
Data <- data.frame(Data, stringsAsFactors = FALSE)
Data$nextitem <- c(as.character(Data$itemset[-1]), NA)
Data$value <- 1
df <- dcast(Data, itemset~nextitem, fill=0)
df <- df[-ncol(df)]
df[-1] <- df[-1] / rowSums(df[-1]) # assuming no rows have all zeros
df
#   itemset aac aba abb       abc aca acb acc       baa bab bac       bba bbc bca bcc caa       cab cac       cba       cbb cca ccb
#1      aac   0 0.0   0 0.0000000   0   0   0 0.0000000 0.0 0.5 0.0000000   0   0   0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.5   0
#2      aba   0 0.0   1 0.0000000   0   0   0 0.0000000 0.0 0.0 0.0000000   0   0   0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0   0
#3      abb   0 0.0   0 0.0000000   0   0   0 0.0000000 0.0 0.0 0.3333333   0   0   0 0.0 0.3333333 0.0 0.0000000 0.3333333 0.0   0
#4      abc   0 0.0   1 0.0000000   0   0   0 0.0000000 0.0 0.0 0.0000000   0   0   0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0   0
#5      aca   0 0.0   0 0.0000000   0   0   0 0.0000000 1.0 0.0 0.0000000   0   0   0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0   0
#6      acb   0 0.0   0 0.0000000   0   0   0 0.0000000 0.0 0.0 0.0000000   0   0   0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0   1
#7      acc   0 0.0   0 0.0000000   0   0   0 0.0000000 0.0 0.0 0.0000000   0   0   0 1.0 0.0000000 0.0 0.0000000 0.0000000 0.0   0
#8      baa   0 0.0   0 0.3333333   0   0   0 0.3333333 0.0 0.0 0.0000000   0   0   0 0.0 0.0000000 0.0 0.3333333 0.0000000 0.0   0
#9      bab   0 0.0   0 0.0000000   0   0   0 0.0000000 0.0 0.0 0.0000000   0   0   0 0.5 0.0000000 0.0 0.0000000 0.0000000 0.5   0
#10     bac   0 0.0   1 0.0000000   0   0   0 0.0000000 0.0 0.0 0.0000000   0   0   0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0   0
#11     bba   0 0.0   0 0.0000000   0   0   0 0.0000000 0.0 0.0 0.0000000   0   1   0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0   0
#12     bbc   1 0.0   0 0.0000000   0   0   0 0.0000000 0.0 0.0 0.0000000   0   0   0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0   0
#13     bca   0 0.0   0 0.0000000   0   0   1 0.0000000 0.0 0.0 0.0000000   0   0   0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0   0
#14     bcc   0 0.0   0 0.0000000   1   0   0 0.0000000 0.0 0.0 0.0000000   0   0   0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0   0
#15     caa   0 0.5   0 0.0000000   0   0   0 0.0000000 0.0 0.0 0.0000000   0   0   0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.5   0
#16     cab   0 0.0   0 0.0000000   0   0   0 0.0000000 0.0 0.0 0.0000000   0   0   1 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0   0
#17     cac   0 0.0   0 0.0000000   0   0   0 1.0000000 0.0 0.0 0.0000000   0   0   0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0   0
#18     cba   0 0.0   0 0.0000000   0   1   0 0.0000000 0.0 0.0 0.0000000   0   0   0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0   0
#19     cbb   0 0.0   0 0.0000000   0   0   0 1.0000000 0.0 0.0 0.0000000   0   0   0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0   0
#20     cca   0 0.0   0 0.0000000   0   0   0 0.0000000 0.5 0.0 0.0000000   0   0   0 0.0 0.0000000 0.5 0.0000000 0.0000000 0.0   0
#21     ccb   0 0.0   0 0.0000000   0   0   0 0.0000000 0.0 0.0 0.0000000   1   0   0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0   0

答案 1 :(得分:1)

您是否正在寻找类似的东西?

library(tidyverse)

df <- data.frame( Data, stringsAsFactors = FALSE ) %>%
  #remove the comma
  mutate( itemset = gsub( ",", "", itemset) ) %>%
  #create colum with next state
  mutate( next_itemset = lead( itemset, n = 1 ) )

xtabs(  ~ itemset + next_itemset, df) / nrow(df)

答案 2 :(得分:0)

此代码创建您要使用的22x22矩阵。

Data <- read.table(header=TRUE, text="
itemset
aac,
cca,
bab,
caa,
aba,
abb,
cab,
bcc,
aca,
bab,
cca,
cac,
baa,
baa,
abc,
abb,
cbb,
baa,
cba,
acb,
ccb,
bbc,
aac,
bac,
abb,
bba,
bca,
acc,
caa,
cca")




Data$second_state <- c(as.character(Data$itemset[-1]),NA)


Data %>% 
  group_by(itemset,second_state) %>% 
  filter(!is.na(second_state) & !is.na(itemset)) %>% 
  count() %>% spread(second_state,n)