我的数据集如下:
Data <- read.table(header=TRUE, text="
itemset
aac,
cca,
bab,
caa,
aba,
abb,
cab,
bcc,
aca,
bab,
cca,
cac,
baa,
baa,
abc,
abb,
cbb,
baa,
cba,
acb,
ccb,
bbc,
aac,
bac,
abb,
bba,
bca,
acc,
caa,
cca")
让我们说每一行对应一个状态。我需要计算两个相邻状态之间的跃迁频率。
问题。是否存在标准功能?
我找到了部分答案here
cbind(table(Data), table(Data) / nrow(Data))
Tab <- table(Data) # observed freq.
Tab <- cbind(Tab, Tab/nrow(Data)) # combine freq. and prop.
Tab <- Tab[order(Tab[,2], decreasing=TRUE),] # sort
colnames(Tab) <- c("freq", "prop") # add column names
dim(Tab)[1]
为22,结果应为22x22
矩阵。
答案 0 :(得分:2)
使用reshape2
的另一种方法,产生21x21的概率转换矩阵
library(reshape2)
Data <- data.frame(Data, stringsAsFactors = FALSE)
Data$nextitem <- c(as.character(Data$itemset[-1]), NA)
Data$value <- 1
df <- dcast(Data, itemset~nextitem, fill=0)
df <- df[-ncol(df)]
df[-1] <- df[-1] / rowSums(df[-1]) # assuming no rows have all zeros
df
# itemset aac aba abb abc aca acb acc baa bab bac bba bbc bca bcc caa cab cac cba cbb cca ccb
#1 aac 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.5 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.5 0
#2 aba 0 0.0 1 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#3 abb 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.3333333 0 0 0 0.0 0.3333333 0.0 0.0000000 0.3333333 0.0 0
#4 abc 0 0.0 1 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#5 aca 0 0.0 0 0.0000000 0 0 0 0.0000000 1.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#6 acb 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 1
#7 acc 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 1.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#8 baa 0 0.0 0 0.3333333 0 0 0 0.3333333 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.3333333 0.0000000 0.0 0
#9 bab 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.5 0.0000000 0.0 0.0000000 0.0000000 0.5 0
#10 bac 0 0.0 1 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#11 bba 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 1 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#12 bbc 1 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#13 bca 0 0.0 0 0.0000000 0 0 1 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#14 bcc 0 0.0 0 0.0000000 1 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#15 caa 0 0.5 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.5 0
#16 cab 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 0 0 1 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#17 cac 0 0.0 0 0.0000000 0 0 0 1.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#18 cba 0 0.0 0 0.0000000 0 1 0 0.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#19 cbb 0 0.0 0 0.0000000 0 0 0 1.0000000 0.0 0.0 0.0000000 0 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
#20 cca 0 0.0 0 0.0000000 0 0 0 0.0000000 0.5 0.0 0.0000000 0 0 0 0.0 0.0000000 0.5 0.0000000 0.0000000 0.0 0
#21 ccb 0 0.0 0 0.0000000 0 0 0 0.0000000 0.0 0.0 0.0000000 1 0 0 0.0 0.0000000 0.0 0.0000000 0.0000000 0.0 0
答案 1 :(得分:1)
您是否正在寻找类似的东西?
library(tidyverse)
df <- data.frame( Data, stringsAsFactors = FALSE ) %>%
#remove the comma
mutate( itemset = gsub( ",", "", itemset) ) %>%
#create colum with next state
mutate( next_itemset = lead( itemset, n = 1 ) )
xtabs( ~ itemset + next_itemset, df) / nrow(df)
答案 2 :(得分:0)
此代码创建您要使用的22x22矩阵。
Data <- read.table(header=TRUE, text="
itemset
aac,
cca,
bab,
caa,
aba,
abb,
cab,
bcc,
aca,
bab,
cca,
cac,
baa,
baa,
abc,
abb,
cbb,
baa,
cba,
acb,
ccb,
bbc,
aac,
bac,
abb,
bba,
bca,
acc,
caa,
cca")
Data$second_state <- c(as.character(Data$itemset[-1]),NA)
Data %>%
group_by(itemset,second_state) %>%
filter(!is.na(second_state) & !is.na(itemset)) %>%
count() %>% spread(second_state,n)