我正在尝试将马尔可夫链算法应用于简单的文本生成,我从互联网上找到了一个代码并进行了更改以适合我的数据,如下所示
library(markovchain)
library(tidyverse)
library(tidytext)
library(stringr)
#use readLines to read any text file
text <- readLines('example.txt')
#check few lines of our text file
head(text)
> head(text)
[1] "PRIDE AND PREJUDICE" "" "By Jane Austen" ""
[5] "" ""
#seperate the texts i.e sentences
text <- text[nchar(text) > 0]
head(text)
#removing all punctuations in the texts
text <- str_replace_all(text, "[[:punct:]]", "")
head(text)
> head(text)
[1] "PRIDE AND PREJUDICE"
[2] "By Jane Austen"
[3] "Chapter 1"
[4] "It is a truth universally acknowledged that a single man in possession"
[5] "of a good fortune must be in want of a wife"
[6] "However little known the feelings or views of such a man may be on his"
#splitting the text into terms
terms <- unlist(strsplit(text, ' '))
head(terms)
> head(terms)
[1] "PRIDE" "AND" "PREJUDICE" "By" "Jane" "Austen"
#creates the states for each term to fit in a the current satate and next state
fit <- markovchainFit(data = terms)
#plot(fit$estimate)
#paste(markovchainSequence(n=50, markovchain=fit$estimate), collapse=' ')
#s <- createSequenceMatrix(terms, sanitize=FALSE)
#fit2 <- fitHigherOrder(s)
#create new variable initially as a empty
new1 <- NULL
# generate new lines by varying the number for i as desired and
for(i in 1:1000){
new1 <- c(new1,
c(paste(markovchainSequence(n=6, markovchain=fit$estimate), collapse=' ')))
}
# Check out the first few lines
head(new1)
# save our titles to a .txt file
write(new1, "new_example2.txt")
这里当我们创建一系列具有所需术语数量的句子时。任何人都可以帮助我理解markovchainSequence如何在一个句子中随机选择第一个词吗? 有没有办法将第一个术语作为用户输入术语并在其后生成序列?
答案 0 :(得分:0)
我不确定您是否理解您的问题,但这里是一个使用markovchain
包的示例,其中一个设置初始值。
# define the states
words <- c("hello", "how", "are", "you")
# define the transition matrix (each row sums to 1)
transitions <- rbind(c(0.1, 0.2, 0.3, 0.4),
c(0.1, 0.2, 0.3, 0.4),
c(0.1, 0.2, 0.3, 0.4),
c(0.1, 0.2, 0.3, 0.4))
rownames(transitions) <- colnames(transitions) <- words
# define a markovchain object
library(markovchain)
markovChain <- new("markovchain", states=words,
transitionMatrix = transitions)
# sample from the Markov chain
# initial value given by t0
markovchainSequence(10, markovChain, t0="how")
# output: "how" "hello" "are" "are" "are" "hello" "you" "are" "you" "you"