Question

我有一个在Jupyter笔记本中工作过的wordcloud生成器。我想为其构建前端，以便可以将文本粘贴到文本框中，单击提交，然后显示wordcloud。基本上，这个小伙子做了here。

我正在寻找一些帮助扩展我的代码的方法，以使它不会在Jupyter笔记本中显示wordcloud，而是将wordcloud的图像呈现在HTML页面上。我正在使用Django构建前端。

这是我在Jupyter笔记本中生成Wordcloud图像的代码。

from wordcloud import WordCloud
from PIL import Image
import matplotlib.pyplot as plt
import nltk
# sun only once -> nltk.download('punkt')
#nltk.download('wordnet') -> only do this once
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer
ps = PorterStemmer()
wnl = WordNetLemmatizer()

def stem(string):    
    stemstring = ""
    nltk_tokens = nltk.word_tokenize(string)
    for word in nltk_tokens: 
        if word in dontstem:
            p = word
        elif word == 'printing':
            p = 'print'
        elif word == 'e-mailing':
            p = 'email'
        elif word == 'e-mails':
            p = 'email'
        elif word == 'e-mail':
            p = 'email'
        elif word == 'installation':
            p = 'install'
        #If the lemmatized word ends in a 'e' then lemmatize instead of stem as stem cuts the 'e'.    
        elif wnl.lemmatize(word).endswith('e'):
            p = wnl.lemmatize(word)
        elif wnl.lemmatize(word).endswith('y'):
            p = wnl.lemmatize(word)
        elif wnl.lemmatize(word).endswith('er'):
            p = wnl.lemmatize(word)
        elif wnl.lemmatize(word).endswith('ing'):
            p = wnl.lemmatize(word)
        else:
            p = ps.stem(word)
        stemstring += p + ' '
    return stemstring

#We use a srt.split() to only count whole words as we don't want to count words inside words. This can happen below. 
def count_substring(string,sub_string):
    count=0
    for word in string.split():
        if word == sub_string:      
            count+=1
    return(count)  

#As we have a phrase which can be made up of two words we use this counting method as it is unlikely that the phrase is contained in another word.
def count_substring_phrases(string,sub_string):
    count=0
    for i in range(len(string)-len(sub_string)+1):
        if(string[i:i+len(sub_string)] == sub_string ):      
            count+=1
    return(count) 

#The function for counting all the words
def countWords(string, phrases, stopWords, dostem):
    newList = {}
    for p in phrases:
        if count_substring_phrases(string,p) > 0:
            newList[p] = count_substring_phrases(string,p)
            string = string.replace(p,'')
        else:
            pass

    if dostem == True:    
        string = stem(string)

    for word in string.split():
        if word in stopWords:
                pass
        #Hack to exclude any word under 4 characters.    
        elif len(word) < 2:
            pass
        else:
            count_substring(string,word)
            newList[word] = count_substring(string,word)

    return(newList) 

MyData= dict(countWords(text, phrases, stopWords, True))
wc = WordCloud(scale=10, max_words=100).generate_from_frequencies(MyData)

plt.figure(figsize=(32,18))
plt.imshow(wc, interpolation="bilinear", aspect='auto')
plt.show()

这是我的views.py文件。如您所见，我可以从表单字段中获取值并将其发送回页面。我现在需要做的是从form字段中获取值，通过wordcloud函数运行它，生成wordcloud的图像，然后将其发送回页面，以便我可以显示它。

from django.shortcuts import render
from wordcloudgen.forms import CharForm
from wordcloudgen.wordcloud import *

def cloud_gen(request):
    if request.method == 'POST':
        form = CharForm(request.POST)
        if form.is_valid():
            text = form.cleaned_data['post']
            phrases = ''
            stopWords = ''


            args = {'form':form, 'text':text}
            return render(request, 'wordcloudgen/cloud_gen.html', args)
    else:
        form = CharForm()
        return render(request, 'wordcloudgen/cloud_gen.html', {'form':form})

我认为我需要在此处的wordcloud代码中进行一些更改：

MyData= dict(countWords(text, phrases, stopWords, True))
wc = WordCloud(scale=10, max_words=100).generate_from_frequencies(MyData)

plt.figure(figsize=(32,18))
plt.imshow(wc, interpolation="bilinear", aspect='auto')
plt.show()

，然后在视图中添加一些内容以调用wordcloud函数，以某种方式保存其输出的图像，然后将其传递给我的args变量，以便我可以在HTML模板上使用{％image％}之类的名称来调用它。

注意：目前，countWords函数中的某些参数已硬编码为空字符串。现在，当我完成所有工作后，表单中只有一个输入字段将用于文本，然后我将添加所有其他参数和选项的输入，要输出的图形大小等。

谢谢

Answer 1

我发现了自己想做的事here。

我将Views.py文件更改为以下内容：

from django.shortcuts import render
from wordcloudgen.forms import CharForm
from wordcloudgen.wordcloud import *

import io
import urllib, base64

def cloud_gen(request):
   if request.method == 'POST':
       form = CharForm(request.POST)
       if form.is_valid():
           text = form.cleaned_data['post']
           phrases = ''
           stopWords = ''

           MyData= dict(countWords(text, phrases, stopWords, True))
           wc = WordCloud(scale=10, max_words=100).generate_from_frequencies(MyData)

           plt.figure(figsize=(32,18))
           plt.imshow(wc, interpolation="bilinear", aspect='auto')

           fig = plt.gcf()
           buf = io.BytesIO()
           fig.savefig(buf, format='png')
           buf.seek(0)
           string = base64.b64encode(buf.read())

           uri = 'data:image/png;base64,' + urllib.parse.quote(string)

           args = {'form':form, 'text':text, 'image':uri}
           return render(request, 'wordcloudgen/cloud_gen.html', args)
   else:
       form = CharForm()
       return render(request, 'wordcloudgen/cloud_gen.html', {'form':form})

然后，我可以在模板页面上使用以下内容显示图像： <img src="{{ image }}">

使用Django在HTML页面上显示matplotlib图像

1 个答案: