如何使用redis将相关文章列表作为值缓存,并将其ID映射为Django中的键?

时间:2018-07-01 19:25:20

标签: python django caching redis

models.py

<script src="https://cdnjs.cloudflare.com/ajax/libs/html2canvas/0.4.1/html2canvas.min.js"></script>
<body>
<div>Screenshot tester</div>
<button onclick="report()">Report bug</button>

<div class="example">Lorem ipsum</div>

<div class="bug-container hide">
  <div>Select bug region</div>
  <div class="bug">    
    <img width="75%" class="screen" >
    <div class="region"></div> 
  </div>
  <div>
    <textarea class="bug-desc">Describe bug here...</textarea>
  </div>
  <div class="send">SEND BUG</div>
</div>

</body>

Views.py

from django.db import models
from django_pandas.managers import DataFrameManager

# Create your models here.
class BcContent(models.Model):
    asset_id = models.PositiveIntegerField()
    title = models.CharField(max_length=255)
    alias = models.CharField(max_length=255)
    title_alias = models.CharField(max_length=255)
    introtext = models.TextField()
    state = models.IntegerField()
    sponsored = models.IntegerField()
    sectionid = models.PositiveIntegerField()
    mask = models.PositiveIntegerField()
    catid = models.PositiveIntegerField()
    created = models.DateTimeField()
    created_by = models.PositiveIntegerField()
    created_by_alias = models.CharField(max_length=255)
    modified = models.DateTimeField()
    modified_by = models.PositiveIntegerField()
    checked_out = models.PositiveIntegerField()
    checked_out_time = models.DateTimeField()
    publish_up = models.DateTimeField()
    publish_down = models.DateTimeField()
    images = models.TextField()
    urls = models.TextField()
    attribs = models.CharField(max_length=5120)
    version = models.PositiveIntegerField()
    parentid = models.PositiveIntegerField()
    ordering = models.IntegerField()
    metakey = models.TextField()
    metadesc = models.TextField()
    access = models.PositiveIntegerField()
    hits = models.PositiveIntegerField()
    metadata = models.TextField()
    featured = models.PositiveIntegerField()
    language = models.CharField(max_length=7)
    xreference = models.CharField(max_length=50)
    admin_push = models.IntegerField()
    author_id = models.PositiveIntegerField(blank=True, null=True)
    meta_title = models.CharField(max_length=255)
    og_title = models.CharField(max_length=255)
    og_description = models.TextField()
    lifestage_period = models.CharField(max_length=255)
    fb_post_id = models.CharField(max_length=255)
    is_instant_article = models.IntegerField()
    instant_article_text = models.TextField()

    objects = DataFrameManager()



    class Meta:
        managed = False
        db_table = 'bc_content'

我需要此文件的帮助。我只需要将我推荐的文章列表的清单保存为redis上的每个文章ID,然后从那里将清单解析为正式版本 请帮助设置redis的值。从网址传递文章ID后如何获取相关文章列表?

url.py

from django.shortcuts import HttpResponse
from .models import BcContent
import os
import re
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
stop = set(stopwords.words('english'))
import functools
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer("english", ignore_stopwords =True)
from django.conf import settings
from rest_framework.decorators import api_view
from rest_framework.response import Response
from rest_framework import status
from django.core.cache import cache
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from string import punctuation
from sklearn.metrics.pairwise import linear_kernel
from django.http import Http404
import numpy as np


stop_words = []
f = open(os.path.join(settings.PROJECT_ROOT, 'stopwords.txt'))

#f = open('stopwords.txt', 'r')
for l in f.readlines():
    stop_words.append(l.replace('\n', ''))

additional_stop_words = ['t','aah','aap','span','nbsp','don','doesn','isn','ve','ll','add', 'ndash','will','nan','q','article','lsquo','rsquo','ldquo','rdquo','personalised','please','read','download','app','here','more','experience','based','explore','bull','fact','myth','ndash','middot','lifestage','entire','collection','articles','reading','website','android','phone','a','zero','value',]
stop_words += additional_stop_words
stop_words = list(filter(None, stop_words))

def _removeNonAscii(s): 
    return "".join(i for i in s if ord(i)<128)

def clean_text(text):
    text = text.lower()
    cleanr = re.compile('<.*?>')
    text = re.sub(cleanr, '', text)
    text = re.sub(r"what's", "what is ", text)
    text = text.replace('(ap)', '')
    text = re.sub(r"\'s", " is ", text)
    text = re.sub(r"\'ve", " have ", text)
    text = re.sub(r"can't", "cannot ", text)
    text = re.sub(r"n't", " not ", text)
    text = re.sub(r"i'm", "i am ", text)
    text = re.sub(r"\'re", " are ", text)
    text = re.sub(r"\'d", " would ", text)
    text = re.sub(r"\'ll", " will ", text)
    text = re.sub(r'\W+', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r"\\", "", text)
    text = re.sub(r"\'", "", text)    
    text = re.sub(r"\"", "", text)
    text = re.sub('[^a-zA-Z ?!]+', '', text)
    text = _removeNonAscii(text)
    text = text.strip()
    return text

def tokenizer(text):
    text = clean_text(text)    
    tokens = [word_tokenize(sent) for sent in sent_tokenize(text)]
    tokens = list(functools.reduce(lambda x,y: x+y, tokens))
    tokens = list(filter(lambda token: token not in (stop_words + list(punctuation)) , tokens))
    return tokens

def preprocess(df):
    df['combined'] = df[['title','metakey', 'metadesc','introtext']].apply(lambda x: ' '.join(x), axis=1)
    df.drop(['title', 'metakey', 'metadesc', 'introtext'], axis = 1, inplace = True)
    #df.combined = df.combined.str.replace('[^\w\s]',' ')
    #df['combined'] = df['combined'].str.replace('\d+', ' ')
    #df.combined = df.combined.str.replace('nbsp?' , ' ')
    #df.combined = df.combined.str.replace('value?' , ' ')
    df = df.dropna(subset = ['combined'])
    #df.combined = df.combined.str.replace('\s+', ' ') 
    df = df[(df.combined.map(len) > 600)]
    #df.reset_index(inplace=True, drop=True)
    df['tokens'] = ''
    df['tokens'] = df['combined'].progress_map(lambda d: tokenizer(d))
    df['text_stemmed']=df['tokens'].apply(lambda x : [stemmer.stem(y) for y in x])
    df.drop(['combined', 'tokens'], axis = 1, inplace = True)
    #df = df.iloc[:,1:2]
    df.set_index('id', inplace=True)
    #df['text_stemmed_sentence']=df['text_stemmed'].apply(lambda x : " ".join(x))
    #df = df[(df.text_stemmed_sentence.map(len) > 600)]
    #df.reset_index(inplace=True, drop=True)
    #df['stemmed_tokens'] = df['text_stemmed_sentence'].progress_map(lambda d: tokenizer(d))
    #df1 = df['stemmed_tokens']
    return df


#@api_view(['GET'])

def detail(request, index):
    if index in cache:
        cache.get('index')
        #return HttpResponse("1")
    else:
        qs = BcContent.objects.all()
        df = qs.to_dataframe(fieldnames = ['id','title','metakey','metadesc','introtext'])
        df1 = preprocess(df)
        vectorizer = TfidfVectorizer(min_df=5, analyzer='word', ngram_range=(1, 2), stop_words='english')
        vz = vectorizer.fit_transform(list(df1['text_stemmed'].map(lambda tokens: ' '.join(tokens))))
        cosine_similarities = linear_kernel(vz,vz)
        articlesRecommend = pd.DataFrame(cosine_similarities, columns = df1.index, index = df1.index)
        # for c in articlesRecommend:
        #   y = np.array([articlesRecommend[c].nlargest(101).index.values])
        #cache.set(, y, timeout = None)
        #result =  cache.get('index')

        y = np.array([articlesRecommend[c].nlargest(101).index.values for c in articlesRecommend])
        articles_df = pd.DataFrame(data = y, index = articlesRecommend.columns)
        recommend = articles_df.iloc[:,1:100].T
        for i in range(len(recommend['id'])):
            cache.set(recommend['index'][i], recommend['id'][i], timeout = None)
        #cache.set(df1.index, recommend, timeout = None)
    return HttpResponse(recommend.to_html())

0 个答案:

没有答案