models.py
<script src="https://cdnjs.cloudflare.com/ajax/libs/html2canvas/0.4.1/html2canvas.min.js"></script>
<body>
<div>Screenshot tester</div>
<button onclick="report()">Report bug</button>
<div class="example">Lorem ipsum</div>
<div class="bug-container hide">
<div>Select bug region</div>
<div class="bug">
<img width="75%" class="screen" >
<div class="region"></div>
</div>
<div>
<textarea class="bug-desc">Describe bug here...</textarea>
</div>
<div class="send">SEND BUG</div>
</div>
</body>
Views.py
from django.db import models
from django_pandas.managers import DataFrameManager
# Create your models here.
class BcContent(models.Model):
asset_id = models.PositiveIntegerField()
title = models.CharField(max_length=255)
alias = models.CharField(max_length=255)
title_alias = models.CharField(max_length=255)
introtext = models.TextField()
state = models.IntegerField()
sponsored = models.IntegerField()
sectionid = models.PositiveIntegerField()
mask = models.PositiveIntegerField()
catid = models.PositiveIntegerField()
created = models.DateTimeField()
created_by = models.PositiveIntegerField()
created_by_alias = models.CharField(max_length=255)
modified = models.DateTimeField()
modified_by = models.PositiveIntegerField()
checked_out = models.PositiveIntegerField()
checked_out_time = models.DateTimeField()
publish_up = models.DateTimeField()
publish_down = models.DateTimeField()
images = models.TextField()
urls = models.TextField()
attribs = models.CharField(max_length=5120)
version = models.PositiveIntegerField()
parentid = models.PositiveIntegerField()
ordering = models.IntegerField()
metakey = models.TextField()
metadesc = models.TextField()
access = models.PositiveIntegerField()
hits = models.PositiveIntegerField()
metadata = models.TextField()
featured = models.PositiveIntegerField()
language = models.CharField(max_length=7)
xreference = models.CharField(max_length=50)
admin_push = models.IntegerField()
author_id = models.PositiveIntegerField(blank=True, null=True)
meta_title = models.CharField(max_length=255)
og_title = models.CharField(max_length=255)
og_description = models.TextField()
lifestage_period = models.CharField(max_length=255)
fb_post_id = models.CharField(max_length=255)
is_instant_article = models.IntegerField()
instant_article_text = models.TextField()
objects = DataFrameManager()
class Meta:
managed = False
db_table = 'bc_content'
我需要此文件的帮助。我只需要将我推荐的文章列表的清单保存为redis上的每个文章ID,然后从那里将清单解析为正式版本 请帮助设置redis的值。从网址传递文章ID后如何获取相关文章列表?
url.py
from django.shortcuts import HttpResponse
from .models import BcContent
import os
import re
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
stop = set(stopwords.words('english'))
import functools
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer("english", ignore_stopwords =True)
from django.conf import settings
from rest_framework.decorators import api_view
from rest_framework.response import Response
from rest_framework import status
from django.core.cache import cache
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from string import punctuation
from sklearn.metrics.pairwise import linear_kernel
from django.http import Http404
import numpy as np
stop_words = []
f = open(os.path.join(settings.PROJECT_ROOT, 'stopwords.txt'))
#f = open('stopwords.txt', 'r')
for l in f.readlines():
stop_words.append(l.replace('\n', ''))
additional_stop_words = ['t','aah','aap','span','nbsp','don','doesn','isn','ve','ll','add', 'ndash','will','nan','q','article','lsquo','rsquo','ldquo','rdquo','personalised','please','read','download','app','here','more','experience','based','explore','bull','fact','myth','ndash','middot','lifestage','entire','collection','articles','reading','website','android','phone','a','zero','value',]
stop_words += additional_stop_words
stop_words = list(filter(None, stop_words))
def _removeNonAscii(s):
return "".join(i for i in s if ord(i)<128)
def clean_text(text):
text = text.lower()
cleanr = re.compile('<.*?>')
text = re.sub(cleanr, '', text)
text = re.sub(r"what's", "what is ", text)
text = text.replace('(ap)', '')
text = re.sub(r"\'s", " is ", text)
text = re.sub(r"\'ve", " have ", text)
text = re.sub(r"can't", "cannot ", text)
text = re.sub(r"n't", " not ", text)
text = re.sub(r"i'm", "i am ", text)
text = re.sub(r"\'re", " are ", text)
text = re.sub(r"\'d", " would ", text)
text = re.sub(r"\'ll", " will ", text)
text = re.sub(r'\W+', ' ', text)
text = re.sub(r'\s+', ' ', text)
text = re.sub(r"\\", "", text)
text = re.sub(r"\'", "", text)
text = re.sub(r"\"", "", text)
text = re.sub('[^a-zA-Z ?!]+', '', text)
text = _removeNonAscii(text)
text = text.strip()
return text
def tokenizer(text):
text = clean_text(text)
tokens = [word_tokenize(sent) for sent in sent_tokenize(text)]
tokens = list(functools.reduce(lambda x,y: x+y, tokens))
tokens = list(filter(lambda token: token not in (stop_words + list(punctuation)) , tokens))
return tokens
def preprocess(df):
df['combined'] = df[['title','metakey', 'metadesc','introtext']].apply(lambda x: ' '.join(x), axis=1)
df.drop(['title', 'metakey', 'metadesc', 'introtext'], axis = 1, inplace = True)
#df.combined = df.combined.str.replace('[^\w\s]',' ')
#df['combined'] = df['combined'].str.replace('\d+', ' ')
#df.combined = df.combined.str.replace('nbsp?' , ' ')
#df.combined = df.combined.str.replace('value?' , ' ')
df = df.dropna(subset = ['combined'])
#df.combined = df.combined.str.replace('\s+', ' ')
df = df[(df.combined.map(len) > 600)]
#df.reset_index(inplace=True, drop=True)
df['tokens'] = ''
df['tokens'] = df['combined'].progress_map(lambda d: tokenizer(d))
df['text_stemmed']=df['tokens'].apply(lambda x : [stemmer.stem(y) for y in x])
df.drop(['combined', 'tokens'], axis = 1, inplace = True)
#df = df.iloc[:,1:2]
df.set_index('id', inplace=True)
#df['text_stemmed_sentence']=df['text_stemmed'].apply(lambda x : " ".join(x))
#df = df[(df.text_stemmed_sentence.map(len) > 600)]
#df.reset_index(inplace=True, drop=True)
#df['stemmed_tokens'] = df['text_stemmed_sentence'].progress_map(lambda d: tokenizer(d))
#df1 = df['stemmed_tokens']
return df
#@api_view(['GET'])
def detail(request, index):
if index in cache:
cache.get('index')
#return HttpResponse("1")
else:
qs = BcContent.objects.all()
df = qs.to_dataframe(fieldnames = ['id','title','metakey','metadesc','introtext'])
df1 = preprocess(df)
vectorizer = TfidfVectorizer(min_df=5, analyzer='word', ngram_range=(1, 2), stop_words='english')
vz = vectorizer.fit_transform(list(df1['text_stemmed'].map(lambda tokens: ' '.join(tokens))))
cosine_similarities = linear_kernel(vz,vz)
articlesRecommend = pd.DataFrame(cosine_similarities, columns = df1.index, index = df1.index)
# for c in articlesRecommend:
# y = np.array([articlesRecommend[c].nlargest(101).index.values])
#cache.set(, y, timeout = None)
#result = cache.get('index')
y = np.array([articlesRecommend[c].nlargest(101).index.values for c in articlesRecommend])
articles_df = pd.DataFrame(data = y, index = articlesRecommend.columns)
recommend = articles_df.iloc[:,1:100].T
for i in range(len(recommend['id'])):
cache.set(recommend['index'][i], recommend['id'][i], timeout = None)
#cache.set(df1.index, recommend, timeout = None)
return HttpResponse(recommend.to_html())