我正在开发一个为程序员收集工作的Web应用程序。它使用django 2.2和beautifulsoup4。单击窗体中的按钮后,我尝试在屏幕上显示解析结果,该页面将重定向到解析结果页面(无)。解析器或views.py中可能存在错误,我无法弄清楚。 逻辑: 1. Django在主页上显示表单 2.用户按下表单中的按钮 3.解析器收集数据 4.解析结果显示在屏幕上
workua.py-刮板
import requests
from bs4 import BeautifulSoup
def clean_description(s):
return s.split('\n')[1]
def get_html(url):
r = requests.get(url)
return r.text
def get_data(html):
bs = BeautifulSoup(html, 'lxml')
job_list = bs.find('div', id='pjax-job-list').find_all('div', class_='card card-hover card-visited wordwrap job-link')
for item in job_list:
title = item.find('h2', class_='add-bottom-sm').text
company = item.find('b').text
d = item.find('p', class_='overflow').text
descr = clean_description(d)
url = 'https://www.work.ua' + item.find('h2', class_='add-bottom-sm').find('a').get('href')
data = {'title':title, 'company':company, 'descr':descr, 'url':url}
# print(data)
def main():
pattern = 'https://www.work.ua/ru/jobs-kyiv-python/?page={}'
for i in range(0, 3):
url = pattern.format(str(i))
get_data(get_html(url))
views.py
from django.shortcuts import render
from .workua import *
from .forms import PageForm
def index_page(request):
form = PageForm(request.GET)
return render(request, 'page/index_page_form.html', context={'form':form})
def workua_result(request):
result = main()
return render(request, 'page/workua_result.html', context={'result':result})
index_page_form.html
{% extends 'base.html' %}
{% block title %}
Main page
{% endblock %}
{% block content %}
<form action="{% url 'workua_result_url' %}" method="GET" class="mt-5 mb-
5 text-center">
{% csrf_token %}
{{ form }}
<button class="btn btn-primary mt-3" type="submit">Go</button>
</form>
{% endblock %}
urls.py
from django.urls import path
from .views import *
urlpatterns = [
path('', index_page, name='index_page_url'),
path('workua/', workua_result, name='workua_result_url'),
]
workua_result.html
{% extends 'base.html' %}
{% block title %}
Work.ua result
{% endblock %}
{% block content %}
{{ result }}
{% endblock %}
答案 0 :(得分:0)
在views.py中编写一个解析器,问题就解决了。 创建了一个空数据列表,并添加了我从beautifulsoup收集的数据。在渲染之前调用了main()函数,并在上下文中传递了数据列表。
views.py
def workua_result(request):
data = []
def get_html(url):
r = requests.get(url)
return r.text
def get_data(html):
bs = BeautifulSoup(html, 'lxml')
job_list = bs.find('div', id='pjax-job-list').find_all('div', class_='card card-hover card-visited wordwrap job-link')
for item in job_list:
try:
title = item.find('h2', class_='add-bottom-sm').text
company = item.find('b').text
descr = item.find('p', class_='overflow').text
url = 'https://www.work.ua' + item.find('h2', class_='add-bottom-sm').find('a').get('href')
data.append({'title': title, 'company': company, 'descr': descr, 'url': url})
except:
pass
def main():
pattern = 'https://www.work.ua/ru/jobs-kyiv-python/?page={}'
for i in range(0, 4):
url = pattern.format(str(i))
get_data(get_html(url))
main()
return render(request, 'page/workua_result.html', context={'data': data})