BeautifulSoup Python 无法从网站抓取数据

时间:2021-06-06 15:03:35

标签: python web-scraping beautifulsoup

我需要将表格数据从 一个网站 我使用 Python 和 BeautifulSoup 运行了下面的代码,但无法从中加载数据。

  [1]: https://i.stack.imgur.com/QKhGg.png
----------------------------------------------------------------------------------------
views.py

from django.shortcuts import render,get_object_or_404,redirect
from django.utils import timezone
from django.contrib.auth.decorators import login_required #function based
from django.contrib.auth.mixins import LoginRequiredMixin #Class based
from blog.models import Post,Comment
from blog.forms import PostForm,CommentForm
from django.urls import reverse_lazy
from django.views.generic import (TemplateView,ListView,DetailView,
                                  CreateView,UpdateView,DeleteView)

# Create your views here.
class AboutView(TemplateView):
    template_name = 'about.html'

class PostListView(ListView):
    model = Post

    def get_queryset(self):
        return Post.objects.filter(published_date__lte=timezone.now()).order_by('-published_date')
        # "__lte" means less than or equal to and "-" means order_by descending order

class PostDetailView(DetailView):
    model = Post

class CreatePostView(LoginRequiredMixin,CreateView):
    login_url = '/login/'
    redirect_field_name = 'blog/post_detail.html'
    form_class = PostForm
    model = Post

class UpdatePostView(LoginRequiredMixin,UpdateView):
    login_url = '/login/'
    redirect_field_name = 'blog/post_detail.html'
    form_class = PostForm
    model = Post

class DeletePostView(LoginRequiredMixin,DeleteView):
    login_url = '/login/'
    success_url = reverse_lazy('post_list')

class DraftListView(LoginRequiredMixin,ListView):
    login_url = '/login/'
    redirect_field_name = 'blog/post_list.html'
    model = Post

    def get_queryset(self):
        return Post.objects.filter(published_date__isnull=True).order_by('created_date')

@login_required
def post_publish(request,pk):
    post = get_object_or_404(Post, pk=pk)
    post.publish
    return redirect('post_detail', pk=pk)

@login_required
def add_comment_to_post(request,pk):
    post = get_object_or_404(Post, pk=pk)
    if request.method == 'POST':
        form = CommentForm(request.POST)
        if form.is_valid():
            comment = form.save(commit=False)
            comment.post = post
            comment.save()
            return redirect('post_detail', pk=post.pk)
    else:
        form = CommentForm()
        return render(request, 'blog/comment_form.html', {'form':form})

@login_required
def comment_approve(request,pk):
    comment = get_object_or_404(Comment, pk=pk)
    comment.approve()
    return redirect('post_detail', pk=comment.post.pk)

@login_required
def comment_remove(request,pk):
    comment = get_object_or_404(Comment, pk=pk)
    post_pk = comment.post.pk
    comment.delete()
    return redirect('post_detail', pk=post_pk)
----------------------------------------------------------------------------------------
model.py
from django.db import models
from django.utils import timezone
from django.urls import reverse

# Create your models here.
class Post(models.Model):
    author = models.ForeignKey('auth.User',on_delete=models.CASCADE)
    title = models.CharField(max_length=200)
    text = models.TextField()
    created_date = models.DateTimeField(default=timezone.now())
    published_date = models.DateTimeField(blank=True,null=True)

    def publish(self):
        self.published_date = timezone.now()
        self.save()

    def approve_comments(self):
        return self.comments.filter(approved_comments=True)

    def get_absolute_url(self):
        return reverse("post_detail",kwargs={'pk':self.pk})

    def __str__(self):
        return self.title

class Comment(models.Model):
    author = models.ForeignKey('blog.post',related_name='comments',on_delete=models.CASCADE)
    title = models.CharField(max_length=200)
    text = models.TextField()
    created_date = models.DateTimeField(default=timezone.now())
    approved_comment = models.BooleanField(default=False)

    def approve(self):
        self.approved_comment = True
        self.save()

    def get_absolute_url(self):
        return reverse("post_list")

    def __str__(self):
        return self.text
----------------------------------------------------------------------------------------
urls.py 
from django.contrib import admin
from django.urls import path,include
from django.contrib.auth import views


urlpatterns = [
    path('admin/', admin.site.urls),
    path('',include('blog.urls')),
    path('accounts/login/',views.LoginView.as_view(),name = 'login'),
    path('accounts/logout/', views.LogoutView.as_view(), name = 'logout',kwargs={'next_page':'/'}),
]
----------------------------------------------------------------------------------------
urls.py myapp
from django.urls import path
from blog import views


urlpatterns = [
    path('', views.PostListView.as_view(), name = 'post_list'),
    path('post/<int:pk>', views.PostDetailView.as_view(), name = 'post_detail'),
    path('post/new/', views.CreatePostView.as_view(), name = 'post_new'),
    path('post/<int:pk>/edit/', views.UpdatePostView.as_view(), name = 'post_edit'),
    path('post/<int:pk>/remove/', views.DeletePostView.as_view(), name = 'post_remove'),
    path('draft/', views.DraftListView.as_view(), name = 'post_draft_list'),
    path('post/<int:pk>/comment', views.add_comment_to_post, name = 'add_comment_to_post'),
    path('comment/<int:pk>/approve', views.comment_approve, name = 'comment_approve'),
    path('comment/<int:pk>/remove', views.comment_remove, name = 'comment_remove'),
    path('post/<int:pk>/publish', views.post_publish, name = 'post_publish'),
    path('about/', views.AboutView.as_view(), name = 'about'),
]

1 个答案:

答案 0 :(得分:2)

您可以使用此示例如何将数据加载到熊猫数据帧中:

import json
import requests
import pandas as pd


url = "https://newweb.nepalstock.com/api/nots/nepse-data/floorsheet"
params = {"page": "0", "size": "10", "sort": "contractId,desc"}

headers = {
    "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0"
}

payload = {"id": 323}

for page in range(0, 4):  # <-- increase number of pages here
    params["page"] = page

    data = requests.post(
        url, params=params, json=payload, headers=headers
    ).json()
    df = pd.json_normalize(data["floorsheets"]["content"])
    print(df)

打印:

     id        contractId contractType stockSymbol buyerMemberId sellerMemberId  contractQuantity  contractRate  contractAmount businessDate  tradeBookId  stockId                          buyerBrokerName                       sellerBrokerName                   tradeTime                      securityName
0  None  2021060604039986         None         SGI            37             40               100         789.0         78900.0   2021-06-06     31492010     2908        Swarna Laxmi Securities Pvt. Ltd.            Creative Securities Pvt Ltd   2021-06-06T14:59:59.86245  Sanima General Insurance Limited
1  None  2021060604039985         None         EIC            37             52               115         758.0         87170.0   2021-06-06     31492008      181        Swarna Laxmi Securities Pvt. Ltd.            Sundhara Securities Limited  2021-06-06T14:59:59.862008        Everest Insurance Co. Ltd.
2  None  2021060604039984         None        UPCL             4             37               500         278.0        139000.0   2021-06-06     31492006     2810    Opal Securities Investment (PVT) Ltd.      Swarna Laxmi Securities Pvt. Ltd.  2021-06-06T14:59:59.861311       UNIVERSAL POWER COMPANY LTD
3  None  2021060604039983         None        UPCL            43             37               200         278.0         55600.0   2021-06-06     31492001     2810              South Asian Bulls Pvt. Ltd.      Swarna Laxmi Securities Pvt. Ltd.  2021-06-06T14:59:59.861051       UNIVERSAL POWER COMPANY LTD
4  None  2021060604039982         None        UPCL            52             37               100         278.0         27800.0   2021-06-06     31491998     2810              Sundhara Securities Limited      Swarna Laxmi Securities Pvt. Ltd.  2021-06-06T14:59:59.860753       UNIVERSAL POWER COMPANY LTD
5  None  2021060604039981         None        UPCL            26             37               164         278.0         45592.0   2021-06-06     31491994     2810            Asian Securities Private Ltd.      Swarna Laxmi Securities Pvt. Ltd.  2021-06-06T14:59:59.860336       UNIVERSAL POWER COMPANY LTD
6  None  2021060601066427         None         KBL            36             49                35         375.0         13125.0   2021-06-06     31491990      142                  Secured Securities Ltd.            Online Securities Pvt. Ltd.  2021-06-06T14:59:59.813562               Kumari Bank Limited
7  None  2021060601066426         None         KBL            36             17                65         375.0         24375.0   2021-06-06     31491983      142                  Secured Securities Ltd.         ABC Securities Private Limited  2021-06-06T14:59:59.810103               Kumari Bank Limited
8  None  2021060604039980         None         LEC             5             53               140         355.0         49700.0   2021-06-06     31491979     2903  Market Securities & Exchange (PVT) Ltd.  Investment Management Nepal Pvt. Ltd.  2021-06-06T14:59:59.793939    Liberty Energy Company Limited
9  None  2021060604039979         None         LEC            45             53               860         355.0        305300.0   2021-06-06     31491978     2903    Imperial Securities Company Pvt. Ltd.  Investment Management Nepal Pvt. Ltd.  2021-06-06T14:59:59.793507    Liberty Energy Company Limited
     id        contractId contractType stockSymbol buyerMemberId sellerMemberId  contractQuantity  contractRate  contractAmount businessDate  tradeBookId  stockId                            buyerBrokerName                                   sellerBrokerName                   tradeTime                                  securityName
0  None  2021060604039978         None         NLG            38              4               500        1289.0        644500.0   2021-06-06     31491972      559  Dipshikha Dhitopatra Karobar Co. Pvt Ltd.              Opal Securities Investment (PVT) Ltd.  2021-06-06T14:59:59.270336                    NLG Insurance Company Ltd.
1  None  2021060604039977         None         HGI             4             53                10         802.0          8020.0   2021-06-06     31491969      179      Opal Securities Investment (PVT) Ltd.              Investment Management Nepal Pvt. Ltd.  2021-06-06T14:59:59.266544           Himalayan General Insurance Co. Ltd
2  None  2021060604039976         None         HGI             4             58                50         802.0         40100.0   2021-06-06     31491964      179      Opal Securities Investment (PVT) Ltd.                          Naasa Securities Co. Ltd.  2021-06-06T14:59:59.266101           Himalayan General Insurance Co. Ltd
3  None  2021060603064119         None       GMFIL            38             19                59         412.0         24308.0   2021-06-06     31491961      263  Dipshikha Dhitopatra Karobar Co. Pvt Ltd.  Nepal Investment And Securities Trading Privat...  2021-06-06T14:59:59.052083  Guheshowori Merchant Bank & Finance Co. Ltd.
4  None  2021060603064118         None       GMFIL            38              4               250         412.0        103000.0   2021-06-06     31491956      263  Dipshikha Dhitopatra Karobar Co. Pvt Ltd.              Opal Securities Investment (PVT) Ltd.   2021-06-06T14:59:59.05199  Guheshowori Merchant Bank & Finance Co. Ltd.
5  None  2021060603064117         None       GMFIL            38             45               150         412.0         61800.0   2021-06-06     31491951      263  Dipshikha Dhitopatra Karobar Co. Pvt Ltd.              Imperial Securities Company Pvt. Ltd.  2021-06-06T14:59:59.051891  Guheshowori Merchant Bank & Finance Co. Ltd.
6  None  2021060603064116         None       GMFIL            38             38               411         411.0        168921.0   2021-06-06     31491947      263  Dipshikha Dhitopatra Karobar Co. Pvt Ltd.          Dipshikha Dhitopatra Karobar Co. Pvt Ltd.  2021-06-06T14:59:59.051779  Guheshowori Merchant Bank & Finance Co. Ltd.
7  None  2021060603064115         None       GMFIL            38             45                78         411.0         32058.0   2021-06-06     31491943      263  Dipshikha Dhitopatra Karobar Co. Pvt Ltd.              Imperial Securities Company Pvt. Ltd.  2021-06-06T14:59:59.051665  Guheshowori Merchant Bank & Finance Co. Ltd.

...and so on.