如何在Scarpy中使用Xpath选择器选择整个内容

时间:2018-10-28 09:52:39

标签: python xpath scrapy

你好,我当时正在抓取一个网站,但是由于网站的结构,我遇到了麻烦

这是网站extends layout block content div.container for customer in customer_list li = customer.address_list 的一页

我想获得除“歌曲信息”部分之外的内容主体,如您所见,有很多span标签,我不明白如何一次获取整个页面。

这是我尝试过的,

function searchappointmentssss(forappdate) { $.ajax({ url: 'http://localhost:3000/searchappointment', type: 'post', data: { date: forappdate }, dataType: 'json', success: function(data) { var aaaaaaa = []; data.forEach(function(dataa) { var ddddddd = dataa.date.split("T"); var appppppp = {}; appppppp.title = dataa.patientname + "\n" + dataa.purposeofvisit; appppppp.description = dataa.purposeofvisit; appppppp.start = ddddddd[0] + " " + dataa.time; appppppp.end = ddddddd[0] + " " + dataa.endtime; appppppp.id = dataa.appointmentid; appppppp.visstatus = dataa.status; appppppp.gicid = dataa.gic; aaaaaaa.push(appppppp); }); console.log(aaaaaaa); $('#calendar').fullCalendar({ defaultView: "agendaDay", defaultDate: "2018-10-26", events: aaaaaaa, eventTextColor: '#ffffff', header: { left: 'prev,next', center: 'title', right: '' }, viewRender: function(view, element) { var newddddddd = moment($('#calendar').fullCalendar('getDate')).format("YYYY-MM-DD"); searchappointmentssss(newddddddd); $("#dateselectedforapp").val(newddddddd + "T00:00"); }, eventClick: function(calEvent, jsEvent, view) { $("#exampleModal2").modal('toggle'); $("#message-textm4").val(calEvent.id); $("#message-textm444").val(calEvent.gicid); $("#message-textm").val(calEvent.description); $("#message-textm6").val(calEvent.visstatus); $("#message-textm99").val(moment(calEvent.start).format("YYYY-MM-DD")); $("#message-textm2").val(moment(calEvent.start).format("hh:mm")); $("#message-textm3").val(moment(calEvent.end).format("hh:mm")); // change the border color just for fun $(this).css('border-color', 'red'); } }); //document.execCommand("InsertImage", false, "upload/"+data); //alert(data); //$("#progressbar").hide(); //location.reload(); //$("#article").load("submitArticle.php"); //window.location.href = window.location.href; }, error: function() { alert(data); } }); }

这使我得到了全部内容的一部分,下半部分是如何获得全部内容

1 个答案:

答案 0 :(得分:1)

通过歌曲信息,您是说这部分吗?

span

由于它位于第一个entire_body = " ".join(response.xpath('//*[@class="post-body entry-content"]/div[1]/span//text()').extract()[1:]) entire_body = "".join(entire_body) 中,因此您可以使用列表切片将其排除,

您可以这样获得它

import sys
import os
import csv
import datetime
from PySide import QtGui, QtCore

class ViewWidget(QtGui.QWidget):
    dirPath = "C:\raghava\main project"
    def __init__(self, x, index, parent=None):
        super(ViewWidget, self).__init__(parent)
        self.p_index = QtCore.QPersistentModelIndex(index)
        self.content_button = QtGui.QWidget(self)
        lay = QtGui.QHBoxLayout(self.content_button)
        lay.setContentsMargins(0, 0, 0,0)
        self.label = QtGui.QLineEdit()
        self.label.setText(datetime.datetime.today().strftime('%d-%m-%Y'))
        self.label.setReadOnly(True)
        self.label.setAlignment(QtCore.Qt.AlignCenter)
        self.label1 = QtGui.QLineEdit()
        self.label1.setText(self.dirPath)
        self.label1.setReadOnly(True)
        self.label1.resize(250, 100)
        self.label1.setAlignment(QtCore.Qt.AlignRight)
        lay.addWidget(self.label)
        lay.addWidget(self.label1)
        self.content_button.move(x, 0)  

class Example(QtGui.QMainWindow):
    def __init__(self, parent=None):
        super(Example, self).__init__(parent)
        self.dirPath = ViewWidget.dirPath
        self.open_file()
        self.file_data=""
    def open_file(self):
        self.setCentralWidget(QtGui.QWidget())
        layout = QtGui. QGridLayout(self.centralWidget())
        self.model  =QtGui.QStandardItemModel(self)
        self.tableView = QtGui.QTableView()
        self.tableView.setModel(self.model)
        self.tableView.horizontalHeader().setStretchLastSection(True)
        self.tableView.clicked.connect(self.onClick)

        self.tableView.verticalHeader().hide()
        self.tableView.horizontalHeader().hide()
        self.appendRowItems(self.dirPath)
        layout.addWidget(self.tableView)
        self.resize(1300,500)
    def appendRowItems(self, dir_path):
        for root, dirs, files in os.walk(dir_path):
            if root == dir_path:
                for file in files:
                    item = QtGui.QStandardItem(file)
                    item.setData(os.path.join(root, file))
                    self.model.appendRow(item)
                    ix = self.model.indexFromItem(item)
                    self.tableView.setIndexWidget(ix, ViewWidget(1270, ix))
    @QtCore.Slot(QtCore.QModelIndex)
    def onClick(self, ix):
        self.file = self.model.itemFromIndex(ix)
        self.file_data = ix.data(QtCore.Qt.UserRole+1)
        self.scrollArea = QtGui.QScrollArea()
        self.scrollArea.setBackgroundRole(QtGui.QPalette.Light)
        self.scrollArea.setWidgetResizable(True)
        self.w = QtGui.QWidget()
        self.lay = QtGui.QVBoxLayout()
        self.model1 = QtGui.QStandardItemModel(self)
        self.tableView = QtGui.QTableView(self)
        self.tableView.setModel(self.model1)
        self.tableView.horizontalHeader().setStretchLastSection(True)
        with open(str(self.file_data), "r") as fileInput:
            for row in csv.reader(fileInput):
                items = [
                    QtGui.QStandardItem(field)
                    for field in row
                ]
                if items: self.model1.appendRow(items)
        self.lay.addWidget(self.tableView)
        self.w.setLayout(self.lay)
        self.scrollArea.setWidget(self.w)
   def mousePressEvent(self,evnt):
        print evnt
        print event.screenPos()

if __name__ == '__main__':
    app = QtGui.QApplication(sys.argv)
    eg =  Example()
    vw= ViewWidget(1270,QtGui.QStandardItemModel().indexFromItem(QtGui.QStandardItem("./adding_buttons.py")))
    eg.show()
    sys.exit(app.exec_())

您始终可以执行此操作以检查选择器/ Xpaths

enter image description here