我在Python中遇到问题。无法解析Bing搜索结果。我不知道代码有什么问题。什么都没出来。该程序只是崩溃( 我还尝试与其他库一起使用-但也没有与它们一起使用。
我将非常感谢您为解决该问题提供的帮助!
以下是界面本身发生的情况: imgur
代码:
import re, sys
import urllib.request
from urllib import request
from urllib.parse import quote
import html2text
from poisk import *
from PyQt5 import QtCore, QtGui, QtWidgets
class MyWin(QtWidgets.QMainWindow):
def __init__(self, parent=None):
QtWidgets.QWidget.__init__(self, parent)
self.ui = Ui_MainWindow()
self.ui.setupUi(self)
self.ui.pushButton.clicked.connect(self.mySearch)
self.ui.pushButton_2.clicked.connect(self.nextSearch)
self.ui.pushButton_3.clicked.connect(self.previosSearch)
def previosSearch(self):
if((self.flagok-1) >= 0):
self.flagok=self.flagok - 1
self.ui.textEdit.setText(self.texts[self.flagok])
def nextSearch(self):
if((self.flagok+1) < self.maxflagok):
self.flagok=self.flagok + 1
self.ui.textEdit.setText(self.texts[self.flagok])
def mySearch(self):
self.ui.textEdit.setText("")
z = self.ui.lineEdit.text()
s = 'https://www.bing.com/search?q='+quote(z)
doc = urllib.request.urlopen(s).read().decode('cp1251',errors='ignore')
o=re.compile('"url":"(.*?)"')
l=o.findall(doc)
sp=[]
for x in l:
if((x.rfind('youtube')==-1) and(x.rfind('yandex')==-1) and(x.rfind('mail.ru')==-1) and(x.rfind('.jpg')==-1) and(x.rfind('.png')==-1) and(x.rfind('.gif')==-1)):
sp.append(x)
sp = dict(zip(sp, sp)).values()
sp1=[]
for s in sp:
sp1.append(s)
kolotvetov=int(self.ui.lineEdit_2.text())
if(kolotvetov<2):
kolotvetov=2
sp1=sp1[:kolotvetov]
sp=sp1
self.progresscount=int(100/len(sp))
self.progresscount2=int(100/len(sp))
self.texts=[]
self.ui.progressBar.setValue(0)
for s in sp:
try:
self.ui.progressBar.setValue(self.progresscount2)
self.progresscount2 = self.progresscount2 + self.progresscount
# Now we will take turns receiving the texts of each page from the search results in the doc variable
doc = urllib.request.urlopen(s).read().decode('utf-8',errors='ignore')
h = html2text.HTML2Text()
h.ignore_links = True
h.body_width = False
h.ignore_images = True
doc = h.handle(doc)
summa=""
# Divide the text of the page into paragraphs
ss=doc.split("\n")
for xx in ss:
xx=xx.strip()
# We filter paragraphs so that they do not start with incorrect characters and end with the correct ones - a point or !?;
if((len(xx)>50) and (xx.startswith('&')==False) and (xx.startswith('>')==False) and (xx.startswith('*')==False) and (xx.startswith('\\')==False) and (xx.startswith('<')==False) and (xx.startswith('(')==False) and (xx.startswith('#')==False) and (xx.endswith('.') or xx.endswith('?') or xx.endswith('!') or xx.endswith(';'))):
summa = summa + xx + "\n \n"
if(len(summa)>500):
self.texts.append(summa)
except Exception:
print('Something went wrong')
self.flagok=0
self.maxflagok=len(self.texts)
self.ui.textEdit.setText(self.texts[0])
self.ui.progressBar.setValue(100)
if __name__=="__main__":
app = QtWidgets.QApplication(sys.argv)
myapp = MyWin()
myapp.show()
sys.exit(app.exec_())
没有Qt:
import re, sys
import urllib.request
from urllib import request
from urllib.parse import quote
import html2text
# We will enter a search query and get a page with search results from Bing in the doc variable
print("\n---------\n")
z=input("Enter your question: ")
print("\n---------\n")
s = 'https://www.bing.com/search?q='+quote(z)
doc = urllib.request.urlopen(s).read().decode('cp1251',errors='ignore')
# The sp list will receive all links to search results from this page.
o=re.compile('"url":"(.*?)"')
l=o.findall(doc)
sp=[]
for x in l:
if((x.rfind('youtube')==-1) and(x.rfind('yandex')==-1) and(x.rfind('mail.ru')==-1) and(x.rfind('.jpg')==-1) and(x.rfind('.png')==-1) and(x.rfind('.gif')==-1)):
sp.append(x)
sp = dict(zip(sp, sp)).values()
for s in sp:
try:
# Now we will take turns receiving the texts of each page from the search results in the doc variable
doc = urllib.request.urlopen(s).read().decode('utf-8',errors='ignore')
h = html2text.HTML2Text()
h.ignore_links = True
h.body_width = False
h.ignore_images = True
doc = h.handle(doc)
summa=""
# Divide the text of the page into paragraphs
ss=doc.split("\n")
for xx in ss:
xx=xx.strip()
# We filter paragraphs so that they do not start with incorrect characters and end with the correct ones - a point or !?;
if((len(xx)>50) and (xx.startswith('&')==False) and (xx.startswith('>')==False) and (xx.startswith('*')==False) and (xx.startswith('\\')==False) and (xx.startswith('<')==False) and (xx.startswith('(')==False) and (xx.startswith('#')==False) and (xx.endswith('.') or xx.endswith('?') or xx.endswith('!') or xx.endswith(';'))):
summa = summa + xx + "\n \n"
if(len(summa)&rt > 500):
print(summa+"\n----------------------------------------\n")
except Exception:
print('Something went wrong')