我有一个解析HTML页面的python服务器,脚本运行正常。但是我有一个Android应用程序,它将通过发送参数即URL来调用此服务器。
我希望服务器获取android应用程序发布的URL并解析HTML页面的数据。
我的疑问是: - 我应该使用哪种方法? 获取或发布
我已经完成了教程,我认为这是POST方法。
下面是我制作的脚本/服务器。请建议我应该做什么编辑。
import cherrypy
import ConfigParser
import json
import mimetypes
import os
from jinja2 import Environment, FileSystemLoader
from bs4 import BeautifulSoup
import requests
import urlparse
from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
import SocketServer
########################################################################
details_array=[]
small_details_array=[]
price_cell_array=[]
lst = []
URL_path
class S(BaseHTTPRequestHandler):
def do_GET(self):
self._set_headers()
URL_path = urlparse.urlparse(self.path)
request_id = URL_path.path
def do_POST(self):
self._set_headers()
URL_path = urlparse.urlparse(self.path)
request_id = URL_path.path
r = requests.get(URL_path)#the URL_path holds the URL
data = r.text
soup = BeautifulSoup(data,"html.parser")
table = soup.find('table',{'class':'table'})
s=""
targetFile=open("plist","w")
detailtext = table.findAll('div',{'class':'detailtext'})
for det in detailtext:
details_array.append(det.text)
smalldetails = table.findAll('div',{'style':'padding-top:5px'})
for smallDet in smalldetails:
small_details_array.append(smallDet.text);
price_cells = table.findAll('td', {'class': 'pricecell'})
for price_cell in price_cells:
price_cell_array.append(price_cell.text)
for i in range(len(details_array)):
d_arr = {}
d_arr['detail']=details_array[i]
temp = small_details_array[i].split('\n')
d_arr['talktime'] = temp[1]
d_arr['keyword']=temp[3]
tempnew = price_cell_array[i].split('\n')
d_arr['price'] = tempnew[1]
d_arr['validity'] = tempnew[3]
# global list
lst.append(d_arr)
t_arr={}
t_arr['events'] = lst;
print json.dumps(t_arr)
targetFile.write("[TopUpList]"+"\n"+"events=")
targetFile.write(json.dumps(t_arr))
targetFile.write('\n[culturalEvents]\nevents={"events": [{"venue": "bangalore", "name": "Culttest"}]}')
targetFile.close()
#########################################################################
class Server():
@cherrypy.expose
def index(self):
return "Seems Like You're Lost :D"
@cherrypy.expose
def eventsList(self,choice):
message="Success, Event List Obtained"
status_code=0;
events=[]
try:
if choice.title() == "Cultural":
events = cultural_event_list['events']
elif choice.title() == "Prodlisting":
events = lists['events']
else:
status_code=-1
message="Failed, No Such Event Type Enlisted"
except:
status_code=-1
message="Failed, Server Error! Error Occured while retreiving Event List"
return json.dumps({'status_code':status_code,'message':message,'events':events})
@cherrypy.expose
def eventsStatus(self,choice):
message="Success, Event List Obtained"
status_code=0;
events=[]
try:
if choice.title() == "Cultural":
events = cultural_event_list['events']
elif choice.title() == "Prodlisting":
events = lists['events']
else:
status_code=-1
message="Failed, No Such Event Type Enlisted"
except:
status_code=-1
message="Failed, Server Error! Error Occured while retreiving Event List"
return json.dumps({'status_code':status_code,'message':message,'hash':json.dumps(events).__hash__()})
if __name__ == '__main__':
''' Setting up the Server with Specified Configuration'''
'''
config = ConfigParser.RawConfigParser()
config.read('server.conf')
cherrypy.server.socket_host = config.get('server','host')
cherrypy.server.socket_port = int(config.get('server','port'))
cherrypy.server.socket_host = '127.0.0.1'
cherrypy.server.socket_port = 5000
'''
list = ConfigParser.RawConfigParser()
cherrypy.config.update({'server.socket_host': '0.0.0.0',})
cherrypy.config.update({'server.socket_port': int(os.environ.get('PORT', '5000')),})
list.read('plist')#the file from where it reads
lists=json.loads(list.get('TopUpList','events'))
cultural_event_list=json.loads(list.get('culturalEvents','events'))
cherrypy.quickstart(Server())
P.S。我认为android部分需要一个POST请求发送到python服务器,如果我错了,请帮我纠正。
答案 0 :(得分:0)
GET - 从指定资源请求数据 POST - 将要处理的数据提交到指定的资源
因此,如果您想提交数据,请使用POST
答案 1 :(得分:0)
基于这样的结论,你只想获得一些信息,而不是在服务器端更新任何东西,我可以告诉GET请求就足够了,理论上更具惯用性。
因此,您应该使用www.yourdomain.com/?q=domaintocrawl.com这样的网址发出请求,并获得解析后的数据。