这是代码:
import requests
from bs4 import BeautifulSoup
import re
url = 'https://olps.cgtransport.org/OLTP/Tax/VehicleStatus.aspx'
reg_number = ['CG04DS7961']
for i in reg_number:
reg1 = i[:-4]
reg2 = i[-4:]
payload = { '__VIEWSTATEFIELDCOUNT' : '3',
'__VIEWSTATE' : '5Rx7Jezv02wRDXtT58JN6uHfoZf2BCTkLyrML9D/7VLW1gz5HhU8sjA2R/7tOPruA/C5yDKTBJBtetPEAxUAPV6iDKZ9TrCt+JTtG9yZisuK5rgWRPQQ9iCqmEFBIGT9K/pVMPJVr2BE+S/S/wtmyTiZRL5zAnbBXZ+Z6xTQcmMj1VSq8vlwmx+0jsZpOHSu46nUZhurNclrV469rApFvORQTcnI2iyS4moLgwH6muz/umtBfTw31jzVsP/3R0u',
'__VIEWSTATE1' : 'pFQlf7Tpik2lCjknuojNbZw9FEYHiUYYGzxOYiwOGcSqt8nHzrZpJW8fGseyQWsG2+r12CzsbOEsxEyBh73/YHGDyK52IHBN1JLYgV45SkLp2jJqaDSbeSE6/3Xfibfd8PXX0SzoyztUTYb30K0Y9X1zTBKl6yP08Ui4I9Wuks7+4qRBDhOLedsrjBCrlWZLgUTIUgiye9UeIfQ/Q8sTR9NOM1N91b38x4+C7kaXhqn/ayrrVxJJm1uXE1ua48z',
'__VIEWSTATE2' : 'SYo3Su3gkp4339oFMeN+Q+/7XFFqlTTs4RAHi08VV252mno3weI5t9jg6ns4mhcrRQLa0bOM2Q/y/qEgkGPXoRxh1QBC/DyfGlLyVc/umb8WOdA1DDypkEt+oRRmI48fX1L6/scDrVZKUQWtF2Pm87WPQcYLP19h5vHXqGIvTHOIdoLzjC',
'__VIEWSTATEGENERATOR' : '34956357',
'__EVENTVALIDATION' : 'ygss/i7NxWFitcgCI9h84GSJJl8UM4sb1apUvzZIv1T1PL/JHswnbZ01G31EtP5I3zrr3rZRL0Hb6aAnrgkmqg7B70FsbNrF9hZ9eFjIGJKw7YBq+G+6hHXE1hYZu3i23uu0Lhdkm+S2An6ptxA+dW5P7+o=', 'ctl00$ContentPlaceHolder1$txtregPart1' : reg1,
'ctl00$ContentPlaceHolder1$txtregPart2' : reg2,
'ctl00$ContentPlaceHolder1$btnshow' : 'Search' }
r = requests.post(url, data=payload)
soup = BeautifulSoup(r.content, 'html5lib')
table = soup.find('table', attrs = {'id':'ctl00_ContentPlaceHolder1_tbPermit'})
data = table.text
headers = re.findall('.+:', data)
print(data)
Registration Number :
CG04DS7961
Registration Date :
20/09/2010
RTO Name :
RAIPUR
Tax Type :
LIFE TIME
Owner's Name :
PRATIK DEWANGAN
Father's Name :
.
Vehicle Class :
NON-TRANSPORT VEHICLE
Vehicle Sub Class :
MCYCLE MOTOR CYCLE
Vehicle Manufacturer :
TVS MOTORS LTD
Vehicle Model :
SCOOTYPEP+
Manufacturer Date :
9/2010
Seating Capacity :
2
UnLaden Weight :
95
Laden Weight :
0
Engine Number :
OG3FA2172150
Chassis Number :
MD626BG39A2F97895
Tax Paid Upto :
Tax Clearance Upto :
Insurance Upto :
Fitness Upto :
19/09/2025
所需的输出仅为CSV格式的值。我不需要标题。
我已经在Stack Overflow上搜索了一个解决方案
但它对我不起作用,因为我无法使用Pandas模块。我想在不支持Pandas的Pythonista上运行此代码。对于这个特定的网站,其他帖子的表格格式与我的不同。
我只想将值视为:
“CG04DS7961”,“20/09/2010”,“RAIPUR”,“LIFE TIME”,“PRATIK DEWANGAN”,“.......”
答案 0 :(得分:0)
根据您的代码,您可以找到特定元素并使用BeautifulSoup
获取文本,然后将文本附加到字符串列表,之后将列表写入csv文件,下面是修改后的代码:
import requests
from bs4 import BeautifulSoup
import re
import csv #import csv
url = 'https://olps.cgtransport.org/OLTP/Tax/VehicleStatus.aspx'
reg_number = ['CG04DS7961']
for i in reg_number:
reg1 = i[:-4]
reg2 = i[-4:]
payload = { '__VIEWSTATEFIELDCOUNT' : '3',
'__VIEWSTATE' : '5Rx7Jezv02wRDXtT58JN6uHfoZf2BCTkLyrML9D/7VLW1gz5HhU8sjA2R/7tOPruA/C5yDKTBJBtetPEAxUAPV6iDKZ9TrCt+JTtG9yZisuK5rgWRPQQ9iCqmEFBIGT9K/pVMPJVr2BE+S/S/wtmyTiZRL5zAnbBXZ+Z6xTQcmMj1VSq8vlwmx+0jsZpOHSu46nUZhurNclrV469rApFvORQTcnI2iyS4moLgwH6muz/umtBfTw31jzVsP/3R0u',
'__VIEWSTATE1' : 'pFQlf7Tpik2lCjknuojNbZw9FEYHiUYYGzxOYiwOGcSqt8nHzrZpJW8fGseyQWsG2+r12CzsbOEsxEyBh73/YHGDyK52IHBN1JLYgV45SkLp2jJqaDSbeSE6/3Xfibfd8PXX0SzoyztUTYb30K0Y9X1zTBKl6yP08Ui4I9Wuks7+4qRBDhOLedsrjBCrlWZLgUTIUgiye9UeIfQ/Q8sTR9NOM1N91b38x4+C7kaXhqn/ayrrVxJJm1uXE1ua48z',
'__VIEWSTATE2' : 'SYo3Su3gkp4339oFMeN+Q+/7XFFqlTTs4RAHi08VV252mno3weI5t9jg6ns4mhcrRQLa0bOM2Q/y/qEgkGPXoRxh1QBC/DyfGlLyVc/umb8WOdA1DDypkEt+oRRmI48fX1L6/scDrVZKUQWtF2Pm87WPQcYLP19h5vHXqGIvTHOIdoLzjC',
'__VIEWSTATEGENERATOR' : '34956357',
'__EVENTVALIDATION' : 'ygss/i7NxWFitcgCI9h84GSJJl8UM4sb1apUvzZIv1T1PL/JHswnbZ01G31EtP5I3zrr3rZRL0Hb6aAnrgkmqg7B70FsbNrF9hZ9eFjIGJKw7YBq+G+6hHXE1hYZu3i23uu0Lhdkm+S2An6ptxA+dW5P7+o=', 'ctl00$ContentPlaceHolder1$txtregPart1' : reg1,
'ctl00$ContentPlaceHolder1$txtregPart2' : reg2,
'ctl00$ContentPlaceHolder1$btnshow' : 'Search' }
r = requests.post(url, data=payload)
soup = BeautifulSoup(r.content, 'lxml')
table = soup.find('table', attrs = {'id':'ctl00_ContentPlaceHolder1_tbPermit'})
resultList=list() # resultList to store the find result
#find the element text and append the result with string format to resultList
for i in table.find_all(attrs={"align":"left"}):
bElement = i.font.text
if str(bElement) != "":
resultList.append(str(bElement))
#write the result to output.csv
with open("output.csv",'wb') as resultFile:
wr = csv.writer(resultFile, dialect='excel')
wr.writerows([resultList])
print resultList #print the resultList
resultList的输出是:
['CG04DS7961', '20/09/2010', 'RAIPUR', 'LIFE TIME', 'PRATIK DEWANGAN', '.', 'NON-TRANSPORT VEHICLE', 'MCYCLE MOTOR CYCLE', 'TVS MOTORS LTD', 'SCOOTYPEP+', '9/2010', '2', '95', '0', 'OG3FA2172150', 'MD626BG39A2F97895', '19/09/2025']