我是Python Scripting的新手。我一直试图想出一些可以帮助我的代码,但我没有成功。问题是当我选择日期和时间然后点击下载按钮时,我必须从此链接Link 1下载文件,网址变为http://www.pmd.gov.pk/cp/displaydata.asp并显示所需的文件。我想编写一个python scrpit,它可以迭代日期和时间,并可以将结果网页中的数据下载到文本文件中。如果有人能帮助我,我会非常感激。
这是我到目前为止所做的,但我无法继续前进。任何帮助? 此刻它给了我这个错误 “IOError:[Errno 22]无效模式('w')或文件名:'* 5/9 / 2016 * 0000.txt'”
from mechanize import Browser
br = Browser()
br.set_handle_robots(False)
br.open("http://www.pmd.gov.pk/cp/display.asp")
br.select_form(nr=0)
controlDate = br.form.find_control("dat")
controlTime = br.form.find_control("Tim")
for Date in controlDate.items:
for Time in controlTime.items:
#print "Saving " str(Date)+str(Time)+".txt"
synoptic = (br.submit()).read()
textFile = open(str(Date)+str(Time)+".txt", 'w')
textFile.write(synoptic)
textFile.close()
print "***FINISHED***"
我已经对代码进行了一些改进,但仍然无法正常工作。它一次又一次地下载相同的文件。有人可以帮忙吗?
import datetime
#adil ="5-9-2016"
#dt = datetime.datetime.strptime(adil, '%m-%d-%Y')
#print '{0}{1}{2:02}'.format(dt.year, dt.month, dt.day % 100)
from mechanize import Browser
br = Browser()
br.set_handle_robots(False)
br.open("http://www.pmd.gov.pk/cp/display.asp")
br.select_form(nr=0)
form = br.form #
controlDate = br.form.find_control("dat")
controlTime = br.form.find_control("Tim")
for Date in controlDate.items:
dtt = str(Date)
try:
dt = datetime.datetime.strptime(dtt, '%m/%d/%Y')
except:
pass
try:
dt = datetime.datetime.strptime(dtt, '*%m/%d/%Y')
except:
pass
#if Date.name == str(dt.strftime('%m'))+'/'+str(dt.strftime('%d'))+'/'+str(dt.strftime('%Y')):
if Date.name == str(Date):
Date.selected = True
for Time in controlTime.items:
tt = str(Time)
try:
ttt = datetime.datetime.strptime(tt, '%H%M')
except:
pass
try:
ttt = datetime.datetime.strptime(tt, '*%H%M')
except:
pass
#if Time.name == str(Time):
#Time.selected = Time.name
#form['Tim'] = str(Time)
if Time.name == str(ttt.strftime('%H'))+str(ttt.strftime('%M')):
Time.selected = True
synoptic = (br.submit()).read()
#timeName= "%s%s" %(ttt.hour, ttt.minute)
textFile = open(str(dt.strftime('%Y'))+str(dt.strftime('%m'))+str(dt.strftime('%d'))+str(ttt.strftime('%H'))+str(ttt.strftime('%M'))+"syn.txt", 'w')
textFile.write(synoptic)
textFile.close()
else:
break
print "***FINISHED***"
这是我自己问题的解决方案。我想从表单中选择日期和时间后下载所需的文件。这是我的代码:
# -*- coding: utf-8 -*-
"""
Created on Tue May 10 14:15:41 2016
@author: MuhammadAdilJaved
"""
import re
import time
from mechanize import Browser
br = Browser()
br.set_handle_robots(False)
br.open("http://www.pmd.gov.pk/cp/display.asp")
br.select_form(nr=0)
form = br.form #
controlDate = br.form.find_control("dat")
controlTime = br.form.find_control("Tim")
backdays = 1 #for how many days in back you want to download data, max. 15
start_time = time.time()
dateList = []
dateListPMD = []
timeList = ['0000', '0300', '0600', '0900', '1200', '1500', '1800', '2100']
#Visit "http://www.pmd.gov.pk/cp/display.asp" and put the available dates in List below
#dateList = ['5/10/2016','5/9/2016',\
# '5/8/2016','5/7/2016',\
# '5/6/2016','5/5/2016',\
# '5/4/2016','5/3/2016',\
# '5/2/2016','5/1/2016',\
# '4/30/2016','4/29/2016',\
# '4/28/2016','4/27/2016',\
# '4/26/2016']
for item in controlDate.items:
dateListPMD.append(item.name)
for date in dateListPMD[0:int(backdays)]:
dateList.append(date)
print "**** Downloading data for following dates: ****"
print dateList
#short dateList for debugging
#dateList = ['5/10/2016','5/9/2016']
#dateList = ['5/10/2016']
i = 0 #for DATE Loop
ii = 0 #for TIME Loop
count = 0 #to calculate no of files Downloaded
for dt in dateList:
br.open("http://www.pmd.gov.pk/cp/display.asp")
br.select_form(nr=0)
form = br.form #
controlDate = br.form.find_control("dat")
controlTime = br.form.find_control("Tim")
for item in controlDate.items:
if item.name == dt:
item.selected = True
dtFileName = dt.translate(None, '!@#$/')
print "Required DATE found i.e "
print item.name, dt
print "DATE loop # ", i
i = i+1
for dti in timeList:
br.open("http://www.pmd.gov.pk/cp/display.asp")
br.select_form(nr=0)
form = br.form #
controlDate = br.form.find_control("dat")
controlTime = br.form.find_control("Tim")
for item in controlDate.items: #
if item.name == dt: #
for item2 in controlTime.items:
if item2.name == dti:
print "Require TIME found & Downloading File i.e "
print item2.name, dti
print "TIME loop # ", ii
ii = ii + 1
item.selected = True
item2.selected = True
synoptic = (br.submit()).read()
soup = re.sub('<[^>]*>', '', synoptic)
textFile = open(str(dtFileName)+str(dti)+'.txt', 'wb')
textFile.write(soup)
textFile.close()
count = count + 1
else:
#print "Required TIME NOT found"
print item2.name, dti
else:
#print "Required DATE NOT found"
print item.name, dt
elapsed_time = (time.time() - start_time)/60
print "*****************************"
print "Total Elapsed Time: ", round(elapsed_time,2), " Mins."
print "Total Files Downloaded: ", str(count)
print "***** F I N I S H E D *****"