从url字符串中删除某些字符(Python3)

时间:2018-05-24 11:47:26

标签: python string url urllib

我终于想到了如何将带有数据的字典转换为字符串,从而为我提供所需的输出。我想知道如何使这个代码更密集。有什么建议?

import urllib.parse
import urllib.request
import re

#user input
start = '19851123'
end = '19851124'
stns = [('235','240')]
var = [('TEMP')]

#format input to dict
req = {
'start':start,
'end':end,
'vars':var,
'stns':stns
}

#format dict to url strg without modifing the 'safe characters'
q = urllib.parse.urlencode(req, doseq = True, safe="()',")

#removing the 'safe characters' and turn '+' into ':'
q = re.sub("[\(',\)]", "", q)
q = re.sub("\+", ":", q)

#combine the url and query
url = 'http://projects.knmi.nl/klimatologie/daggegeven/getdata_dag.cgi?%s' % q 


#create a handel with url+query
fhand = urllib.request.urlopen(url)

#print requested data whitout header
for line in fhand:
    if not line.decode().startswith('#'):
        print(line.decode().strip())

1 个答案:

答案 0 :(得分:0)

通过预处理req输入,您可以避免需要re

req = {
  'start': 19851123,
  'end': 19851124,
  'vars': ['TEMP'],
  'stns': ['235:240'],
}

urllib.parse.urlencode(req, doseq=True, safe=':')
# 'start=19851123&end=19851124&vars=TEMP&stns=235:240'

因此,您可以将代码限制为:

from urllib.parse import urlencode
from urllib.request import urlopen


def main(parameters, url='http://projects.knmi.nl/klimatologie/daggegeven/getdata_dag.cgi'):
    query = urlencode(parameters, doseq=True, safe=':')
    with urlopen('{}?{}'.format(url, query)) as fhand:
        for line in fhand:
            if not line.startswith(b'#'):
                yield line.decode().strip()


if __name__ == '__main__':
    req = {
        'start': 19851123,
        'end': 19851124,
        'vars': ['TEMP'],
        'stns': ['235:240'],
    }
    for line in main(req):
        print(line)

但是修改用户输入会很麻烦,因此使用argparse可能会有所帮助。从外观上看,varsstns可能会有不同的长度,因此您可能会受益于具有特定默认案例处理的'append' action

import argparse
from urllib.parse import urlencode
from urllib.request import urlopen


def command_line_parser():
    parser = argparse.ArgumentParser()
    parser.add_argument('start', type=int)
    parser.add_argument('end', type=int)
    parser.add_argument('-v', '--vars', action='append')
    parser.add_argument('-s', '--stns', action='append', nargs=2, type=int)
    return parser


def parse_command_line(parser):
    args = parser.parse_args()
    if args.vars is None:
        args.vars = ['TEMP']
    if args.stns is None:
        args.stns = [(235, 240)]
    args.stns = ['{}:{}'.format(*stn) for stn in args.stns]
    return args


def main(parameters, url='http://projects.knmi.nl/klimatologie/daggegeven/getdata_dag.cgi'):
    query = urlencode(parameters, doseq=True, safe=':')
    with urlopen('{}?{}'.format(url, query)) as fhand:
        for line in fhand:
            if not line.startswith(b'#'):
                yield line.decode().strip()


if __name__ == '__main__':
    for line in main(vars(parse_command_line(command_line_parser()))):
        print(line)

用法类似于

$ python script.py -v TEMP -v TEST -s 235 240 19851123 19851124

将查询以下网址:

http://projects.knmi.nl/klimatologie/daggegeven/getdata_dag.cgi?start=19851123&end=19851124&vars=TEMP&vars=TEST&stns=235:240