我已经编写了一些代码来尝试执行以下操作
但是,这不起作用,并且完全清空了文件(因此它是0字节)
我认为我必须非常接近完成这项工作,但尚未对其进行管理。任何指导将不胜感激
pagenumber=1
directory_in_str='/home/somewhere/somedir/'
pathlist = Path(directory_in_str).glob('**/*.svg')
for path in pathlist:
#because path is object not string
path_in_str = str(path)
print(path_in_str)
with open(path_in_str, 'r+') as f:
for line in f:
myregex = r"(?:xlink:href\=\")(.*)(?:\?q=80\"\/\>)"
result = myregex.search(line)
if result:
#If a match is found, replace the text in the line
origfullimgfilename = result
formattedpagenumber = '{:0>3}'.format(pagenumber)
replfullimgfilename='page-'+str(formattedpagenumber)+'-img1.jpg'
line = re.sub(origfullimgfilename, replfullimgfilename, line.rstrip())
#Then retrieve the file! (origfullimgfilename)
try:
urllib.request.urlretrieve(origfullimgfilename+"?q=100", replfullimgfilename)
except urllib.error.HTTPError as e:
print("HTTP Error: "+str(e.code)+" - SVG URL: "+str(origfullimgfilename)+" - aborting\n")
break
pagenumber += 1
答案 0 :(得分:0)
lines = """<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<!-- Page 1 -->
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0" y="0" width="1198" height="1576" viewBox="0 0 1198 1576" version="1.1" style="display: block;margin-left: auto;margin-right: auto;">
<path d="M0,0 L0,1576 L1198,1576 L1198,0 Z " fill="#FFFFFF" stroke="none"/>
<image preserveAspectRatio="none" x="0" y="0" width="1199" height="1577" xlink:href="https://cdn-assets.somewhere.com/f929e7b4404d3e48918cdc0ecd4efbc9fa91dab5_2734/9c237c7e35efe88878f9e5f7a3555f7a379ed9ee9d95b491b6d0003fd80afc6b/9c68a28d6b5161f7e975a163ff093a81172916e23c778068c6bfefcfab195154.jpg?q=80"/>
<g fill="#112449">
<use xlink:href="#f0_2" transform="matrix(19.1,0,0,19.1,885.3,204.3)"/>
<use xlink:href="#f0_o" transform="matrix(19.1,0,0,19.1,910,204.3)"/>
<use xlink:href="#f0_t" transform="matrix(19.1,0,0,19.1,930.3,204.3)"/>
<use xlink:href="#f0_f" transform="matrix(19.1,0,0,19.1,949.6,204.3)"/>"""
import re
newlines = []
for line in lines.split('\n'):
myregex = re.compile(r"(?:xlink:href\=\")(.*)(?:\?q=80\"\/\>)")
result = myregex.search(line)
if result:
print(result)
#If a match is found, replace the text in the line
origfullimgfilename = result.group(1)
replfullimgfilename='page-##-img1.jpg'
line = re.sub(origfullimgfilename, replfullimgfilename, line)
newlines.append(line)
print( '\n'.join(newlines) )