我正在尝试获取一个元素属性,但我得到的只是一个无值或空列表,具体取决于我尝试获取它。此外,如果有人知道更好的方法来获得元素的特定标签,我将不胜感激。 这是代码,间隔开的部分应该返回url但不是。
import xml.etree.ElementTree as ET
import webbrowser,time,urllib.request
import tkinter as tk
import urllib
# webbrowser.get('windows-default').open_new('http://www.reddit.com/'+'r/blender')
main = tk.Tk()
class Application(tk.Frame):
def __init__(self, master=None):
tk.Frame.__init__(self, master)
self.pack()
self.createWidgets()
self.initial()
def createWidgets(self):
# print('Went to createWidgets()')
self.send_entry = tk.Entry(self)
self.send_entry.grid(row=0,column=0)
self.change_sub = tk.Button(self,text='Change Subreddit', command=lambda :self.getXML(self.send_entry.get())).grid(row=0 , column=2)
self.lb_scrollY = tk.Scrollbar(self,orient=tk.VERTICAL)
self.lb_scrollY.grid(row=1,column=1,sticky=tk.NS)
self.thread_lb = tk.Listbox(self,yscrollcommand=self.lb_scrollY.set)
self.lb_scrollY['command']=self.thread_lb.yview
self.thread_lb.grid(row=1,column=0)
self.QUIT = tk.Button(self, text="QUIT", fg="red", command=main.destroy).grid(row=2)
def descStripper(self,desc):
x1=int(desc.find('alt="'))
if x1 != -1:
x2Start = x1+5
x2=int(desc.find('"',x2Start))
desc = desc[x1+5:x2]
return desc
else:
desc = "There is no description. Maybe it's a link"
return desc
def lbPopulator(self,title,pub,link):
# print('Went to lbPopulator()')
self.thread_lb.delete(0,tk.END)
for item in title:
self.thread_lb.insert(tk.END,item)
def getXmlData(self):
counter = 0
self.threadPubDateList = []
self.threadTitleList = []
self.threadLinkList = []
self.threadDescList = []
self.threadThumbNail = []
tree=ET.parse('rss.xml')
root=tree.getroot()
for channel in root:
for SubChannel in channel:
if SubChannel.tag == 'item':
for threadInfo in SubChannel:
# print(SubChannel.getchildren())
if threadInfo.tag == 'title':
self.threadTitleList.append(threadInfo.text)
if threadInfo.tag == 'pubDate':
self.threadPubDateList.append(threadInfo.text[:-6])
if threadInfo.tag == 'link':
self.threadLinkList.append(threadInfo.text)
if threadInfo.tag == 'description':
self.threadDescList.append(self.descStripper(threadInfo.text))
if threadInfo.tag == '{http://search.yahoo.com/mrss/}title':
print(threadInfo.tag)
print(threadInfo.attrib)
print(threadInfo.get('url'))
self.lbPopulator(self.threadTitleList,self.threadPubDateList,self.threadLinkList)
# print(self.threadTitleList)
# print(self.threadPubDateList)
# print(self.threadLinkList)
# print(self.threadDescList)
def getXML(self,subreddit):
try:
url = 'http://www.reddit.com'+subreddit+'.rss'
source = urllib.request.urlretrieve(url,'rss.xml')
self.getXmlData()
except urllib.error.HTTPError as err:
print('Too many requests-Try again')
def initial(self):
try:
source = urllib.request.urlretrieve('http://www.reddit.com/.rss','rss.xml')
self.getXmlData()
except urllib.error.HTTPError as err:
print('Too many requests-Trying again 3')
time.sleep(3)
self.__init__()
# main.geometry("250x150")
app = Application(master=main)
app.mainloop()
这是传递XML文件时应该返回缩略图url的代码。这是所有最后的'if'陈述,其余的都可以正常工作。
def getXmlData(self):
counter = 0
self.threadPubDateList = []
self.threadTitleList = []
self.threadLinkList = []
self.threadDescList = []
self.threadThumbNail = []
tree=ET.parse('rss.xml')
root=tree.getroot()
for channel in root:
for SubChannel in channel:
if SubChannel.tag == 'item':
for threadInfo in SubChannel:
# print(SubChannel.getchildren())
if threadInfo.tag == 'title':
self.threadTitleList.append(threadInfo.text)
if threadInfo.tag == 'pubDate':
self.threadPubDateList.append(threadInfo.text[:-6])
if threadInfo.tag == 'link':
self.threadLinkList.append(threadInfo.text)
if threadInfo.tag == 'description':
self.threadDescList.append(self.descStripper(threadInfo.text))
if threadInfo.tag == '{http://search.yahoo.com/mrss/}title':
print(threadInfo.tag)
print(threadInfo.attrib)
print(threadInfo.get('url'))
答案 0 :(得分:1)
唯一具有名为 url 属性的标记是 media:thumbnail 标记。正如您所指出的,media
在xmlns:media="http://search.yahoo.com/mrss/"
的顶部被声明。这让我相信你的最后一个if语句应该是:
if threadInfo.tag == '{http://search.yahoo.com/mrss/}thumbnail':
print(threadInfo.tag)
print(threadInfo.attrib)
print(threadInfo.get('url'))
哪个应产生输出:
'{http://search.yahoo.com/mrss/}thumbnail'
{'url' : 'http://a.thumbs.redditmedia.com/cozEqqG9muj-tT3Z.jpg'}
'http://a.thumbs.redditmedia.com/cozEqqG9muj-tT3Z.jpg'