我打算创建一个脚本来扫描网站列表并返回他们的WHOIS数据。 WHOIS查找返回多个属性,例如域名,创建日期,到期日期等。
我的问题是:存储数据的最佳方法是什么?我正在考虑创建一个名为“Site”的对象,其中包含所有不同的属性。这甚至是Python OOP的正确用法吗?如果是的话,你能给出一个小例子吗?
非常感谢您的帮助!
编辑:到目前为止我的代码
#Server scan test
#Not sure if using Python yet, but it should be so simple it won't matter
import whois
class Scanner(object):
def __init__(self, arg):
super(ClassName, self).__init__()
self.arg = arg
def site(creationDate, domain_name, emails, expiration_date):
self.creation_Date = creationDate
self.domain_name = domain_name
self.emails = emails
self.expiration_date = expiration_date
self.name_servers = name_servers
self.referral_url = referral_url
self.registrar = registrar
self.status = status
self.updated_date = updated_date
self.whois_server = whois_server
dummies = ['ttt.com', 'uuu.com', 'aaa.com']
infoArray = {}
for i in dummies:
w = whois.whois(i)
infoArray[i] = w.text
答案 0 :(得分:1)
我会使用字典来存储数据
答案 1 :(得分:0)
如果你想要python对象持久性,你可以尝试shelve module。
以下是文档的示例:
import shelve
d = shelve.open(filename) # open -- file may get suffix added by low-level
# library
d[key] = data # store data at key (overwrites old data if
# using an existing key)
data = d[key] # retrieve a COPY of data at key (raise KeyError if no
# such key)
del d[key] # delete data stored at key (raises KeyError
# if no such key)
flag = d.has_key(key) # true if the key exists
klist = d.keys() # a list of all existing keys (slow!)
答案 2 :(得分:0)
这听起来像是pywhois。
基本入门类是一个很好的例子,看起来像这样:
class WhoisEntry(object):
"""Base class for parsing a Whois entries.
"""
# regular expressions to extract domain data from whois profile
# child classes will override this
_regex = {
'domain_name': 'Domain Name:\s?(.+)',
'registrar': 'Registrar:\s?(.+)',
'whois_server': 'Whois Server:\s?(.+)',
'referral_url': 'Referral URL:\s?(.+)', # http url of whois_server
'updated_date': 'Updated Date:\s?(.+)',
'creation_date': 'Creation Date:\s?(.+)',
'expiration_date': 'Expiration Date:\s?(.+)',
'name_servers': 'Name Server:\s?(.+)', # list of name servers
'status': 'Status:\s?(.+)', # list of statuses
'emails': '[\w.-]+@[\w.-]+\.[\w]{2,4}', # list of email addresses
}
def __init__(self, domain, text, regex=None):
self.domain = domain
self.text = text
if regex is not None:
self._regex = regex
def __getattr__(self, attr):
"""The first time an attribute is called it will be calculated here.
The attribute is then set to be accessed directly by subsequent calls.
"""
whois_regex = self._regex.get(attr)
if whois_regex:
setattr(self, attr, re.findall(whois_regex, self.text))
return getattr(self, attr)
else:
raise KeyError('Unknown attribute: %s' % attr)
def __str__(self):
"""Print all whois properties of domain
"""
return '\n'.join('%s: %s' % (attr, str(getattr(self, attr))) for attr in self.attrs())
def attrs(self):
"""Return list of attributes that can be extracted for this domain
"""
return sorted(self._regex.keys())
@staticmethod
def load(domain, text):
"""Given whois output in ``text``, return an instance of ``WhoisEntry`` that represents its parsed contents.
"""
if text.strip() == 'No whois server is known for this kind of object.':
raise PywhoisError(text)
if '.com' in domain:
return WhoisCom(domain, text)
elif '.net' in domain:
return WhoisNet(domain, text)
elif '.org' in domain:
return WhoisOrg(domain, text)
elif '.ru' in domain:
return WhoisRu(domain, text)
elif '.name' in domain:
return WhoisName(domain, text)
elif '.us' in domain:
return WhoisUs(domain, text)
elif '.me' in domain:
return WhoisMe(domain, text)
elif '.uk' in domain:
return WhoisUk(domain, text)
else:
return WhoisEntry(domain, text)
编辑:因为我无法评论Svend的答案,所以你可以轻松地在字典中处理存储:
scanner = new Scanner()
scanner.self.emails = 'test@example.com'
scanner.self.expiration_date = 'Tomorrow'
scan_data_dict = scanner.__dict__