我是编程和python的新手。 因此我有一个问题。我的同学们已经创建了很少的python脚本,但现在我们陷入困境,没有更多的想法。我们需要将几个python脚本合并为一个工作脚本。请问有人可以帮助我们吗? 脚本:
# Script: webpage_get.py
# Desc: Fetches data from a webpage, and parses out hyperlinks.
# Author: Wojciech Kociszewski
# Created: Nov, 2013
#
import sys, urllib
def wget(url):
''' Try to retrieve a webpage via its url, and return its contents'''
print '[*] wget()'
#open file like url object from web, based on url
url_file = urllib.urlopen(url)
# get webpage contents
page = url_file.read()
return page
def main():
#temp testing url argument
sys.argv.append('http://www.soc.napier.ac.uk/~cs342/CSN08115/cw_webpage/index.html')
#check args
if len(sys.argv) != 2:
print '[-] Usage: webpage_get URL'
return
#Get and analyse web page
print wget(sys.argv[1])
if __name__ == '__main__':
main()
# Script: webpage_getlinks.py
# Desc: Basic web site info gathering and analysis script. From a URL gets
# page content, parsing links out.
# Author: Wojciech Kociszewski
# Created: Nov, 2013
#
import sys, re
import webpage_get
def print_links(page):
''' find all hyperlinks on a webpage passed in as input and print '''
print '[*] print_links()'
# regex to match on hyperlinks, returning 3 grps, links[1] being the link itself
links = re.findall(r'(\<a.*href\=.*)(http\:.+)(?:[^\'" >]+)', page)
# sort and print the links
links.sort()
print '[+]', str(len(links)), 'HyperLinks Found:'
for link in links:
print link[1]
def main():
# temp testing url argument
sys.argv.append('http://www.soc.napier.ac.uk/~cs342/CSN08115/cw_webpage/index.html')
# Check args
if len(sys.argv) != 2:
print '[-] Usage: webpage_getlinks URL'
return
# Get the web page
page = webpage_get.wget(sys.argv[1])
# Get the links
print_links(page)
if __name__ == '__main__':
main()
# Script: webpage_getemails.py
# Desc: Basic web site info gathering and analysis script. From a URL gets
# page content, parsing emails out.
# Author: Wojciech Kociszewski
# Created: Nov, 2013
#
import sys, re
import webpage_get
def print_emails(page):
''' find all emails on a webpage passed in as input and print '''
print '[*] print_emails()'
# regex to match on emails
emails = re.findall(r'([\d\w\.-_]+@[\w\d\.-_]+\.\w+)', page)
# sort and print the emails
emails.sort()
print '[+]', str(len(emails)), 'Emails Found:'
for email in emails:
print email
def main():
# temp testing url argument
sys.argv.append('http://www.soc.napier.ac.uk/~cs342/CSN08115/cw_webpage/index.html')
# Check args
if len(sys.argv) != 2:
print '[-] Usage: webpage_getemails'
return
# Get the web page
page = webpage_get.wget(sys.argv[1])
# Get the emails
print_emails(page)
if __name__ == '__main__':
main()
答案 0 :(得分:0)
分析您的脚本并找到公共代码
将公共代码转换为模块
使用公共代码重写单个程序
如果您希望将各个程序组合成一个大程序,那将会更容易