#This is my webpage_getinfo.py script used to parse emails, phone numbers and other details from a webpage. It will be needed for the file_download.py script#
import sys, re
import webpage_get
def print_md5s(page):
print '[*] print_md5s()'
md5s = re.findall(r'([a-fA-F\d]{32})', page)
md5s.sort()
print '[+]', str(len(md5s)), 'MD5 Hashes Found:'
for md5 in md5s:
print md5
def print_emails(page):
print '[*] print_emails()'
emails = re.findall(r'[\w\-][\w\-\.]+@[\w\-][\w\-\.]+[a-zA-Z]{1,4}', page)
emails.sort()
print '[+]', str(len(emails)), 'Email Addresses Found:'
for email in emails:
print email
def print_phones(page):
print '[*] print_phones numbers()'
phones = re.findall(r'\+\d................', page)
phones.sort()
print '[+]', str(len(phones)), 'Phone Numbers Found:'
for phone in phones:
print phone
def print_images(page):
print '[*] print_images()'
images = re.findall(r'\w+.\jpg|\w+.\gif|\w+.\w+\.\wmp', page)
images.sort()
print '[+]', str(len(images)), 'Images Found:'
for image in images:
print image
def print_documents(page):
print '[*] print_documents()'
documents = re.findall(r'\w+\.\wocx', page)
documents.sort()
print '[+]', str(len(documents)), 'Documents Found:'
for document in documents:
print document
def main():
page = webpage_get.webget(sys.argv) # getting the webpage from webpage_get.py as 'page'
print_md5s(page)
print_emails(page)
print_phones(page)
print_images(page)
print_documents(page)
if __name__ == '__main__':
main()
## This is the code for file_download.py below. It download the details from webpage_getinfo and stores it in an uncreated file directory, creating the directory as soon as the script is run##
import errno
import sys, os, urllib2, urllib
import webpage_getinfo
page = 'http://www.soc.napier.ac.uk/~cs342/CSN08115/cw_webpage/index.html'
def path_to_download():
file_name = 'file.txt' # creates the file name
script_dir = os.path.dirname(os.path.abspath(file_name))
dest_dir = os.path.join(script_dir, 'C:\\temp', 'coursework')
try:
os.makedirs(dest_dir)
except IOError:
print 'Warning: file already exists'
path = os.path.join(dest_dir, file_name)
# writing the details of each information from webpage_getinfo
file_dest = open(dest_dir, 'w') # opening the destination directory
file_dest.write('Here is the site documents\n')
file_dest.write(md5details)
file_dest.write(emails)
file_dest.write(phones)
file_dest.write(images)
file_dest.write(documents)
def main():
path_to_download()
md5_info = webpage_getinfo.print_md5s() # getting md5 from webpage_getinfo.py
email_info = webpage_getinfo.print_emails() # getting emails from webpage_getinfo.py
phones_info = webpage_getinfo.print_phones() # getting phone numbers from webpage_getinfo.py
images_info = webpage_getinfo.print_images() # getting images from webpage_getinfo.py
documents_info = webpage_getinfo.print_documents() # getting documents from webpage_getinfo.py
if __name__ == '__main__':
main()
#任何时候我运行脚本,它告诉我' IOError:Permission denied'如果文件不存在。请通过建议任何代码更正和协助将不胜感激。我是python中的业余爱好者,请原谅我凌乱的代码。谢谢。#。
答案 0 :(得分:0)
如果路径不存在,则只制作路径,如下所示:
if not os.path.exists(dest_dir):
os.makedirs(dest_dir)