如何在Python中将数据下载到未创建的文件目录中?

时间:2014-12-01 07:50:59

标签: python python-2.7

#This is my webpage_getinfo.py script used to parse emails, phone numbers and other details from a webpage. It will be needed for the file_download.py script#

import sys, re
import webpage_get

def print_md5s(page):
    print '[*] print_md5s()'
    md5s = re.findall(r'([a-fA-F\d]{32})', page)
    md5s.sort()
    print '[+]', str(len(md5s)), 'MD5 Hashes Found:'
    for md5 in md5s:
        print md5

def print_emails(page):
    print '[*] print_emails()'
    emails = re.findall(r'[\w\-][\w\-\.]+@[\w\-][\w\-\.]+[a-zA-Z]{1,4}', page)
    emails.sort()
    print '[+]', str(len(emails)), 'Email Addresses Found:'
    for email in emails:
        print email

def print_phones(page):
    print '[*] print_phones numbers()'
    phones = re.findall(r'\+\d................', page)
    phones.sort()
    print '[+]', str(len(phones)), 'Phone Numbers Found:'
    for phone in phones:
        print phone

def print_images(page):
    print '[*] print_images()'
    images = re.findall(r'\w+.\jpg|\w+.\gif|\w+.\w+\.\wmp', page)
    images.sort()
    print '[+]', str(len(images)), 'Images Found:'
    for image in images:
        print image

def print_documents(page):
    print '[*] print_documents()'
    documents = re.findall(r'\w+\.\wocx', page)
    documents.sort()
    print '[+]', str(len(documents)), 'Documents Found:'
    for document in documents:
        print document

def main():
    page = webpage_get.webget(sys.argv) # getting the webpage from webpage_get.py as 'page'
    print_md5s(page)
    print_emails(page)
    print_phones(page)
    print_images(page)
    print_documents(page) 

if __name__ == '__main__':
    main()



 ## This is the code for file_download.py below. It download the details from webpage_getinfo and stores it in an uncreated file directory, creating the directory as soon as the script is run##  

    import errno
    import sys, os, urllib2, urllib
    import webpage_getinfo

    page = 'http://www.soc.napier.ac.uk/~cs342/CSN08115/cw_webpage/index.html'

    def path_to_download():
        file_name = 'file.txt' # creates the file name
        script_dir = os.path.dirname(os.path.abspath(file_name)) 
        dest_dir = os.path.join(script_dir, 'C:\\temp', 'coursework')

        try:
            os.makedirs(dest_dir)

        except IOError:
            print 'Warning: file already exists'    
        path = os.path.join(dest_dir, file_name)
        # writing the details of each information from webpage_getinfo
        file_dest = open(dest_dir, 'w') # opening the destination directory
        file_dest.write('Here is the site documents\n')
        file_dest.write(md5details)
        file_dest.write(emails)
        file_dest.write(phones)
        file_dest.write(images)
        file_dest.write(documents)

    def main():
        path_to_download()
        md5_info = webpage_getinfo.print_md5s() # getting md5 from webpage_getinfo.py
        email_info = webpage_getinfo.print_emails() # getting emails from webpage_getinfo.py
        phones_info = webpage_getinfo.print_phones() # getting phone numbers from webpage_getinfo.py
        images_info = webpage_getinfo.print_images() # getting images from webpage_getinfo.py
        documents_info = webpage_getinfo.print_documents() # getting documents from webpage_getinfo.py  

    if __name__ == '__main__':
        main()

#任何时候我运行脚本,它告诉我' IOError:Permission denied'如果文件不存在。请通过建议任何代码更正和协助将不胜感激。我是python中的业余爱好者,请原谅我凌乱的代码。谢谢。#。

1 个答案:

答案 0 :(得分:0)

如果路径不存在,则只制作路径,如下所示:

if not os.path.exists(dest_dir):
    os.makedirs(dest_dir)