我正在尝试创建递归文件结构,但是当我尝试创建文件时,它会创建第一个文件夹,但没有其他文件夹。
import urllib2
from bs4 import BeautifulSoup
import shutil
import os
ext = [".html", ".jpeg", ".png", ".gif", ".jpg"]
def findLinks( url, newPath ):
resp=urllib2.urlopen(url) #open first link
if resp.getcode() == 200:
if "text/html" in resp.headers["content-type"]:
s = BeautifulSoup(resp.read(), "html.parser")
links = s.find_all( 'a' ) #put all a links into links list
for link in links:
f = link['href']
print f
newDir = newPath+f
if not os.path.isdir(newDir): #if doesn't already exist
if not newDir.endswith(tuple(ext)): #if nota file
os.makedirs(newDir) # create all directories
if newDir.endswith(".html"):
newFile = open(newDir, 'w+')
newFile.write("sample text")
newFile.close()
return links
findLinks('http://localhost/onlinecontent/Test', '/Test' )