Question

我正在打开一个Excel电子表格，读取b列中的数据并使用该数据在页面上执行URL查找和正则表达式，然后将找到的数据保存到同一电子表格中的第3列和第4列。一切正常，但它只写了最后一行。我想因为我的for循环是如何构建的，我已经在这里搜索了很多主题但是还没有能够理解它。

import os
from openpyxl import load_workbook
import urllib
import re
from urllib2 import urlopen
directoryPath=r'C:\Python27' #The main folder
os.chdir(directoryPath)
folder_list=os.listdir(directoryPath)
for folders, sub_folders, file in os.walk(directoryPath): #Traversing the sub folders
    for name in file:
        if name.endswith(".xlsx"):
            filename = os.path.join(folders, name)
            wb=load_workbook(filename, data_only=True)
            ws=wb.active
            cell_range = ws['B2':'B4']
            f = urllib.urlopen(cell.value)
            s = f.read()
            for row in cell_range: # This is iterating through rows 1-7
                for cell in row: # This iterates through the columns(cells) in that row
                    ws.cell(row=ws._current_row, column=3).value= str(re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,4}",s))
                    ws.cell(row=ws._current_row, column=4).value= str(re.findall(r"\(?(?:(?:0(?:0|11)\)?[\s-]?\(?|\+)44\)?[\s-]?\(?(?:0\)?[\s-]?\(?)?|0)(?:\d{2}\)?[\s-]?\d{4}[\s-]?\d{4}|\d{3}\)?[\s-]?\d{3}[\s-]?\d{3,4}|\d{4}\)?[\s-]?(?:\d{5}|\d{3}[\s-]?\d{3})|\d{5}\)?[\s-]?\d{4,5}|8(?:00[\s-]?11[\s-]?11|45[\s-]?46[\s-]?4\d))(?:(?:[\s-]?(?:x|ext\.?\s?|\#)\d+)?)",s))
                    wb.save('scraper.xlsx')

使用

进行测试的示例网址

http://www.knaptoninsurance.co.uk/contact-us
http://www.sterlingsafetywear.co.uk/contact
http://www.ilu.org.uk/contact.html

Python循环只写入最后一行

0 个答案: