我到目前为止所拥有的代码:
import os
import xlrd
import re
import sys
from collections import OrderedDict
import json
reload(sys)
sys.setdefaultencoding('utf-8')
#####the keyword should be "ab_"
rootdir = 'C:\Users\'
variable_count = 1
name = re.compile('ab_')
excel_files = []
for subdir, dirs, files in os.walk(rootdir):
for file in files:
if ".xlsx" in file:
excel_files.append(os.path.join(subdir, file)) #list of excel files
variables_list = []
s = []
dict_files = {}
dict_variables = {}
for items in excel_files:
for sh in xlrd.open_workbook(items).sheets():
for row in range(sh.nrows):
for col in range(sh.ncols):
myCell = sh.cell(row, col)
if name.search(str(myCell.value)):
excel_keyword = []
excel_keyword.append(items)
dict_files[items] = None
for keys in dict_files:
for sh in xlrd.open_workbook(keys).sheets():
for row in range(sh.nrows):
for col in range(sh.ncols):
myCell = sh.cell(row, col)
if name.search(str(myCell.value)):
dict_variables[variable_count] = str(myCell.value)
variable_count += 1
dict_files[keys] = dict_variables
#print dict_variables.values()
#print dict_files
f = open(r"Results_variables.json", "w")
json.dump(dict_files, f, indent=3, sort_keys=True) ###indent and sort_keys for pretty printing###
f.close()`
但是,此代码查找所有Excel地址和包含“ ab_”的单元格值,并将找到的所有单元格值附加到每个键上。我需要在每个密钥后面附加的只是在该密钥地址中包含“ ab_”的单元格。
当前输出:
{
"C:\\Users\\house_1.xlsx":
1: "ab_blue"
2: "ab_red"
3: "ab_white"
4: "ab_yellow"
5: "ab_purple"
"C:\\Users\\house_1.xlsx":
1: "ab_blue"
2: "ab_red"
3: "ab_white"
4: "ab_yellow"
5: "ab_purple"
"C:\\Users\\house_1.xlsx":
1: "ab_blue"
2: "ab_red"
3: "ab_white"
4: "ab_yellow"
5: "ab_purple"
"C:\\Users\\house_1.xlsx":
1: "ab_blue"
2: "ab_red"
3: "ab_white"
4: "ab_yellow"
5: "ab_purple"
}
预期输出:
{
"C:\\Users\\house_1.xlsx":
1: "ab_blue"
"C:\\Users\\house_1.xlsx":
1: "ab_red"
"C:\\Users\\house_1.xlsx":
1: "ab_white"
"C:\\Users\\house_1.xlsx":
1: "ab_yellow"
2: "ab_purple"
}