我正在尝试使用Google CSE API创建一个程序,迭代公司的人员列表,并从结构化数据中提取LinkedIn个人资料信息。然后它运行一个正则表达式从这些信息中提取信息并将它们附加到文本文件中。测试它可以达到一定程度,但是一旦我在列表中找到某个名称,我就会收到此错误:
Traceback (most recent call last):
File "C:\Users\svillamil\Desktop\CSE2.py", line 27, in <module>
results = google_search("Gene Grochala Capital Health", my_api_key, my_cse_id, num=1)
File "C:\Users\svillamil\Desktop\CSE2.py", line 17, in google_search
return res['items']
KeyError: 'items'
在CSE上进行调查显示该名称和公司没有结果。所以,我把KeyError作为例外。
except KeyError:
pass
这不起作用,所以我尝试了:
except Exception as e:
pass
甚至:
except:
pass
没有运气。
我的代码是否有问题导致此错误无法通过?或者它可能是初始输入的问题?
以下是我的参考计划:
from googleapiclient.discovery import build
import pprint
import csv
import re
import time
import os
os.chdir('C:\\users\\name\\Desktop')
my_api_key = "xxxxx"
my_cse_id = "xxxxx"
def google_search(search_term, api_key, cse_id, **kwargs):
service = build("customsearch","v1",developerKey=api_key)#initializes an instance of the custom search service with the build module.
res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute()#executes cse().list() on service to return metadata on the search performed, the specific CSE it is calling, and any other variable that might be added when calling the function as a whole
return res['items']
a = 0
with open('list.csv', 'r') as f:
reader = csv.reader(f)
for row in reader:
a+=1
name = row[1] + ' ' + row[2] + ' at ' +row[4]
print("This trial will search for", name)
results = google_search(name, my_api_key, my_cse_id, num=1)
try:
for result in results:
fn = r"fn':\s'(.+?)'"
pt = r"pagemap':.+'title.+?\s'(.*?)'"
role = r"role':\W+(.+?)'"
org = r"org\W+(.+?)'"
with open("cse_result.txt", "a+") as nameLookup:
if re.search(str(fn),str(result)) is not None:
name2 = re.search(str(fn),str(result)).group(1)
nameLookup.write("Trial "+str(a)+'\n')
nameLookup.write("The name being searched for in this trial is "+name+'.\n')
nameLookup.write("The name found is "+str(name2)+"\n")
nameLookup.write('\n')
else:
nameLookup.write("Trial "+str(a)+'\n')
nameLookup.write("We could not find a name on this trial."+'\n')
nameLookup.write('\n')
if re.search(str(pt),str(result)) is not None:
position_title = re.search(str(pt),str(result)).group(1)
nameLookup.write("The position found at this trial is " + position_title + '.\n')
nameLookup.write('\n')
else:
nameLookup.write('We could not find a position title at this trial.')
nameLookup.write('\n')
if re.search(str(role),str(result)) is not None:
role_title = re.search(str(role),str(result)).group(1)
nameLookup.write("The position found at this trial is " + role_title + '.\n')
nameLookup.write('\n')
else:
nameLookup.write('We could not return a position at this trial.')
nameLookup.write('\n')
if re.search(str(org),str(result)) is not None:
orginization = re.search(str(org),str(result)).group(1)
nameLookup.write("The orginization found at this trial is " + orginization + '.\n')
nameLookup.write('\n')
else:
nameLookup.write('We could not return an orginization at this trial.')
nameLookup.write('\n')
nameLookup.write('\n')
nameLookup.write('==========================')
nameLookup.write('\n')
except KeyError:
pass
#time.sleep(1)
这仍然产生了同样的错误
=============================================== ======
这是一个经过编辑的代码,根据评论进行了一些更改
def google_search(search_term, api_key, cse_id, **kwargs):
service = build("customsearch","v1",developerKey=api_key)#initializes an instance of the custom search service with the build module.
res = service.cse().list(q=search_term, cx=cse_id, **kwargs).execute()#executes cse().list() on service to return metadata on the search performed, the specific CSE it is calling, and any other variable that might be added when calling the function as a whole
return res.get('items', [])
a = 0
def is_empty(any_structure):
if any_structure:
return False
else:
return True
with open('list.csv', 'r') as f:
reader = csv.reader(f)
for row in reader:
a+=1
name = row[1] + ' ' + row[2] + ' at ' +row[4]
print("This trial will search for", name)
results = google_search(name, my_api_key, my_cse_id, num=1)
for result in results:
fn = r"fn':\s'(.+?)'"
pt = r"pagemap':.+'title.+?\s'(.*?)'"
role = r"role':\W+(.+?)'"
org = r"org\W+(.+?)'"
with open("cse_result.txt", "a+") as nameLookup:
if is_empty(result)==True:
nameLookup.write('We could not return any data at this trial. Please see linkedin. This is trial '+a)
nameLookup.write('\n')
if re.search(str(fn),str(result)) is not None:
name2 = re.search(str(fn),str(result)).group(1)
nameLookup.write("Trial "+str(a)+'\n')
nameLookup.write("The name being searched for in this trial is "+name+'.\n')
nameLookup.write("The name found is "+str(name2)+"\n")
nameLookup.write('\n')
else:
nameLookup.write("Trial "+str(a)+'\n')
nameLookup.write("We could not find a name on this trial."+'\n')
nameLookup.write('\n')
if re.search(str(pt),str(result)) is not None:
position_title = re.search(str(pt),str(result)).group(1)
nameLookup.write("The position found at this trial is " + position_title + '.\n')
nameLookup.write('\n')
else:
nameLookup.write('We could not find a position title at this trial.')
nameLookup.write('\n')
if re.search(str(role),str(result)) is not None:
role_title = re.search(str(role),str(result)).group(1)
nameLookup.write("The position found at this trial is " + role_title + '.\n')
nameLookup.write('\n')
else:
nameLookup.write('We could not return a position at this trial.')
nameLookup.write('\n')
if re.search(str(org),str(result)) is not None:
orginization = re.search(str(org),str(result)).group(1)
nameLookup.write("The orginization found at this trial is " + orginization + '.\n')
nameLookup.write('\n')
else:
nameLookup.write('We could not return an orginization at this trial.')
nameLookup.write('\n')
nameLookup.write('\n')
nameLookup.write('==========================')
nameLookup.write('\n')
现在的问题是,如果字典对象为空,则不会附加没有找到数据的通知。