我尝试构建一个简单的Python脚本来计算每个用户在过去365天内在Highrise CRM系统中输入的注释数量。我已经创建了一个适用于微小数据集的脚本(一个只有10个注释的Highrise系统),但它会在更大的数据集上超时(我假设因为我缺乏Python技能,因此我的脚本非常低效)。
我正在使用Python 3.3使用Nitrous.io来处理环境。 我使用Highton包装器进行Highrise API调用(我还没弄明白如何成功地从文件中读取API密钥,但是我可以通过输入API密钥和用户名来使其工作直接 - 这里的提示很有用,但我的重点是让脚本在生产规模的Highrise环境中运行。)
有人可以提供有关如何更优雅/更正确地执行此操作的建议吗?
我的Python脚本是:
# Using https://github.com/seibert-media/Highton to integrate with Highrise CRM
# Change to Python 3.3 with this command: source py3env/bin/activate
# Purpose: Count activity by Highrise CRM user in the last 365 days
from highton import Highton
from datetime import date, datetime, timedelta
#initialize Highrise instance
#keyfile = open('highrisekeys.txt', 'r')
#highrise_key = keyfile.readline()
#highrise_user = keyfile.readline()
#print('api key = ', api_key, 'user = ', api_user)
high = Highton(
api_key = 'THIS_IS_A_SECRET',
user = 'SECRET'
)
users = high.get_users()
#print('users is type: ', type(users))
#for user in users:
# print('Users: ', user.name)
people = high.get_people()
#print('people is type: ', type(people))
notes = []
tmp_notes = []
for person in people:
#print('Person: ', person.first_name, person.last_name)
#person_highrise_id = person.highrise_id
#print(person.last_name)
tmp_notes = high.get_person_notes(person.highrise_id)
if (type(tmp_notes) is list):
notes.extend(high.get_person_notes(person.highrise_id)) # No quotes for person_highrise_id in ()'s
#print('Notes is type ', type(notes), ' for ', person.first_name, ' ', person.last_name)
#print('total number of notes is ', len(notes))
for user in users:
#print(user.name, ' has ', notes.author_id.count(user.highrise_id), ' activities')
counter = 0
for note in notes:
if (note.author_id == user.highrise_id) and (note.created_at > datetime.utcnow() + timedelta(days = -365)):
counter += 1
print(user.name, ' has performed ', counter, ' activities')
我得到的错误信息是:
Traceback (most recent call last): File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 544, in urlopen
body=body, headers=headers) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 341, in _make_request
self._validate_conn(conn) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 761, in _validate_conn
conn.connect() File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connection.py", line 204, in connect
conn = self._new_conn() File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connection.py", line 134, in _new_conn
(self.host, self.port), self.timeout, **extra_kw) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/util/connection.py", line 64, in create_connection
for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): socket.gaierror: [Errno -2] Name or service not known
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/adapters.py", line 370, in send
timeout=timeout File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 597, in urlopen
_stacktrace=sys.exc_info()[2]) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/util/retry.py", line 245, in increment
raise six.reraise(type(error), error, _stacktrace) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/packages/six.py", line 309, in reraise
raise value.with_traceback(tb) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 544, in urlopen
body=body, headers=headers) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 341, in _make_request
self._validate_conn(conn) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connectionpool.py", line 761, in _validate_conn
conn.connect() File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connection.py", line 204, in connect
conn = self._new_conn() File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/connection.py", line 134, in _new_conn
(self.host, self.port), self.timeout, **extra_kw) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/packages/urllib3/util/connection.py", line 64, in create_connection
for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): requests.packages.urllib3.exceptions.ProtocolError: ('Connection aborted.', gaierror(-2, 'Name or service not known'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "highrise-analysis.py", line 35, in <module>
tmp_notes = high.get_person_notes(person.highrise_id) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/highton/highton.py", line 436, in get_person_notes
return self._get_notes(subject_id, 'people') File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/highton/highton.py", line 433, in _get_notes
highrise_type, subject_id)), Note) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/highton/highton.py", line 115, in _get_data
content = self._get_request(endpoint, params).content File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/highton/highton.py", line 44, in _get_request
params=params, File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/api.py", line 69, in get
return request('get', url, params=params, **kwargs) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/api.py", line 50, in request
response = session.request(method=method, url=url, **kwargs) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/sessions.py", line 465, in request
resp = self.send(prep, **send_kwargs) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/sessions.py", line 573, in send
r = adapter.send(request, **kwargs) File "/home/action/workspace/highrise-analysis/py3env/lib/python3.3/site-packages/requests/adapters.py", line 415, in send
raise ConnectionError(err, request=request) requests.exceptions.ConnectionError: ('Connection aborted.', gaierror(-2, 'Name or service not known'))
答案 0 :(得分:1)
解决的问题:Highrise API的速率限制为同一帐户的同一IP地址每10秒一次500个请求,这在提取数据时我超过了。为了解决这个问题,我添加了time.sleep(.5)
命令,以便在每个人的每个音符数据拉动之间暂停,以避免超过该速率限制阈值。
此外,我将代码分成两个独立的函数: 1.提取用户,人员和笔记数据并将其存储为带有pickle的本地文件,因此每次我想进行一些分析时我都不需要提取数据 2.对提取的pickle文件执行分析
我还需要添加try / except KeyError
条件,因为有些笔记是由不再活跃的高层用户(离开公司的人)创建的
以下修改过的代码:
# Using https://github.com/seibert-media/Highton to integrate with Highrise CRM
# Change to Python 3.3 with this command: source py3env/bin/activate
# Purpose: Count activity by Highrise CRM user in the last 365 days
from highton import Highton
from datetime import date, datetime, timedelta
import time
import pickle
# ===================================================================
def Create_Notes_Backup(highrise_key, highrise_user, notesfile, userfile, peoplefile, trailing_days = 365):
# Function to create new Notes backup file of Highrise instance (this can take a while)
print('Entered Create_Notes_Backup function')
high = Highton(api_key = highrise_key, user = highrise_user) # Connect to API
print('Connected to Highrise')
users = high.get_users()
print('Pulled ', len(users), ' users')
people = high.get_people()
print('Pulled ', len(people), ' people')
notes = []
tmp_notes = []
print('Started creating notes array')
for person in people:
tmp_notes = high.get_person_notes(person.highrise_id)
time.sleep(.5) # Pause per API limits https://github.com/basecamp/highrise-api
if (type(tmp_notes) is list):
print('Pulled ', len(tmp_notes), ' notes for ', person.first_name, ' ', person.last_name)
if tmp_notes[0].created_at > datetime.utcnow() + timedelta(days = -trailing_days):
notes.extend(high.get_person_notes(person.highrise_id)) # No quotes for person_highrise_id in ()'s
print('Finished creating notes array')
# Final Step: Export lists into pickle files
with open(notesfile, 'wb') as f:
pickle.dump(notes, f)
with open(userfile, 'wb') as g:
pickle.dump(users, g)
with open(peoplefile, 'wb') as h:
pickle.dump(people, h)
print('Exported lists to *.bak files')
# ===================================================================
def Analyze_Notes_Backup(notesfile, userfile, peoplefile, trailing_days = 365):
# Function to analyze notes backup:
# 1. Count number of activities in last trailing_days days
# 2. Identify date of last note update
print('Entered Analyze_Notes_Backup function')
notes = []
users = []
people = []
# Load the lists
with open(notesfile, 'rb') as a:
notes = pickle.load(a)
with open(userfile, 'rb') as b:
users = pickle.load(b)
with open(peoplefile, 'rb') as c:
people = pickle.load(c)
# Start counting
user_activity_count = {}
last_user_update = {}
for user in users:
user_activity_count[user.highrise_id] = 0
last_user_update[user.highrise_id] = date(1901, 1, 1)
print('Started counting user activity by note')
for note in notes:
if note.created_at > datetime.utcnow() + timedelta(days = -trailing_days):
#print('Note created ', note.created_at, ' by ', note.author_id, ' regarding ', note.body)
try:
user_activity_count[note.author_id] += 1
except KeyError:
print('User no longer exists')
try:
if (note.created_at.date() > last_user_update[note.author_id]):
last_user_update[note.author_id] = note.created_at.date()
except KeyError:
print('...')
print('Finished counting user activity by note')
print('=======================================')
f = open('highrise-analysis-output.txt', 'w')
f.write('Report run on ')
f.write(str(date.today()))
f.write('\n Highrise People Count: ')
f.write(str(len(people)))
f.write('\n ============================ \n')
for user in users:
print(user.name, ' has performed ', user_activity_count[user.highrise_id], ' activities')
f.write(str.join(' ', (user.name, ', ', str(user_activity_count[user.highrise_id]))))
if last_user_update[user.highrise_id] == date(1901, 1, 1):
print(user.name, ' has not updated Highrise in the last 365 days')
f.write(str.join(' ', (', NO_UPDATES\n')))
else:
print(user.name, ' last updated Highrise ', last_user_update[user.highrise_id])
f.write(str.join(' ', (', ', str(last_user_update[user.highrise_id]), '\n')))
all_done = time.time()
f.close
# ===================================================================
if __name__ == "__main__":
trailing_days = 365 # Number of days back to monitor
# Production Environment Analysis
Create_Notes_Backup(MY_API_KEY, MY_HIGHRISE_USERID, 'highrise-production-notes.bak', 'highrise-production-users.bak', 'highrise-production-people.bak', trailing_days = 365) # Production Environment
Analyze_Notes_Backup('highrise-production-notes.bak', 'highrise-production-users.bak', 'highrise-production-people.bak', trailing_days = 365)
答案 1 :(得分:0)
麦克,
您正在做的是通过所有用户,然后为每个用户进行所有注释。一旦有了用户,就应该有办法只查询属于该用户的笔记。您可能可以在查询中包含日期范围,只需执行.count即可查看匹配的记录数。
如果您无法按用户搜索笔记,请仔细阅读笔记,然后在字典中存储userId以及符合条件的用户笔记总和。然后,您可以将用户ID与users表匹配。
祝你好运