我希望优化下面的小代码:
def update_users_genre_lang_score(cursor):
cursor.execute("select user_id,playDuration,lang,genre from sd_archive_track_clicks where playDuration > 15 and user_id!=0 and genre!=0 and lang!=0 and lang <21 and genre <24 and playDate > '2016-10-01'order by playDate desc")
db.commit()
numrows = int(cursor.rowcount)
tracks_played= cursor.fetchall()
genre_list=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23]
lang_list=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
#initialization part
user_genre_score = {}
user_lang_score = {}
for track in tracks_played:
user_genre_score[track['user_id']]={}
user_lang_score[track['user_id']]={}
for genre in genre_list:
user_genre_score[track['user_id']][genre]=0
for lang in lang_list:
user_lang_score[track['user_id']][lang]=0
#initialization part end
for track in tracks_played:
user_genre_score[track['user_id']][track['genre']]=int(user_genre_score[track['user_id']][track['genre']]) + 1
user_lang_score[track['user_id']][track['lang']]=int(user_lang_score[track['user_id']][track['lang']]) + 1
有什么办法可以优化初始化步骤吗?
答案 0 :(得分:1)
您可以通过创建默认值并将其复制到记录中来获得一些加速。以下是包含一些注释的示例代码......
def update_users_genre_lang_score(cursor):
# you are asking for a lot of stuff but only using a little. Is this
# stuff consumed in this function?
cursor.execute("select user_id,playDuration,lang,genre from sd_archive_track_clicks where playDuration > 15 and user_id!=0 and genre!=0 and lang!=0 and lang <21 and genre <24 and playDate > '2016-10-01'order by playDate desc")
# what is the commit for?
# db.commit()
numrows = int(cursor.rowcount)
tracks_played= cursor.fetchall()
#print tracks_played
genre_list=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23]
genre_default = {genre:0 for genre in genre_list}
lang_list=[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
lang_default = {lang:0 for lang in lang_list}
#initialization part
user_genre_score = {}
user_lang_score = {}
for track in tracks_played:
user_id = track['user_id']
user_genre_score[user_id]=genre_default.copy()
user_lang_score[user_id]=lang_default.copy()
#initialization part end
# this seems like an expensive way to initialize to 1 instead of 0...
# am i missing something?!
for track in tracks_played:
user_genre_score[track['user_id']][track['genre']] += 1
user_lang_score[track['user_id']][track['lang']] += 1
<强>更新强>
您可以使用collections.defaultdict
进行初始化,以便在您触摸项目时动态生成这些项目。这样可以避免每次user_id出现在行中时重新访问节点。
import collections
def update_users_genre_lang_score(cursor):
cursor.execute("select user_id,playDuration,lang,genre from sd_archive_track_clicks where playDuration > 15 and user_id!=0 and genre!=0 and lang!=0 and lang <21 and genre <24 and playDate > '2016-10-01'order by playDate desc")
# what is the commit for?
# db.commit()
numrows = int(cursor.rowcount)
tracks_played= cursor.fetchall()
#print tracks_played
#initialization part
# this creates a two level nested dict ending in an integer count
# that generates items dynamically
user_genre_score = collections.defaultdict(lambda: collections.defaultdict(int))
user_lang_score = collections.defaultdict(lambda: collections.defaultdict(int))
#initialization part end
for track in tracks_played:
user_genre_score[track['user_id']][track['genre']] += 1
user_lang_score[track['user_id']][track['lang']] += 1
如何运作
defaultdict
可以让你的大脑爆炸 - 公平警告。使用dict
,访问不存在的密钥会引发KeyError
。但是使用defaultdict
,它会调用您提供的初始化程序并为您创建密钥。当你致电int()
时,你会得到一个0.
>>> int()
0
因此,如果我们将其设为初始化程序,则在首次访问新密钥时会得到0
>>> d1 = collections.defaultdict(int)
>>> d1
defaultdict(<class 'int'>, {})
>>> d1['user1']
0
>>> d1
defaultdict(<class 'int'>, {'user1': 0})
如果你增加一个新的密钥,python首先得到进行初始化的项目
>>> d1['user2'] += 1
>>> d1
defaultdict(<class 'int'>, {'user1': 0, 'user2': 1})
但是你需要两个级别的dicts ...,所以外部的一个创建内部defaultdict
>>> d2 = collections.defaultdict(lambda:collections.defaultdict(int))
>>> d2['user1']
defaultdict(<class 'int'>, {})
>>> d2['user1']['genre1']
0
>>> d2
defaultdict(<function <lambda> at 0x7efedf493bf8>, {'user1': defaultdict(<class 'int'>, {'genre1': 0})})
>>> d2['user1']['genre2'] += 1
>>> d2
defaultdict(<function <lambda> at 0x7efedf493bf8>, {'user1': defaultdict(<class 'int'>, {'genre1': 0, 'genre2': 1})})
答案 1 :(得分:0)
您可以像这样优化它
genre_list = list(range(1,24))
lang_list = list(range(1,23))