在我的代码中,我有以下一行:
(item, item) for ...
我只是不明白为什么会有item for ...
之类的from joblib import Parallel, delayed
from tokenizer import Predicates
class test():
def __init__(self):
# millions of records
self.data = [
("user name", "abc@gmail.com"),
("user1 abc", "abc@gmail.com"),
("abc user1 ", "abcd@gmail.com")
]
def proc(self, data, strategy):
# unwrap different strategy for different column
func_username, func_email = strategy
res = []
for uname, email in data:
# func call to tokenise column data
username_tok = func_username(uname)
email_tok = func_email(uname)
res.append((username_tok, email_tok))
return res
def run(self):
# define different tokenisation strategy
strategy = [(Predicates().tokenFingerprint, Predicates().tokenFingerprint),
(Predicates().otherMethod, Predicates().otherMethod)]
# assign tokenisation jobs to multiple thread.
# NOTE that to simplify the example, self.data is not splitted here. So both threads work on the same dataset now.
lsres_strategy = []
for func_username, func_email in strategy:
lsres = Parallel(n_jobs=2)(delayed(self.proc)(self.data, (func_username, func_email)))
lsres_strategy.append(lsres)
t = test()
t.run()
此代码段来自http://www.django-rest-framework.org/tutorial/1-serialization/#creating-a-model-to-work-with