如何在并行执行程序中转换此代码末尾提供的for循环,以使循环的每次迭代都并行执行,而不是顺序执行,即fold_list中的每个值都应用于与当前内核不同的内核上?
def ROEM(predictions, userCol= "USER_ID", itemCol= "SERVICE_ROUTE_ID", ratingCol= "ENGAGEMENT_SCORE"):
#Creates table that can be queried
predictions.createOrReplaceTempView("predictions")
#Sum of total number of plays of all songs
denominator = predictions.groupBy().sum(ratingCol).collect()[0][0]
#Calculating rankings of songs predictions by user
spark.sql("SELECT " + userCol + " , " + ratingCol + " , PERCENT_RANK() OVER (PARTITION BY " + userCol + " ORDER BY prediction DESC) AS rank FROM predictions").createOrReplaceTempView("rankings")
#Multiplies the rank of each song by the number of plays and adds the products together
numerator = spark.sql('SELECT SUM(' + ratingCol + ' * rank) FROM rankings').collect()[0][0]
performance = numerator/denominator
return performance
# Split the data into training and test sets
(training, test) = df.randomSplit([0.8, 0.2])
#Building 5 folds within the training set.
train1, train2, train3, train4, train5 = training.randomSplit([0.2, 0.2, 0.2, 0.2, 0.2], seed = 1)
fold1 = train2.union(train3).union(train4).union(train5)
fold2 = train3.union(train4).union(train5).union(train1)
fold3 = train4.union(train5).union(train1).union(train2)
fold4 = train5.union(train1).union(train2).union(train3)
fold5 = train1.union(train2).union(train3).union(train4)
foldlist = [(fold1, train1), (fold2, train2), (fold3, train3), (fold4, train4), (fold5, train5)]
# Empty list to fill with ROEMs from each model
ROEMS = []
# Loops through all models and all folds
for model in model_list:
for ft_pair in foldlist:
# Fits model to fold within training data
fitted_model = model.fit(ft_pair[0])
# Generates predictions using fitted_model on respective CV test data
predictions = fitted_model.transform(ft_pair[1])
# Generates and prints a ROEM metric CV test data
r = ROEM(predictions)
print ("ROEM: ", r)
# Fits model to all of training data and generates preds for test data
v_fitted_model = model.fit(training)
v_predictions = v_fitted_model.transform(test)
v_ROEM = ROEM(v_predictions)
# Adds validation ROEM to ROEM list
ROEMS.append(v_ROEM)
print ("Validation ROEM: ", v_ROEM)