from lib.threadrunner import ThreadRunner
import signal
import time
RUN = True
def sigint_handler(signum, frame):
print 'Stop pressing the CTRL+C!'
global RUN
RUN = False
signal.signal(signal.SIGINT, sigint_handler)
objectToRun = ClassToRun(cfg)
time.sleep(3)
# The below code uses threading module to create 3 threads
threadRunner = ThreadRunner()
threadRunner.load('Task Name', objectToRun)
threadRunner.start(3)
# threadRunner.joinAll()
while RUN:
time.sleep(1)
我有这样的rdd。我想将前三列(如'a','b','c')组合为键,以计算一些统计值: 例如,对应'a','b','c',我想聚合值(1,4)和(9,8),得到第4和第5列的总和:(10,12) )。得到他们的意思:(5,6)。我怎么能意识到它?
答案 0 :(得分:1)
使用rdd:
(data.map(lambda x: (x[:3], x[3:] + (1,)))
.reduceByKey(lambda a, b: tuple(x + y for x, y in zip(a, b)))
.map(lambda x: (x[0], (x[1][0]/x[1][2], x[1][1]/x[1][2])))
).collect()
# [(('a', 'b', 'c'), (5, 6)), (('l', 'g', 'z'), (14, 11)), (('s', 'q', 'a'), (9, 8)), (('o', 'u', 'w'), (43, 40))]