dplyr值汇总返回NA

时间:2015-11-07 07:13:53

标签: r

我正在尝试总结下面的数据框架,但我不断得到NA而不是我期待的平均值。有人可以解释为什么会这样吗?

class Node:

    def __init__(self, function, master_ip, master_port, process_count=None):
        self.is_alive = True
        self.function = function
        self.master_ip = master_ip
        self.master_port = master_port
        self.tasks = multiprocessing.Queue()
        self.results = multiprocessing.Queue()
        self.thread = threading.Thread(target=self.main)
        self.lock = multiprocessing.RLock()
        self.times = []

        if process_count == None:
            self.process_count = multiprocessing.cpu_count()
        else:
            self.process_count = process_count
        self.processes = [_NodeProcess(self) for i in range(self.process_count)]

    def main(self):
        while self.is_alive:
            socket = Objsocket()
            if socket.connect(self.master_ip, self.master_port):
                print('Connected to master at {}:{}'.format(self.master_ip, self.master_port))
            else:
                time.sleep(1)
                continue

            while self.is_alive:
                try:
                    if self.tasks.empty():
                        socket.send({'header': 'task_request'})
                        answer = socket.receive()
                        if answer['header'] == 'task':
                            task = answer['task']
                            self.tasks.put(task)
                            print('Got task "{}"'.format(task))
                        elif answer['header'] == 'no_task':
                            pass

                    if not self.results.empty():
                        socket.send(self.results.get())
                        print('Sent result')

                except ConnectionLostError:
                    break

            print('Disconnected')
            socket.close()

    def start(self):
        self.thread.start()
        for process in self.processes:
            process.start()
        print('Started node with {} processes'.format(len(self.processes)))

    def stop(self):
        for process in self.processes:
            process.terminate()
        self.is_alive = False
        self.thread.join()
        print('Stopped node')

    def give_task(self):
        return self.tasks.get()

    def grab_result(self, result):
        self.results.put(result)
        with self.lock: self.times.append(result['time'])


class _NodeProcess:
    def __init__(self, master):
        self.master = master
        self.process = multiprocessing.Process(target=self.main)

    def terminate(self):
        self.process.terminate()

    def start(self):
        self.process.start()

    def main(self):
        while True:
            task = self.master.give_task()
            t = time.perf_counter()
            result = {'header': 'result', 'result': self.master.function(task), 'task': task, 'time': time.perf_counter() - t}
            with self.master.lock: self.master.grab_result(result)

1 个答案:

答案 0 :(得分:2)

我们不需要引用No_Photos

 numeric_results %>% 
        group_by(Participant) %>% 
        summarise( Mean = mean(No_Photos, na.rm=TRUE))
 #  Participant  Mean
 #        (chr) (dbl)
 #1          a3    10
 #2          c2    26

注意:我使用na.rm=TRUE删除NA值(如果存在)。