我有这个MapReduceJob.hpp
文件来初始化map / reduce作业(我正在开发一个并行的mapreduce骨架项目)。
#ifndef MAPREDUCEJOB_HPP_
#define MAPREDUCEJOB_HPP_
#include <ff/farm.hpp>
#include "TaskScheduler.hpp"
#include "MapReduceWorker.hpp"
#include "MapResult.hpp"
#include "TextInputFormat.hpp"
#include "LineRecordReader.hpp"
#include "MapReduceJob.hpp"
#include "TaskScheduler.hpp"
using namespace ff;
/*
* MIK: map input key
* MIV: map input value
* MOK: map output key
* MOV: map output value
* RK: reduce output key
* RV: reduce output value
*/
template<typename MIK, typename MIV, typename MOK, typename MOV, typename RK, typename RV>
class MapReduceJob {
public:
MapReduceJob( string file_name,
function<void(MIK key, MIV value, MapResult<MIK,MIV,MOK,MOV>*)> map_func ,
function<pair<RK,RV> (MOK key, vector<MOV> list_value)> red_func,
int nWorkers){
farm = new ff_Farm<> ( [nWorkers]() {
std::vector<std::unique_ptr<ff_node> > Workers;
for(int i=0;i<nWorkers;++i)
Workers.push_back(std::unique_ptr<ff_node_t<Task<int,string,string,int>,Result<int,string,string,int>> >(new MapReduceWorker<int,string,string,int>()));
return Workers;
}() );
MapScheduler<int,string,string,int> e(farm->getlb(),file_name,nWorkers,map_func);
farm->remove_collector(); // removes the default collector
farm->add_emitter(e);
farm->wrap_around();
if (farm->run_and_wait_end()<0) error("running myFarm");
}
MapReduceJob( string file_name,
function<void(MIK key, MIV value, MapResult<MIK,MIV,MOK,MOV>*)> map_func ,
function<pair<RK,RV> (MOK key, vector<MOV> list_value)> red_func)
: MapReduceJob (file_name, map_func, red_func, ff_realNumCores()){}
void waitForCompletion(){
if (farm->run_and_wait_end()<0) error("running myFarm");
}
private:
ff_Farm<> *farm;
};
#endif /* MAPREDUCEJOB_HPP_ */
我知道您无法知道FastFlow,但是farm->run_and_wait_end()
的第一次调用是正确的,第二次调用 是怎么回事?如果我删除两个调用之一(第一个仍然行为正确,第二个不执行任何操作),也会发生这种情况。
我使用此main.cpp
运行/测试上面的代码:
#include <iostream>
#include <sstream>
#include <iterator>
#include <ff/pipeline.hpp> // defines ff_pipeline and ff_Pipe
#include <ff/farm.hpp>
#include "MapReduceJob.hpp"
using namespace ff;
int main() {
std::function<void(int key, string value,MapResult<int,string,string,int> *result)> map_func = [](int key,string value,MapResult<int,string,string,int> *result) {
istringstream iss(value);
vector<string> tokens{istream_iterator<string>{iss},istream_iterator<string>{}};
for(string key : tokens)
result->emit(key,1);
};
std::function<pair<string,int> (string key, vector<int> list_value)> red_func = [](string key, vector<int> list_value){
pair<string,int> result(key,list_value.size());
return result;
};
MapReduceJob<int,string,string,int,string,int> job ("file",map_func,red_func,4);
job.waitForCompletion();
return 0;
}
关键指令是job
和job.waitFroCompletion();
的宣言(不产生任何东西)。