我试图在C ++中运行wordcount的例子,就像这个link一样,描述了用C ++运行WordCount程序的方法。
所以我在 wordcount.cpp :
文件中有这个代码#include <algorithm>
#include <limits>
#include <string>
#include "stdint.h" // <--- to prevent uint64_t errors!
#include "Pipes.hh"
#include "TemplateFactory.hh"
#include "StringUtils.hh"
using namespace std;
class WordCountMapper : public HadoopPipes::Mapper {
public:
// constructor: does nothing
WordCountMapper( HadoopPipes::TaskContext& context ) {
}
// map function: receives a line, outputs (word,"1")
// to reducer.
void map( HadoopPipes::MapContext& context ) {
//--- get line of text ---
string line = context.getInputValue();
//--- split it into words ---
vector< string > words = HadoopUtils::splitString( line, " " );
//--- emit each word tuple (word, "1" ) ---
for ( unsigned int i=0; i < words.size(); i++ ) {
context.emit( words[i], HadoopUtils::toString( 1 ) );
}
}
};
class WordCountReducer : public HadoopPipes::Reducer {
public:
// constructor: does nothing
WordCountReducer(HadoopPipes::TaskContext& context) {
}
// reduce function
void reduce( HadoopPipes::ReduceContext& context ) {
int count = 0;
//--- get all tuples with the same key, and count their numbers ---
while ( context.nextValue() ) {
count += HadoopUtils::toInt( context.getInputValue() );
}
//--- emit (word, count) ---
context.emit(context.getInputKey(), HadoopUtils::toString( count ));
}
};
int main(int argc, char *argv[]) {
return HadoopPipes::runTask(HadoopPipes::TemplateFactory<WordCountMapper,WordCountReducer>() );
}
我有 Makefile :
CC = g++
HADOOP_INSTALL = /home/hduser/Scrivania/hadoop-1.2.1
PLATFORM = Linux-amd64-64
CPPFLAGS = -m64 -I$(HADOOP_INSTALL)/c++/$(PLATFORM)/include/hadoop/
wordcount: wordcount.cpp
$(CC) $(CPPFLAGS) $< -Wall -lssl -lcrypto -L$(HADOOP_INSTALL)/c++/$(PLATFORM)/lib -lhadooppipes -lhadooputils -lpthread -g -O2 -o $@
编译工作正常,但是当我尝试运行我的程序时如下:
$ hadoop-1.2.1/bin/hadoop pipes -D hadoop.pipes.java.recordreader=true \
-D hadoop.pipes.java.recordwriter=true -input input -output output -program wordcount
我有这个结果:
INFO util.NativeCodeLoader: Loaded the native-hadoop library
WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
WARN snappy.LoadSnappy: Snappy native library not loaded
INFO mapred.FileInputFormat: Total input paths to process : 4
INFO filecache.TrackerDistributedCacheManager: Creating filewordcount in /tmp/hadoop-hduser/mapred/local/archive/8648114132384070327_893673541_1470671038-work--6818354830621303575 with rwxr-xr-x
INFO filecache.TrackerDistributedCacheManager: Cached wordcount as /tmp/hadoop-hduser/mapred/local/archive/8648114132384070327_893673541_1470671038/filewordcount
INFO filecache.TrackerDistributedCacheManager: Cached wordcount as /tmp/hadoop-hduser/mapred/local/archive/8648114132384070327_893673541_1470671038/filewordcount
INFO mapred.JobClient: Running job: job_local2050700100_0001
INFO mapred.LocalJobRunner: Waiting for map tasks
INFO mapred.LocalJobRunner: Starting task: attempt_local2050700100_0001_m_000000_0
INFO util.ProcessTree: setsid exited with exit code 0
INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@15b734b
INFO mapred.MapTask: Processing split: file:/home/hduser/Scrivania/input/sample.txt:0+530
INFO mapred.MapTask: numReduceTasks: 1
INFO mapred.MapTask: io.sort.mb = 100
INFO mapred.MapTask: data buffer = 79691776/99614720
INFO mapred.MapTask: record buffer = 262144/327680
INFO mapred.LocalJobRunner: Starting task: attempt_local2050700100_0001_m_000001_0
INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@62d1f025
INFO mapred.MapTask: Processing split: file:/home/hduser/Scrivania/input/matrix.txt:0+255
INFO mapred.MapTask: numReduceTasks: 1
INFO mapred.MapTask: io.sort.mb = 100
INFO mapred.MapTask: data buffer = 79691776/99614720
INFO mapred.MapTask: record buffer = 262144/327680
INFO mapred.LocalJobRunner: Starting task: attempt_local2050700100_0001_m_000002_0
INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@3d04562f
INFO mapred.MapTask: Processing split: file:/home/hduser/Scrivania/input/matrix.txt~:0+235
INFO mapred.MapTask: numReduceTasks: 1
INFO mapred.MapTask: io.sort.mb = 100
INFO mapred.MapTask: data buffer = 79691776/99614720
INFO mapred.MapTask: record buffer = 262144/327680
INFO mapred.LocalJobRunner: Starting task: attempt_local2050700100_0001_m_000003_0
INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@133d9211
INFO mapred.MapTask: Processing split: file:/home/hduser/Scrivania/input/sample.txt~:0+0
INFO mapred.MapTask: numReduceTasks: 1
INFO mapred.MapTask: io.sort.mb = 100
INFO mapred.MapTask: data buffer = 79691776/99614720
INFO mapred.MapTask: record buffer = 262144/327680
INFO mapred.LocalJobRunner: Map task executor complete.
WARN mapred.LocalJobRunner: job_local2050700100_0001
java.lang.Exception: java.lang.NullPointerException
at org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:354)
Caused by: java.lang.NullPointerException
at org.apache.hadoop.mapred.pipes.Application.<init>(Application.java:103)
at org.apache.hadoop.mapred.pipes.PipesMapRunner.run(PipesMapRunner.java:68)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:430)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:366)
at org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:223)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
at java.util.concurrent.FutureTask$Sync.innerRun(FutureTask.java:334)
at java.util.concurrent.FutureTask.run(FutureTask.java:166)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
at java.lang.Thread.run(Thread.java:724)
INFO mapred.JobClient: map 0% reduce 0%
INFO mapred.JobClient: Job complete: job_local2050700100_0001
INFO mapred.JobClient: Counters: 0
INFO mapred.JobClient: Job Failed: NA
Exception in thread "main" java.io.IOException: Job failed!
at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1357)
at org.apache.hadoop.mapred.pipes.Submitter.runJob(Submitter.java:248)
at org.apache.hadoop.mapred.pipes.Submitter.run(Submitter.java:479)
at org.apache.hadoop.mapred.pipes.Submitter.main(Submitter.java:494)
我尝试过使用Hadoop版本: