我在ubuntu 10.04上以伪分布式模式(单节点集群)配置了hadoop。
我在运行hadoop管道代码时遇到问题 我的代码如下:
#include "/home/hadoop/project/hadoop-0.20.2/c++/Linux-amd64-64/include/hadoop/Pipes.hh"
#include "/home/hadoop/project/hadoop-0.20.2/c++/Linux-amd64-64/include/hadoop/TemplateFactory.hh"
#include "/home/hadoop/project/hadoop-0.20.2/c++/Linux-amd64-64/include/hadoop/StringUtils.hh"
#include "/home/hadoop/project/hadoop-0.20.2/src/c++/libhdfs/hdfs.h"
const std::string WORDCOUNT = "WORDCOUNT";
const std::string INPUT_WORDS = "INPUT_WORDS";
const std::string OUTPUT_WORDS = "OUTPUT_WORDS";
//hdfs fs;
//hdfs writefile;
hdfsFS fs;
hdfsFile writefile;
const char* writepath="/temp/mest";
class WordCountMap: public HadoopPipes::Mapper {
public:
HadoopPipes::TaskContext::Counter* inputWords;
WordCountMap(HadoopPipes::TaskContext& context) {
fs = hdfsConnect("192.168.0.133", 54310);
inputWords = context.getCounter(WORDCOUNT, INPUT_WORDS);
}
~WordCountMap()
{
hdfsCloseFile(fs, writefile);
}
void map(HadoopPipes::MapContext& context)
{
hdfsFile writefile = hdfsOpenFile(fs, writepath, O_WRONLY|O_CREAT, 0, 0, 0);
std::vector<std::string> words =
HadoopUtils::splitString(context.getInputValue(), " ");
for(unsigned int i=0; i < words.size(); ++i) {
context.emit(words[i], "1");
}
context.incrementCounter(inputWords, words.size());
}
};
class WordCountReduce: public HadoopPipes::Reducer {
public:
HadoopPipes::TaskContext::Counter* outputWords;
WordCountReduce(HadoopPipes::TaskContext& context) {
outputWords = context.getCounter(WORDCOUNT, OUTPUT_WORDS);
}
void reduce(HadoopPipes::ReduceContext& context) {
int sum = 0;
while (context.nextValue()) {
sum += HadoopUtils::toInt(context.getInputValue());
}
context.emit(context.getInputKey(), HadoopUtils::toString(sum));
context.incrementCounter(outputWords, 1);
}
};
int main(int argc, char *argv[]) {
return HadoopPipes::runTask(HadoopPipes::TemplateFactory<WordCountMap,
WordCountReduce>());
}
我编译成功编译了它。
我用以下命令运行它:
bin / hadoop pipes -D java.pipes.recordreader = true -D java.pipes.recordwriter = true -input gutenberg -output manish_gut2 -program bin / cat
但是当我运行它时会显示以下问题:
11/05/04 16:13:12 WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
11/05/04 16:13:12 INFO mapred.FileInputFormat: Total input paths to process : 3
11/05/04 16:13:13 INFO mapred.JobClient: Running job: job_201105041611_0001
11/05/04 16:13:14 INFO mapred.JobClient: map 0% reduce 0%
11/05/04 16:13:24 INFO mapred.JobClient: Task Id : attempt_201105041611_0001_m_000000_0, Status : FAILED
java.io.IOException: pipe child exception
at org.apache.hadoop.mapred.pipes.Application.abort(Application.java:151)
at org.apache.hadoop.mapred.pipes.PipesMapRunner.run(PipesMapRunner.java:101)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:358)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:307)
at org.apache.hadoop.mapred.Child.main(Child.java:170)
Caused by: java.io.EOFException
at java.io.DataInputStream.readByte(DataInputStream.java:250)
at org.apache.hadoop.io.WritableUtils.readVLong(WritableUtils.java:298)
at org.apache.hadoop.io.WritableUtils.readVInt(WritableUtils.java:319)
at org.apache.hadoop.mapred.pipes.BinaryProtocol$UplinkReaderThread.run(BinaryProtocol.java:114)
attempt_201105041611_0001_m_000000_0: Hadoop Pipes Exception: RecordReader not defined at /export/crawlspace/chris/work/branch-0.20/src/c++/pipes/impl/HadoopPipes.cc:692 in virtual void HadoopPipes::TaskContextImpl::runMap(std::string, int, bool)
11/05/04 16:13:24 INFO mapred.JobClient: Task Id : attempt_201105041611_0001_m_000001_0, Status : FAILED
java.io.IOException: pipe child exception
at org.apache.hadoop.mapred.pipes.Application.abort(Application.java:151)
at org.apache.hadoop.mapred.pipes.PipesMapRunner.run(PipesMapRunner.java:101)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:358)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:307)
at org.apache.hadoop.mapred.Child.main(Child.java:170)
Caused by: java.io.EOFException
at java.io.DataInputStream.readByte(DataInputStream.java:250)
at org.apache.hadoop.io.WritableUtils.readVLong(WritableUtils.java:298)
at org.apache.hadoop.io.WritableUtils.readVInt(WritableUtils.java:319)
at org.apache.hadoop.mapred.pipes.BinaryProtocol$UplinkReaderThread.run(BinaryProtocol.java:114)
attempt_201105041611_0001_m_000001_0: Hadoop Pipes Exception: RecordReader not defined at /export/crawlspace/chris/work/branch-0.20/src/c++/pipes/impl/HadoopPipes.cc:692 in virtual void HadoopPipes::TaskContextImpl::runMap(std::string, int, bool)
11/05/04 16:13:29 INFO mapred.JobClient: Task Id : attempt_201105041611_0001_m_000001_1, Status : FAILED
java.io.IOException: pipe child exception
at org.apache.hadoop.mapred.pipes.Application.abort(Application.java:151)
at org.apache.hadoop.mapred.pipes.PipesMapRunner.run(PipesMapRunner.java:101)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:358)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:307)
at org.apache.hadoop.mapred.Child.main(Child.java:170)
Caused by: java.io.EOFException
at java.io.DataInputStream.readByte(DataInputStream.java:250)
at org.apache.hadoop.io.WritableUtils.readVLong(WritableUtils.java:298)
at org.apache.hadoop.io.WritableUtils.readVInt(WritableUtils.java:319)
at org.apache.hadoop.mapred.pipes.BinaryProtocol$UplinkReaderThread.run(BinaryProtocol.java:114)
attempt_201105041611_0001_m_000001_1: Hadoop Pipes Exception: RecordReader not defined at /export/crawlspace/chris/work/branch-0.20/src/c++/pipes/impl/HadoopPipes.cc:692 in virtual void HadoopPipes::TaskContextImpl::runMap(std::string, int, bool)
11/05/04 16:13:29 INFO mapred.JobClient: Task Id : attempt_201105041611_0001_m_000000_1, Status : FAILED
java.io.IOException: pipe child exception
at org.apache.hadoop.mapred.pipes.Application.abort(Application.java:151)
at org.apache.hadoop.mapred.pipes.PipesMapRunner.run(PipesMapRunner.java:101)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:358)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:307)
at org.apache.hadoop.mapred.Child.main(Child.java:170)
Caused by: java.io.EOFException
at java.io.DataInputStream.readByte(DataInputStream.java:250)
at org.apache.hadoop.io.WritableUtils.readVLong(WritableUtils.java:298)
at org.apache.hadoop.io.WritableUtils.readVInt(WritableUtils.java:319)
at org.apache.hadoop.mapred.pipes.BinaryProtocol$UplinkReaderThread.run(BinaryProtocol.java:114)
attempt_201105041611_0001_m_000000_1: Hadoop Pipes Exception: RecordReader not defined at /export/crawlspace/chris/work/branch-0.20/src/c++/pipes/impl/HadoopPipes.cc:692 in virtual void HadoopPipes::TaskContextImpl::runMap(std::string, int, bool)
11/05/04 16:13:35 INFO mapred.JobClient: Task Id : attempt_201105041611_0001_m_000000_2, Status : FAILED
java.io.IOException: pipe child exception
at org.apache.hadoop.mapred.pipes.Application.abort(Application.java:151)
at org.apache.hadoop.mapred.pipes.PipesMapRunner.run(PipesMapRunner.java:101)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:358)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:307)
at org.apache.hadoop.mapred.Child.main(Child.java:170)
Caused by: java.io.EOFException
at java.io.DataInputStream.readByte(DataInputStream.java:250)
at org.apache.hadoop.io.WritableUtils.readVLong(WritableUtils.java:298)
at org.apache.hadoop.io.WritableUtils.readVInt(WritableUtils.java:319)
at org.apache.hadoop.mapred.pipes.BinaryProtocol$UplinkReaderThread.run(BinaryProtocol.java:114)
attempt_201105041611_0001_m_000000_2: Hadoop Pipes Exception: RecordReader not defined at /export/crawlspace/chris/work/branch-0.20/src/c++/pipes/impl/HadoopPipes.cc:692 in virtual void HadoopPipes::TaskContextImpl::runMap(std::string, int, bool)
11/05/04 16:13:35 INFO mapred.JobClient: Task Id : attempt_201105041611_0001_m_000001_2, Status : FAILED
java.io.IOException: pipe child exception
at org.apache.hadoop.mapred.pipes.Application.abort(Application.java:151)
at org.apache.hadoop.mapred.pipes.PipesMapRunner.run(PipesMapRunner.java:101)
at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:358)
at org.apache.hadoop.mapred.MapTask.run(MapTask.java:307)
at org.apache.hadoop.mapred.Child.main(Child.java:170)
Caused by: java.io.EOFException
at java.io.DataInputStream.readByte(DataInputStream.java:250)
at org.apache.hadoop.io.WritableUtils.readVLong(WritableUtils.java:298)
at org.apache.hadoop.io.WritableUtils.readVInt(WritableUtils.java:319)
at org.apache.hadoop.mapred.pipes.BinaryProtocol$UplinkReaderThread.run(BinaryProtocol.java:114)
attempt_201105041611_0001_m_000001_2: Hadoop Pipes Exception: RecordReader not defined at /export/crawlspace/chris/work/branch-0.20/src/c++/pipes/impl/HadoopPipes.cc:692 in virtual void HadoopPipes::TaskContextImpl::runMap(std::string, int, bool)
11/05/04 16:13:44 INFO mapred.JobClient: Job complete: job_201105041611_0001
11/05/04 16:13:44 INFO mapred.JobClient: Counters: 3
11/05/04 16:13:44 INFO mapred.JobClient: Job Counters
11/05/04 16:13:44 INFO mapred.JobClient: Launched map tasks=8
11/05/04 16:13:44 INFO mapred.JobClient: Data-local map tasks=8
11/05/04 16:13:44 INFO mapred.JobClient: Failed map tasks=1
Exception in thread "main" java.io.IOException: Job failed!
at org.apache.hadoop.mapred.JobClient.runJob(JobClient.java:1252)
at org.apache.hadoop.mapred.pipes.Submitter.runJob(Submitter.java:248)
at org.apache.hadoop.mapred.pipes.Submitter.run(Submitter.java:479)
at org.apache.hadoop.mapred.pipes.Submitter.main(Submitter.java:494)
我不知道我做错了什么? 我怎么能运行这个程序? 如何解决这些错误?
答案 0 :(得分:2)
我会从他们在这里做的事情开始http://wiki.apache.org/hadoop/C%2B%2BWordCount,让它发挥作用,然后将其扩展到您的实现。
您还可以使用该页面查看您的实施和他们的差异,并尝试以这种方式解决问题。我注意到的一个区别是您的recordreader
和recordwriter
类。您有java.pipes.recordreader
和java.pipes.recordwriter
,而该链接的示例使用hadoop.pipes.java.recordreader
和hadoop.pipes.java.recordwriter
。
之前我没有在这个方法中使用过hadoop,所以这只是我找到了类似的东西并注意到了差异。 :)
希望这会指出你正确的方向
答案 1 :(得分:1)
这里的问题是@Nija所描述的。
未指定 hadoop.pipes.java.recordreader
,默认为false。这意味着它希望你的C ++代码中有RecordReader
。而且你没有,因此无法找到它。