My Jena Hadoop MapReduce示例抛出java.lang.NoClassDefFoundError。 这是一个Maven项目。我读到它可能与缺少的依赖关系有关,但我无法弄清楚我错过了哪一个! 可能是什么问题?
控制台日志
java.lang.NoClassDefFoundError: org/apache/jena/hadoop/rdf/types/NodeWritable
at org.apache.jena.hadoop.rdf.stats.RdfMapReduceExample.main(RdfMapReduceExample.java:29)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
Caused by: java.lang.ClassNotFoundException: org.apache.jena.hadoop.rdf.types.NodeWritable
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 7 more
地图代码第1部分
package org.apache.jena.hadoop.rdf.mapreduce.count;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.jena.hadoop.rdf.types.AbstractNodeTupleWritable;
import org.apache.jena.hadoop.rdf.types.NodeWritable;
public abstract class AbstractNodeTupleNodeCountMapper<TKey, TValue, T extends AbstractNodeTupleWritable<TValue>>
extends Mapper<TKey, T, NodeWritable, LongWritable> {
private LongWritable initialCount = new LongWritable(1);
@Override
protected void map(TKey key, T value, Context context) throws IOException, InterruptedException {
NodeWritable[] ns = this.getNodes(value);
for (NodeWritable n : ns) {
context.write(n, this.initialCount);
}
}
protected abstract NodeWritable[] getNodes(T tuple);
}
地图代码第2部分
package org.apache.jena.hadoop.rdf.mapreduce.count;
import org.apache.jena.graph.Triple;
import org.apache.jena.hadoop.rdf.mapreduce.count.AbstractNodeTupleNodeCountMapper;
import org.apache.jena.hadoop.rdf.types.NodeWritable;
import org.apache.jena.hadoop.rdf.types.TripleWritable;
public class TripleNodeCountMapper<TKey> extends AbstractNodeTupleNodeCountMapper<TKey, Triple, TripleWritable> {
@Override
protected NodeWritable[] getNodes(TripleWritable tuple) {
Triple t = tuple.get();
return new NodeWritable[] { new NodeWritable(t.getSubject()), new NodeWritable(t.getPredicate()),
new NodeWritable(t.getObject()) };
}
}
减少代码
package org.apache.jena.hadoop.rdf.mapreduce.count;
import java.io.IOException;
import java.util.Iterator;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.jena.hadoop.rdf.types.NodeWritable;
public class NodeCountReducer extends Reducer<NodeWritable, LongWritable, NodeWritable, LongWritable> {
@Override
protected void reduce(NodeWritable key, Iterable<LongWritable> values, Context context)
throws IOException, InterruptedException {
long count = 0;
Iterator<LongWritable> iter = values.iterator();
while (iter.hasNext()) {
count += iter.next().get();
}
context.write(key, new LongWritable(count));
}
}
工作处理程序
package org.apache.jena.hadoop.rdf.stats;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.jena.hadoop.rdf.io.input.TriplesInputFormat;
import org.apache.jena.hadoop.rdf.io.output.ntriples.NTriplesNodeOutputFormat;
import org.apache.jena.hadoop.rdf.mapreduce.count.NodeCountReducer;
import org.apache.jena.hadoop.rdf.mapreduce.count.TripleNodeCountMapper;
import org.apache.jena.hadoop.rdf.types.NodeWritable;
public class RdfMapReduceExample {
public static void main(String[] args) {
try {
// Get Hadoop configuration
Configuration config = new Configuration(true);
// Create job
Job job = Job.getInstance(config);
job.setJarByClass(RdfMapReduceExample.class);
job.setJobName("RDF Triples Node Usage Count");
// Map/Reduce classes
job.setMapperClass(TripleNodeCountMapper.class);
job.setMapOutputKeyClass(NodeWritable.class);
job.setMapOutputValueClass(LongWritable.class);
job.setReducerClass(NodeCountReducer.class);
// Input and Output
job.setInputFormatClass(TriplesInputFormat.class);
job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
FileInputFormat.setInputPaths(job, new Path(args[1]));
FileOutputFormat.setOutputPath(job, new Path(args[2]));
// Launch the job and await completion
job.submit();
if (job.monitorAndPrintJob()) {
// OK
System.out.println("Completed");
} else {
// Failed
System.err.println("Failed");
}
} catch (Throwable e) {
e.printStackTrace();
}
}
}
Pom.xml依赖项
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.jena/jena-elephas-common -->
<dependency>
<groupId>org.apache.jena</groupId>
<artifactId>jena-elephas-common</artifactId>
<version>3.1.1</version>
</dependency>
<dependency>
<groupId>org.apache.jena</groupId>
<artifactId>jena-elephas-io</artifactId>
<version>3.1.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-common -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-common</artifactId>
<version>2.7.1</version>
<scope>provided</scope>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.7.1</version>
<scope>provided</scope>
</dependency>
</dependencies>
答案 0 :(得分:2)
您的依赖声明是正确的,否则您的代码根本无法编译。
您的问题是您的JAR可能只包含您的代码,并且不包含任何必要的依赖项。因此,当Map Reduce尝试运行代码时,不存在任何依赖项。
通常在构建Map Reduce时,最好创建一个包含代码和所有依赖项的胖JAR。 maven组件插件可用于执行此操作(如果愿意,您也可以使用Maven阴影。)
将此添加到您的pom.xml
:
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptors>
<descriptor>hadoop-job.xml</descriptor>
</descriptors>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
添加使用此hadoop-job.xml
:
<assembly>
<id>hadoop-job</id>
<formats>
<format>jar</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<dependencySets>
<dependencySet>
<unpack>false</unpack>
<scope>provided</scope>
<outputDirectory>lib</outputDirectory>
<excludes>
<exclude>${groupId}:${artifactId}</exclude>
</excludes>
</dependencySet>
<dependencySet>
<unpack>true</unpack>
<includes>
<include>${groupId}:${artifactId}</include>
</includes>
</dependencySet>
</dependencySets>
</assembly>
本质上,这要求Maven为您构建一个包含所有未提供的依赖项的胖JAR。这将创建一个名为your-artifact-VERSION-hadoop-job.jar
的额外人工制品,您应该运行它而不是正常的JAR