我的任务是创建一个Java程序,该Java程序使用Hadoop在MongoDB中“ MapReduce”一个特定的集合,这是将MongoDB集合提供给Hadoop Job的正确方法。
首先,我知道这不是Hadoop / MongoDB的本质,但是我为HW提供了此任务,以提供一个Java程序,该程序可以读取Mapreduce Hadoop Job中的MongoDB集合。 因此,我的想法是将所有Mongo所需的Hadoop库与Mongo需要的驱动程序合并为我的项目的jar,然后执行相同的工作。
对于我的任务,我选择创建一个Biblio图书(Livres)和Rental(租书)数据库,最后我想为每本书存储多少次被出租。
因此,我制作了一个DataSeeding类,它将用数据填充MongoDB,然后制作了Mapper和Reducer,它们是针对我的情况进行调整的“标准”简单对象,但是在尝试运行Driving Class时遇到了这个错误
Exception in thread "main" java.lang.IncompatibleClassChangeError: Found interface org.apache.hadoop.mapreduce.JobContext, but class was expected
请帮助
对于编码类,这里是
DataSeed类:
package bibliomapreduce;
import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.MongoClient;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Date;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
*
* @author panda
*/
public class DataSeeder {
public static MongoClient mongoClient;
public static DB database;
public static DBCollection Livres;
public static DBCollection Empreints;
public String databse_name = "biblio";
private void DataSeederSingleton() {
try {
mongoClient = new MongoClient();
database = mongoClient.getDB(databse_name);
Livres = database.getCollection("livres");
Empreints = database.getCollection("empreints");
} catch (Exception ex) {
Logger.getLogger(DataSeeder.class.getName()).log(Level.SEVERE, null, ex);
System.out.println("Missing Driver");
}
}
public DataSeeder() {
if (mongoClient == null
|| database == null
|| Livres == null
|| Empreints == null) {
DataSeederSingleton();
}
}
public BasicDBObject generate_bdbo_livre(String isbn, String auteur, String titre) {
return new BasicDBObject("isbn", isbn)
.append("auteur", auteur)
.append("titre", titre);
}
public BasicDBObject generate_bdbo_empreint(String isbn, String nom_emp, String date_livraision) {
return new BasicDBObject("isbn", isbn)
.append("nom_emp", nom_emp)
.append("date_livraision", date_livraision);
}
public void SeedMyData() {
String[][] init_livres = {{"123-r", "Samir", "Les Nuits"}, {"321-a", "Lahnin", "Les Jours"}};
String[][] init_empreints = {{"123-r", "Samir", new Date().toString()}, {"321-a", "Lahnin", new Date().toString()}};
for (String[] item : init_livres) {
Livres.insert(generate_bdbo_livre(item[0], item[1], item[2]));
}
for (String[] item : init_empreints) {
Empreints.insert(generate_bdbo_empreint(item[0], item[1], item[2]));
}
}
public void UpdateMyCollection(String updates_identifier, ArrayList<String> updates, String target_identifier, String target_id, DBCollection target_collection) {
target_collection.update(new BasicDBObject(target_identifier, target_id),
new BasicDBObject(updates_identifier, updates)
);
}
public void UpdateMyCollection(String updates_identifier, Object updates, String target_identifier, String target_id, DBCollection target_collection) {
target_collection.update(new BasicDBObject(target_identifier, target_id),
new BasicDBObject(updates_identifier, updates)
);
}
}
The Mapper
package bibliomapreduce;
import com.mongodb.BasicDBObject;
import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
/**
*
* @author panda
*/
public class BiblioMapper extends Mapper<Object, BasicDBObject, Text, Text> {
public void map(Object key, BasicDBObject value, Mapper.Context context
) throws IOException, InterruptedException {
DataSeeder ds = new DataSeeder();
String isbn = value.get("isbn").toString();
String nom_emp = value.get("nom_emp").toString();
context.write(new Text(isbn), new Text(nom_emp));
}
}
Reducer
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package bibliomapreduce;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
/**
*
* @author panda
*/
public class BiblioReducer
extends Reducer<Text, Iterable<Text>, Text, Text> {
public void reduce(Text key, Iterable<Text> values,
Reducer.Context context
) throws IOException, InterruptedException {
ArrayList<Text> result = new ArrayList<Text>();
for (Text val : values) {
result.add(val);
}
context.write(key, result);
/**
* To store the resulting values back into MongoDB
*/
DataSeeder ds = new DataSeeder();
ds.UpdateMyCollection("nombre_d_empreints", result.size(), "isbn", key.toString(), ds.Livres);
ds.UpdateMyCollection("liste_d_empreinteurs", result, "isbn", key.toString(), ds.Livres);
}
}
主要阶级
package bibliomapreduce;
import com.mongodb.hadoop.util.MongoConfigUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
/**
*
* @author panda
*/
public class BiblioMapReduce {
/**
* @param args the command line arguments
*/
public static void main(String[] args) throws Exception {
// Data Seeding to our MongoDB collections
DataSeeder ds = new DataSeeder();
ds.SeedMyData();
// Global hadoop configuration
String out_path = "/home/panda/Desktop/NoSQL Biblio MapReduce/BiblioMapReduce/src/out";
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "BiblioMR");
// Set Hadoop Job input configuration to the given Hadoop Collection
MongoConfigUtil.setInputURI(job.getConfiguration(), "mongodb://localhost/biblio.livres");
// More Configuration for the Hadoop Job Mapper/Reducer
job.setJarByClass(BiblioMapReduce.class);
job.setNumReduceTasks(0);
job.setMapperClass(BiblioMapper.class);
job.setCombinerClass(BiblioReducer.class);
job.setReducerClass(BiblioReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setInputFormatClass(com.mongodb.hadoop.MongoInputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
// To Log the result into the given path
FileOutputFormat.setOutputPath(job, new Path(out_path));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
N.B: hadoop-3.0.3
mongo db版本v3.6.3
java版本“ 1.8.0_201”
预期在给定目录路径中包含Hadoop的输出 以及Booking(empreint)中每个文档的更新