我想用BulkLoad把数据推送到Hbase,我用一个带地图的文件来做。但是当我想在shell中运行程序时,错误来了!
我重写了 RecordReader 以每次读取整个文件。但我不知道为什么我的程序出错了!
21/05/25 10:46:01 INFO mapreduce.Job: Job job_1621909750051_0002 running in uber mode : false
21/05/25 10:46:01 INFO mapreduce.Job: map 0% reduce 0%
21/05/25 10:46:01 INFO mapreduce.Job: Job job_1621909750051_0002 failed with state FAILED due to: Application application_1621909750051_0002 failed 2 times due to AM Container for appattempt_1621909750051_0002_000002 exited with exitCode: 1
For more detailed output, check application tracking page:http://master:8088/cluster/app/application_1621909750051_0002Then, click on links to logs of each attempt.
Diagnostics: Exception from container-launch.
Container id: container_1621909750051_0002_02_000001
Exit code: 1
Stack trace: ExitCodeException exitCode=1:
at org.apache.hadoop.util.Shell.runCommand(Shell.java:585)
at org.apache.hadoop.util.Shell.run(Shell.java:482)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:776)
at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:212)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:302)
at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:82)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Container exited with a non-zero exit code 1
Failing this attempt. Failing the application.
21/05/25 10:46:01 INFO mapreduce.Job: Counters: 0
这是我的java程序:
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.List;
/**
* a map deal with a file
*/
public class WaveDataInput {
private static final Log LOG = LogFactory.getLog(WaveDataInput.class);
public static class WholeReader extends RecordReader<Text, Text> {
private Text key = new Text();
private Text value = new Text();
private boolean isProcessed = false;
private FSDataInputStream fsDataInputStream;
private byte[] buffer;
private FileSplit fileSplit;
@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
this.fileSplit = (FileSplit) split;
Path path = fileSplit.getPath();
FileSystem fileSystem = path.getFileSystem(context.getConfiguration());
fsDataInputStream = fileSystem.open(path); // 获取文件的输入流
}
@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
if (!isProcessed) {
String keyName = fileSplit.getPath().toString();
key.set(keyName);
buffer = new byte[(int) fileSplit.getLength()];
fsDataInputStream.read(buffer);
value.set(buffer, 0, buffer.length);
isProcessed = true;
return true;
} else {
return false;
}
}
@Override
public Text getCurrentKey() throws IOException, InterruptedException {
return key;
}
@Override
public Text getCurrentValue() throws IOException, InterruptedException {
return value;
}
@Override
public float getProgress() throws IOException, InterruptedException {
return isProcessed ? 0 : 1;
}
@Override
public void close() throws IOException {
if (fsDataInputStream != null) {
IOUtils.closeStream(fsDataInputStream);
}
}
}
public static class customFileInputFormat extends FileInputFormat<Text, Text> {
@Override
protected boolean isSplitable(JobContext context, Path filename) {
return false;
}
@Override
public RecordReader<Text, Text> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
return new WholeReader();
}
}
public static class WaveMapper extends Mapper<Text, Text, ImmutableBytesWritable, Put> {
@Override
protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
String[] text = value.toString().split("\n");
/**
* flag level lat lon timestamp depth time_gap name
* 2 2 335024N 0782848W 0000 9.8 3600 NDBC
*
* data:
* flag time depth wave_speed wave_dir
* 3 20160101000800 0.8 6.60 297.0
* 3 20160101000800 9.8 2.90 297.0
*/
float lon = 0.0f;
float lat = 0.0f;
int level = 0;
int region = 0;
for (String line : text) {
String ele[] = line.split(" ");
if (ele.length < 3) return;
if ("2".equals(ele[0].trim())) {
if(ele.length < 4) continue;
level = Integer.parseInt(ele[1]);
lat = getLonLat(ele[2]);
lon = getLonLat(ele[3]);
} else if ("3".equals(ele[0].trim())) {
if(ele.length < 5) continue;
region = (ele[1].hashCode())%100;
String timestap = System.nanoTime() + "";
String stamp = timestap.substring(timestap.length() - 9, timestap.length() - 2);
String rowkey = (String.format("%02d", region)) + ele[1] +stamp;
Put put = new Put(Bytes.toBytes(rowkey));
ImmutableBytesWritable putRowkey = new ImmutableBytesWritable(rowkey.getBytes());
put.addColumn(Bytes.toBytes("element"), Bytes.toBytes("time"), Bytes.toBytes(ele[1]));
put.addColumn(Bytes.toBytes("element"), Bytes.toBytes("lat"), Bytes.toBytes(lat));
put.addColumn(Bytes.toBytes("element"), Bytes.toBytes("lon"), Bytes.toBytes(lon));
put.addColumn(Bytes.toBytes("element"), Bytes.toBytes("depth"), Bytes.toBytes(Float.parseFloat(ele[2])));
put.addColumn(Bytes.toBytes("element"), Bytes.toBytes("wave_speed"), Bytes.toBytes(Float.parseFloat(ele[3])));
put.addColumn(Bytes.toBytes("element"), Bytes.toBytes("wave_dir"), Bytes.toBytes(Float.parseFloat(ele[4])));
ImmutableBytesWritable outkey = new ImmutableBytesWritable(rowkey.getBytes());
context.write(outkey, put);
}
}
}
}
public static void main(String[] args) throws Exception {
if(args.length < 3){
return;
}
System.setProperty("HADOOP_USER_NAME", "611");
long start = System.currentTimeMillis();
String inputPath = "hdfs://192.168.1.237:9000/dataset/gtspp/gtspp4_in202012.txt";
String outputPath = "hdfs://192.168.1.237:9000/dataset/out3";
String HbaseTable = "sea:element";
inputPath = args[0];
outputPath = args[1];
HbaseTable = args[2];
Configuration conf = new Configuration();
conf.set("fs.defaultFs", "hdfs://192.168.1.237:9000");
conf.set("hbase.zookeeper.quorum", "master,slave1");
conf.set("hbase.master", "master:6000");
conf.set("hbase.zookeeper.property.clientPort", "2183");
conf.set("mapreduce.app-submission.cross-platform", "true");
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
Connection conn = ConnectionFactory.createConnection(conf);
Admin admin = conn.getAdmin();
HTable table = (HTable) conn.getTable(TableName.valueOf(HbaseTable));
final Path OutputPath=new Path(outputPath);
// delete outpath
FileSystem fs = FileSystem.get(new URI("hdfs://192.168.1.237:9000"), conf);
if(fs.exists(OutputPath))
fs.delete(OutputPath, true);
Job job = Job.getInstance(conf, "wave-load");
job.setMapperClass(WaveMapper.class);
job.setJarByClass(WaveDataInput.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(Put.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(HFileOutputFormat2.class);
FileInputFormat.setInputPaths(job, new Path(inputPath));
FileOutputFormat.setOutputPath(job, new Path(outputPath));
if(job.waitForCompletion(true)){
LoadIncrementalHFiles Loader=new LoadIncrementalHFiles(conf);
Loader.doBulkLoad(OutputPath, admin, table, conn.getRegionLocator(TableName.valueOf(HbaseTable)));
}
}
public static float getLonLat(String str){
int flag = 1;
if(str.contains("N") || str.contains("E")){
flag = 1;
}else if(str.contains("W") || str.contains("S")){
flag = -1;
}else {
return 0;
}
float value =Float.parseFloat(str.substring(0, str.length() - 1))/10000 ;
return value * flag;
}
}
这是 http://master:8088/cluster/app/application_1621909750051_0002 内容
2021-05-25 10:46:00,634 INFO [main] org.apache.hadoop.mapreduce.v2.app.MRAppMaster: Created MRAppMaster for application appattempt_1621909750051_0002_000002
2021-05-25 10:46:00,824 INFO [main] org.apache.hadoop.mapreduce.v2.app.MRAppMaster: Executing with tokens:
2021-05-25 10:46:00,825 INFO [main] org.apache.hadoop.mapreduce.v2.app.MRAppMaster: Kind: YARN_AM_RM_TOKEN, Service: , Ident: (appAttemptId { application_id { id: 2 cluster_timestamp: 1621909750051 } attemptId: 2 } keyId: -1575443230)
2021-05-25 10:46:01,055 INFO [main] org.apache.hadoop.mapreduce.v2.app.MRAppMaster: Using mapred newApiCommitter.
2021-05-25 10:46:01,057 INFO [main] org.apache.hadoop.mapreduce.v2.app.MRAppMaster: OutputCommitter set in config null
2021-05-25 10:46:01,094 INFO [main] org.apache.hadoop.service.AbstractService: Service org.apache.hadoop.mapreduce.v2.app.MRAppMaster failed in state INITED; cause: org.apache.hadoop.yarn.exceptions.YarnRuntimeException: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2 not found
org.apache.hadoop.yarn.exceptions.YarnRuntimeException: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2 not found
at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$2.call(MRAppMaster.java:518)
at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$2.call(MRAppMaster.java:498)
at org.apache.hadoop.mapreduce.v2.app.MRAppMaster.callWithJobClassLoader(MRAppMaster.java:1593)
at org.apache.hadoop.mapreduce.v2.app.MRAppMaster.createOutputCommitter(MRAppMaster.java:498)
at org.apache.hadoop.mapreduce.v2.app.MRAppMaster.serviceInit(MRAppMaster.java:284)
at org.apache.hadoop.service.AbstractService.init(AbstractService.java:163)
at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$5.run(MRAppMaster.java:1551)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1762)
at org.apache.hadoop.mapreduce.v2.app.MRAppMaster.initAndStartAppMaster(MRAppMaster.java:1548)
at org.apache.hadoop.mapreduce.v2.app.MRAppMaster.main(MRAppMaster.java:1481)
Caused by: java.lang.RuntimeException: java.lang.ClassNotFoundException: Class org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2 not found
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2267)
at org.apache.hadoop.mapreduce.task.JobContextImpl.getOutputFormatClass(JobContextImpl.java:222)
at org.apache.hadoop.mapreduce.v2.app.MRAppMaster$2.call(MRAppMaster.java:514)
... 11 more
Caused by: java.lang.ClassNotFoundException: Class org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2 not found
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2171)
at org.apache.hadoop.conf.Configuration.getClass(Configuration.java:2265)
... 13 more