我使用importtsv命令导入了8GB的csv文件。然后我运行这个命令
./hadoop jar /usr/local/hbase/hbase-0.94.10.jar completebulkload /app/hadoop/tmp/df/data/fb333 fb
一段时间后,它给出了以下错误
ERROR mapreduce.LoadIncrementalHFiles: Encountered unrecoverable error from region server
org.apache.hadoop.hbase.client.RetriesExhaustedException: Failed after attempts=14, exceptions:
Wed Oct 09 22:59:34 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36234 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:00:35 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36283 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:01:37 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36325 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:02:38 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, org.apache.hadoop.ipc.RemoteException: org.apache.hadoop.hbase.RegionTooBusyException: failed to get a lock in 60000ms
at org.apache.hadoop.hbase.regionserver.HRegion.lock(HRegion.java:5889)
at org.apache.hadoop.hbase.regionserver.HRegion.lock(HRegion.java:5875)
at org.apache.hadoop.hbase.regionserver.HRegion.startBulkRegionOperation(HRegion.java:5834)
at org.apache.hadoop.hbase.regionserver.HRegion.bulkLoadHFiles(HRegion.java:3628)
at org.apache.hadoop.hbase.regionserver.HRegion.bulkLoadHFiles(HRegion.java:3611)
at org.apache.hadoop.hbase.regionserver.HRegionServer.bulkLoadHFiles(HRegionServer.java:2930)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.apache.hadoop.hbase.ipc.WritableRpcEngine$Server.call(WritableRpcEngine.java:320)
at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(HBaseServer.java:1426)
Wed Oct 09 23:03:40 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36381 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:04:42 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36419 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:05:46 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36448 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:06:51 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36480 remote=localhost/127.0.0.1:50334]
我该如何克服这个问题?
答案 0 :(得分:0)
就在昨天,经过一番努力,我能够使用MapReduce成功生成HFile并使用LoadIncrementalHFiles以编程方式加载到HBase中。所以我希望我能帮助你。
你能先试试这些事吗
在运行completebulkload之前,检查输出文件夹中是否已生成HFile。让我们假设您的输出文件夹是'输出'并且列的姓氏是' d',那么你应该在输出/ d /
如果它们在那里,则运行completebulkload命令。让我们假设您仍然得到上述异常。检查输出文件夹中是否仍存在HFile。如果它们不存在,大多数情况下即使控制台显示异常,数据也会在HBase中加载。检查HBAse表行数。
我建议这样做的原因是,我遇到了类似的问题,其中LoadIncrementalHFiles将文件加载到HBase并从输出文件夹中删除它但仍然尝试从输出文件夹中读取HFile,这可能是你看到的原因等待频道准备好阅读时超时。'
如果仍然无法解决问题,请提供更多日志供我查看。
答案 1 :(得分:0)
这是我用于导入csv文件的代码示例
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class SampleUploader {
private static final String NAME = "SampleUploader";
static class Uploader extends
Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {
private long checkpoint = 100;
private long count = 0;
@Override
public void map(LongWritable key, Text line, Context context)
throws IOException {
// Split CSV line
String[] values = line.toString().split(",");
String rowStr = values[0].replaceAll("\"", "");
String titleStr = values[1].replaceAll("\"", "");
String bodyStr = values[2].replaceAll("\"", "");
String tagsStr = values[3].replaceAll("\"", "");
// Extract each value
byte[] row = Bytes.toBytes(rowStr.trim());
byte[] title = Bytes.toBytes(titleStr);
byte[] body = Bytes.toBytes(bodyStr);
byte[] tags = Bytes.toBytes(tagsStr);
Put put = new Put(row);
try {
put.add(Bytes.toBytes("st"), Bytes.toBytes("TITLE"), title);
put.add(Bytes.toBytes("st"), Bytes.toBytes("BODY"), body);
put.add(Bytes.toBytes("st"), Bytes.toBytes("TAGS"), tags);
} catch (Exception e1) {
System.out.println("PUT EXC");
e1.printStackTrace();
}
// Uncomment below to disable WAL. This will improve performance but
// means
// you will experience data loss in the case of a RegionServer
// crash.
// put.setWriteToWAL(false);
try {
context.write(new ImmutableBytesWritable(row), put);
} catch (InterruptedException e) {
System.out.println("WRITE EXC");
e.printStackTrace();
}
// Set status every checkpoint lines
if (++count % checkpoint == 0) {
context.setStatus("Emitting Put " + count);
}
}
}
/**
* Job configuration.
*/
public static Job configureJob(Configuration conf) throws IOException {
Path inputPath = new Path("/home/coder/Downloads/Train3.csv");
String tableName = "sodata";
Job job = new Job(conf, NAME + "_" + tableName);
job.setJarByClass(Uploader.class);
FileInputFormat.setInputPaths(job, inputPath);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(Uploader.class);
TableMapReduceUtil.initTableReducerJob(tableName, null, job);
job.setNumReduceTasks(0);
return job;
}
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
conf.set("hbase.master", "localhost:54310");
conf.set("hbase.client.write.buffer", "1000000000"); //This is set high not to miss any line due to memory restrictions.
Job job = configureJob(conf);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}