Question

我使用importtsv命令导入了8GB的csv文件。然后我运行这个命令


./hadoop jar /usr/local/hbase/hbase-0.94.10.jar completebulkload /app/hadoop/tmp/df/data/fb333 fb

一段时间后，它给出了以下错误


ERROR mapreduce.LoadIncrementalHFiles: Encountered unrecoverable error from region server
org.apache.hadoop.hbase.client.RetriesExhaustedException: Failed after attempts=14, exceptions:
Wed Oct 09 22:59:34 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36234 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:00:35 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36283 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:01:37 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36325 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:02:38 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, org.apache.hadoop.ipc.RemoteException: org.apache.hadoop.hbase.RegionTooBusyException: failed to get a lock in 60000ms
    at org.apache.hadoop.hbase.regionserver.HRegion.lock(HRegion.java:5889)
    at org.apache.hadoop.hbase.regionserver.HRegion.lock(HRegion.java:5875)
    at org.apache.hadoop.hbase.regionserver.HRegion.startBulkRegionOperation(HRegion.java:5834)
    at org.apache.hadoop.hbase.regionserver.HRegion.bulkLoadHFiles(HRegion.java:3628)
    at org.apache.hadoop.hbase.regionserver.HRegion.bulkLoadHFiles(HRegion.java:3611)
    at org.apache.hadoop.hbase.regionserver.HRegionServer.bulkLoadHFiles(HRegionServer.java:2930)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
    at java.lang.reflect.Method.invoke(Method.java:597)
    at org.apache.hadoop.hbase.ipc.WritableRpcEngine$Server.call(WritableRpcEngine.java:320)
    at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(HBaseServer.java:1426)

Wed Oct 09 23:03:40 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36381 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:04:42 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36419 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:05:46 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36448 remote=localhost/127.0.0.1:50334]
Wed Oct 09 23:06:51 EEST 2013, org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles$3@3cb075, java.net.SocketTimeoutException: Call to localhost/127.0.0.1:50334 failed on socket timeout exception: java.net.SocketTimeoutException: 60000 millis timeout while waiting for channel to be ready for read. ch : java.nio.channels.SocketChannel[connected local=/127.0.0.1:36480 remote=localhost/127.0.0.1:50334]

我该如何克服这个问题？

Answer 1

就在昨天，经过一番努力，我能够使用MapReduce成功生成HFile并使用LoadIncrementalHFiles以编程方式加载到HBase中。所以我希望我能帮助你。

你能先试试这些事吗

在运行completebulkload之前，检查输出文件夹中是否已生成HFile。让我们假设您的输出文件夹是＆＃39;输出＆＃39;并且列的姓氏是＆＃39; d＆＃39;，那么你应该在输出/ d /
如果它们在那里，则运行completebulkload命令。让我们假设您仍然得到上述异常。检查输出文件夹中是否仍存在HFile。如果它们不存在，大多数情况下即使控制台显示异常，数据也会在HBase中加载。检查HBAse表行数。

我建议这样做的原因是，我遇到了类似的问题，其中LoadIncrementalHFiles将文件加载到HBase并从输出文件夹中删除它但仍然尝试从输出文件夹中读取HFile，这可能是你看到的原因等待频道准备好阅读时超时。＆＃39;

如果仍然无法解决问题，请提供更多日志供我查看。

Answer 2

这是我用于导入csv文件的代码示例

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class SampleUploader {

private static final String NAME = "SampleUploader";

static class Uploader extends
        Mapper<LongWritable, Text, ImmutableBytesWritable, Put> {

    private long checkpoint = 100;
    private long count = 0;

    @Override
    public void map(LongWritable key, Text line, Context context)
            throws IOException {

        // Split CSV line
        String[] values = line.toString().split(",");

        String rowStr = values[0].replaceAll("\"", "");
        String titleStr = values[1].replaceAll("\"", "");
        String bodyStr = values[2].replaceAll("\"", "");
        String tagsStr = values[3].replaceAll("\"", "");

        // Extract each value
        byte[] row = Bytes.toBytes(rowStr.trim());
        byte[] title = Bytes.toBytes(titleStr);
        byte[] body = Bytes.toBytes(bodyStr);
        byte[] tags = Bytes.toBytes(tagsStr);

        Put put = new Put(row);
        try {

            put.add(Bytes.toBytes("st"), Bytes.toBytes("TITLE"), title);
            put.add(Bytes.toBytes("st"), Bytes.toBytes("BODY"), body);
            put.add(Bytes.toBytes("st"), Bytes.toBytes("TAGS"), tags);
        } catch (Exception e1) {
            System.out.println("PUT EXC");
            e1.printStackTrace();
        }

        // Uncomment below to disable WAL. This will improve performance but
        // means
        // you will experience data loss in the case of a RegionServer
        // crash.
        // put.setWriteToWAL(false);

        try {
            context.write(new ImmutableBytesWritable(row), put);
        } catch (InterruptedException e) {
            System.out.println("WRITE EXC");
            e.printStackTrace();
        }

        // Set status every checkpoint lines
        if (++count % checkpoint == 0) {
            context.setStatus("Emitting Put " + count);
        }
    }
}

/**
 * Job configuration.
 */
public static Job configureJob(Configuration conf) throws IOException {
    Path inputPath = new Path("/home/coder/Downloads/Train3.csv");
    String tableName = "sodata";
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClass(Uploader.class);
    FileInputFormat.setInputPaths(job, inputPath);
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(Uploader.class);

    TableMapReduceUtil.initTableReducerJob(tableName, null, job);
    job.setNumReduceTasks(0);
    return job;
}

public static void main(String[] args) throws Exception {
    Configuration conf = HBaseConfiguration.create();
    conf.set("hbase.master", "localhost:54310");
    conf.set("hbase.client.write.buffer", "1000000000"); //This is set high not to miss any line due to memory restrictions.
    Job job = configureJob(conf);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}

Hadoop completebulkload提供RetriesExhaustedException

2 个答案: