我正在尝试从一个表中读取数据,进行一些处理并将结果放在另一个表中。这个代码是
'package examples;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
public class HbaseCombinerExample {
public static class MyMapper extends TableMapper<Text, FloatWritable> {
private Text text = new Text();
public void map(ImmutableBytesWritable row, Result value,Context context) throws IOException, InterruptedException {
int[] siteArray = {1, 8, 18, 22, 26, 35, 48, 51, 53, 81, 108, 110, 139, 146, 150, 154, 169, 181, 235, 240, 404, 405, 406, 407, 409, 410, 411, 671, 673, 695, 697, 698, 1001};
String siteId = new String(value.getValue(Bytes.toBytes("location"), Bytes.toBytes("site")));
String param = new String(value.getValue(Bytes.toBytes("data"), Bytes.toBytes("param_name")));
boolean bool = Arrays.asList(siteArray).contains(Integer.parseInt(siteId));
if(bool && param.equals("o3")) {
String year = new String(value.getValue(Bytes.toBytes("date"), Bytes.toBytes("year")));
String month = new String(value.getValue(Bytes.toBytes("date"), Bytes.toBytes("month")));
String day = new String(value.getValue(Bytes.toBytes("date"), Bytes.toBytes("day")));
String hour = new String(value.getValue(Bytes.toBytes("date"), Bytes.toBytes("hour")));
String pollution = new String(value.getValue(Bytes.toBytes("data"), Bytes.toBytes("value")));
String val = year+month+day+hour+siteId;
FloatWritable pollValue = new FloatWritable(Float.parseFloat(pollution));
text.set(val); // we can only emit Writables...
context.write(text, pollValue);
}
}
}
public static class MyTableReducer extends TableReducer<Text, FloatWritable, ImmutableBytesWritable> {
public void reduce(Text key, Iterable<FloatWritable> values, Context context) throws IOException, InterruptedException {
float i = 0;
float count = 0;
// ImmutableBytesWritable HKey = new ImmutableBytesWritable(Bytes.toBytes(key.toString()));
for (FloatWritable val : values) {
i += val.get();
// ++count;
}
// float avgPollution = i/count;
Put HPut = new Put(Bytes.toBytes(key.toString()));
HPut.add(Bytes.toBytes("data"), Bytes.toBytes("avgPollutionValue"),Bytes.toBytes(i));
context.write(null, HPut);
}
}
public static void main(String[] args) throws Exception {
Configuration config = HBaseConfiguration.create();
Job job = new Job(config, "HourlyAvg");
job.setJarByClass(HbaseCombinerExample.class);
String sourceTable = "airqualitys1";
String targetTable = "bigd26-hbase-avg";
Scan scan = new Scan();
scan.setCaching(300); // 1 is the default in Scan, which will be bad for MapReduce jobs
scan.setCacheBlocks(false);
TableMapReduceUtil.initTableMapperJob(sourceTable,scan, MyMapper.class,Text.class, FloatWritable.class, job);
TableMapReduceUtil.initTableReducerJob(targetTable, MyTableReducer.class, job);
job.setNumReduceTasks(1);
boolean b = job.waitForCompletion(true);
if (!b) {
throw new IOException("error with job!");
}
}
}'
问题是输出表中没有记录。当地图缩小运行时,我也得到这一行。
'任务ID:attempt_1397249742212_1380_m_000001_0,状态:FAILED'和 '无法在app_1397249742212_1407的任何已配置本地目录中初始化app-log目录'
任何信息/建议都会非常有用。