我试图在我的集群中使用hadoop mapreduce运行数据清理程序。但它以
结尾Container [pid=5932,containerID=container_1480660624412_0297_02_000001] is running beyond physical memory limits. Current usage: 2.0 GB of 2 GB physical memory used; 31.8 GB of 4.2 GB virtual memory used. Killing container.
我已经通过设置
更改了配置 job.getConfiguration().setInt("mapreduce.map.memory.mb",3072);
job.getConfiguration().setInt("mapreduce.reduce.memory.mb",6144);
job.getConfiguration().setStrings("mapreduce.map.java.opts","-Xmx3072m");
job.getConfiguration().setStrings("mapreduce.reduce.java.opts","-Xmx6144m");
但似乎无法正常工作。错误始终是'当前使用情况:使用2.0 GB的2 GB物理内存;使用31.8 GB的4.2 GB虚拟内存'。
我想知道如何解决这个问题。也许有人可以给我一个基本的数据模式,因为我认为我的方法是不灵活的。非常感谢你。
数据大小:6TB 群集有10台服务器
当数据大小很小,如30GB时。程序没问题。
mapred-site.xml中
```
<property>
<name>mapreduce.map.memory.mb</name>
<value>3072</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>3072</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx3072m</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx6144m</value>
</property>
<property>
``` 我的代码
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
public class DataCleanIdIconWeb1{
public static class QLMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
Text outputValue = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// in map just processes each line of the string and outputs it
}
}
}
public static class QLCombiner extends Reducer<Text, NullWritable, Text, NullWritable> {
@Override
protected void reduce(Text key, Iterable<NullWritable> values,
Reducer<Text, NullWritable, Text, NullWritable>.Context context) throws IOException, InterruptedException {
// String line = key.toString();
// String[] split = line.split("\t");
context.write(key, NullWritable.get());
}
}
public static class QLReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
private MultipleOutputs<Text, NullWritable> mos;
@Override
protected void setup(Reducer<Text, NullWritable, Text, NullWritable>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
super.setup(context);
mos = new MultipleOutputs<Text, NullWritable>(context);
}
@Override
protected void cleanup(Reducer<Text, NullWritable, Text, NullWritable>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
super.cleanup(context);
mos.close();
}
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context)
throws IOException, InterruptedException {
String line = key.toString();
String[] split = line.split("\t");
if (split.length == 5) {
// mos.write("iconRecord", key, NullWritable.get());
mos.write("iconRecord", key, NullWritable.get(), "iconRecord/icon");
} else if (split.length == 1) {
// mos.write("allID", key, NullWritable.get());
mos.write("AllID", key, NullWritable.get(), "AllID/AllID");
} else { // split.length == 6
// mos.write("webRecord", key, NullWritable.get());
mos.write("webRecord", key, NullWritable.get(), "webRecord/web");
}
}
}
public static void run(String originalDataPath, String dataCleanOutputFile) throws Exception {
System.out.println("Start dataClean");
//long startTime = System.currentTimeMillis();
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(DataCleanIdIconWeb1.class);
job.getConfiguration().setBoolean("mapreduce.map.output.compress", true);
job.getConfiguration().setClass("mapreduce.map.output.compress.codec", GzipCodec.class, CompressionCodec.class);
job.getConfiguration().setBoolean("mapreduce.output.fileoutputformat.compress", false);
job.getConfiguration().setStrings("mapreduce.reduce.shuffle.input.buffer.percent", "0.01");
job.getConfiguration().setInt("yarn.scheduler.maximum-allocation-mb",6144);
job.getConfiguration().setStrings("mapred.child.java.opts","-Xmx8192m");
job.getConfiguration().setInt("mapreduce.map.memory.mb",3072);
job.getConfiguration().setInt("mapreduce.reduce.memory.mb",6144);
job.getConfiguration().setStrings("mapreduce.map.java.opts","-Xmx3072m");
job.getConfiguration().setStrings("mapreduce.reduce.java.opts","-Xmx6144m");
job.getConfiguration().setBoolean("yarn.nodemanager.vmem-check-enabled",false);
job.setNumReduceTasks(30);
//job.getConfiguration().setInt("yarn.nodemanager.vmem-pmem-ratio",5);
job.getConfiguration().setStrings("mapreduce.job.jvm.numtasks","-1");
job.setMapperClass(QLMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
// FileInputFormat.setInputPaths(job, new
// Path("hdfs://10.1.18.202:9000/data/user_detail_201606"));
// FileInputFormat.setInputPaths(job, new
// Path("hdfs://10.1.18.202:9000/data/userAllDetail3M.txt"));
FileInputFormat.setInputPaths(job, new Path(originalDataPath));
// FileInputFormat.class, QLMapper.class);
// MultipleInputs.addInputPath(job, new Path(args[1]),
// FileInputFormat.class, QLMapper.class);
//
job.setCombinerClass(QLCombiner.class);
job.setReducerClass(QLReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setInputFormatClass(TextInputFormat.class);
//job.setOutputFormatClass(NullOutputFormat.class);
LazyOutputFormat.setOutputFormatClass(job, TextOutputFormat.class);
//job.setOutputFormatClass(TextOutputFormat.class);
// FileOutputFormat.setOutputPath(job, new
// Path("hdfs://10.1.18.202:9000/mrOutput/dataclean3mNew2"));
FileOutputFormat.setOutputPath(job, new Path(dataCleanOutputFile));
MultipleOutputs.addNamedOutput(job, "iconRecord", TextOutputFormat.class, Text.class, NullWritable.class);
MultipleOutputs.addNamedOutput(job, "AllID", TextOutputFormat.class, Text.class, NullWritable.class);
MultipleOutputs.addNamedOutput(job, "webRecord", TextOutputFormat.class, Text.class, NullWritable.class);
job.waitForCompletion(true);
long endTime = System.currentTimeMillis();
//System.out.println("DataClean Time: " + (endTime - startTime) / 1000f / 60f / 60f + " h");
}
public static void main(String[] args) throws Exception {
// String originalDataPath = "hdfs://10.1.18.202:9000/recommend/100data";
// String dataCleanOutputFile = "hdfs://10.1.18.202:9000/recommend/gameRecommend11.16/dataClean11.29";
String originalDataPath = "hdfs://pre/user/hdu/data/newRecord/originalData7";
String dataCleanOutputFile = "hdfs://pre//user/hdu/gamerecommend/dataClean/2016-12-16/step1";
DataCleanIdIconWeb1.run(originalDataPath, dataCleanOutputFile);
}
}