public class dewpoint extends Configured implements Tool
private static final Logger logger = LoggerFactory.getLogger(dewpoint.class);
static final String KEYSPACE = "weather";
static final String COLUMN_FAMILY = "user";
private static final String OUTPUT_PATH1 = "/tmp/intermediate1";
private static final String OUTPUT_PATH2 = "/tmp/intermediate2";
private static final String OUTPUT_PATH3 = "/tmp/intermediate3";
private static final String INPUT_PATH1 = "/tmp/intermediate1";
public static void main(String[] args) throws Exception
ToolRunner.run(new Configuration(), new dewpoint(), args);
public static class dpmap1 extends Mapper<Map<String, ByteBuffer>, Map<FloatWritable, ByteBuffer>, Text, DoubleWritable>
DoubleWritable val1 = new DoubleWritable();
Text word = new Text();
String date;
float temp;
public void map(Map<String, ByteBuffer> keys, Map<FloatWritable, ByteBuffer> columns, Context context) throws IOException, InterruptedException
for (Entry<String, ByteBuffer> key : keys.entrySet())
if (!"date".equals(key.getKey()))
date = ByteBufferUtil.string(key.getValue());
for (Entry<FloatWritable, ByteBuffer> column : columns.entrySet())
if (!"temprature".equals(column.getKey()))
temp = ByteBufferUtil.toFloat(column.getValue());
context.write(word, val1);
public static class dpred1 extends Reducer<Text, DoubleWritable, Text, DoubleWritable>
public void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException
double beta = 17.62;
double landa = 243.12;
DoubleWritable result1 = new DoubleWritable();
DoubleWritable result2 = new DoubleWritable();
for (DoubleWritable val : values){
// System.out.println(val.get());
beta *= val.get();
context.write(key, result1);
context.write(key, result2);
public static class dpmap2 extends Mapper <Text, DoubleWritable, Text, DoubleWritable>{
Text key2 = new Text();
double temp1, temp2 =0;
public void map(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException {
String[] sp = values.toString().split("\t");
for (int i=0; i< sp.length; i+=4)
for(int j=1;j< sp.length; j+=4)
temp1 = Double.valueOf(sp[j]);
for (int k=3;k< sp.length; k+=4)
temp2 = Double.valueOf(sp[k]);
context.write(key2, new DoubleWritable(temp2/temp1));
public static class dpred2 extends Reducer<Text, DoubleWritable, Text, DoubleWritable>
public void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException
double alpha = 6.112;
double tmp = 0;
DoubleWritable result3 = new DoubleWritable();
for (DoubleWritable val : values){
tmp = alpha*(Math.pow(Math.E, val.get()));
context.write(key, result3);
public int run(String[] args) throws Exception
Job job1 = new Job(getConf(), "DewPoint");
FileOutputFormat.setOutputPath(job1, new Path(OUTPUT_PATH1));
ConfigHelper.setInputRpcPort(job1.getConfiguration(), "9160");
ConfigHelper.setInputInitialAddress(job1.getConfiguration(), "localhost");
ConfigHelper.setInputColumnFamily(job1.getConfiguration(), KEYSPACE, COLUMN_FAMILY);
ConfigHelper.setInputPartitioner(job1.getConfiguration(), "Murmur3Partitioner");
CqlConfigHelper.setInputCQLPageRowSize(job1.getConfiguration(), "3");
if (job1.isSuccessful()){
Job job2 = new Job(getConf(), "DewPoint");
FileInputFormat.addInputPath(job2, new Path(OUTPUT_PATH1));
FileOutputFormat.setOutputPath(job2, new Path(OUTPUT_PATH2));
return 0;
13/10/25 11:33:37 INFO util.NativeCodeLoader: Loaded the native-hadoop library
13/10/25 11:33:37 WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
13/10/25 11:33:40 INFO mapred.JobClient: Running job: job_local1294015510_0001
13/10/25 11:33:41 INFO mapred.LocalJobRunner: Waiting for map tasks
13/10/25 11:33:41 INFO mapred.LocalJobRunner: Starting task: attempt_local1294015510_0001_m_000000_0
13/10/25 11:33:41 INFO util.ProcessTree: setsid exited with exit code 0
13/10/25 11:33:41 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@190a0d6
13/10/25 11:33:41 INFO mapred.MapTask: Processing split: ColumnFamilySplit((-9223372036854775808, '1684704676388456087] @[localhost])
13/10/25 11:33:41 INFO mapred.MapTask: io.sort.mb = 100
13/10/25 11:33:41 INFO mapred.JobClient: map 0% reduce 0%
13/10/25 11:33:43 INFO mapred.MapTask: data buffer = 79691776/99614720
13/10/25 11:33:43 INFO mapred.MapTask: record buffer = 262144/327680
13/10/25 11:33:44 INFO mapred.MapTask: Starting flush of map output
13/10/25 11:33:44 INFO mapred.MapTask: Finished spill 0
13/10/25 11:33:44 INFO mapred.Task: Task:attempt_local1294015510_0001_m_000000_0 is done. And is in the process of commiting
13/10/25 11:33:44 INFO mapred.LocalJobRunner:
13/10/25 11:33:44 INFO mapred.Task: Task 'attempt_local1294015510_0001_m_000000_0' done.
13/10/25 11:33:44 INFO mapred.LocalJobRunner: Finishing task: attempt_local1294015510_0001_m_000000_0
13/10/25 11:33:44 INFO mapred.LocalJobRunner: Starting task: attempt_local1294015510_0001_m_000001_0
13/10/25 11:33:44 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@9aba32
13/10/25 11:33:44 INFO mapred.MapTask: Processing split: ColumnFamilySplit((1684704676388456087, '-9223372036854775808] @[localhost])
13/10/25 11:33:44 INFO mapred.MapTask: io.sort.mb = 100
13/10/25 11:33:47 INFO mapred.JobClient: map 50% reduce 0%
13/10/25 11:33:47 INFO mapred.MapTask: data buffer = 79691776/99614720
13/10/25 11:33:47 INFO mapred.MapTask: record buffer = 262144/327680
13/10/25 11:33:47 INFO mapred.MapTask: Starting flush of map output
13/10/25 11:33:47 INFO mapred.MapTask: Finished spill 0
13/10/25 11:33:47 INFO mapred.Task: Task:attempt_local1294015510_0001_m_000001_0 is done. And is in the process of commiting
13/10/25 11:33:47 INFO mapred.LocalJobRunner:
13/10/25 11:33:47 INFO mapred.Task: Task 'attempt_local1294015510_0001_m_000001_0' done.
13/10/25 11:33:47 INFO mapred.LocalJobRunner: Finishing task: attempt_local1294015510_0001_m_000001_0
13/10/25 11:33:47 INFO mapred.LocalJobRunner: Map task executor complete.
13/10/25 11:33:48 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@17f11fb
13/10/25 11:33:48 INFO mapred.LocalJobRunner:
13/10/25 11:33:48 INFO mapred.Merger: Merging 2 sorted segments
13/10/25 11:33:48 INFO mapred.Merger: Down to the last merge-pass, with 2 segments left of total size: 204 bytes
13/10/25 11:33:48 INFO mapred.LocalJobRunner:
13/10/25 11:33:48 INFO mapred.Task: Task:attempt_local1294015510_0001_r_000000_0 is done. And is in the process of commiting
13/10/25 11:33:48 INFO mapred.LocalJobRunner:
13/10/25 11:33:48 INFO mapred.Task: Task attempt_local1294015510_0001_r_000000_0 is allowed to commit now
13/10/25 11:33:48 INFO output.FileOutputCommitter: Saved output of task 'attempt_local1294015510_0001_r_000000_0' to /tmp/intermediate1
13/10/25 11:33:48 INFO mapred.LocalJobRunner: reduce > reduce
13/10/25 11:33:48 INFO mapred.Task: Task 'attempt_local1294015510_0001_r_000000_0' done.
13/10/25 11:33:48 INFO mapred.JobClient: map 100% reduce 100%
13/10/25 11:33:48 INFO mapred.JobClient: Job complete: job_local1294015510_0001
13/10/25 11:33:48 INFO mapred.JobClient: Counters: 20
13/10/25 11:33:48 INFO mapred.JobClient: File Output Format Counters
13/10/25 11:33:48 INFO mapred.JobClient: Bytes Written=324
13/10/25 11:33:48 INFO mapred.JobClient: FileSystemCounters
13/10/25 11:33:48 INFO mapred.JobClient: FILE_BYTES_READ=1503
13/10/25 11:33:48 INFO mapred.JobClient: FILE_BYTES_WRITTEN=161938
13/10/25 11:33:48 INFO mapred.JobClient: File Input Format Counters
13/10/25 11:33:48 INFO mapred.JobClient: Bytes Read=0
13/10/25 11:33:48 INFO mapred.JobClient: Map-Reduce Framework
13/10/25 11:33:48 INFO mapred.JobClient: Map output materialized bytes=212
13/10/25 11:33:48 INFO mapred.JobClient: Map input records=8
13/10/25 11:33:48 INFO mapred.JobClient: Reduce shuffle bytes=0
13/10/25 11:33:48 INFO mapred.JobClient: Spilled Records=24
13/10/25 11:33:48 INFO mapred.JobClient: Map output bytes=120
13/10/25 11:33:48 INFO mapred.JobClient: Total committed heap usage (bytes)=485359616
13/10/25 11:33:48 INFO mapred.JobClient: CPU time spent (ms)=0
13/10/25 11:33:48 INFO mapred.JobClient: SPLIT_RAW_BYTES=208
13/10/25 11:33:48 INFO mapred.JobClient: Combine input records=8
13/10/25 11:33:48 INFO mapred.JobClient: Reduce input records=12
13/10/25 11:33:48 INFO mapred.JobClient: Reduce input groups=5
13/10/25 11:33:48 INFO mapred.JobClient: Combine output records=12
13/10/25 11:33:48 INFO mapred.JobClient: Physical memory (bytes) snapshot=0
13/10/25 11:33:48 INFO mapred.JobClient: Reduce output records=10
13/10/25 11:33:48 INFO mapred.JobClient: Virtual memory (bytes) snapshot=0
13/10/25 11:33:48 INFO mapred.JobClient: Map output records=8
13/10/25 11:33:49 WARN mapred.JobClient: No job jar file set. User classes may not be found. See JobConf(Class) or JobConf#setJar(String).
13/10/25 11:33:49 INFO input.FileInputFormat: Total input paths to process : 1
13/10/25 11:33:49 INFO mapred.JobClient: Running job: job_local600426365_0002
13/10/25 11:33:49 INFO mapred.LocalJobRunner: Waiting for map tasks
13/10/25 11:33:49 INFO mapred.LocalJobRunner: Starting task: attempt_local600426365_0002_m_000000_0
13/10/25 11:33:49 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@18d30fb
13/10/25 11:33:49 INFO mapred.MapTask: Processing split: file:/tmp/intermediate1/part-r-00000:0+312
13/10/25 11:33:49 INFO mapred.MapTask: io.sort.mb = 100
13/10/25 11:33:50 INFO mapred.MapTask: data buffer = 79691776/99614720
13/10/25 11:33:50 INFO mapred.MapTask: record buffer = 262144/327680
13/10/25 11:33:50 INFO mapred.MapTask: Starting flush of map output
13/10/25 11:33:50 INFO mapred.MapTask: Finished spill 0
13/10/25 11:33:50 INFO mapred.Task: Task:attempt_local600426365_0002_m_000000_0 is done. And is in the process of commiting
13/10/25 11:33:50 INFO mapred.LocalJobRunner:
13/10/25 11:33:50 INFO mapred.Task: Task 'attempt_local600426365_0002_m_000000_0' done.
13/10/25 11:33:50 INFO mapred.LocalJobRunner: Finishing task: attempt_local600426365_0002_m_000000_0
13/10/25 11:33:50 INFO mapred.LocalJobRunner: Map task executor complete.
13/10/25 11:33:50 INFO mapred.Task: Using ResourceCalculatorPlugin : org.apache.hadoop.util.LinuxResourceCalculatorPlugin@d75c47
13/10/25 11:33:50 INFO mapred.LocalJobRunner:
13/10/25 11:33:50 INFO mapred.Merger: Merging 1 sorted segments
13/10/25 11:33:50 INFO mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 84 bytes
13/10/25 11:33:50 INFO mapred.LocalJobRunner:
13/10/25 11:33:50 INFO mapred.Task: Task:attempt_local600426365_0002_r_000000_0 is done. And is in the process of commiting
13/10/25 11:33:50 INFO mapred.LocalJobRunner:
13/10/25 11:33:50 INFO mapred.Task: Task attempt_local600426365_0002_r_000000_0 is allowed to commit now
13/10/25 11:33:50 INFO output.FileOutputCommitter: Saved output of task 'attempt_local600426365_0002_r_000000_0' to /tmp/intermediate2
13/10/25 11:33:50 INFO mapred.LocalJobRunner: reduce > reduce
13/10/25 11:33:50 INFO mapred.Task: Task 'attempt_local600426365_0002_r_000000_0' done.
13/10/25 11:33:50 INFO mapred.JobClient: map 100% reduce 100%
13/10/25 11:33:50 INFO mapred.JobClient: Job complete: job_local600426365_0002
13/10/25 11:33:50 INFO mapred.JobClient: Counters: 20
13/10/25 11:33:50 INFO mapred.JobClient: File Output Format Counters
13/10/25 11:33:50 INFO mapred.JobClient: Bytes Written=89
13/10/25 11:33:50 INFO mapred.JobClient: File Input Format Counters
13/10/25 11:33:50 INFO mapred.JobClient: Bytes Read=324
13/10/25 11:33:50 INFO mapred.JobClient: FileSystemCounters
13/10/25 11:33:50 INFO mapred.JobClient: FILE_BYTES_READ=2486
13/10/25 11:33:50 INFO mapred.JobClient: FILE_BYTES_WRITTEN=213321
13/10/25 11:33:50 INFO mapred.JobClient: Map-Reduce Framework
13/10/25 11:33:50 INFO mapred.JobClient: Map output materialized bytes=88
13/10/25 11:33:50 INFO mapred.JobClient: Map input records=10
13/10/25 11:33:50 INFO mapred.JobClient: Reduce shuffle bytes=0
13/10/25 11:33:50 INFO mapred.JobClient: Spilled Records=10
13/10/25 11:33:50 INFO mapred.JobClient: Map output bytes=144
13/10/25 11:33:50 INFO mapred.JobClient: CPU time spent (ms)=0
13/10/25 11:33:50 INFO mapred.JobClient: Total committed heap usage (bytes)=538705920
13/10/25 11:33:50 INFO mapred.JobClient: Combine input records=10
13/10/25 11:33:50 INFO mapred.JobClient: SPLIT_RAW_BYTES=101
13/10/25 11:33:50 INFO mapred.JobClient: Reduce input records=5
13/10/25 11:33:50 INFO mapred.JobClient: Reduce input groups=5
13/10/25 11:33:50 INFO mapred.JobClient: Combine output records=5
13/10/25 11:33:50 INFO mapred.JobClient: Physical memory (bytes) snapshot=0
13/10/25 11:33:50 INFO mapred.JobClient: Reduce output records=5
13/10/25 11:33:50 INFO mapred.JobClient: Virtual memory (bytes) snapshot=0
13/10/25 11:33:50 INFO mapred.JobClient: Map output records=10
答案 0 :(得分:1)
我想我明白了问题所在。在你的第二个映射器中你会发出temp2 / temp1 ,因为你的最终结果中的无穷大意味着 temp1 = 0
我认为你会发现长度为1,这意味着temp1 = 0值永远不会改变。