MapReduce应用程序计数器输出1

时间:2017-04-15 13:53:53

标签: java mapreduce

我已经开发了一个MapReduce应用程序,我想找到平均值和输入数据的总和。但计数器仅输出1.我检查了Reducer的for循环中的计数器值并且值是正确的,但是输出文件打印1.我将发布输入数据样本和我的代码

输入数据

14974|Customer#000014974|cTBm50vGWOXsnoYdbLR9z|4|14-465-794-1875|8431.32|AUTOMOBILE|pending grouches. silent theodolites sleep furiously quick dependencies. dolphins maintain sly
14970|Customer#000014970|FG9Pxox q6cHPHGomY08u|3|13-185-927-7901|9054.14|AUTOMOBILE|ut the carefully even deposits. regular ideas beneath the deposits nag 
14963|Customer#000014963|w75qInZOQrR,WzgipSwdpueOM7qeu|6|16-462-356-2145|8397.42|MACHINERY|ly ironic packages: packages cajole ideas. ironic foxes boost. depe
14929|Customer#000014929|mht7IoZNn1Rcmbgwj3OjxqND3|11|21-970-694-9116|9615.16|MACHINERY| according to the final instructions. carefully even requests sleep across t
14904|Customer#000014904|g4Y,pOSAYE 1|9|19-348-888-7443|9924.56|AUTOMOBILE| final, even deposits wake fluffily along the blithely regular excuses. regular, even excuses unwind about 
14867|Customer#000014867| V01ThLgnisvKLqnyA7RLMxi|13|23-436-741-1980|9278.31|HOUSEHOLD| final dependencies sleep furiously along the carefully special accounts. requests engage fluffily amo
14856|Customer#000014856|kzt2v lzu,TvOhL|4|14-475-481-5051|9692.63|AUTOMOBILE|ts haggle blithely final, final foxes. furiously regular ideas nag slyly blithely pending deposi
14848|Customer#000014848|K6rA91M3M2HXTjxz46gJWuj|9|19-592-694-6275|9078.19|BUILDING|en, bold warthogs. silent, regular theodolites sleep quickly theodolites. slyl
4412|Customer#000004412|MNJ9DEIivjnbcGZk2W|7|17-665-838-5600|9781.29|MACHINERY| special, regular foxes above the quickly sp
1|Customer#000000001|IVhzIApeRb ot,c,E|15|25-989-741-2988|711.56|BUILDING|to the even, regular platelets. regular, ironic epitaphs nag e
2|Customer#000000002|XSTf4,NCwDVaWNe6tEgvwfmRchLXak|13|23-768-687-3665|121.65|AUTOMOBILE|l accounts. blithely ironic theodolites integrate boldly: caref
3|Customer#000000003|MG9kdTD2WBHm|1|11-719-748-3364|7498.12|AUTOMOBILE| deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abov
4|Customer#000000004|XxVSJsLAGtn|4|14-128-190-5944|2866.83|MACHINERY| requests. final, regular ideas sleep final accou

代码

public static class TokenizerMapper extends Mapper<LongWritable, Text,Text ,Text>{

           public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
               float balance = 0;
               String custKey = "";
               int nation = 0;

               Text word = new Text();
               Text segment = new Text();

               String[] line = value.toString().split("\\|");
               if (line.length < 7) { 
                   System.err.println("map: Not enough records");
                   return;
               }
               custKey = line[1];
               try {
                   nation = Integer.parseInt(line[3]);
                   balance = Float.parseFloat(line[5]);
               } catch (NumberFormatException e) {
                   e.printStackTrace();
                   return;
               }

               if(balance > 8000 && (nation < 15 && nation > 1)){ 
                 segment.set(line[6]);
                 word.set(custKey + "\t" + balance);
                 context.write(segment,word);
               }
          }
        }

        public static class AvgReducer extends Reducer<Text,Text,Text,Text> {

        public void reduce(Text key, Iterable<Text> values,Context context) throws IOException, InterruptedException {

            float sumBalance = 0,avgBalance = 0;
            int count = 0;   

            for(Text v : values){
               String[] a = v.toString().trim().split("\t");

                sumBalance += Float.parseFloat(a[1]);

                count++;

            }

              System.out.println("counter2 "+count);
              avgBalance = count <= 1 ? sumBalance : avgBalance / count;

              context.write(key,new Text(avgBalance+"\t"+count));

           }
        }

CMD输出

counter 1counter 2counter 3counter 4counter 5counter 6counter 7counter 8counter 9counter 10counter 11counter 12counter 13counter 14counter 15counter 16counter 17counter 18counter 19counter 20counter 21counter 22counter 23counter 24counter 25counter 26counter 27counter 28counter 29counter 30counter 31counter 32counter 33counter 34counter 35counter 36counter 37counter 38counter 39counter 40counter 41counter 42counter 43counter 44counter 45counter 46counter 47counter 48counter 49counter 50counter 51counter 52counter 53counter 54counter 55counter 56counter 57counter 58counter 59counter 60counter 61counter 62counter 63counter 64counter 65counter 66counter 67counter 68counter 69counter 70counter 71counter 72counter 73counter 74counter 75counter 76counter 77counter 78counter 79counter 80counter 81counter 82counter 83counter 84counter 85counter 86counter 87counter 88counter 89counter 90counter 91counter 92counter 93counter 94counter 95counter 96counter 97counter 98counter 99counter 100counter 101counter 102counter 103counter 104counter 105counter 106counter 107counter 108counter 109counter 110counter 111counter 112counter 113counter 114counter 115counter 116counter 117counter 118counter 119counter 120counter 121counter 122counter 123counter 124counter 125counter 126counter 127counter 128counter 129counter 130counter 131counter 132counter 133counter 134counter 135counter 136counter 137counter 138counter 139counter 140counter 141counter 142counter 143counter 144counter 145counter 146counter 147counter 148counter 149counter 150counter 151counter 152counter 153counter 154counter 155counter 156counter 157counter 158counter 159counter 160counter 161counter 162counter 163counter 164counter 165counter 166counter 167counter 168counter 169counter 170counter 171counter 172counter 173counter 174counter 175counter 176counter 177counter 178counter 179counter 180counter 181counter 182counter 183counter 184counter 185counter 186counter 187counter 188counter 189counter 190counter 191counter 192counter 193counter 194counter 195counter 196counter 197counter 198counter 199counter 200counter 201counter 202counter 203counter 204counter 205counter 206counter 207counter 208counter 209counter 210counter 211counter 212counter 213counter 214counter 215counter 216counter 217counter 218counter 219counter 220counter 221counter 222counter 223counter 224counter 225counter 226counter 227counter 228counter 229counter 230counter 231counter 232counter 233counter 234counter 235counter 236counter 237counter 238counter 239counter 240counter 241counter 242counter 243counter 244counter 245counter 246counter 247counter 248counter 249counter 250counter 251counter 252counter 253counter 254counter 255counter 256counter 257counter 258counter 259counter 260counter 261counter 262counter 263counter 264counter 265counter 266counter 267counter 268counter 269counter 270counter 271counter 272counter 273counter 274counter 275counter 276counter 277counter 278counter 279counter 280counter 281counter 282counter 283counter 284counter 285counter 286counter 287counter 288counter2 288
counter2 0
counter2 0
counter2 0
counter2 0
17/04/15 16:51:57 INFO mapred.MapTask: Finished spill 0
17/04/15 16:51:57 INFO mapred.Task: Task:attempt_local1738495890_0001_m_000000_0 is done. And is in the process of committing
17/04/15 16:51:57 INFO mapred.LocalJobRunner: map
17/04/15 16:51:57 INFO mapred.Task: Task 'attempt_local1738495890_0001_m_000000_0' done.
17/04/15 16:51:57 INFO mapred.LocalJobRunner: Finishing task: attempt_local1738495890_0001_m_000000_0
17/04/15 16:51:57 INFO mapred.LocalJobRunner: map task executor complete.
17/04/15 16:51:57 INFO mapred.LocalJobRunner: Waiting for reduce tasks
17/04/15 16:51:57 INFO mapred.LocalJobRunner: Starting task: attempt_local1738495890_0001_r_000000_0
17/04/15 16:51:57 INFO output.FileOutputCommitter: File Output Committer Algorithm version is 1
17/04/15 16:51:57 INFO util.ProcfsBasedProcessTree: ProcfsBasedProcessTree currently is supported only on Linux.
17/04/15 16:51:57 INFO mapred.Task:  Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@748e52c
17/04/15 16:51:57 INFO mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@1615c2c3
17/04/15 16:51:57 INFO reduce.MergeManagerImpl: MergerManager: memoryLimit=334338464, maxSingleShuffleLimit=83584616, mergeThreshold=220663392, ioSortFactor=10, memToMemMergeOutputsThreshold=10
17/04/15 16:51:57 INFO reduce.EventFetcher: attempt_local1738495890_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
17/04/15 16:51:57 INFO reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local1738495890_0001_m_000000_0 decomp: 94 len: 98 to MEMORY
17/04/15 16:51:57 INFO reduce.InMemoryMapOutput: Read 94 bytes from map-output for attempt_local1738495890_0001_m_000000_0
17/04/15 16:51:57 INFO reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 94, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->94
17/04/15 16:51:57 INFO reduce.EventFetcher: EventFetcher is interrupted.. Returning
17/04/15 16:51:57 INFO mapred.LocalJobRunner: 1 / 1 copied.
17/04/15 16:51:57 INFO reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
17/04/15 16:51:57 INFO mapred.Merger: Merging 1 sorted segments
17/04/15 16:51:57 INFO mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 81 bytes
17/04/15 16:51:57 INFO reduce.MergeManagerImpl: Merged 1 segments, 94 bytes to disk to satisfy reduce memory limit
17/04/15 16:51:57 INFO reduce.MergeManagerImpl: Merging 1 files, 98 bytes from disk
17/04/15 16:51:57 INFO reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce
17/04/15 16:51:57 INFO mapred.Merger: Merging 1 sorted segments
17/04/15 16:51:57 INFO mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 81 bytes
17/04/15 16:51:57 INFO mapred.LocalJobRunner: 1 / 1 copied.
17/04/15 16:51:57 INFO Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
counter 1counter2 1
counter2 0
counter2 0
counter2 0
counter2 0

要在MapReduce中实现的SQL查询

select
c_mktsegment, count(c_custkey), avg(c_acctbal)
from
customer
where c_nationkey == '[NATION]' and c_acctbal > [BALANCE]
group by
c_mktsegment;

2 个答案:

答案 0 :(得分:3)

您只为其中一个reducer键递增count。您没有在映射器中输出任何AUTOMOBILE记录,因为您坚持平衡超过8000且国家位于(1,15)。编辑:我现在看到你提取的数据远远多于你发布的7个样本记录。

这可能也是一个问题,一旦你计算出你的计数:

avgBalance = count <= 1 ? sumBalance : avgBalance / count; 

答案 1 :(得分:1)

我认为我的上一个答案中有一个拼写错误,您试图通过指定avgBalance = 0来解决此问题。

你想要从总和中除以数!不是平均值。

  float avgBalance = count <= 1 ? sumBalance : (sumBalance / count);

然后,您的计数器将打印values的长度,而不是特定密钥的客户计数。

  

在MapReduce中实现的SQL查询
  where c_nationkey == '[NATION]'

顺便说一下,这不是你的MapReduce的目的。 nation < 15 && nation > 1

除此之外,我已修复您的代码以生成此输出。

AUTOMOBILE  4   9275.662
BUILDING    1   9078.19
HOUSEHOLD   1   9278.31
MACHINERY   3   9264.623

这是解决方案

(使用HashSet来计算唯一客户

public class AvgMapRed extends Configured implements Tool {

    public static final String APP_NAME = AvgMapRed.class.getSimpleName();

    public static void main(String[] args) throws Exception {
        final int status = ToolRunner.run(new Configuration(), new AvgMapRed(), args);
        System.exit(status);
    }

    @Override
    public int run(String[] args) throws Exception {
        Configuration conf = getConf();
        Job job = Job.getInstance(conf, APP_NAME);
        job.setJarByClass(AvgMapRed.class);

        job.setMapperClass(TokenizerMapper.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(AverageReducer.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        return job.waitForCompletion(true) ? 0 : 1;
    }

    static class TokenizerMapper extends Mapper<LongWritable, Text, Text, Text> {

        private final Text word = new Text();
        private final Text segment = new Text();

        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

            String[] tokens = value.toString().split("\\|");

            if (tokens.length < 7) {
                System.err.printf("mapper: not enough records for %s", Arrays.toString(tokens));
                return;
            }
            String custKey = tokens[1];

            int nation = 0;
            float balance = 0;
            try {
                nation = Integer.parseInt(tokens[3]);
                balance = Float.parseFloat(tokens[5]);
            } catch (NumberFormatException e) {
                e.printStackTrace();
                return;
            }

            if (balance > 8000 && (nation < 15 && nation > 1)) {
                segment.set(tokens[6]);
                word.set(custKey + "\t" + balance);
                context.write(segment, word);
            }

        }
    }

    static class AverageReducer extends Reducer<Text, Text, Text, Text> {

        private final Text output = new Text();

        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

            float sumBalance = 0;
            int count = 0;

            List<String> customers = new ArrayList<>();

            for (Text v : values) {
                String[] a = v.toString().trim().split("\t");

                customers.add(a[0]); // Count all customers for this key

                sumBalance += Float.parseFloat(a[1]);
                count++;
            }

            float avgBalance = count <= 1 ? sumBalance : (sumBalance / count);

            output.set(customers.size() + "\t" + avgBalance);
            context.write(key, output);
        }
    }
}