在hadoop中mapreducing后,part-r-00000和成功文件为0 kb

时间:2017-10-09 11:46:13

标签: java hadoop terminal mapreduce bigdata

17/10/09 19:40:55 INFO input.FileInputFormat: Total input paths to process : 1
17/10/09 19:40:55 INFO util.NativeCodeLoader: Loaded the native-hadoop library
17/10/09 19:40:55 WARN snappy.LoadSnappy: Snappy native library not loaded
17/10/09 19:40:56 INFO mapred.JobClient: Running job: job_201710090351_0026
17/10/09 19:40:57 INFO mapred.JobClient:  map 0% reduce 0%
17/10/09 19:41:00 INFO mapred.JobClient:  map 100% reduce 0%
17/10/09 19:41:07 INFO mapred.JobClient:  map 100% reduce 33%
17/10/09 19:41:08 INFO mapred.JobClient:  map 100% reduce 100%
17/10/09 19:41:08 INFO mapred.JobClient: Job complete: job_201710090351_0026
17/10/09 19:41:08 INFO mapred.JobClient: Counters: 28
17/10/09 19:41:08 INFO mapred.JobClient:   Map-Reduce Framework
17/10/09 19:41:08 INFO mapred.JobClient:     Spilled Records=0
17/10/09 19:41:08 INFO mapred.JobClient:     Map output materialized bytes=6
17/10/09 19:41:08 INFO mapred.JobClient:     Reduce input records=0
17/10/09 19:41:08 INFO mapred.JobClient:     Virtual memory (bytes) snapshot=3778863104
17/10/09 19:41:08 INFO mapred.JobClient:     Map input records=8
17/10/09 19:41:08 INFO mapred.JobClient:     SPLIT_RAW_BYTES=107
17/10/09 19:41:08 INFO mapred.JobClient:     Map output bytes=0
17/10/09 19:41:08 INFO mapred.JobClient:     Reduce shuffle bytes=6
17/10/09 19:41:08 INFO mapred.JobClient:     Physical memory (bytes) snapshot=313819136
17/10/09 19:41:08 INFO mapred.JobClient:     Reduce input groups=0
17/10/09 19:41:08 INFO mapred.JobClient:     Combine output records=0
17/10/09 19:41:08 INFO mapred.JobClient:     Reduce output records=0
17/10/09 19:41:08 INFO mapred.JobClient:     Map output records=0
17/10/09 19:41:08 INFO mapred.JobClient:     Combine input records=0
17/10/09 19:41:08 INFO mapred.JobClient:     CPU time spent (ms)=890
17/10/09 19:41:08 INFO mapred.JobClient:     Total committed heap usage (bytes)=302514176
17/10/09 19:41:08 INFO mapred.JobClient:   File Input Format Counters 
17/10/09 19:41:08 INFO mapred.JobClient:     Bytes Read=892
17/10/09 19:41:08 INFO mapred.JobClient:   FileSystemCounters
17/10/09 19:41:08 INFO mapred.JobClient:     HDFS_BYTES_READ=999
17/10/09 19:41:08 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=109316
17/10/09 19:41:08 INFO mapred.JobClient:     FILE_BYTES_READ=6
17/10/09 19:41:08 INFO mapred.JobClient:   Job Counters 
17/10/09 19:41:08 INFO mapred.JobClient:     Launched map tasks=1
17/10/09 19:41:08 INFO mapred.JobClient:     Launched reduce tasks=1
17/10/09 19:41:08 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=8085
17/10/09 19:41:08 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0
17/10/09 19:41:08 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=2769
17/10/09 19:41:08 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0
17/10/09 19:41:08 INFO mapred.JobClient:     Data-local map tasks=1
17/10/09 19:41:08 INFO mapred.JobClient:   File Output Format Counters 
17/10/09 19:41:08 INFO mapred.JobClient:     Bytes Written=0

这显示了mapreduce作业的日志。下面是java代码。 reducer.part-r-00000存在一些问题,成功文件为空。

    package BigData;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class BusinessCategoryPA {

/*
 * Mapper Class
 */
public static class Map extends Mapper<LongWritable, Text, Text, NullWritable>{
    private Text businessCategory = new Text();     //Type of Output key

    /* 
     * Map function that emits a business category as a key and null value as a value
     */
    public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException{
        String[] business = value.toString().split("::");
        if(business[1].contains("Palo Alto")){
            String businessCategoryList = business[2];

            businessCategoryList = businessCategoryList.replace("(", "");
            businessCategoryList = businessCategoryList.replace(")", "");
            businessCategoryList = businessCategoryList.replace("List", "");
            businessCategoryList = businessCategoryList.replace(" ", "");
            String[] businessCategoryList1 = businessCategoryList.toString().split(",");

            for(String item:businessCategoryList1){
                businessCategory.set(item);
                context.write(businessCategory, NullWritable.get());
            }

        }
    }
}

/*
 * Reducer Class
 */
public static class Reduce extends Reducer<Text, NullWritable, Text, NullWritable>{
    //private IntWritable outcome = new IntWritable();

    /*
     * Reduce function
     */
    public void reduce(Text key, Iterable<NullWritable> value, Context context) throws IOException, InterruptedException{

        context.write(key, NullWritable.get());
    }
}

/* 
 * Driver program
 */
public static void main(String[] args) throws Exception {

    /*
     * Configuration of a job
     */
    Configuration conf = new Configuration();

    /*
     * Getting all the arguments
     */
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: BusinessCategoryPA <in> <out>");
        System.exit(2);
    }

    /*
     * Create a job with name "BusinessCategoryPA"
     */
    Job job = new Job(conf, "BusinessCategoryPA");
    job.setJarByClass(BusinessCategoryPA.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    /*
     *  set output key type
     */
    job.setOutputKeyClass(Text.class);

    /*
     * set output value type
     */
    job.setOutputValueClass(NullWritable.class);

    /*
     * set the HDFS path of the input data
     */
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));

    /*
     * set the HDFS path for the output
     */
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    /*
     * Wait till job completion
     */
    System.exit(job.waitForCompletion(true) ? 0 : 1);


}
}

如何生成csv文件作为输出?

输入文件 Business.csv文件包含有关本地企业的基本信息。 Business.csv文件包含以下列:

"business_id"::"full_address"::"categories"

'business_id': (a unique identifier for the business)(eg: HIPGr2gSEN4T73tjz47hpw)
'full_address': (localized address)(eg. 1 Palmer Sq EPrinceton, NJ 08542)
'categories': [(localized category names)] (eg. List(Pubs, Bars, American (Traditional), Nightlife, Restaurants))

hadoop jar&#39; /home/hduser/Downloads/Hadoop/TopTenRatedBusiness.jar' bd.TopTenRatedBusiness /Yelp/input/business.csv /Yelp/output.csv /

我使用此命令生成输出。

1 个答案:

答案 0 :(得分:0)

请检查条件&#34; if(business [1] .contains(&#34; Palo Alto&#34;))&#34;再次验证您的输入文件确实包含&#34; Palo Alto&#34;与您在此处编写的同一格式。