我输入文件如下
输入文件
**Regid Epoch Timestamp**
f77xvP 1509073785106
2YXsF7r 1509073795109
GUf98M1 1509073805109
JNFXto1 1509073815110
这就是我要做的事情,从HDFS读取上面的输入文件,并通过按日期/时间/小时/分钟进行分区,用单独的文件写入HDFS。
我对此进行了研究可以使用Hive完成,但是可以使用分区/ Reducer。可以帮助我以实现这个目的的方式帮助我
package com.main;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.json.JSONObject;
public class JsonMain {
public static Properties props;
public static class Mapperclass extends Mapper<LongWritable, Text, Text, Text>{
DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
public void map(LongWritable mkey, Text mvalue, Context context) throws IOException, InterruptedException{
try{
JSONObject obj = new JSONObject(mvalue.toString());
String regId = (String)obj.get("regId");
String time = (String)obj.get("time");
Date date = new Date(Long.parseLong(time));
// long epoch = System.currentTimeMillis();
// System.out.println(epoch);
// Date date = new Date(epoch);
String datestring = dateFormat.format(date);
context.write(new Text(regId), new Text(time + " " + datestring ));
}
catch (Exception e) {
e.printStackTrace();
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
// TODO Auto-generated method stub
// props = new Properties();
//FileInputStream propFile = new FileInputStream(args[0].trim());
//props.load(propFile);
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "Map Reduce");
job.setJarByClass(JsonMain.class);
job.setMapperClass(Mapperclass.class);
job.setNumReduceTasks(0);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
上面的代码用HDFS写如下。
Ef77xvP 1509073785106 2017-10-26 20:09:45
2YXsF7r 1509073795109 2017-10-26 20:09:55
GUf98M1 1509073805109 2017-10-26 20:10:05
JNFXto1 1509073815110 2017-10-26 20:10:15
bkFbEK3 1509073825110 2017-10-26 20:10:25
LDze9od 1509073835110 2017-10-26 20:10:35
y0Zs8gF 1509073845111 2017-10-26 20:10:45