在EMR

时间:2016-07-20 12:11:37

标签: amazon-web-services hadoop mapreduce elastic-map-reduce

在EMR上运行自定义jar时出现此错误。

Exception in thread "main" com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.services.s3.model.AmazonS3Exception: Bad Request (Service: Amazon S3; Status Code: 400; Error Code: 400 Bad Request; Request ID: B042BB0B40A75966), S3 Extended Request ID: vr/DUr8HD3xjomauyzqvVdGuW3fHBP8PDUmTIAoVLUxrmsxh9H+OS9+cgo4OmHxaz/b8CSPGmuc=
    at com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1389)
    at com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:902)
    at com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:607)
    at com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.http.AmazonHttpClient.doExecute(AmazonHttpClient.java:376)
    at com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.http.AmazonHttpClient.executeWithTimer(AmazonHttpClient.java:338)
    at com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:287)
    at com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3826)
    at com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.services.s3.AmazonS3Client.headBucket(AmazonS3Client.java:1071)
    at com.amazon.ws.emr.hadoop.fs.shaded.com.amazonaws.services.s3.AmazonS3Client.doesBucketExist(AmazonS3Client.java:1029)
    at com.amazon.ws.emr.hadoop.fs.s3n.Jets3tNativeFileSystemStore.ensureBucketExists(Jets3tNativeFileSystemStore.java:138)
    at com.amazon.ws.emr.hadoop.fs.s3n.Jets3tNativeFileSystemStore.initialize(Jets3tNativeFileSystemStore.java:116)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:606)
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:191)
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
    at com.sun.proxy.$Proxy23.initialize(Unknown Source)
    at com.amazon.ws.emr.hadoop.fs.s3n.S3NativeFileSystem.initialize(S3NativeFileSystem.java:461)
    at com.amazon.ws.emr.hadoop.fs.EmrFileSystem.initialize(EmrFileSystem.java:110)
    at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2703)
    at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:91)
    at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2737)
    at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2719)
    at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:375)
    at org.apache.hadoop.fs.Path.getFileSystem(Path.java:295)
    at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(FileInputFormat.java:485)
    at SentimentsDriver.run(SentimentsDriver.java:21)
    at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70)
    at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:84)
    at SentimentsDriver.main(SentimentsDriver.java:33)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:606)
    at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
    at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
x

参数: S3://sentimentproj/input/tweet.txt S3:// sentimentproj /输出

SentimentsDriver:

    import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/*This class is responsible for running map reduce job*/
public class SentimentsDriver extends Configured implements Tool{
public int run(String[] args) throws Exception{
 if(args.length !=2) {
 System.err.println("Usage: MaxTemperatureDriver <input path> <outputpath>");
 System.exit(-1);
 }
 @SuppressWarnings("deprecation")
Job job = new Job();
 job.setJarByClass(SentimentsDriver.class);
 job.setJobName("SentimentAnalysis");
 FileInputFormat.addInputPath(job, new Path(args[0]));
 FileOutputFormat.setOutputPath(job,new Path(args[1]));
 job.setMapperClass(SentimentsMapper.class);
 job.setReducerClass(SentimentsReducer.class);
 job.setOutputKeyClass(Text.class);
 job.setOutputValueClass(IntWritable.class);
 System.exit(job.waitForCompletion(true) ? 0:1); 
 boolean success = job.waitForCompletion(true);
 return success ? 0 : 1;
 }
public static void main(String[] args) throws Exception {
 SentimentsDriver driver = new SentimentsDriver();
 int exitCode = ToolRunner.run(driver, args);
 System.exit(exitCode);
 }
}

1 个答案:

答案 0 :(得分:1)

问题可能出在S3存储桶上。确保在创建存储桶时将S3存储桶区域保留在群集的同一区域中。如果它会有所不同,那么它通常会抛出Bad Request 400错误。