使用下面的代码,我试图从S3 aws读取文件并将其加载到数据帧中。
import com.amazonaws.auth.BasicAWSCredentials
import com.amazonaws.services.s3.AmazonS3Client
import com.amazonaws.services.s3.model.GetObjectRequest
import com.databricks.spark.avro._
val credentials = new BasicAWSCredentials("mykey", "mysecreteKey")
val s3Client = new AmazonS3Client(credentials);
val s3Object = s3Client.getObject(new GetObjectRequest("bucket", "home/ubuntu/file_1463718615376_0.avro"))
val myData= scala.io.Source.fromInputStream(s3Object.getObjectContent()).mkString
val dfData = sqlContext.read.avro(myData)
我能够对我机器上本地存储的文件做同样的事情。
scala> val visitorData = scala.io.Source.fromInputStream(s3Objectlv.getObjectContent()).mkString
java.nio.charset.MalformedInputException: Input length = 1
at java.nio.charset.CoderResult.throwException(CoderResult.java:281)
at sun.nio.cs.StreamDecoder.implRead(StreamDecoder.java:339)
at sun.nio.cs.StreamDecoder.read(StreamDecoder.java:178)
at java.io.InputStreamReader.read(InputStreamReader.java:184)
at java.io.BufferedReader.fill(BufferedReader.java:161)
at java.io.BufferedReader.read(BufferedReader.java:182)
at scala.io.BufferedSource$$anonfun$iter$1$$anonfun$apply$mcI$sp$1.apply$mcI$sp(BufferedSource.scala:38)
at scala.io.Codec.wrap(Codec.scala:68)
at scala.io.BufferedSource$$anonfun$iter$1.apply(BufferedSource.scala:38)
at scala.io.BufferedSource$$anonfun$iter$1.apply(BufferedSource.scala:38)
at scala.collection.Iterator$$anon$9.next(Iterator.scala:162)
at scala.collection.Iterator$$anon$17.hasNext(Iterator.scala:511)
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
at scala.io.Source.hasNext(Source.scala:226)
at scala.collection.Iterator$class.foreach(Iterator.scala:727)
at scala.io.Source.foreach(Source.scala:178)
at scala.collection.TraversableOnce$class.addString(TraversableOnce.scala:320)
at scala.io.Source.addString(Source.scala:178)
at scala.collection.TraversableOnce$class.mkString(TraversableOnce.scala:286)
at scala.io.Source.mkString(Source.scala:178)
at scala.collection.TraversableOnce$class.mkString(TraversableOnce.scala:288)
at scala.io.Source.mkString(Source.scala:178)
at scala.collection.TraversableOnce$class.mkString(TraversableOnce.scala:290)
at scala.io.Source.mkString(Source.scala:178)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:63)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:68)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:70)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:72)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:74)
..........................