在Hadoop 2 namenode
上的Spark(0.9.1)shell中运行此示例scala> val file1 = sc.textFile("hdfs://testhadoopname1.myserver.com:9000/user/ubuntu/events/datepart=2014-04-11/2014-04-11-09-42.txt")
14/04/16 10:27:01 INFO storage.MemoryStore: ensureFreeSpace(74968) called with curMem=302142, maxMem=311387750 14/04/16 10:27:01 INFO storage.MemoryStore: Block broadcast_3 stored as values to memory (estimated size 73.2 KB, free 296.6 MB) file1: org.apache.spark.rdd.RDD[String] = MappedRDD[7] at textFile at :12
scala> file1.count()
为什么我会这样做?
java.io.IOException: Failed on local exception: com.google.protobuf.InvalidProtocolBufferException: Protocol message contained an invalid tag (zero).; Host Details : local host is: "testhadoopname1.myserver.com/10.255.187.229"; destination host is: "testhadoopname1.myserver.com":9000; at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:764) at org.apache.hadoop.ipc.Client.call(Client.java:1351) at org.apache.hadoop.ipc.Client.call(Client.java:1300) at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:206) at com.sun.proxy.$Proxy14.getFileInfo(Unknown Source) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:622) at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:186) at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102) at com.sun.proxy.$Proxy14.getFileInfo(Unknown Source) at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getFileInfo(ClientNamenodeProtocolTranslatorPB.java:651) at org.apache.hadoop.hdfs.DFSClient.getFileInfo(DFSClient.java:1679) at org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:1106) at org.apache.hadoop.hdfs.DistributedFileSystem$17.doCall(DistributedFileSystem.java:1102) at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1102) at org.apache.hadoop.fs.FileSystem.globStatusInternal(FileSystem.java:1701) at org.apache.hadoop.fs.FileSystem.globStatus(FileSystem.java:1647) at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:222) at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:270) at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:140) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:207) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:205) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.rdd.RDD.partitions(RDD.scala:205) at org.apache.spark.rdd.MappedRDD.getPartitions(MappedRDD.scala:28) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:207) at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:205) at scala.Option.getOrElse(Option.scala:120) at org.apache.spark.rdd.RDD.partitions(RDD.scala:205) at org.apache.spark.SparkContext.runJob(SparkContext.scala:898) at org.apache.spark.rdd.RDD.count(RDD.scala:726) at $iwC$$iwC$$iwC$$iwC.(:15) at $iwC$$iwC$$iwC.(:20) at $iwC$$iwC.(:22) at $iwC.(:24) at (:26) at .(:30) at .() at .(:7) at .() at $print() at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:622) at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:772) at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1040) at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:609) at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:640) at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:604) at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:793) at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:838) at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:750) at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:598) at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:605) at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:608) at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:931) at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:881) at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:881) at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:881) at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:973) at org.apache.spark.repl.Main$.main(Main.scala:31) at org.apache.spark.repl.Main.main(Main.scala) Caused by: com.google.protobuf.InvalidProtocolBufferException: Protocol message contained an invalid tag (zero). at com.google.protobuf.InvalidProtocolBufferException.invalidTag(InvalidProtocolBufferException.java:89) at com.google.protobuf.CodedInputStream.readTag(CodedInputStream.java:108) at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.(RpcHeaderProtos.java:1398) at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.(RpcHeaderProtos.java:1362) at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto$1.parsePartialFrom(RpcHeaderProtos.java:1492) at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto$1.parsePartialFrom(RpcHeaderProtos.java:1487) at com.google.protobuf.AbstractParser.parsePartialFrom(AbstractParser.java:200) at com.google.protobuf.AbstractParser.parsePartialDelimitedFrom(AbstractParser.java:241) at com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:253) at com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:259) at com.google.protobuf.AbstractParser.parseDelimitedFrom(AbstractParser.java:49) at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.parseDelimitedFrom(RpcHeaderProtos.java:2364) at org.apache.hadoop.ipc.Client$Connection.receiveRpcResponse(Client.java:996) at org.apache.hadoop.ipc.Client$Connection.run(Client.java:891)
答案 0 :(得分:2)
问题是我使用的是Hadoop的不合格版本(2.0.3-alpha版本)。我使用Spark 0.9.1独立集群管理器在最新的Hadoop 2.3.0上重新创建了HDFS集群,并在spark-shell中运行了上述示例......就像宣传的那样。