引入了新的HDFS群集

时间:2019-08-19 10:09:50

标签: scala apache-spark hdfs apache-zookeeper

我的代码使用两个HDFS群集,一个我指定为默认群集,而我需要将另一个用于某些文件。

我更改了另一个集群的文件系统配置,但无法解析名称节点。

val conf = new Configuration()
  conf.set("fs.defaultFS", "hdfs://test1-hdfs")
  conf.set("dfs.nameservices", "test1-hdfs")
  conf.set("dfs.ha.namenodes.test1-hdfs", "nn1,nn2")
  conf.set("dfs.namenode.rpc-address.test1-hdfs.nn1", "test1-hdfs-nn1.com:9820")
  conf.set("dfs.namenode.rpc-address.test1-hdfs.nn2", "test1-hdfs-nn2.com:9820")
  conf.set("dfs.client.failover.proxy.provider.test1-hdfs", "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider")
  val fs = FileSystem.get(conf).getUri.toString
  val path = new Path(filePath)
 // val path = new Path(filePath)
  val fileSystem = FileSystem.get(path.toUri, conf).getUri.toString
  val df = spark.read.csv(fs.concat(filePath))
  df.printSchema()

如果我设置了"fs.defaultFS"hdfs://test1-hdfs-nn1.com:9820"正常工作,但这与Namenode无关。

  

由于:java.lang.IllegalArgumentException:   java.net.UnknownHostException:的test1-hdfs   org.apache.hadoop.security.SecurityUtil.buildTokenService(SecurityUtil.java:378)     在   org.apache.hadoop.hdfs.NameNodeProxies.createNonHAProxy(NameNodeProxies.java:310)     在   org.apache.hadoop.hdfs.NameNodeProxies.createProxy(NameNodeProxies.java:176)     在org.apache.hadoop.hdfs.DFSClient。(DFSClient.java:678)在   org.apache.hadoop.hdfs.DFSClient。(DFSClient.java:619)在   org.apache.hadoop.hdfs.DistributedFileSystem.initialize(DistributedFileSystem.java:149)     在   org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2669)     在org.apache.hadoop.fs.FileSystem.access $ 200(FileSystem.java:94)在   org.apache.hadoop.fs.FileSystem $ Cache.getInternal(FileSystem.java:2703)     在org.apache.hadoop.fs.FileSystem $ Cache.get(FileSystem.java:2685)     在org.apache.hadoop.fs.FileSystem.get(FileSystem.java:373)处   org.apache.hadoop.fs.Path.getFileSystem(Path.java:295)在   org.apache.hadoop.mapreduce.lib.input.LineRecordReader.initialize(LineRecordReader.java:84)     在   org.apache.spark.sql.execution.datasources.HadoopFileLinesReader。(HadoopFileLinesReader.scala:65)     在   org.apache.spark.sql.execution.datasources.text.TextFileFormat $$ anonfun $ readToUnsafeMem $ 1.apply(TextFileFormat.scala:119)     在   org.apache.spark.sql.execution.datasources.text.TextFileFormat $$ anonfun $ readToUnsafeMem $ 1.apply(TextFileFormat.scala:116)     在   org.apache.spark.sql.execution.datasources.FileFormat $$ anon $ 1.apply(FileFormat.scala:148)     在   org.apache.spark.sql.execution.datasources.FileFormat $$ anon $ 1.apply(FileFormat.scala:132)     在   org.apache.spark.sql.execution.datasources.FileScanRDD $$ anon $ 1.org $ apache $ spark $ sql $ execution $ datasources $ FileScanRDD $$ anon $$ readCurrentFile(FileScanRDD.scala:124)     在   org.apache.spark.sql.execution.datasources.FileScanRDD $$ anon $ 1.nextIterator(FileScanRDD.scala:177)     在   org.apache.spark.sql.execution.datasources.FileScanRDD $$ anon $ 1.hasNext(FileScanRDD.scala:101)     在   org.apache.spark.sql.catalyst.expressions.GeneratedClass $ GeneratedIteratorForCodegenStage1.processNext(未知   来源)   org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)     在   org.apache.spark.sql.execution.WholeStageCodegenExec $$ anonfun $ 11 $$ anon $ 1.hasNext(WholeStageCodegenExec.scala:619)     在   org.apache.spark.sql.execution.SparkPlan $$ anonfun $ 2.apply(SparkPlan.scala:255)     在   org.apache.spark.sql.execution.SparkPlan $$ anonfun $ 2.apply(SparkPlan.scala:247)     在   org.apache.spark.rdd.RDD $$ anonfun $ mapPartitionsInternal $ 1 $$ anonfun $ apply $ 24.apply(RDD.scala:836)     在   org.apache.spark.rdd.RDD $$ anonfun $ mapPartitionsInternal $ 1 $$ anonfun $ apply $ 24.apply(RDD.scala:836)     在   org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)     在org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)     在org.apache.spark.rdd.RDD.iterator(RDD.scala:288)处   org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)     在org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)     在org.apache.spark.rdd.RDD.iterator(RDD.scala:288)处   org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)在   org.apache.spark.scheduler.Task.run(Task.scala:121)在   org.apache.spark.executor.Executor $ TaskRunner $$ anonfun $ 10.apply(Executor.scala:402)     在org.apache.spark.util.Utils $ .tryWithSafeFinally(Utils.scala:1360)     在   org.apache.spark.executor.Executor $ TaskRunner.run(Executor.scala:408)     在   java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)     在   java.util.concurrent.ThreadPoolExecutor $ Worker.run(ThreadPoolExecutor.java:624)     在java.lang.Thread.run(Thread.java:748)

0 个答案:

没有答案