我一直在尝试从couchbase读取数据,但由于身份验证问题而无法读取数据。
import com.couchbase.client.java.document.JsonDocument
import org.apache.spark.sql.SparkSession
import com.couchbase.spark._
object SparkRead {
def main(args: Array[String]): Unit = {
// The SparkSession is the main entry point into spark
val spark = SparkSession
.builder()
.appName("KeyValueExample")
.master("local[*]") // use the JVM as the master, great for testing
.config("spark.couchbase.nodes", "***********") // connect to couchbase on hostname
.config("spark.couchbase.bucket.beer-sample","") // open the travel-sample bucket with empty password
.config("spark.couchbase.username", "couchdb")
.config("spark.couchbase.password", "******")
.config("spark.couchbase.connectTimeout","30000")
.config("spark.couchbase.kvTimeout","10000")
.config("spark.couchbase.socketConnect","10000")
.getOrCreate()
spark.sparkContext
.couchbaseGet[com.couchbase.client.java.document.JsonDocument](Seq("airline_10123")) // Load documents from couchbase
.collect() // collect all data from the spark workers
.foreach(println) // print each document content
}
}
以下是构建文件
name := "KafkaSparkCouchReadWrite"
organization := "my.clairvoyant"
version := "1.0.0-SNAPSHOT"
scalaVersion := "2.11.11"
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % "2.1.0",
"org.apache.spark" %% "spark-streaming" % "2.1.0",
"org.apache.spark" %% "spark-sql" % "2.1.0",
"org.apache.spark" % "spark-streaming-kafka-0-10_2.11" % "2.2.0",
"com.couchbase.client" %% "spark-connector" % "2.1.0",
"org.glassfish.hk2" % "hk2-utils" % "2.2.0-b27",
"org.glassfish.hk2" % "hk2-locator" % "2.2.0-b27",
"javax.validation" % "validation-api" % "1.1.0.Final",
"org.apache.kafka" %% "kafka" % "0.11.0.0",
"com.googlecode.json-simple" % "json-simple" % "1.1").map(_.excludeAll(ExclusionRule("org.glassfish.hk2"),ExclusionRule("javax.validation")))
错误日志
17/12/12 15:18:35 INFO BlockManager:初始化BlockManager:BlockManagerId(驱动程序,192.168.33.220,52402,无) 17/12/12 15:18:35 INFO SharedState:仓库路径是'文件:/ Users / sampat / Desktop / GitClairvoyant / cpdl3-poc / KafkaSparkCouchReadWrite / spark-warehouse /'。 17/12/12 15:18:35 INFO CouchbaseCore:CouchbaseEnvironment:{sslEnabled = false,sslKeystoreFile =' null',sslKeystorePassword = false,sslKeystore = null,bootstrapHttpEnabled = true,bootstrapCarrierEnabled = true,bootstrapHttpDirectPort = 8091 ,bootstrapHttpSslPort = 18091,bootstrapCarrierDirectPort = 11210,bootstrapCarrierSslPort = 11207,ioPoolSize = 8,computationPuffSize = 8,responseBufferSize = 16384,requestBufferSize = 16384,kvServiceEndpoints = 1,viewServiceEndpoints = 12,queryServiceEndpoints = 12,searchServiceEndpoints = 12,ioPool = NioEventLoopGroup,kvIoPool = null,viewIoPool = null,searchIoPool = null,queryIoPool = null,coreScheduler = CoreScheduler,memcachedHashingStrategy = DefaultMemcachedHashingStrategy,eventBus = DefaultEventBus,packageNameAndVersion = couchbase-java-client / 2.4.2(git:2.4.2,core:1.4.2) ),dcpEnabled = false,retryStrategy = BestEffort,maxRequestLifetime = 75000,retryDelay = ExponentialDelay {growBy 1.0 MICROSECONDS,2的幂; lower = 100,upper = 100000},reconnectDelay = ExponentialDelay {growBy 1.0 MILLISECONDS,2的幂; lower = 32,upper = 4096},observeIntervalDelay = ExponentialDelay {growBy 1.0 MICROSECONDS,2的幂; lower = 10,upper = 100000},keepAliveInterval = 30000,autoreleaseAfter = 2000,bufferPoolingEnabled = true,tcpNodelayEnabled = true,mutationTokensEnabled = false,socketConnectTimeout = 1000,dcpConnectionBufferSize = 20971520,dcpConnectionBufferAckThreshold = 0.2,dcpConnectionName = dcp / core-io,callbacksOnIoPool = false,disconnectTimeout = 25000,requestBufferWaitStrategy = com.couchbase.client.core.env.DefaultCoreEnvironment$2 @7b7b3edb,queryTimeout = 75000,viewTimeout = 75000,kvTimeout = 2500,connectTimeout = 5000,dnsSrvEnabled = false} 17/12/12 15:18:37 WARN端点:[null] [KeyValueEndpoint]:身份验证失败。 17/12/12 15:18:37 INFO端点:[null] [KeyValueEndpoint]:从Channel通知为非活动状态,尝试重新连接。 17/12/12 15:18:37 WARN ResponseStatusConverter:带有协议HTTP的未知ResponseStatus:401 17/12/12 15:18:37 WARN ResponseStatusConverter:带有协议HTTP的未知ResponseStatus:401 线程" main"中的例外情况com.couchbase.client.java.error.InvalidPasswordException:存储桶的密码" beer-sample"不符合。 在com.couchbase.client.java.CouchbaseAsyncCluster $ OpenBucketErrorHandler.call(CouchbaseAsyncCluster.java:601) 在com.couchbase.client.java.CouchbaseAsyncCluster $ OpenBucketErrorHandler.call(CouchbaseAsyncCluster.java:584) at rx.internal.operators.OperatorOnErrorResumeNextViaFunction $ 4.onError(OperatorOnErrorResumeNextViaFunction.java:140) at rx.internal.operators.OnSubscribeMap $ MapSubscriber.onError(OnSubscribeMap.java:88)
答案 0 :(得分:0)
示例代码:
import com.couchbase.client.java.document.JsonDocument
import com.couchbase.spark._
import org.apache.spark.sql.SparkSession
object SparkReadCouchBase {
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder()
.appName("KeyValueExample")
.master("local[*]") // use the JVM as the master, great for testing
.config("spark.couchbase.nodes", "127.0.0.1") // connect to couchbase on hostname
.config("spark.couchbase.bucket.travel-sample","") // open the travel-sample bucket with empty password
.config("com.couchbase.username", "*******")
.config("com.couchbase.password", "*******")
.config("com.couchbase.kvTimeout","10000")
.config("com.couchbase.connectTimeout","30000")
.config("com.couchbase.socketConnect","10000")
.getOrCreate()
println("=====================================================================================")
spark.sparkContext
.couchbaseGet[JsonDocument](Seq("airline_10123")) // Load documents from couchbase
.collect() // collect all data from the spark workers
.foreach(println) // print each document content
println("=====================================================================================")
}
}
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.*******.*****</groupId>
<artifactId>KafkaSparkCouch</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<properties>
<java.version>1.8</java.version>
<spark.version>2.2.0</spark.version>
<scala.version>2.11.8</scala.version>
<scala.parent.version>2.11</scala.parent.version>
<kafka.client.version>0.11.0.0</kafka.client.version>
<fat.jar.name>SparkCouch</fat.jar.name>
<scala.binary.version>2.11</scala.binary.version>
<main.class>com.*******.demo.spark.couchbase.SparkReadCouchBaseTest</main.class>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.parent.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_${scala.parent.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming-kafka-0-10_${scala.parent.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.parent.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>com.couchbase.client</groupId>
<artifactId>spark-connector_${scala.parent.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>${kafka.client.version}</version>
</dependency>
<dependency>
<groupId>com.googlecode.json-simple</groupId>
<artifactId>json-simple</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-library</artifactId>
<version>${scala.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>${java.version}</source>
<target>${java.version}</target>
</configuration>
</plugin>
<!--Create fat-jar file-->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.4</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<version>2.15.2</version>
<executions>
<execution>
<id>compile</id>
<goals>
<goal>compile</goal>
</goals>
<phase>compile</phase>
</execution>
<execution>
<id>test-compile</id>
<goals>
<goal>testCompile</goal>
</goals>
<phase>test-compile</phase>
</execution>
<execution>
<phase>process-resources</phase>
<goals>
<goal>compile</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
<version>3.1.6</version>
<configuration>
<scalaCompatVersion>${scala.binary.version}</scalaCompatVersion>
<scalaVersion>${scala.version}</scalaVersion>
</configuration>
<!-- other settings-->
</plugin>
</plugins>
</build>
<repositories>
<repository>
<id>mavencentral</id>
<url>http://repo1.maven.org/maven2/</url>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
<repository>
<id>scala</id>
<name>Scala Tools</name>
<url>http://scala-tools.org/repo-releases/</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
</repositories>
<pluginRepositories>
<pluginRepository>
<id>scala</id>
<name>Scala Tools</name>
<url>http://scala-tools.org/repo-releases/</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</pluginRepository>
</pluginRepositories>
<name>KafkaSparkCouch</name>
答案 1 :(得分:0)
您需要将以下沙发床配置设置为系统属性:
System.setProperty("com.couchbase.connectTimeout", "30000");
System.setProperty("com.couchbase.kvTimeout", "10000");
System.setProperty("com.couchbase.socketConnect", "10000");