将Apache Spark与couchbase连接起来

时间:2016-10-17 09:56:41

标签: java maven apache-spark apache-spark-sql couchbase

我正在尝试将spark应用程序与Couchbase连接。为此,我正在应用以下代码。

double[] val=new double[3];
SparkContext sc = new SparkContext(new SparkConf().setAppName("sql").setMaster("local").set("com.couchbase.nodes", "url").set("com.couchbase.client.bucket","password"));
        SQLContext sql = new SQLContext(sc);
    JsonObject content = JsonObject.create().put("mean", val[0]).put("median", val[1]).put("standardDeviation",
                    val[2]);
            JsonDocument doc=JsonDocument.create("docId", content);
bucket.upsert(doc);

但我得到以下异常

Exception in thread "main" java.lang.NoClassDefFoundError: com/couchbase/client/java/document/json/JsonObject
        at com.cloudera.sparkwordcount.JavaWordCount.main(JavaWordCount.java:74)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
        at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:731)
        at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:181)
        at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:206)
        at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:121)
        at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.ClassNotFoundException: com.couchbase.client.java.document.json.JsonObject
        at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
        at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
        at java.security.AccessController.doPrivileged(Native Method)
        at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
        ... 10 more

我的maven依赖关系如下: -

    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-core_2.10</artifactId>
      <version>1.6.1</version>
    </dependency>
    <dependency>
      <groupId>org.apache.spark</groupId>
      <artifactId>spark-sql_2.10</artifactId>
      <version>1.6.1</version>
    </dependency>
    <dependency>
    <groupId>com.databricks</groupId>
    <artifactId>spark-csv_2.10</artifactId>
    <version>1.4.0</version>
</dependency>
<dependency>
    <groupId>com.couchbase.client</groupId>
    <artifactId>spark-connector_2.10</artifactId>
    <version>1.1.0</version>
</dependency>
   <dependency>
      <groupId>com.couchbase.client</groupId>
      <artifactId>java-client</artifactId>
      <version>2.3.4</version>
    </dependency>

请告诉我我失踪的地方。

1 个答案:

答案 0 :(得分:2)

以下是使用Spark 1.6连接Couchbase所需的最低依赖关系

<dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-core_2.10</artifactId>
    <version>1.6.2</version>
</dependency>
<dependency>
    <groupId>org.apache.spark</groupId>
    <artifactId>spark-sql_2.10</artifactId>
    <version>1.6.2</version>
</dependency>
<dependency>
    <groupId>com.couchbase.client</groupId>
    <artifactId>spark-connector_2.10</artifactId>
    <version>1.2.1</version>
</dependency>

这是保存和检索JsonDocument到Couchbase的示例程序。希望这会有所帮助。

import java.util.Arrays;
import java.util.List;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import com.couchbase.client.java.document.JsonDocument;
import com.couchbase.client.java.document.json.JsonObject;
import com.couchbase.spark.japi.CouchbaseDocumentRDD;
import com.couchbase.spark.japi.CouchbaseSparkContext;

public class CouchBaseDemo {
    public static void main(String[] args) {
        //JavaSparkContext
        SparkConf conf = new SparkConf().setAppName("CouchBaseDemo").setMaster("local").set("com.couchbase.bucket.travel-sample", "");
        JavaSparkContext jsc = new JavaSparkContext(conf);
        CouchbaseSparkContext csc = CouchbaseSparkContext.couchbaseContext(jsc);
        //Create and save JsonDocument
        JsonDocument docOne = JsonDocument.create("docOne", JsonObject.create().put("new", "doc-content"));
        JavaRDD<JsonDocument> jRDD = jsc.parallelize(Arrays.asList(docOne));
        CouchbaseDocumentRDD<JsonDocument> cbRDD = CouchbaseDocumentRDD.couchbaseDocumentRDD(jRDD);
        cbRDD.saveToCouchbase();
        //fetch JsonDocument
        List<JsonDocument> doc = csc.couchbaseGet(Arrays.asList("docOne")).collect();
        System.out.println(doc);
    }
}