from pyspark import SparkConf, SparkContext
from pyspark.sql import SQLContext, SparkSession
from pyspark.sql.types import *
from pyspark import SparkConf, SparkContext
import os
spark = SparkSession.builder.master('local').appName('test').config('spark.driver.memory', '5G').getOrCreate()
sfOptions=credentials
df = spark.read.format(SNOWFLAKE_SOURCE_NAME).options(**).option("query", "select * from xyz.xpy where year(ORDERDATE)=2018 limit 100").load()
# verify
df.count()
#然后我得到
的错误Py4JJavaError: An error occurred while calling o190.load.
: java.lang.ClassNotFoundException: Failed to find data source: net.snowflake.spark.snowflake. Please find packages at http://spark.apache.org/third-party-projects.html
at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:657)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:194)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:167)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.base/java.lang.reflect.Method.invoke(Method.java:567)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.base/java.lang.Thread.run(Thread.java:830)
Caused by: java.lang.ClassNotFoundException: net.snowflake.spark.snowflake.DefaultSource
at java.base/java.net.URLClassLoader.findClass(URLClassLoader.java:436)
at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:588)
at java.base/java.lang.ClassLoader.loadClass(ClassLoader.java:521)
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$20$$anonfun$apply$12.apply(DataSource.scala:634)*
答案 0 :(得分:0)
尝试这样
步骤1:使用Spark版本-2.3.0创建集群。和Scala版本-2.11
步骤2:将雪花jdbc-3.5.4.jar附加到集群。 https://mvnrepository.com/artifact/net.snowflake/snowflake-jdbc/3.5.4
步骤3:将spark-snowflake_2.11-2.3.2驱动程序附加到集群。 https://mvnrepository.com/artifact/net.snowflake/spark-snowflake_2.11/2.3.2
这是示例代码。
QueryBase query = null;
foreach (var userId in usersIds)
{
query |= new TermQuery() { Field = "userId", Value = userId };
}
var searchRequest = new SearchRequest("users", "post")
{
Query = new QueryContainer(query),
From = 0,
Size = 5,
Sort = new List<ISort> { new SortField { Field = "date", Order = SortOrder.Descending } }
};