我正在运行我的Spark代码,以便在安装了Spark 2.2.0的Amazon EMR 5.8.0中将数据保存到HBase。 在IntelliJ中运行,它工作正常,但在EMR集群中,它给我带来了这个错误:
java.lang.NoClassDefFoundError:org / apache / spark / sql / DataFrame
代码
val zookeeperQuorum = args(0)
val tableName = args(1)
val inputPath = args(2)
val spark = SparkSession.builder
.appName("PhoenixSpark")
.getOrCreate
val df = spark.read
.option("delimiter", "\001")
.csv(inputPath)
val hBaseDf = spark.read
.format("org.apache.phoenix.spark")
.option("table", tableName)
.option("zkUrl", zookeeperQuorum)
.load()
val tableSchema = hBaseDf.schema
val rowKeyDf = df.withColumn("row_key", concat(col("_c3"), lit("_"), col("_c5"), lit("_"), col("_c0")))
rowKeyDf.createOrReplaceTempView("mytable")
val correctedDf = spark.sql("Select row_key, _c0, _c1, _c2, _c3, _c4, _c5, _c6, _c7," +
"_c8, _c9, _c10, _c11, _c12, _c13, _c14, _c15, _c16, _c17, _c18, _c19 from mytable")
val rdd = correctedDf.rdd
val finalDf= spark.createDataFrame(rdd, tableSchema)
finalDf.write
.format("org.apache.phoenix.spark")
.mode("overwrite")
.option("table", tableName)
.option("zkUrl", zookeeperQuorum)
.save()
spark.stop()
我的pom.xml正确地提到Spark版本为2.2.0
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.myntra.analytics</groupId>
<artifactId>com.myntra.analytics</artifactId>
<version>1.0-SNAPSHOT</version>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
</plugin>
<!-- "package" command plugin -->
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.6</version>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<version>2.2.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.11</artifactId>
<version>2.2.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_2.11</artifactId>
<version>2.2.0</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.phoenix</groupId>
<artifactId>phoenix-spark</artifactId>
<version>4.11.0-HBase-1.3</version>
<scope>provided</scope>
</dependency>
</dependencies>
<pluginRepositories>
<pluginRepository>
<id>scala-tools.org</id>
<name>Scala-tools Maven2 Repository</name>
<url>http://scala-tools.org/repo-releases</url>
</pluginRepository>
</pluginRepositories>
以下是来自EMR日志的堆栈跟踪,它显示了此错误。
17/09/28 23:20:18 ERROR ApplicationMaster: User class threw exception:
java.lang.NoClassDefFoundError: org/apache/spark/sql/DataFrame
java.lang.NoClassDefFoundError: org/apache/spark/sql/DataFrame
at java.lang.Class.getDeclaredMethods0(Native Method)
at java.lang.Class.privateGetDeclaredMethods(Class.java:2701)
at java.lang.Class.getDeclaredMethod(Class.java:2128)
at java.io.ObjectStreamClass.getPrivateMethod(ObjectStreamClass.java:1475)
at java.io.ObjectStreamClass.access$1700(ObjectStreamClass.java:72)
at java.io.ObjectStreamClass$2.run(ObjectStreamClass.java:498)
at java.io.ObjectStreamClass$2.run(ObjectStreamClass.java:472)
at java.security.AccessController.doPrivileged(Native Method)
at java.io.ObjectStreamClass.<init>(ObjectStreamClass.java:472)
at java.io.ObjectStreamClass.lookup(ObjectStreamClass.java:369)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1134)
at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548)
at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1509)
at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178)
at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:348)
at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:43)
at org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:100)
at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:295)
at org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:288)
at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:108)
at org.apache.spark.SparkContext.clean(SparkContext.scala:2287)
at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:370)
at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:369)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
at org.apache.spark.rdd.RDD.map(RDD.scala:369)
at org.apache.phoenix.spark.PhoenixRDD.toDataFrame(PhoenixRDD.scala:131)
at org.apache.phoenix.spark.PhoenixRelation.schema(PhoenixRelation.scala:60)
at org.apache.spark.sql.execution.datasources.LogicalRelation$.apply(LogicalRelation.scala:77)
at org.apache.spark.sql.SparkSession.baseRelationToDataFrame(SparkSession.scala:415)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:172)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:146)
at com.mynra.analytics.chronicles.PhoenixSpark$.main(PhoenixSpark.scala:29)
at com.mynra.analytics.chronicles.PhoenixSpark.main(PhoenixSpark.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:635)
Caused by: java.lang.ClassNotFoundException: org.apache.spark.sql.DataFrame
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:335)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 41 more
答案 0 :(得分:2)
我们在Hortonworks HDP 2.6.3上遇到了同样的问题。原因似乎是org.apache.phoenix.spark类的类路径冲突。在HDP中,此软件包的Spark 1.6版本包含在phoenix-client.jar中。你需要通过在前面放置特定于Spark2的插件phoenix-spark2.jar 来覆盖它:
/usr/hdp/current/spark2-client/bin/spark-submit --master yarn-client --num-executors 2 --executor-cores 2 --driver-memory 3g --executor-memory 3g
--conf "spark.driver.extraClassPath=/usr/hdp/current/phoenix-client/phoenix-spark2.jar:/usr/hdp/current/phoenix-client/phoenix-client.jar:/etc/hbase/conf" --conf "spark.executor.extraClassPath=/usr/hdp/current/phoenix-client/phoenix-spark2.jar:/usr/hdp/current/phoenix-client/phoenix-client.jar:/etc/hbase/conf"
--class com.example.test phoenix_test.jar
答案 1 :(得分:1)
我遇到了同样的问题,甚至在没有任何自定义代码的情况下使用spark-shell查看它。经过一番争吵后,我认为这是与EMR 5.8(和5.9)一起包含的凤凰罐子的问题。我不知道为什么他们的Phoenix客户端jar似乎有一个org.apache.spark.sql.DataFrame的类引用,因为它在Spark 2.0.0中被更改为DataSet [Row]的别名。 (特别是因为他们的凤凰罐子声称是4.11,应该修复此问题。)
下面我已经为我修复了它。我怀疑你也可以使用你的版本来使用提供的Phoenix,但也可以使用本地版本。
我做了什么来解决它:
予。我将我当地的Phoenix客户端jar复制到S3(我有一个4.10版本。)
II。我写了一个简单的安装shell脚本,并把它放在S3上:
aws s3 cp s3://<YOUR_BUCKET_GOES_HERE>/phoenix-4.10.0-HBase-1.2-client.jar /home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar
III。我创建了一个只是从第2步运行shell脚本的引导操作。
IV。我创建了一个JSON文件,将这个jar放入spark-default的执行器和驱动程序类路径中,并将它放在S3中:
[
{
"Classification": "spark-defaults",
"Properties": {
"spark.executor.extraClassPath": "/home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar:/etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*",
"spark.driver.extraClassPath": "/home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar:/etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*"
}
}
]
之后我启动了我的集群并启动了一个火花壳。下面你可以看到它的输出,包括显示我正在使用的jar的详细类路径信息,以及一个成功加载的DataFrame。
[hadoop@ip-10-128-7-183 ~]$ spark-shell -v
Using properties file: /usr/lib/spark/conf/spark-defaults.conf
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/lib/spark/jars/slf4j-log4j12-1.7.16.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
Adding default property: spark.sql.warehouse.dir=hdfs:///user/spark/warehouse
Adding default property: spark.executor.extraJavaOptions=-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:OnOutOfMemoryError='kill -9 %p'
Adding default property: spark.history.fs.logDirectory=hdfs:///var/log/spark/apps
Adding default property: spark.eventLog.enabled=true
Adding default property: spark.shuffle.service.enabled=true
Adding default property: spark.driver.extraLibraryPath=/usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native
Adding default property: spark.yarn.historyServer.address=ip-10-128-7-183.columbuschildrens.net:18080
Adding default property: spark.stage.attempt.ignoreOnDecommissionFetchFailure=true
Adding default property: spark.resourceManager.cleanupExpiredHost=true
Adding default property: spark.yarn.appMasterEnv.SPARK_PUBLIC_DNS=$(hostname -f)
Adding default property: spark.driver.extraJavaOptions=-XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:OnOutOfMemoryError='kill -9 %p'
Adding default property: spark.master=yarn
Adding default property: spark.blacklist.decommissioning.timeout=1h
Adding default property: spark.executor.extraLibraryPath=/usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native
Adding default property: spark.sql.hive.metastore.sharedPrefixes=com.amazonaws.services.dynamodbv2
Adding default property: spark.executor.memory=6144M
Adding default property: spark.driver.extraClassPath=/home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar:/etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*
Adding default property: spark.eventLog.dir=hdfs:///var/log/spark/apps
Adding default property: spark.dynamicAllocation.enabled=true
Adding default property: spark.executor.extraClassPath=/home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar:/etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*
Adding default property: spark.executor.cores=1
Adding default property: spark.history.ui.port=18080
Adding default property: spark.blacklist.decommissioning.enabled=true
Adding default property: spark.hadoop.yarn.timeline-service.enabled=false
Parsed arguments:
master yarn
deployMode null
executorMemory 6144M
executorCores 1
totalExecutorCores null
propertiesFile /usr/lib/spark/conf/spark-defaults.conf
driverMemory null
driverCores null
driverExtraClassPath /home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar:/etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*
driverExtraLibraryPath /usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native
driverExtraJavaOptions -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:OnOutOfMemoryError='kill -9 %p'
supervise false
queue null
numExecutors null
files null
pyFiles null
archives null
mainClass org.apache.spark.repl.Main
primaryResource spark-shell
name Spark shell
childArgs []
jars null
packages null
packagesExclusions null
repositories null
verbose true
Spark properties used, including those specified through
--conf and those from the properties file /usr/lib/spark/conf/spark-defaults.conf:
(spark.blacklist.decommissioning.timeout,1h)
(spark.blacklist.decommissioning.enabled,true)
(spark.executor.extraLibraryPath,/usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native)
(spark.hadoop.yarn.timeline-service.enabled,false)
(spark.executor.memory,6144M)
(spark.sql.warehouse.dir,hdfs:///user/spark/warehouse)
(spark.driver.extraLibraryPath,/usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native)
(spark.yarn.historyServer.address,ip-10-128-7-183.columbuschildrens.net:18080)
(spark.eventLog.enabled,true)
(spark.history.ui.port,18080)
(spark.stage.attempt.ignoreOnDecommissionFetchFailure,true)
(spark.yarn.appMasterEnv.SPARK_PUBLIC_DNS,$(hostname -f))
(spark.executor.extraJavaOptions,-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:OnOutOfMemoryError='kill -9 %p')
(spark.resourceManager.cleanupExpiredHost,true)
(spark.shuffle.service.enabled,true)
(spark.history.fs.logDirectory,hdfs:///var/log/spark/apps)
(spark.driver.extraJavaOptions,-XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:OnOutOfMemoryError='kill -9 %p')
(spark.sql.hive.metastore.sharedPrefixes,com.amazonaws.services.dynamodbv2)
(spark.eventLog.dir,hdfs:///var/log/spark/apps)
(spark.executor.extraClassPath,/home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar:/etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*)
(spark.master,yarn)
(spark.dynamicAllocation.enabled,true)
(spark.executor.cores,1)
(spark.driver.extraClassPath,/home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar:/etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*)
Main class:
org.apache.spark.repl.Main
Arguments:
System properties:
(spark.blacklist.decommissioning.timeout,1h)
(spark.executor.extraLibraryPath,/usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native)
(spark.blacklist.decommissioning.enabled,true)
(spark.hadoop.yarn.timeline-service.enabled,false)
(spark.executor.memory,6144M)
(spark.driver.extraLibraryPath,/usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native)
(spark.sql.warehouse.dir,hdfs:///user/spark/warehouse)
(spark.yarn.historyServer.address,ip-10-128-7-183.columbuschildrens.net:18080)
(spark.eventLog.enabled,true)
(spark.history.ui.port,18080)
(spark.stage.attempt.ignoreOnDecommissionFetchFailure,true)
(spark.yarn.appMasterEnv.SPARK_PUBLIC_DNS,$(hostname -f))
(SPARK_SUBMIT,true)
(spark.executor.extraJavaOptions,-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:OnOutOfMemoryError='kill -9 %p')
(spark.app.name,Spark shell)
(spark.resourceManager.cleanupExpiredHost,true)
(spark.shuffle.service.enabled,true)
(spark.history.fs.logDirectory,hdfs:///var/log/spark/apps)
(spark.driver.extraJavaOptions,-XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:OnOutOfMemoryError='kill -9 %p')
(spark.jars,)
(spark.submit.deployMode,client)
(spark.executor.extraClassPath,/home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar:/etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*)
(spark.eventLog.dir,hdfs:///var/log/spark/apps)
(spark.sql.hive.metastore.sharedPrefixes,com.amazonaws.services.dynamodbv2)
(spark.master,yarn)
(spark.dynamicAllocation.enabled,true)
(spark.executor.cores,1)
(spark.driver.extraClassPath,/home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar:/etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*)
Classpath elements:
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
17/10/11 13:36:05 WARN yarn.Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME.
17/10/11 13:36:23 WARN metastore.ObjectStore: Version information not found in metastore. hive.metastore.schema.verification is not enabled so recording the schema version 1.2.0
17/10/11 13:36:23 WARN metastore.ObjectStore: Failed to get database default, returning NoSuchObjectException
17/10/11 13:36:23 WARN metastore.ObjectStore: Failed to get database global_temp, returning NoSuchObjectException
Spark context Web UI available at http://ip-10-128-7-183.columbuschildrens.net:4040
Spark context available as 'sc' (master = yarn, app id = application_1507728658269_0001).
Spark session available as 'spark'.
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/___/ .__/\_,_/_/ /_/\_\ version 2.2.0
/_/
Using Scala version 2.11.8 (OpenJDK 64-Bit Server VM, Java 1.8.0_141)
Type in expressions to have them evaluated.
Type :help for more information.
scala> :paste
// Entering paste mode (ctrl-D to finish)
import org.apache.spark.SparkContext
import org.apache.spark.sql.SQLContext
import org.apache.phoenix.spark._
import org.apache.spark.sql.DataFrame
var sqlContext = new SQLContext(sc);
val phoenixHost = "10.128.7.183:2181"
// Exiting paste mode, now interpreting.
warning: there was one deprecation warning; re-run with -deprecation for details
import org.apache.spark.SparkContext
import org.apache.spark.sql.SQLContext
import org.apache.phoenix.spark._
import org.apache.spark.sql.DataFrame
sqlContext: org.apache.spark.sql.SQLContext = org.apache.spark.sql.SQLContext@258ff54a
phoenixHost: String = 10.128.7.183:2181
scala> val variant_hg19_df = sqlContext.load("org.apache.phoenix.spark", Map("table" -> "VARIANT_ANNOTATION_HG19", "zkUrl" -> phoenixHost))
warning: there was one deprecation warning; re-run with -deprecation for details
variant_hg19_df: org.apache.spark.sql.DataFrame = [CHROMOSOME_ID: int, POSITION: int ... 36 more fields]