java.lang.NoClassDefFoundError:org / apache / spark / sql / DataFrame

时间:2017-09-28 23:31:29

标签: scala apache-spark hbase emr phoenix

我正在运行我的Spark代码,以便在安装了Spark 2.2.0的Amazon EMR 5.8.0中将数据保存到HBase。 在IntelliJ中运行,它工作正常,但在EMR集群中,它给我带来了这个错误:

  

java.lang.NoClassDefFoundError:org / apache / spark / sql / DataFrame

代码

val zookeeperQuorum = args(0)
val tableName = args(1)
val inputPath = args(2)
val spark = SparkSession.builder
  .appName("PhoenixSpark")
  .getOrCreate

val df = spark.read
  .option("delimiter", "\001")
  .csv(inputPath)

val hBaseDf = spark.read
  .format("org.apache.phoenix.spark")
  .option("table", tableName)
  .option("zkUrl", zookeeperQuorum)
  .load()

val tableSchema = hBaseDf.schema
val rowKeyDf = df.withColumn("row_key", concat(col("_c3"), lit("_"), col("_c5"), lit("_"), col("_c0")))
rowKeyDf.createOrReplaceTempView("mytable")

val correctedDf = spark.sql("Select row_key, _c0, _c1, _c2, _c3, _c4, _c5, _c6, _c7," +
  "_c8, _c9, _c10, _c11, _c12, _c13, _c14, _c15, _c16, _c17, _c18, _c19 from mytable")
val rdd = correctedDf.rdd
val finalDf= spark.createDataFrame(rdd, tableSchema)

finalDf.write
  .format("org.apache.phoenix.spark")
  .mode("overwrite")
  .option("table", tableName)
  .option("zkUrl", zookeeperQuorum)
  .save()

spark.stop()

我的pom.xml正确地提到Spark版本为2.2.0

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
     xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>org.myntra.analytics</groupId>
<artifactId>com.myntra.analytics</artifactId>
<version>1.0-SNAPSHOT</version>
<build>
    <plugins>
        <plugin>
            <groupId>org.apache.maven.plugins</groupId>
            <artifactId>maven-compiler-plugin</artifactId>
            <configuration>
                <source>1.8</source>
                <target>1.8</target>
            </configuration>
        </plugin>
        <plugin>
            <groupId>org.scala-tools</groupId>
            <artifactId>maven-scala-plugin</artifactId>
            <executions>
                <execution>
                    <goals>
                        <goal>compile</goal>
                        <goal>testCompile</goal>
                    </goals>
                </execution>
            </executions>
        </plugin>
        <!-- "package" command plugin -->
        <plugin>
            <artifactId>maven-assembly-plugin</artifactId>
            <version>2.6</version>
            <configuration>
                <descriptorRefs>
                    <descriptorRef>jar-with-dependencies</descriptorRef>
                </descriptorRefs>
            </configuration>
            <executions>
                <execution>
                    <id>make-assembly</id>
                    <phase>package</phase>
                    <goals>
                        <goal>single</goal>
                    </goals>
                </execution>
            </executions>
        </plugin>
    </plugins>
</build>

<dependencies>
    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-core_2.11</artifactId>
        <version>2.2.0</version>
        <scope>provided</scope>
    </dependency>

    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-sql_2.11</artifactId>
        <version>2.2.0</version>
        <scope>provided</scope>
    </dependency>

    <dependency>
        <groupId>org.apache.spark</groupId>
        <artifactId>spark-hive_2.11</artifactId>
        <version>2.2.0</version>
        <scope>provided</scope>
    </dependency>

    <dependency>
        <groupId>org.apache.phoenix</groupId>
        <artifactId>phoenix-spark</artifactId>
        <version>4.11.0-HBase-1.3</version>
        <scope>provided</scope>
    </dependency>

</dependencies>

<pluginRepositories>
    <pluginRepository>
        <id>scala-tools.org</id>
        <name>Scala-tools Maven2 Repository</name>
        <url>http://scala-tools.org/repo-releases</url>
    </pluginRepository>
</pluginRepositories>

以下是来自EMR日志的堆栈跟踪,它显示了此错误。

17/09/28 23:20:18 ERROR ApplicationMaster: User class threw exception: 
java.lang.NoClassDefFoundError: org/apache/spark/sql/DataFrame
java.lang.NoClassDefFoundError: org/apache/spark/sql/DataFrame
at java.lang.Class.getDeclaredMethods0(Native Method)
at java.lang.Class.privateGetDeclaredMethods(Class.java:2701)
at java.lang.Class.getDeclaredMethod(Class.java:2128)
at java.io.ObjectStreamClass.getPrivateMethod(ObjectStreamClass.java:1475)
at java.io.ObjectStreamClass.access$1700(ObjectStreamClass.java:72)
at java.io.ObjectStreamClass$2.run(ObjectStreamClass.java:498)
at java.io.ObjectStreamClass$2.run(ObjectStreamClass.java:472)
at java.security.AccessController.doPrivileged(Native Method)
at java.io.ObjectStreamClass.<init>(ObjectStreamClass.java:472)
at java.io.ObjectStreamClass.lookup(ObjectStreamClass.java:369)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1134)
at java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548)
at java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1509)
at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178)
at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:348)
at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:43)
at org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:100)
at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:295)
at org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:288)
at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:108)
at org.apache.spark.SparkContext.clean(SparkContext.scala:2287)
at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:370)
at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:369)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
at org.apache.spark.rdd.RDD.map(RDD.scala:369)
at org.apache.phoenix.spark.PhoenixRDD.toDataFrame(PhoenixRDD.scala:131)
at org.apache.phoenix.spark.PhoenixRelation.schema(PhoenixRelation.scala:60)
at org.apache.spark.sql.execution.datasources.LogicalRelation$.apply(LogicalRelation.scala:77)
at org.apache.spark.sql.SparkSession.baseRelationToDataFrame(SparkSession.scala:415)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:172)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:146)
at com.mynra.analytics.chronicles.PhoenixSpark$.main(PhoenixSpark.scala:29)
at com.mynra.analytics.chronicles.PhoenixSpark.main(PhoenixSpark.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.yarn.ApplicationMaster$$anon$2.run(ApplicationMaster.scala:635)

Caused by: java.lang.ClassNotFoundException: org.apache.spark.sql.DataFrame
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:335)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 41 more

2 个答案:

答案 0 :(得分:2)

我们在Hortonworks HDP 2.6.3上遇到了同样的问题。原因似乎是org.apache.phoenix.spark类的类路径冲突。在HDP中,此软件包的Spark 1.6版本包含在phoenix-client.jar中。你需要通过在前面放置特定于Spark2的插件phoenix-spark2.jar 来覆盖它

/usr/hdp/current/spark2-client/bin/spark-submit --master yarn-client --num-executors 2 --executor-cores 2 --driver-memory 3g --executor-memory 3g 
--conf "spark.driver.extraClassPath=/usr/hdp/current/phoenix-client/phoenix-spark2.jar:/usr/hdp/current/phoenix-client/phoenix-client.jar:/etc/hbase/conf" --conf "spark.executor.extraClassPath=/usr/hdp/current/phoenix-client/phoenix-spark2.jar:/usr/hdp/current/phoenix-client/phoenix-client.jar:/etc/hbase/conf" 
--class com.example.test phoenix_test.jar

答案 1 :(得分:1)

我遇到了同样的问题,甚至在没有任何自定义代码的情况下使用spark-shell查看它。经过一番争吵后,我认为这是与EMR 5.8(和5.9)一起包含的凤凰罐子的问题。我不知道为什么他们的Phoenix客户端jar似乎有一个org.apache.spark.sql.DataFrame的类引用,因为它在Spark 2.0.0中被更改为DataSet [Row]的别名。 (特别是因为他们的凤凰罐子声称是4.11,应该修复此问题。)

下面我已经为我修复了它。我怀疑你也可以使用你的版本来使用提供的Phoenix,但也可以使用本地版本。

我做了什么来解决它:

予。我将我当地的Phoenix客户端jar复制到S3(我有一个4.10版本。)

II。我写了一个简单的安装shell脚本,并把它放在S3上:

aws s3 cp s3://<YOUR_BUCKET_GOES_HERE>/phoenix-4.10.0-HBase-1.2-client.jar /home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar

III。我创建了一个只是从第2步运行shell脚本的引导操作。

IV。我创建了一个JSON文件,将这个jar放入spark-default的执行器和驱动程序类路径中,并将它放在S3中:

[
    {
      "Classification": "spark-defaults",
      "Properties": {
          "spark.executor.extraClassPath": "/home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar:/etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*",
              "spark.driver.extraClassPath": "/home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar:/etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*"
      }
    }
]
  1. 当我去创建我的群集时,我在“编辑软件设置(可选)”中引用了我的JSON文件的完整S3路径 - &gt; “从AWS控制台加载JSON”部分。
  2. 之后我启动了我的集群并启动了一个火花壳。下面你可以看到它的输出,包括显示我正在使用的jar的详细类路径信息,以及一个成功加载的DataFrame。

    [hadoop@ip-10-128-7-183 ~]$ spark-shell -v
    Using properties file: /usr/lib/spark/conf/spark-defaults.conf
    SLF4J: Class path contains multiple SLF4J bindings.
    SLF4J: Found binding in [jar:file:/home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar!/org/slf4j/impl/StaticLoggerBinder.class]
    SLF4J: Found binding in [jar:file:/usr/lib/spark/jars/slf4j-log4j12-1.7.16.jar!/org/slf4j/impl/StaticLoggerBinder.class]
    SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
    Adding default property: spark.sql.warehouse.dir=hdfs:///user/spark/warehouse
    Adding default property: spark.executor.extraJavaOptions=-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:OnOutOfMemoryError='kill -9 %p'
    Adding default property: spark.history.fs.logDirectory=hdfs:///var/log/spark/apps
    Adding default property: spark.eventLog.enabled=true
    Adding default property: spark.shuffle.service.enabled=true
    Adding default property: spark.driver.extraLibraryPath=/usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native
    Adding default property: spark.yarn.historyServer.address=ip-10-128-7-183.columbuschildrens.net:18080
    Adding default property: spark.stage.attempt.ignoreOnDecommissionFetchFailure=true
    Adding default property: spark.resourceManager.cleanupExpiredHost=true
    Adding default property: spark.yarn.appMasterEnv.SPARK_PUBLIC_DNS=$(hostname -f)
    Adding default property: spark.driver.extraJavaOptions=-XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:OnOutOfMemoryError='kill -9 %p'
    Adding default property: spark.master=yarn
    Adding default property: spark.blacklist.decommissioning.timeout=1h
    Adding default property: spark.executor.extraLibraryPath=/usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native
    Adding default property: spark.sql.hive.metastore.sharedPrefixes=com.amazonaws.services.dynamodbv2
    Adding default property: spark.executor.memory=6144M
    Adding default property: spark.driver.extraClassPath=/home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar:/etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*
    Adding default property: spark.eventLog.dir=hdfs:///var/log/spark/apps
    Adding default property: spark.dynamicAllocation.enabled=true
    Adding default property: spark.executor.extraClassPath=/home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar:/etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*
    Adding default property: spark.executor.cores=1
    Adding default property: spark.history.ui.port=18080
    Adding default property: spark.blacklist.decommissioning.enabled=true
    Adding default property: spark.hadoop.yarn.timeline-service.enabled=false
    Parsed arguments:
      master                  yarn
      deployMode              null
      executorMemory          6144M
      executorCores           1
      totalExecutorCores      null
      propertiesFile          /usr/lib/spark/conf/spark-defaults.conf
      driverMemory            null
      driverCores             null
      driverExtraClassPath    /home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar:/etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*
      driverExtraLibraryPath  /usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native
      driverExtraJavaOptions  -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:OnOutOfMemoryError='kill -9 %p'
      supervise               false
      queue                   null
      numExecutors            null
      files                   null
      pyFiles                 null
      archives                null
      mainClass               org.apache.spark.repl.Main
      primaryResource         spark-shell
      name                    Spark shell
      childArgs               []
      jars                    null
      packages                null
      packagesExclusions      null
      repositories            null
      verbose                 true
    
    Spark properties used, including those specified through
     --conf and those from the properties file /usr/lib/spark/conf/spark-defaults.conf:
      (spark.blacklist.decommissioning.timeout,1h)
      (spark.blacklist.decommissioning.enabled,true)
      (spark.executor.extraLibraryPath,/usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native)
      (spark.hadoop.yarn.timeline-service.enabled,false)
      (spark.executor.memory,6144M)
      (spark.sql.warehouse.dir,hdfs:///user/spark/warehouse)
      (spark.driver.extraLibraryPath,/usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native)
      (spark.yarn.historyServer.address,ip-10-128-7-183.columbuschildrens.net:18080)
      (spark.eventLog.enabled,true)
      (spark.history.ui.port,18080)
      (spark.stage.attempt.ignoreOnDecommissionFetchFailure,true)
      (spark.yarn.appMasterEnv.SPARK_PUBLIC_DNS,$(hostname -f))
      (spark.executor.extraJavaOptions,-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:OnOutOfMemoryError='kill -9 %p')
      (spark.resourceManager.cleanupExpiredHost,true)
      (spark.shuffle.service.enabled,true)
      (spark.history.fs.logDirectory,hdfs:///var/log/spark/apps)
      (spark.driver.extraJavaOptions,-XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:OnOutOfMemoryError='kill -9 %p')
      (spark.sql.hive.metastore.sharedPrefixes,com.amazonaws.services.dynamodbv2)
      (spark.eventLog.dir,hdfs:///var/log/spark/apps)
      (spark.executor.extraClassPath,/home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar:/etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*)
      (spark.master,yarn)
      (spark.dynamicAllocation.enabled,true)
      (spark.executor.cores,1)
      (spark.driver.extraClassPath,/home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar:/etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*)
    
    
    Main class:
    org.apache.spark.repl.Main
    Arguments:
    
    System properties:
    (spark.blacklist.decommissioning.timeout,1h)
    (spark.executor.extraLibraryPath,/usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native)
    (spark.blacklist.decommissioning.enabled,true)
    (spark.hadoop.yarn.timeline-service.enabled,false)
    (spark.executor.memory,6144M)
    (spark.driver.extraLibraryPath,/usr/lib/hadoop/lib/native:/usr/lib/hadoop-lzo/lib/native)
    (spark.sql.warehouse.dir,hdfs:///user/spark/warehouse)
    (spark.yarn.historyServer.address,ip-10-128-7-183.columbuschildrens.net:18080)
    (spark.eventLog.enabled,true)
    (spark.history.ui.port,18080)
    (spark.stage.attempt.ignoreOnDecommissionFetchFailure,true)
    (spark.yarn.appMasterEnv.SPARK_PUBLIC_DNS,$(hostname -f))
    (SPARK_SUBMIT,true)
    (spark.executor.extraJavaOptions,-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:OnOutOfMemoryError='kill -9 %p')
    (spark.app.name,Spark shell)
    (spark.resourceManager.cleanupExpiredHost,true)
    (spark.shuffle.service.enabled,true)
    (spark.history.fs.logDirectory,hdfs:///var/log/spark/apps)
    (spark.driver.extraJavaOptions,-XX:+UseConcMarkSweepGC -XX:CMSInitiatingOccupancyFraction=70 -XX:MaxHeapFreeRatio=70 -XX:+CMSClassUnloadingEnabled -XX:OnOutOfMemoryError='kill -9 %p')
    (spark.jars,)
    (spark.submit.deployMode,client)
    (spark.executor.extraClassPath,/home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar:/etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*)
    (spark.eventLog.dir,hdfs:///var/log/spark/apps)
    (spark.sql.hive.metastore.sharedPrefixes,com.amazonaws.services.dynamodbv2)
    (spark.master,yarn)
    (spark.dynamicAllocation.enabled,true)
    (spark.executor.cores,1)
    (spark.driver.extraClassPath,/home/hadoop/phoenix-4.10.0-HBase-1.2-client.jar:/etc/hadoop/conf:/etc/hive/conf:/usr/lib/hadoop-lzo/lib/*:/usr/share/aws/aws-java-sdk/*:/usr/share/aws/emr/emrfs/conf:/usr/share/aws/emr/emrfs/lib/*:/usr/share/aws/emr/emrfs/auxlib/*)
    Classpath elements:
    
    
    
    Setting default log level to "WARN".
    To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
    17/10/11 13:36:05 WARN yarn.Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME.
    17/10/11 13:36:23 WARN metastore.ObjectStore: Version information not found in metastore. hive.metastore.schema.verification is not enabled so recording the schema version 1.2.0
    17/10/11 13:36:23 WARN metastore.ObjectStore: Failed to get database default, returning NoSuchObjectException
    17/10/11 13:36:23 WARN metastore.ObjectStore: Failed to get database global_temp, returning NoSuchObjectException
    Spark context Web UI available at http://ip-10-128-7-183.columbuschildrens.net:4040
    Spark context available as 'sc' (master = yarn, app id = application_1507728658269_0001).
    Spark session available as 'spark'.
    Welcome to
          ____              __
         / __/__  ___ _____/ /__
        _\ \/ _ \/ _ `/ __/  '_/
       /___/ .__/\_,_/_/ /_/\_\   version 2.2.0
          /_/
    
    Using Scala version 2.11.8 (OpenJDK 64-Bit Server VM, Java 1.8.0_141)
    Type in expressions to have them evaluated.
    Type :help for more information.
    
    scala> :paste
    // Entering paste mode (ctrl-D to finish)
    
    import org.apache.spark.SparkContext
    import org.apache.spark.sql.SQLContext
    import org.apache.phoenix.spark._
    import org.apache.spark.sql.DataFrame
    
    var sqlContext = new SQLContext(sc);
    val phoenixHost = "10.128.7.183:2181"
    
    // Exiting paste mode, now interpreting.
    
    warning: there was one deprecation warning; re-run with -deprecation for details
    import org.apache.spark.SparkContext
    import org.apache.spark.sql.SQLContext
    import org.apache.phoenix.spark._
    import org.apache.spark.sql.DataFrame
    sqlContext: org.apache.spark.sql.SQLContext = org.apache.spark.sql.SQLContext@258ff54a
    phoenixHost: String = 10.128.7.183:2181
    
    scala> val variant_hg19_df = sqlContext.load("org.apache.phoenix.spark", Map("table" -> "VARIANT_ANNOTATION_HG19", "zkUrl" -> phoenixHost))
    warning: there was one deprecation warning; re-run with -deprecation for details
    variant_hg19_df: org.apache.spark.sql.DataFrame = [CHROMOSOME_ID: int, POSITION: int ... 36 more fields]