我需要在数据框上使用SQL分析函数,所以我尝试使用HiveContext。
仅供阅读:
import org.apache.spark._
import org.apache.spark.sql.hive.HiveContext
object hctest {
def main(args: Array[String]): Unit = {
val cf = new SparkConf().setAppName("example").setMaster("local[2]")
val sc = new SparkContext(cf)
val hc = new HiveContext(sc)
//import hc.implicits._
var URL = "jdbc:postgresql://localhost:5432/marks"
var USER = "postgres"
var PASS = "root"
var prop = new java.util.Properties()
var table = "import.test_table"
prop.setProperty("user", USER)
prop.setProperty("password", PASS)
val df = hc.read.jdbc(URL, table, prop)
df.show
}
}
我在" val hc = new HiveContext(sc)"
获得以下例外17/02/16 17:33:52 WARN ObjectStore: Failed to get database default, returning NoSuchObjectException
17/02/16 17:33:52 WARN : Your hostname, FBSALT000058 resolves to a loopback/non-reachable address: fe80:0:0:0:c817:f8:8736:f861%17, but we couldn't find any external IP address!
17/02/16 17:33:54 INFO HiveMetaStore: Added admin role in metastore
17/02/16 17:33:54 INFO HiveMetaStore: Added public role in metastore
17/02/16 17:33:54 INFO HiveMetaStore: No user is added in admin role, since config is empty
17/02/16 17:33:54 INFO HiveMetaStore: 0: get_all_databases
17/02/16 17:33:54 INFO audit: ugi=IN0108 ip=unknown-ip-addr cmd=get_all_databases
17/02/16 17:33:54 INFO HiveMetaStore: 0: get_functions: db=default pat=*
17/02/16 17:33:54 INFO audit: ugi=IN0108 ip=unknown-ip-addr cmd=get_functions: db=default pat=*
17/02/16 17:33:54 INFO Datastore: The class "org.apache.hadoop.hive.metastore.model.MResourceUri" is tagged as "embedded-only" so does not have its own datastore table.
Exception in thread "main" java.lang.RuntimeException: java.lang.RuntimeException: Error while running command to get file permissions : ExitCodeException exitCode=-1073741515:
at org.apache.hadoop.util.Shell.runCommand(Shell.java:538)
at org.apache.hadoop.util.Shell.run(Shell.java:455)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:715)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:808)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:791)
at org.apache.hadoop.fs.FileUtil.execCommand(FileUtil.java:1097)
at org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.loadPermissionInfo(RawLocalFileSystem.java:582)
at org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.getPermission(RawLocalFileSystem.java:557)
at org.apache.hadoop.hive.ql.session.SessionState.createRootHDFSDir(SessionState.java:599)
at org.apache.hadoop.hive.ql.session.SessionState.createSessionDirs(SessionState.java:554)
at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:508)
at org.apache.spark.sql.hive.client.ClientWrapper.<init>(ClientWrapper.scala:204)
at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:238)
at org.apache.spark.sql.hive.HiveContext.executionHive$lzycompute(HiveContext.scala:218)
at org.apache.spark.sql.hive.HiveContext.executionHive(HiveContext.scala:208)
at org.apache.spark.sql.hive.HiveContext.functionRegistry$lzycompute(HiveContext.scala:462)
at org.apache.spark.sql.hive.HiveContext.functionRegistry(HiveContext.scala:461)
at org.apache.spark.sql.UDFRegistration.<init>(UDFRegistration.scala:40)
at org.apache.spark.sql.SQLContext.<init>(SQLContext.scala:330)
at org.apache.spark.sql.hive.HiveContext.<init>(HiveContext.scala:90)
at org.apache.spark.sql.hive.HiveContext.<init>(HiveContext.scala:101)
at hctest$.main(hctest.scala:11)
at hctest.main(hctest.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:147)
at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:522)
at org.apache.spark.sql.hive.client.ClientWrapper.<init>(ClientWrapper.scala:204)
at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:238)
at org.apache.spark.sql.hive.HiveContext.executionHive$lzycompute(HiveContext.scala:218)
at org.apache.spark.sql.hive.HiveContext.executionHive(HiveContext.scala:208)
at org.apache.spark.sql.hive.HiveContext.functionRegistry$lzycompute(HiveContext.scala:462)
at org.apache.spark.sql.hive.HiveContext.functionRegistry(HiveContext.scala:461)
at org.apache.spark.sql.UDFRegistration.<init>(UDFRegistration.scala:40)
at org.apache.spark.sql.SQLContext.<init>(SQLContext.scala:330)
at org.apache.spark.sql.hive.HiveContext.<init>(HiveContext.scala:90)
at org.apache.spark.sql.hive.HiveContext.<init>(HiveContext.scala:101)
at hctest$.main(hctest.scala:11)
at hctest.main(hctest.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:147)
Caused by: java.lang.RuntimeException: Error while running command to get file permissions : ExitCodeException exitCode=-1073741515:
at org.apache.hadoop.util.Shell.runCommand(Shell.java:538)
at org.apache.hadoop.util.Shell.run(Shell.java:455)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:715)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:808)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:791)
at org.apache.hadoop.fs.FileUtil.execCommand(FileUtil.java:1097)
at org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.loadPermissionInfo(RawLocalFileSystem.java:582)
at org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.getPermission(RawLocalFileSystem.java:557)
at org.apache.hadoop.hive.ql.session.SessionState.createRootHDFSDir(SessionState.java:599)
at org.apache.hadoop.hive.ql.session.SessionState.createSessionDirs(SessionState.java:554)
at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:508)
at org.apache.spark.sql.hive.client.ClientWrapper.<init>(ClientWrapper.scala:204)
at org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:238)
at org.apache.spark.sql.hive.HiveContext.executionHive$lzycompute(HiveContext.scala:218)
at org.apache.spark.sql.hive.HiveContext.executionHive(HiveContext.scala:208)
at org.apache.spark.sql.hive.HiveContext.functionRegistry$lzycompute(HiveContext.scala:462)
at org.apache.spark.sql.hive.HiveContext.functionRegistry(HiveContext.scala:461)
at org.apache.spark.sql.UDFRegistration.<init>(UDFRegistration.scala:40)
at org.apache.spark.sql.SQLContext.<init>(SQLContext.scala:330)
at org.apache.spark.sql.hive.HiveContext.<init>(HiveContext.scala:90)
at org.apache.spark.sql.hive.HiveContext.<init>(HiveContext.scala:101)
at hctest$.main(hctest.scala:11)
at hctest.main(hctest.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:606)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:147)
at org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.loadPermissionInfo(RawLocalFileSystem.java:620)
at org.apache.hadoop.fs.RawLocalFileSystem$DeprecatedRawLocalFileStatus.getPermission(RawLocalFileSystem.java:557)
at org.apache.hadoop.hive.ql.session.SessionState.createRootHDFSDir(SessionState.java:599)
at org.apache.hadoop.hive.ql.session.SessionState.createSessionDirs(SessionState.java:554)
at org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:508)
... 17 more
17/02/16 17:33:56 INFO SparkContext: Invoking stop() from shutdown hook
17/02/16 17:33:56 INFO SparkUI: Stopped Spark web UI at http://192.168.20.45:4040
17/02/16 17:33:56 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
17/02/16 17:33:56 INFO MemoryStore: MemoryStore cleared
17/02/16 17:33:56 INFO BlockManager: BlockManager stopped
17/02/16 17:33:56 INFO BlockManagerMaster: BlockManagerMaster stopped
17/02/16 17:33:56 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
17/02/16 17:33:56 INFO SparkContext: Successfully stopped SparkContext
17/02/16 17:33:56 INFO ShutdownHookManager: Shutdown hook called
17/02/16 17:33:56 INFO ShutdownHookManager: Deleting directory C:\Users\in0108\AppData\Local\Temp\spark-48bc7af3-cabb-4526-87da-4b852a072406
17/02/16 17:33:56 INFO RemoteActorRefProvider$RemotingTerminator: Shutting down remote daemon.
17/02/16 17:33:56 INFO RemoteActorRefProvider$RemotingTerminator: Remote daemon shut down; proceeding with flushing remote transports.
17/02/16 17:33:56 INFO RemoteActorRefProvider$RemotingTerminator: Remoting shut down.
17/02/16 17:33:56 ERROR ShutdownHookManager: Exception while deleting Spark temp dir: C:\Users\in0108\AppData\Local\Temp\spark-48bc7af3-cabb-4526-87da-4b852a072406
java.io.IOException: Failed to delete: C:\Users\in0108\AppData\Local\Temp\spark-48bc7af3-cabb-4526-87da-4b852a072406
at org.apache.spark.util.Utils$.deleteRecursively(Utils.scala:929)
at org.apache.spark.util.ShutdownHookManager$$anonfun$1$$anonfun$apply$mcV$sp$3.apply(ShutdownHookManager.scala:65)
at org.apache.spark.util.ShutdownHookManager$$anonfun$1$$anonfun$apply$mcV$sp$3.apply(ShutdownHookManager.scala:62)
at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:108)
at org.apache.spark.util.ShutdownHookManager$$anonfun$1.apply$mcV$sp(ShutdownHookManager.scala:62)
at org.apache.spark.util.SparkShutdownHook.run(ShutdownHookManager.scala:267)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(ShutdownHookManager.scala:239)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply(ShutdownHookManager.scala:239)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1$$anonfun$apply$mcV$sp$1.apply(ShutdownHookManager.scala:239)
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1801)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply$mcV$sp(ShutdownHookManager.scala:239)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply(ShutdownHookManager.scala:239)
at org.apache.spark.util.SparkShutdownHookManager$$anonfun$runAll$1.apply(ShutdownHookManager.scala:239)
at scala.util.Try$.apply(Try.scala:161)
at org.apache.spark.util.SparkShutdownHookManager.runAll(ShutdownHookManager.scala:239)
at org.apache.spark.util.SparkShutdownHookManager$$anon$2.run(ShutdownHookManager.scala:218)
at org.apache.hadoop.util.ShutdownHookManager$1.run(ShutdownHookManager.java:54)
17/02/16 17:33:56 INFO ShutdownHookManager: Deleting directory C:\Users\in0108\AppData\Local\Temp\spark-e39cf6bb-08b6-40d7-9bb8-c2ca40c015dc
Process finished with exit code 1
关于我能做什么的任何建议?我非常需要使用像LEAD,LAG这样的SQL函数(当我使用spark.sql.expressions.Window时它也不起作用)