我正面临pyspark问题。我想从oracle数据库中检索数据。 我的主要问题是创建jdbc网址。
我尝试了两种方法,但都出错了。 下面是我的代码源。 您能帮助我提出正确的请求吗?: 我精确地说,我正在使用Spark 1.5(Spark 2.0功能将无法使用)。 非常感谢,
#####
from pyspark import SparkContext,SparkConf
appName='Import-Data'
try:
sc.stop()
except :
print 'spark context does not exists'
else:
print 'existing spark context stopped'
conf = SparkConf().setAppName(appName)
conf.set("spark.executor.instances", "9")
conf.set("spark.executor.cores", "4")
conf.set("spark.executor.memory", "8g")
sc = SparkContext(conf=conf)
import numpy as np
import datetime as dt
import pandas as pd
import glob
import os
import re
sqlsc = SQLContext(sc)
from pyspark import SQLContext
from pyspark.sql.functions import *
from pyspark.sql.types import *
sqlsc = SQLContext(sc)
from pyspark import SQLContext
from pyspark.sql.functions import *
from pyspark.sql.types import *
#Connection a la base de donnees
#First way (YYYY is the user and XXXXXX is the password)
#MyDataFrame = sqlsc.read.load(source="jdbc",url="jdbc:oracle:thin://Server/DATABASE? user=YYYY&password=XXXXXX",dbtable="schema.table")
#Second way
MyDataFrame = sqlsc.read.load(source="jdbc",url="jdbc:oracle:thin:YYYY/XXXXXX@Server:1521/DATABASE",dbtable="Schema.table")
#Here is the error I am facing:
Py4JJavaErrorTraceback (most recent call last)
<ipython-input-21-82abab7efad2> in <module>()
----> 1 MyDataFrame.show(5)
/usr/iop/current/spark-client/python/pyspark/sql/dataframe.py in show(self, n, truncate)
254 +---+-----+
255 """
--> 256 print(self._jdf.showString(n, truncate))
257
258 def __repr__(self):
/usr/iop/current/spark-client/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py in __call__(self, *args)
536 answer = self.gateway_client.send_command(command)
537 return_value = get_return_value(answer, self.gateway_client,
--> 538 self.target_id, self.name)
539
540 for temp_arg in temp_args:
/usr/iop/current/spark-client/python/pyspark/sql/utils.py in deco(*a, **kw)
34 def deco(*a, **kw):
35 try:
---> 36 return f(*a, **kw)
37 except py4j.protocol.Py4JJavaError as e:
38 s = e.java_exception.toString()
/usr/iop/current/spark-client/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
298 raise Py4JJavaError(
299 'An error occurred while calling {0}{1}{2}.\n'.
--> 300 format(target_id, '.', name), value)
301 else:
302 raise Py4JError(
Py4JJavaError: An error occurred while calling o152.showString.
: java.lang.IllegalStateException: SparkContext has been shutdown
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1814)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1835)
at org.apache.spark.SparkContext.runJob(SparkContext.scala:1848)
at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:215)
at org.apache.spark.sql.execution.Limit.executeCollect(basicOperators.scala:207)
at org.apache.spark.sql.DataFrame$$anonfun$collect$1.apply(DataFrame.scala:1385)
at org.apache.spark.sql.DataFrame$$anonfun$collect$1.apply(DataFrame.scala:1385)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:56)
at org.apache.spark.sql.DataFrame.withNewExecutionId(DataFrame.scala:1903)
at org.apache.spark.sql.DataFrame.collect(DataFrame.scala:1384)
at org.apache.spark.sql.DataFrame.head(DataFrame.scala:1314)
at org.apache.spark.sql.DataFrame.take(DataFrame.scala:1377)
at org.apache.spark.sql.DataFrame.showString(DataFrame.scala:178)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379)
at py4j.Gateway.invoke(Gateway.java:259)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:207)
at java.lang.Thread.run(Thread.java:745)
Here are the environments variable set:
export PATH=/gpfs/user/$USER/env_python2/bin:/gpfs/user/$USER/env_python3/bin:$PATH
#ajout de R
export PATH=/gpfs/user/common/R-devel/R-3.4.1/bin:$PATH
#Lib pour Jupyter
export LD_LIBRARY_PATH=/gpfs/user/common/jupyter/sqlite/sqlite/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/gpfs/user/common/jupyter/sqlite/sqlite/lib:$LD_LIBRARY_PATH
export SPARK_CLASSPATH=/soft/ora1120/db/jdbc/lib/ojdbc6.jar:/gpfs/user/e547041/jupyter/toolbox/spark-csv_2.10-0.1.jar
注意:我使用的木星在1.5散布下
答案 0 :(得分:0)
您的代码清楚地表明JVM SparkContext
没有运行
Py4JJavaError: An error occurred while calling o152.showString.
: java.lang.IllegalStateException: SparkContext has been shutdown
如果未正确停止Python SparkContext
,或者由于某些配置问题而无法启动Java SparkContext
,则会发生这种情况。
在这种状态下,任何类型的操作(不一定是jdbc
都会失败。
要解决此问题,您应该确定为什么上下文无法正确启动。查看您发布的代码(如果没有上下文和适当的缩进,很难对其进行全面分析),很可能
try:
sc.stop()
使驱动程序处于未定义状态。