使用python在pyspark中运行sql查询时出错

时间:2019-11-12 12:23:04

标签: pyspark pyspark-sql pyspark-dataframes

您好,我编写了一个代码,其中我要从TERR.txt文件创建数据帧。现在,我正在尝试运行sql查询,但出现了一些错误。

您能帮我解决这个问题吗?

代码:

from pyspark.context import SparkContext
from pyspark.sql import HiveContext
from pyspark.sql import SQLContext
from pyspark.sql.functions import *
from pyspark.sql.window import Window
from pyspark.sql.session import SparkSession
from pyspark.sql import Row
from pyspark.sql import DataFrame
from pyspark.context import SparkConf

conf = SparkConf()

sc = SparkContext(conf=conf)
spark = SparkSession(sc)
sqlContext = SQLContext(sc)
sc.setLogLevel("ERROR")

df = sqlContext.read.format('csv').option("delimiter", "|").options(header='true', multiline = 'true',escape = '\"',inferSchema='false').load("/test/TERR.txt")

print(df)
print(df.show())

query = "select a.idasnmitory_id,a.dscrptnasnmitory_desc,a.nmasterr_nm,a.pstn_type,a.parnt_terrasparnt_nm_id,b.nmasparnt_terr_nm,a.start_dt,a.type,CASE WHEN substr(a.nm,1,6) IN ('105-30','105-31','105-32','105-41','105-42','105-43','200-CD','200-CG','200-CO','200-CP','200-CR','200-DG') THEN 'JBI' WHEN substr(a.nm,1,6) IN ('100-SC','105-05','105-06','105-07','105-08','105-13','105-71','105-72','105-73') THEN 'JP' WHEN substr(a.nm,1,6) IN ('103-16') THEN 'JT' WHEN substr(a.nm,1,6) IN ('105-51','200-HA','200-HF','200-HT','105-HT') THEN 'JSA' WHEN substr(a.nm,1,6) IN ('105-61','200-PR') THEN 'PR' WHEN substr(a.nm,1,3) IN ('302') THEN 'Canada-MEM' WHEN substr(a.nm,1,3) IN ('301') THEN 'Canada-MSL'ELSE'Unspecified'END AS DEPARTMENT,CASE WHEN substr(a.nm,1,6) IN ('105-06','105-07','105-08') THEN 'CVMMSL' WHEN substr(a.nm,1,6) IN ('100-SC','105-13') THEN 'CVMCSS' WHEN substr(a.nm,1,6) IN ('105-41','200-CD') THEN 'DermMSL' WHEN substr(a.nm,1,6) IN ('105-42','200-CG') THEN 'GastroMSL' WHEN substr(a.nm,1,6) IN ('105-31') THEN 'HemeOncMSL' WHEN substr(a.nm,1,6) IN ('200-DG') THEN 'ImmMD' WHEN substr(a.nm,1,6) IN ('103-16') THEN 'IDMSL' WHEN substr(a.nm,1,6) IN ('200-CP') THEN 'ImmOps' WHEN substr(a.nm,1,6) IN ('105-05','105-71','105-72','105-73') THEN 'NeuroMSL' WHEN substr(a.nm,1,6) IN ('105-30','200-CO') THEN 'OncMSL' WHEN substr(a.nm,1,6) IN ('105-61','200-PR') THEN 'PuertoRicoMSL' WHEN substr(a.nm,1,6) IN ('105-43','200-CR') THEN 'RheumMSL' WHEN substr(a.nm,1,6) IN ('105-51','200-HF') THEN 'RWVEField' WHEN substr(a.nm,1,6) IN ('105-32') THEN 'SolidTumorMSL' WHEN substr(a.nm,1,6) IN ('200-HT','105-HT') THEN 'RWVEPopHealth' WHEN substr(a.nm,1,6) IN ('301-PC') THEN 'Canada-PCMSL' WHEN substr(a.nm,1,6) IN ('301-VR') THEN 'Canada-VR/ONCMSL' WHEN substr(a.nm,1,6) IN ('301-SO') THEN 'Canada-Hematology(Myeloid)MSL' WHEN substr(a.nm,1,6) IN ('301-ON') THEN 'Canada-Hematology(Lymphoid)MSL' WHEN substr(a.nm,1,6) IN ('301-IP') THEN 'Canada-CNSMSL' WHEN substr(a.nm,1,6) IN ('301-RD') THEN 'Canada-RheumMSL' WHEN substr(a.nm,1,6) IN ('301-IB') THEN 'Canada-GastroMSL' WHEN substr(a.nm,1,6) IN ('301-DE') THEN 'Canada-DermMSL' WHEN substr(a.nm,1,6) IN ('301-SE') THEN 'Canada-BiologicsMSL' WHEN substr(a.nm,1,6) IN ('302-PC') THEN 'Canada-PCMEM' WHEN substr(a.nm,1,6) IN ('302-VR') THEN 'Canada-VR/ONCMEM' WHEN substr(a.nm,1,6) IN ('302-SO') THEN 'Canada-Hematology(Myeloid)MEM' WHEN substr(a.nm,1,6) IN ('302-ON') THEN 'Canada-Hematology(Lymphoid)MEM' WHEN substr(a.nm,1,6) IN ('302-IP') THEN 'Canada-CNSMEM' WHEN substr(a.nm,1,6) IN ('302-RD') THEN 'Canada-RheumMEM' WHEN substr(a.nm,1,6) IN ('302-IB') THEN 'Canada-GastroMEM' WHEN substr(a.nm,1,6) IN ('302-DE') THEN 'Canada-DermMEM' WHEN substr(a.nm,1,6) IN ('302-SE') THEN 'Canada-BiologicsMEM'ELSE'Unspecified'END AS FRANCHISE from outbound.terr a left outer joIN outbound.terr b on a.parnt_terr=b.id"

df.createOrReplaceTempView("terr")

df_output = spark.sql(query)

错误:

 Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/usr/lib/spark/python/pyspark/sql/session.py", line 710, in sql
    return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped)
  File "/usr/lib/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py", line 1257, in __call__
  File "/usr/lib/spark/python/pyspark/sql/utils.py", line 69, in deco
    raise AnalysisException(s.split(': ', 1)[1], stackTrace)
pyspark.sql.utils.AnalysisException: u"Table or view not found: `outbound`.`terr`; line 1 pos 2791;\n'Project

0 个答案:

没有答案