我正在pyspark上运行SQL查询,并出现以下错误。
你能帮我吗?
query = "select DENSE_RANK() OVER(ORDER BY PROD_NM, CNTRY) AS SYSTEM_ID, id AS SOURCE_ID,source_name,prod_nm,CNTRY,source_entity,entity_name from(SELECT distinct id, 'AMPIL' as SOURCE_NAME,prod_nm, 'PROD2' AS Source_Entity,'PRODUCT' AS ENTITY_NAME,CASE WHEN OPRTNG_CMPNYS = 'Janssen Canada' THEN 'Canada' WHEN OPRTNG_CMPNYS LIKE 'Janssen US%' THEN 'United States' END AS CNTRY FROM vw_prod2 UNION SELECT mdm_id , 'MDM' AS SOURCE_NAME, product_name AS PROD_NM, 'MDM_PROD' AS Source_Entity,'PRODUCT' AS ENTITY_NAME, COUNTRY_NAME FROM vm_mdm_product PROD, vm_mdm_countries WHERE PROD.COUNTRY_ID = vm_mdm_countries.COUNTRY_ID UNION SELECT distinct id, 'AMPIL' as SOURCE_NAME, nm AS PROD_NM, 'PROD' AS Source_Entity,'PRODUCT' AS ENTITY_NAME, CNTRY FROM vw_prod union select DENSE_RANK() OVER(ORDER BY PROD_NM, CNTRY) AS SYSTEM_ID, id AS SOURCE_ID,source_name,prod_nm,CNTRY,source_entity,entity_name from(SELECT distinct id, 'AMPIL' as SOURCE_NAME,prod_nm, 'PROD2' AS Source_Entity,'PRODUCT' AS ENTITY_NAME,CASE WHEN OPRTNG_CMPNYS = 'Janssen Canada' THEN 'Canada' WHEN OPRTNG_CMPNYS LIKE 'Janssen US%' THEN 'United States' END AS CNTRY FROM vw_prod2 UNION SELECT mdm_id , 'MDM' AS SOURCE_NAME, product_name AS PROD_NM, 'MDM_PROD' AS Source_Entity,'PRODUCT' AS ENTITY_NAME, COUNTRY_NAME FROM vm_mdm_product PROD, vm_mdm_countries WHERE PROD.COUNTRY_ID = vm_mdm_countries.COUNTRY_ID UNION SELECT distinct id, 'AMPIL' as SOURCE_NAME, nm AS PROD_NM, 'PROD' AS Source_Entity,'PRODUCT' AS ENTITY_NAME, CNTRY FROM vw_prod union select DENSE_RANK() OVER(ORDER BY PROD_NM, CNTRY) AS SYSTEM_ID, id AS SOURCE_ID,source_name,prod_nm,CNTRY,source_entity,entity_name from(SELECT distinct id, 'AMPIL' as SOURCE_NAME,prod_nm, 'PROD2' AS Source_Entity,'PRODUCT' AS ENTITY_NAME,CASE WHEN OPRTNG_CMPNYS = 'Janssen Canada' THEN 'Canada' WHEN OPRTNG_CMPNYS LIKE 'Janssen US%' THEN 'United States' END AS CNTRY FROM vw_prod2 UNION SELECT mdm_id , 'MDM' AS SOURCE_NAME, product_name AS PROD_NM, 'MDM_PROD' AS Source_Entity,'PRODUCT' AS ENTITY_NAME, COUNTRY_NAME FROM vm_mdm_product PROD, vm_mdm_countries WHERE PROD.COUNTRY_ID = vm_mdm_countries.COUNTRY_ID UNION SELECT distinct id, 'AMPIL' as SOURCE_NAME, nm AS PROD_NM, 'PROD' AS Source_Entity,'PRODUCT' AS ENTITY_NAME, CNTRY FROM vw_prod union select DENSE_RANK() OVER(ORDER BY PROD_NM, CNTRY) AS SYSTEM_ID, id AS SOURCE_ID,source_name,prod_nm,CNTRY,source_entity,entity_name from(SELECT distinct id, 'AMPIL' as SOURCE_NAME,prod_nm, 'PROD2' AS Source_Entity,'PRODUCT' AS ENTITY_NAME,CASE WHEN OPRTNG_CMPNYS = 'Janssen Canada' THEN 'Canada' WHEN OPRTNG_CMPNYS LIKE 'Janssen US%' THEN 'United States' END AS CNTRY FROM vw_prod2 UNION SELECT mdm_id , 'MDM' AS SOURCE_NAME, product_name AS PROD_NM, 'MDM_PROD' AS Source_Entity,'PRODUCT' AS ENTITY_NAME, COUNTRY_NAME FROM vm_mdm_product PROD, vm_mdm_countries WHERE PROD.COUNTRY_ID = vm_mdm_countries.COUNTRY_ID UNION SELECT distinct id, 'AMPIL' as SOURCE_NAME, nm AS PROD_NM, 'PROD' AS Source_Entity,'PRODUCT' AS ENTITY_NAME, CNTRY FROM vw_prod"
df = sqlContext.sql(query)
错误:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/lib/spark/python/pyspark/sql/context.py", line 353, in sql
return self.sparkSession.sql(sqlQuery)
File "/usr/lib/spark/python/pyspark/sql/session.py", line 710, in sql
return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped)
File "/usr/lib/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py", line 1257, in __call__
File "/usr/lib/spark/python/pyspark/sql/utils.py", line 73, in deco
raise ParseException(s.split(': ', 1)[1], stackTrace)
pyspark.sql.utils.ParseException: u"\nmismatched input 'from' expecting <EOF>(line 1, pos 133)
答案 0 :(得分:0)
您在查询中遗漏了几个右括号“)”,请对此进行查看。