我通过python impala.dbapi
启动了一个配置单元查询,它可以很好地运行如下:
import os
import pandas as pd
from impala.dbapi import connect
from impala.util import as_pandas
from datetime import datetime
user=os.environ['HIVE_USER']
password=os.environ['HIVE_PASSWORD']
up_to_date_query = '''
select * from dejavu.tracking_events limit 1
'''
conn = connect(host='ecprdbhdp02-clientgw.kenshooprd.local', port=10000,
user=user,
password=password,
auth_mechanism='PLAIN')
cursor = conn.cursor()
cursor.execute(up_to_date_query)
df = as_pandas(cursor)
df.head()
但是,当我添加以下内容时," Add Jar
"条款如下:
up_to_date_query = '''
ADD JAR hdfs://BICluster/user/yossis/udfs/hive-udf-0.1-SNAPSHOT.jar;
select * from dejavu.tracking_events limit 1
'''
我收到以下错误:
---------------------------------------------------------------------------
HiveServer2Error Traceback (most recent call last)
<ipython-input-10-1e512abcc69e> in <module>()
4 auth_mechanism='PLAIN')
5 cursor = conn.cursor()
----> 6 cursor.execute(up_to_date_query)
7 df = as_pandas(cursor)
8 df.head()
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in execute(self, operation, parameters, configuration)
300 # PEP 249
301 self.execute_async(operation, parameters=parameters,
--> 302 configuration=configuration)
303 log.debug('Waiting for query to finish')
304 self._wait_to_finish() # make execute synchronous
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in execute_async(self, operation, parameters, configuration)
341 self._last_operation = op
342
--> 343 self._execute_async(op)
344
345 def _debug_log_state(self):
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in _execute_async(self, operation_fn)
360 self._reset_state()
361 self._debug_log_state()
--> 362 operation_fn()
363 self._last_operation_active = True
364 self._debug_log_state()
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in op()
338 op = self.session.execute(self._last_operation_string,
339 configuration,
--> 340 async=True)
341 self._last_operation = op
342
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in execute(self, statement, configuration, async)
1025 confOverlay=configuration,
1026 runAsync=async)
-> 1027 return self._operation('ExecuteStatement', req)
1028
1029 def get_databases(self, schema='.*'):
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in _operation(self, kind, request)
955
956 def _operation(self, kind, request):
--> 957 resp = self._rpc(kind, request)
958 return self._get_operation(resp.operationHandle)
959
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in _rpc(self, func_name, request)
923 response = self._execute(func_name, request)
924 self._log_response(func_name, response)
--> 925 err_if_rpc_not_ok(response)
926 return response
927
/home/yehoshaphats/anaconda/lib/python2.7/site-packages/impala/hiveserver2.pyc in err_if_rpc_not_ok(resp)
702 resp.status.statusCode != TStatusCode.SUCCESS_WITH_INFO_STATUS and
703 resp.status.statusCode != TStatusCode.STILL_EXECUTING_STATUS):
--> 704 raise HiveServer2Error(resp.status.errorMessage)
705
706
HiveServer2Error: Error while processing statement: null
请注意,在Hive中直接运行查询(通过hue
控制台)时,查询可以正常工作。
在搜索类似问题之后,似乎没有人要求完全相同的问题:(
提前致谢!
答案 0 :(得分:0)
似乎;
分隔符会导致问题,我只是把它分开如下:
cursor.execute('ADD JAR hdfs://BICluster/user/yossis/udfs/hive-udf-0.1-SNAPSHOT.jar')
up_to_date_query = '''
select * from dejavu.tracking_events limit 1
'''
cursor.execute(up_to_date_query)