我在pandas
中有一个PYTHON
数据框。我想创建/加载这个数据框到一个配置表。
我知道我们可以从spark
数据框创建pandas
数据框并创建配置表。
我想使用纯python
方式而不使用pyspark
。
我在本地pyhive
pyhs2
python
使用pyhive
conn_1 = hive.Connection(host=hive_host, port=10000, username=username)
我收到以下错误:
NotImplementedError: Wrong number of arguments for overloaded function 'Client_setAttr'.
Possible C/C++ prototypes are:
setAttr(saslwrapper::Client *,std::string const &,std::string const &)
setAttr(saslwrapper::Client *,std::string const &,uint32_t)
使用pyhs2
:
conn_1 = pyhs2.connect(host='host', port=10000,authMechanism="NOSASL", user='hive', database='default')
返回
TTransportException: TSocket read 0 bytes
如何使用hive
pandas
并创建配置单元表
Error for pyhive
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
<ipython-input-7-bf6a49c722d5> in <module>()
----> 1 conn_1 = hive.Connection(host=hive_host, port=10000, username=username)
C:\Users\viru\AppData\Local\Continuum\Anaconda2\lib\site-packages\pyhive\hive.pyc in __init__(self, host, port, username, database, auth, configuration, kerberos_service_name, password, thrift_transport)
157
158 try:
--> 159 self._transport.open()
160 open_session_req = ttypes.TOpenSessionReq(
161 client_protocol=protocol_version,
C:\Users\viru\AppData\Local\Continuum\Anaconda2\lib\site-packages\thrift_sasl\__init__.pyc in open(self)
72 type=TTransportException.NOT_OPEN,
73 message="Already open!")
---> 74 self.sasl = self.sasl_client_factory()
75
76 ret, chosen_mech, initial_response = self.sasl.start(self.mechanism)
C:\Users\viru\AppData\Local\Continuum\Anaconda2\lib\site-packages\pyhive\hive.pyc in sasl_factory()
134 def sasl_factory():
135 sasl_client = sasl.Client()
--> 136 sasl_client.setAttr('host', host)
137 if sasl_auth == 'GSSAPI':
138 sasl_client.setAttr('service', kerberos_service_name)
C:\Users\viru\AppData\Local\Continuum\Anaconda2\lib\site-packages\sasl\saslwrapper.pyc in setAttr(*args)
89 __swig_destroy__ = _saslwrapper.delete_Client
90 __del__ = lambda self : None;
---> 91 def setAttr(*args): return _saslwrapper.Client_setAttr(*args)
92 def init(*args): return _saslwrapper.Client_init(*args)
93 def start(*args): return _saslwrapper.Client_start(*args)
NotImplementedError: Wrong number of arguments for overloaded function 'Client_setAttr'.
Possible C/C++ prototypes are:
setAttr(saslwrapper::Client *,std::string const &,std::string const &)
setAttr(saslwrapper::Client *,std::string const &,uint32_t)
Error for pyhs2
---------------------------------------------------------------------------
TTransportException Traceback (most recent call last)
<ipython-input-6-01e06bdcc707> in <module>()
----> 1 conn_1 = pyhs2.connect(host='host', port=10000,authMechanism="NOSASL", user='hive', database='default')
C:\Users\viru\AppData\Local\Continuum\Anaconda2\lib\site-packages\pyhs2\__init__.pyc in connect(*args, **kwargs)
5 """
6 from .connections import Connection
----> 7 return Connection(*args, **kwargs)
C:\Users\viru\AppData\Local\Continuum\Anaconda2\lib\site-packages\pyhs2\connections.pyc in __init__(self, host, port, authMechanism, user, password, database, configuration, timeout)
45 self.client = TCLIService.Client(TBinaryProtocol(transport))
46 transport.open()
---> 47 res = self.client.OpenSession(TOpenSessionReq(username=user, password=password, configuration=configuration))
48 self.session = res.sessionHandle
49 if database is not None:
C:\Users\viru\AppData\Local\Continuum\Anaconda2\lib\site-packages\pyhs2\TCLIService\TCLIService.pyc in OpenSession(self, req)
152 """
153 self.send_OpenSession(req)
--> 154 return self.recv_OpenSession()
155
156 def send_OpenSession(self, req):
C:\Users\viru\AppData\Local\Continuum\Anaconda2\lib\site-packages\pyhs2\TCLIService\TCLIService.pyc in recv_OpenSession(self)
163
164 def recv_OpenSession(self, ):
--> 165 (fname, mtype, rseqid) = self._iprot.readMessageBegin()
166 if mtype == TMessageType.EXCEPTION:
167 x = TApplicationException()
C:\Users\viru\AppData\Local\Continuum\Anaconda2\lib\site-packages\thrift\protocol\TBinaryProtocol.pyc in readMessageBegin(self)
146 raise TProtocolException(type=TProtocolException.BAD_VERSION,
147 message='No protocol version header')
--> 148 name = self.trans.readAll(sz)
149 type = self.readByte()
150 seqid = self.readI32()
C:\Users\viru\AppData\Local\Continuum\Anaconda2\lib\site-packages\thrift\transport\TTransport.pyc in readAll(self, sz)
58 have = 0
59 while (have < sz):
---> 60 chunk = self.read(sz - have)
61 have += len(chunk)
62 buff += chunk
C:\Users\viru\AppData\Local\Continuum\Anaconda2\lib\site-packages\thrift\transport\TTransport.pyc in read(self, sz)
159 if len(ret) != 0:
160 return ret
--> 161 self.__rbuf = BufferIO(self.__trans.read(max(sz, self.__rbuf_size)))
162 return self.__rbuf.read(sz)
163
C:\Users\viru\AppData\Local\Continuum\Anaconda2\lib\site-packages\thrift\transport\TSocket.pyc in read(self, sz)
130 if len(buff) == 0:
131 raise TTransportException(type=TTransportException.END_OF_FILE,
--> 132 message='TSocket read 0 bytes')
133 return buff
134
TTransportException: TSocket read 0 bytes
尝试@Alvaro Joao代码后出错
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-19-a1d7696d3c89> in <module>()
----> 1 cursor = hive.connect(dsn= hive_host+':10000', user=username,password=password).cursor()
C:\Users\viru\AppData\Local\Continuum\Anaconda2\lib\site-packages\pyhive\hive.pyc in connect(*args, **kwargs)
62 :returns: a :py:class:`Connection` object.
63 """
---> 64 return Connection(*args, **kwargs)
65
66
TypeError: __init__() got an unexpected keyword argument 'dsn'