在这里,我已经使用pyarrow库编写了Python代码,并尝试连接HDFS,但在下面出现错误:
代码:
import pyarrow
import os
import posixpath
import sys
from pyarrow.util import implements
from pyarrow.filesystem import FileSystem
import pyarrow.lib as lib
pyarrow.hdfs.connect(host='xx.xx.xx.xx', port=22, user='cloudera', kerb_ticket=None, driver='libhdfs', extra_conf=None)
错误:
-------------------------------------------------- ---------------------------- FileNotFoundError Traceback(最近一次调用 最后) ----> 1 pyarrow.hdfs.connect(host = '10 .40.1.8',port = 22,user ='cloudera',kerb_ticket = None,driver ='libhdfs')
〜\ AppData \ Local \ Continuum \ anaconda3 \ lib \ site-packages \ pyarrow \ hdfs.py 在连接中(主机,端口,用户,kerb_ticket,驱动程序,extra_conf) 213 fs = HadoopFileSystem(主机=主机,端口=端口,用户=用户, 214 kerb_ticket = kerb_ticket,驱动程序=驱动程序, -> 215 extra_conf = extra_conf) 216返回fs
〜\ AppData \ Local \ Continuum \ anaconda3 \ lib \ site-packages \ pyarrow \ hdfs.py 在 init 中(自身,主机,端口,用户,kerb_ticket,驱动程序,extra_conf) 36 driver ='libhdfs',extra_conf = None): 37如果驱动程序=='libhdfs': ---> 38 _maybe_set_hadoop_classpath() 39 40 self._connect(主机,端口,用户,kerb_ticket,驱动程序,extra_conf)
〜\ AppData \ Local \ Continuum \ anaconda3 \ lib \ site-packages \ pyarrow \ hdfs.py 在_maybe_set_hadoop_classpath()中 138 classpath = _hadoop_classpath_glob(hadoop_bin) 139其他: -> 140 classpath = _hadoop_classpath_glob('hadoop') 141 142 os.environ ['CLASSPATH'] = classpath.decode('utf-8')
〜\ AppData \ Local \ Continuum \ anaconda3 \ lib \ site-packages \ pyarrow \ hdfs.py 在_hadoop_classpath_glob(hadoop_bin)中 163 164 hadoop_classpath_args =(hadoop_bin,'classpath','--glob') -> 165 return subprocess.check_output(hadoop_classpath_args) 166 167
〜\ AppData \ Local \ Continuum \ anaconda3 \ lib \ subprocess.py在 check_output(超时,* popenargs,** kwargs) 393 394 return run(* popenargs,stdout = PIPE,timeout = timeout,check = True, -> 395 ** kwargs).stdout 396 397
〜\ AppData \ Local \ Continuum \ anaconda3 \ lib \ subprocess.py在运行中(输入, capture_output,超时,检查,* popenargs,** kwargs) 470 kwargs ['stderr'] = PIPE 471 -> 472以Popen(* popenargs,** kwargs)作为进程: 473尝试: 474 stdout,stderr = process.communicate(input,timeout = timeout)
〜\ AppData \ Local \ Continuum \ anaconda3 \ lib \ subprocess.py在 init (自我,args,bufsize,可执行文件,stdin,stdout,stderr,preexec_fn,close_fds,shell,cwd,env,universal_newlines, startupinfo,creationflags,restore_signals,start_new_session, pass_fds,编码,错误,文本) 773 c2pread,c2pwrite, 774 errread,errwrite, -> 775 restore_signals,start_new_session) 776除: 777#如果孩子无法启动,请清理。
〜\ AppData \ Local \ Continuum \ anaconda3 \ lib \ subprocess.py在 _execute_child(自己,args,可执行文件,preexec_fn,close_fds,pass_fds,cwd,env,startupinfo,creatingflags,shell,p2cread, p2cwrite,c2pread,c2pwrite,errread,errwrite, 1176
env,1177 os.fspath(cwd) 如果cwd不是None其他None, -> 1178 startupinfo)1179最后:1180#子级启动。关上 这些管道的父级副本FileNotFoundError:[WinError 2]系统找不到文件 指定
如何解决此问题并连接到HDFS?