使用Pydoop API的Python代码如下所示。我有一个文本文件/home/progen/test.txt
。在运行脚本将此文件复制到HDFS时,我收到错误。
import pydoop.hdfs as hdfs
local_path = '/home/progen/test.txt'
hdfs_path = '/spark_user/'
host = 'master'
port = 9000
hdfsobj = hdfs.hdfs(host, port, user='spark', groups=['supergroup'])
hdfsobj.copy(local_path, hdfsobj, hdfs_path)
hdfsCopyImpl(src=/home/progen/test.txt, dst=/spark_user/, deleteSource=0): FileUtil#copy error:
java.io.FileNotFoundException: File does not exist: /home/progen/test.txt
at org.apache.hadoop.hdfs.DistributedFileSystem$18.doCall(DistributedFileSystem.java:1122)
at org.apache.hadoop.hdfs.DistributedFileSystem$18.doCall(DistributedFileSystem.java:1114)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1114)
at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:337)
at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:289)
Traceback (most recent call last):
File "hdfs.py", line 8, in <module>
hdfsobj.copy(local_path, hdfsobj, hdfs_path)
File "/usr/local/lib/python2.7/dist-packages/pydoop/hdfs/fs.py", line 304, in copy
return self.fs.copy(from_path, to_hdfs, to_path)
hdfsCopyImpl(src=/home/progen/test.txt, dst=/spark_user/, deleteSource=0): FileUtil#copy error:
java.io.FileNotFoundException: File does not exist: /home/progen/test.txt
at org.apache.hadoop.hdfs.DistributedFileSystem$18.doCall(DistributedFileSystem.java:1122)
at org.apache.hadoop.hdfs.DistributedFileSystem$18.doCall(DistributedFileSystem.java:1114)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.getFileStatus(DistributedFileSystem.java:1114)
at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:337)
at org.apache.hadoop.fs.FileUtil.copy(FileUtil.java:289)
Traceback (most recent call last):
File "hdfs.py", line 8, in <module>
hdfsobj.copy(local_path, hdfsobj, hdfs_path)
File "/usr/local/lib/python2.7/dist-packages/pydoop/hdfs/fs.py", line 304, in copy
return self.fs.copy(from_path, to_hdfs, to_path)