我想从HDFS位置读取所有文件并使用spring batch依次处理文件。目前我正在使用MultiResourceItemReader从本地文件系统读取文件并进行处理。
答案 0 :(得分:0)
已将文件从HDFS位置读取到本地文件系统,并通过Spring批处理从本地文件系统读取文件。
//Read the files from the hdfs to local file system
private Resource[] getMultipleResourceItemreader() {
ArrayList<Resource> resource = new ArrayList<Resource>();
org.apache.hadoop.conf.Configuration configuration= new org.apache.hadoop.conf.Configuration();
configuration.set("fs.defaultFS", "hdfs://localhost:9000");
configuration.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
System.setProperty("HADOOP_USER_NAME", "xxxx");
System.setProperty("hadoop.home.dir", "D:\\rajesh\\softwares\\winutils");
FileSystem fs;
try {
fs = FileSystem.get(URI.create("hdfs://localhost:9000"), configuration);
FileStatus[] files = fs.listStatus(new Path("hdfsfilelocation"));
for (int i=0;i<files.length;i++){
//resource.add( context.getResource(files[i].getPath().toString()));
fs.copyToLocalFile(files[i].getPath(), new Path(batchConfigurationProperties.getCsvFilePath()));
deleteTempFile(batchConfigurationProperties.getCsvFilePath(),".crc");
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
File configJsonDirectory = new File(batchConfigurationProperties.getCsvFilePath());
File[] csvFileList = configJsonDirectory.listFiles();
for (File file : csvFileList) {
if (file.isFile()) {
resource.add(new FileSystemResource(file.getPath()));
}
}
return resource.toArray(new Resource[resource.size()]);
}