有没有办法像我们一样在JAVA中计算HDFS目录的行数 在命令提示符下执行以下命令?
hadoop fs -cat /abc/def/* | wc -l
特别是使用HADOOP API而不是编写map-reduce或spark代码。
答案 0 :(得分:4)
这样的事情应该有效: -
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class LineCounter {
public static void main(String[] args) throws IOException {
// TODO Auto-generated method stub
Configuration conf = new Configuration();
conf.addResource(new FileInputStream("hdfs-site.xml"));
conf.addResource(new FileInputStream("core-site.xml"));
conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
conf.set("fs.file.impl",org.apache.hadoop.fs.LocalFileSystem.class.getName());
FileSystem fs = FileSystem.get(conf);
Path pt = new Path("/some/path");
FileStatus[] status = fs.listStatus(pt);
int count = 0;
for(FileStatus f : status){
if(f.isFile()){
FSDataInputStream inputStream = fs.open(f.getPath());
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
String line = reader.readLine();
while(line!=null){
count++;
line = reader.readLine();
}
if(reader!=null){
reader.close();
}
}
}
}
}