Hdfs文件行数

时间:2017-11-07 12:35:15

标签: hadoop mapreduce bigdata

有没有办法像我们一样在JAVA中计算HDFS目录的行数 在命令提示符下执行以下命令?

hadoop fs -cat  /abc/def/* | wc -l

特别是使用HADOOP API而不是编写map-reduce或spark代码。

1 个答案:

答案 0 :(得分:4)

这样的事情应该有效: -

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

public class LineCounter {

    public static void main(String[] args) throws IOException {
        // TODO Auto-generated method stub

        Configuration conf = new Configuration();
        conf.addResource(new FileInputStream("hdfs-site.xml"));
        conf.addResource(new FileInputStream("core-site.xml"));

        conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
        conf.set("fs.file.impl",org.apache.hadoop.fs.LocalFileSystem.class.getName());

        FileSystem fs = FileSystem.get(conf);
        Path pt = new Path("/some/path");

        FileStatus[] status = fs.listStatus(pt);

        int count = 0;

        for(FileStatus f : status){
            if(f.isFile()){
                 FSDataInputStream inputStream = fs.open(f.getPath());
                 BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));

                 String line = reader.readLine();

                 while(line!=null){
                     count++;
                     line = reader.readLine();
                 }

                 if(reader!=null){
                     reader.close();
                 }
            }
        }

    }

}