如何在不将整个文件读入RAM的情况下逐行读取文件

时间:2019-02-14 21:56:07

标签: java groovy file-io inputstream

我正在尝试找到最有效的方法来读取大文件,处理数据以及在Java / Groovy中对输出执行某些外部功能。根据我的阅读,可以使用BufferedReader,Scanner,FileIterator或使用Stream来完成此操作,但到目前为止,在每个测试用例中,我的堆都溢出了。我不确定我执行的步骤顺序是否正确,或者这是否是正常行为。任何帮助确定我的代码做错了什么还是忽略了其他方法的帮助将不胜感激。源文件只是一个名为test.csv的1GB CSV文件。

package test;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.LineIterator;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.ResponseBody;
import org.springframework.web.bind.annotation.RestController;
import sftp.SftpConnector;
import sftp.SftpHandler;
import sftp.fileInfo;
import java.io.*;
import java.nio.file.Paths;
import java.text.SimpleDateFormat;
import java.util.Collection;
import java.util.Date;
import java.util.Properties;
import java.util.Scanner;
import java.util.logging.Logger;
import java.nio.file.Files;
import java.util.stream.Stream;

@RestController
public class TestController {


private SftpConnector connector;

@Autowired
public TestController(){
    try{
        this.connector = new SftpConnector(this.getProperties());
    }catch(Exception ex){
        System.out.println(ex.getMessage());
    }
}

public Properties getProperties(){
    Properties prop = new Properties();
    try{
        File propfile = new File("C:\\tmp\\connection2.properties");
        prop.load(new FileInputStream(propfile));
    }catch(FileNotFoundException fnf){
        System.out.println("Could not find the connection.properties prop file.");
    }catch(IOException IO){
        System.out.println("Could not open the connection.properties file.");
    }
    return prop;
}

@GetMapping(value="/test")
@ResponseBody
public String testConnection(){
    String response = "";
    try{
        this.connector.openSFTPConnection();
        response = "Connection has been opened";
    }catch(Exception ex){
        response = ex.getMessage();
    }finally{
        if( this.connector.getSession().isConnected()){
            try{
                this.connector.closeSFTPConnection();
            }catch(Exception ex){
                response = response + ex.getMessage();
            }
        }
    }
    return response;
}

@GetMapping(value="/testLogger")
@ResponseBody
public String testLogger(){
    String response = "";
    Logger LOG = Logger.getLogger(Logger.getGlobal().getName());
    try{
        SftpHandler handler = new SftpHandler(this.getProperties());
        LOG.addHandler(handler);
    }catch(Exception ex){
        response = ex.getMessage();
    }
    try{
        LOG.info("Opening the SFTP Channel");
        this.connector.openSFTPConnection();
        LOG.info("Channel is opened.  Getting the list of XML's");
        Collection<fileInfo> files = this.connector.ls("/IB_Test", "*.xml");
        LOG.info("List retrieved.  Downloading each xml file");
        for(fileInfo file: files){
            LOG.info("Downloading " + file.getName());
            this.connector.get("/IB_Test/" + file.getName(), "C:\\tmp\\");
            LOG.info("Downloaded");
        }
        LOG.info("All XML's have been downloaded.");
        response = "All files have been downloaded";
    }catch(Exception ex){
        LOG.severe("Exception thrown:" + ex.getMessage());
        response = ex.getMessage();
    }finally{
        if(this.connector.getSession().isConnected()){
            try{
                LOG.info("Attempting to close the SFTP connection");
                this.connector.closeSFTPConnection();
            }catch(Exception ex){
                LOG.severe("Exception thrown: " + ex.getMessage());
                response = response + ex.getMessage();
            }
        }
    }
    return response;
}

@GetMapping(value="/testScanner")
@ResponseBody public String testScanner(){
    InputStream input = null;
    try{
        input = new FileInputStream(new File("C:\\tmp\\test.csv"));
        Scanner sc = new Scanner(input);
        while(sc.hasNextLine()){
            String line = sc.nextLine();
            System.out.println(line);
        }
    }catch(Exception fnf){
        System.out.println(fnf.getMessage());
    }
    return "File has completed upload line by line...";
}

@GetMapping(value="/testFilesNIO")
@ResponseBody public String testFilesNIO(){
    String output = "";
    try{
        Stream<String> lines = Files.lines(Paths.get("C:\\tmp\\test.csv"));
        lines.forEach(
                line ->
                        System.out.println(line)
        );
    }catch( Exception ex){
        output = ex.getMessage();
    }
    return output;
}

@GetMapping(value="/testFilesNIOwBufferedReader")
@ResponseBody public String testFilesNIOwBufferedReader(){
    String output = "";
    try{
        BufferedReader reader = Files.newBufferedReader(Paths.get("C:\\tmp\\test.csv"));
        reader.lines().skip(1).forEach(
                line -> System.out.println(line)
        );
    }catch( Exception ex){
        output = ex.getMessage();
    }
    return output;
}

@GetMapping(value="/trueBufferedReader")
@ResponseBody public String trueBufferedReader(){
    String output = "";
    try{
        BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(new File("C:\\tmp\\test.csv"))),10000000);
        String line ;
        while( (line = reader.readLine()) != null){
            System.out.println(line);
        }
    }catch(Exception ex){
        output = ex.getMessage();
    }
    return output;
}

@GetMapping(value="/lineIterator")
@ResponseBody public String lineIterator(){
    String output = "";
    try{
        LineIterator it = FileUtils.lineIterator(new File("C:\\tmp\\test.csv"));
        try{
            while( it.hasNext()){
                System.out.println(it.nextLine());
            }

        }finally{
            LineIterator.closeQuietly(it);
        }
    }catch(Exception ex){
        output = ex.getMessage();
    }
    return output;
}
}

预期结果是看到将CSV文件的每一行打印到控制台。

4 个答案:

答案 0 :(得分:1)

Charset yourCharset = Charset.forName("UTF-8");
try (BufferedReader br = Files.newBufferedReader(your_file, yourCharset )) {
    String singleLine = null;
    while ((singleLine = br.readLine()) != null) {
        System.out.println(singleLine );
    }
} catch (IOException ex) {
    // handle exception
}

答案 1 :(得分:1)

或者在Groovy中:

yourFile.withReader('UTF-8') { r ->
    r.eachLine { line ->
        println line
    }
}

答案 2 :(得分:1)

在所有测试中,您正在逐行读取文件以将其传输到System.out

如果您需要将输入流传输到输出流,则可以使用这种方法,它将是最快的:

yourFile.withInputStream{stream->
    System.out << stream
}

请注意,System.out的运行速度很慢...

答案 3 :(得分:0)

这是我的CSV文件中的错字,导致结尾没有换行。这样就可以将整个文档准备为1行。