给定InputStream替换字符并生成OutputStream

时间:2016-12-23 13:25:35

标签: java inputstream outputstream

我有很多大量文件,我需要通过替换某些字符来转换为CSV。

我正在寻找可靠的方法,因为InputStream返回OutputStream并将所有字符c1替换为c2

这里的诀窍是并行读写,我不能将整个文件放在内存中。

如果我想同时读写,我是否需要在单独的线程中运行它?

非常感谢您的建议。

3 个答案:

答案 0 :(得分:3)

要将数据从输入流复制到输出流,您可以在读取数据时将其写入一个字节(或字符)或一次一行。

这是一个读取文件的示例,该文件将所有“x”字符转换为“y”。

BufferedInputStream in = new BufferedInputStream(new FileInputStream("input.dat"));
BufferedOutputStream out = new BufferedOutputStream(new FileOutputStream("output.dat"));
int ch;
while((ch = in.read()) != -1) {
        if (ch == 'x') ch = 'y';
        out.write(ch);
}
out.close();
in.close();

或者如果可以使用Reader并一次处理一行,那么可以使用这个方法:

BufferedReader reader = new BufferedReader(new FileReader("input.dat"));
PrintWriter writer = new PrintWriter(
      new BufferedOutputStream(new FileOutputStream("output.dat")));
String str;
while ((str = reader.readLine()) != null) {
    str = str.replace('x', 'y');     // replace character at a time
    str = str.replace("abc", "ABC"); // replace string sequence
    writer.println(str);
}
writer.close();
reader.close();

BufferedInputStream BufferedReader 预读,并在缓冲区中保留8K字符以提高性能。可以处理非常大的文件,同时一次只能在内存中保留8K字符。

答案 1 :(得分:1)

            FileWriter writer = new FileWriter("Report.csv");
            BufferedReader reader = new BufferedReader(new InputStreamReader(YOURSOURCE, Charsets.UTF_8));
            String line;
            while ((line = reader.readLine()) != null) {
                line.replace('c1', 'c2');
                writer.append(line);
                writer.append('\n');
            }
            writer.flush();
            writer.close();

答案 2 :(得分:0)

您可以在此处找到相关答案:Filter (search and replace) array of bytes in an InputStream

我在该线程中接受了@ aioobe的答案,并在Java中构建了替换输入流模块,您可以在我的GitHub要点中找到它:https://gist.github.com/lhr0909/e6ac2d6dd6752871eb57c4b083799947

也将源代码放在这里:

import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Queue;

/**
 * Created by simon on 8/29/17.
 */
public class ReplacingInputStream extends FilterInputStream {

    private Queue<Integer> inQueue, outQueue;
    private final byte[] search, replacement;

    public ReplacingInputStream(InputStream in, String search, String replacement) {
        super(in);

        this.inQueue = new LinkedList<>();
        this.outQueue = new LinkedList<>();

        this.search = search.getBytes();
        this.replacement = replacement.getBytes();
    }

    private boolean isMatchFound() {
        Iterator<Integer> iterator = inQueue.iterator();

        for (byte b : search) {
            if (!iterator.hasNext() || b != iterator.next()) {
                return false;
            }
        }

        return true;
    }

    private void readAhead() throws IOException {
        // Work up some look-ahead.
        while (inQueue.size() < search.length) {
            int next = super.read();
            inQueue.offer(next);

            if (next == -1) {
                break;
            }
        }
    }

    @Override
    public int read() throws IOException {
        // Next byte already determined.

        while (outQueue.isEmpty()) {
            readAhead();

            if (isMatchFound()) {
                for (byte a : search) {
                    inQueue.remove();
                }

                for (byte b : replacement) {
                    outQueue.offer((int) b);
                }
            } else {
                outQueue.add(inQueue.remove());
            }
        }

        return outQueue.remove();
    }

    @Override
    public int read(byte b[]) throws IOException {
        return read(b, 0, b.length);
    }

    // copied straight from InputStream inplementation, just needed to to use `read()` from this class
    @Override
    public int read(byte b[], int off, int len) throws IOException {
        if (b == null) {
            throw new NullPointerException();
        } else if (off < 0 || len < 0 || len > b.length - off) {
            throw new IndexOutOfBoundsException();
        } else if (len == 0) {
            return 0;
        }

        int c = read();
        if (c == -1) {
            return -1;
        }
        b[off] = (byte)c;

        int i = 1;
        try {
            for (; i < len ; i++) {
                c = read();
                if (c == -1) {
                    break;
                }
                b[off + i] = (byte)c;
            }
        } catch (IOException ee) {
        }
        return i;
    }
}