加快文件读取速度

时间:2015-08-18 13:45:52

标签: java mongodb csv neo4j

我要将我的数据从MongoDB传递给Neo4j。 所以,我在.csv中导出了MongoDB文档。正如你可以阅读here我对阵列统一有问题。 所以我写了一个java程序来解决这个问题。
这是从MongoDB导出的.csv(注意统一数组的不同):

_id,official_name,common_name,country,started_by.day,started_by.month,started_by.year,championship,stadium.name,stadium.capacity,palmares.first_prize,palmares.second_prize,palmares.third_prize,palmares.fourth_prize,average_age,squad_value,foreigners,uniform.0,uniform.1,uniform.2
0,yaDIXxLAOV,WWYWLqPcYM,QsVwiNmeGl,7,9,1479,oYKGgstIMv,qskcxizCkd,8560,10,25,9,29,16,58,6,first_colour,second_colour,third_colour

以下是在Neo4j中导入的方式:

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;

public class ConvertireCSV {

    public static void main(String[] args) throws IOException {

        FileReader f;
        f=new FileReader("output.csv");

        BufferedReader b;
        b=new BufferedReader(f);

        String firstLine= b.readLine();
        int uniform = firstLine.indexOf("uniform");
        firstLine=firstLine.substring(0, uniform);
        firstLine = firstLine + "uniform.0,uniform.1,uniform.2\n";

        String line="";
        String csv="";

        while(true) {
            line=b.readLine();
            if(line==null)
                break;
            int u = line.indexOf("\"[");
            line=line.substring(0, u);
            line=line + "first_colour,second_colour,third_colour \n";
            csv=csv+line;                   
        }

        File file = new File("outputForNeo4j.csv");

        if(file.createNewFile()) {
            PrintWriter pw = new PrintWriter(file); 
            pw.println(firstLine + csv);
            System.out.println("New file \"outputForNeo4j.csv\" created.");
            pw.flush();
            pw.close();
        }
    }
}

我的代码有效,但我必须转换.k文件的500k行,而程序太慢了(20分钟后它仍在工作:/):

.selectized

如何让它更快?

3 个答案:

答案 0 :(得分:1)

 csv=csv+line;

字符串连接是昂贵的操作。我建议使用bufferedWriter。 像这样的东西:

FileReader f;
     f=new FileReader("output.csv");

     BufferedReader b;
     BufferedWriter out;
     b=new BufferedReader(f);
     try{
      out = new BufferedWriter(new FileWriter("outputForNeo4j.csv"));
     } catch(Exception e){
        //cannot create file 
     }
     System.out.println("New file \"outputForNeo4j.csv\" created.");

     String firstLine= b.readLine();
     int uniform = firstLine.indexOf("uniform");
     firstLine=firstLine.substring(0, uniform);
     firstLine = firstLine + "uniform.0,uniform.1,uniform.2\n";

     String line="";
     String csv="";

     out.write(firstLine);

     while(true) {
         line=b.readLine();
         if(line==null)
             break;
         int u = line.indexOf("\"[");
         line=line.substring(0, u);
         line=line + "first_colour,second_colour,third_colour \n";
        out.write(line);               
     }         
        out.flush();

     }

答案 1 :(得分:1)

一些改进代码的基本方法:

  1. 确保您的变量达到了所需的最小范围。如果您不在循环之外需要line,请不要在循环之外声明它。
  2. 简单字符串的连接通常很慢。使用StringBuilder来加快速度。
  3. 为什么要缓冲字符串呢?好像浪费了记忆。只需打开输出流到目标文件,并在处理时将行写入新文件。
  4. <强>示例:

    我认为你不需要第一点的例子。 第二件事看起来像这样:

    ...
    StringBuilder csv = new StringBuilder();
    while(true) {
        ...
        csv.append(line);
    }
    ...
    if(file.createNewFile()) {
        ...
        pw.println(firstLine + csv.toString());
        ...
    }
    

    对于第三点,重写会更广泛:

    public static void main(String[] args) throws IOException {
        FileReader f;
        f=new FileReader("output.csv");
    
        BufferedReader b;
        b=new BufferedReader(f);
    
        String firstLine= b.readLine();
        int uniform = firstLine.indexOf("uniform");
        firstLine=firstLine.substring(0, uniform);
        firstLine = firstLine + "uniform.0,uniform.1,uniform.2\n";
    
        File file = new File("outputForNeo4j.csv");
        if(!file.createNewFile()) {
            // all work would be for nothing! Bailing out.
            return;
        }
    
        PrintWriter pw = new PrintWriter(file); 
        pw.print(firstLine);
    
        while(true) {
            String line=b.readLine();
            if(line==null)
                break;
            int u = line.indexOf("\"[");
            line=line.substring(0, u);
            line=line + "first_colour,second_colour,third_colour \n";
            pw.print(line);                   
        }
    
        System.out.println("New file \"outputForNeo4j.csv\" created.");
        pw.flush();
        pw.close();
        b.close()
    }
    

答案 2 :(得分:1)

结果:
test0:运行:241次迭代,avarage milis = 246
test1:运行:249次迭代,avarage milis = 118
test2:运行:269次迭代,avarage milis = 5
test3:运行:241次迭代,avarage milis = 2

import java.io.*;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.Random;

public class Tester {

private static final String filePath = "c:\\bigFile.txt";
//private static final String filePath = "c:\\bigfileNewLine.txt";

private static final int numOfMethods = 4;
private static final int numOfIter = 1000;
public Tester() throws NoSuchMethodException {
    System.out.println("Tester.Tester");

    int[] milisArr = new int [numOfMethods];
    int[] actualRun = new int [numOfMethods];

    Random rnd = new Random(System.currentTimeMillis());
    Long startMs = 0l, endMs = 0l;
    Method[] method = new Method[numOfMethods];
    for (int i = 0; i < numOfMethods; i++)
        method[i] = this.getClass().getMethod("test" + i);

    int testCount = 0;
    while (testCount++ < numOfIter) {
        int testMethod = rnd.nextInt(numOfMethods);
        Method m = method[testMethod];
        try {
            System.gc();
            startMs = System.currentTimeMillis();
            String retval = (String) m.invoke(null);
            endMs = System.currentTimeMillis();

        } catch (IllegalAccessException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        } catch (InvocationTargetException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        }
        milisArr[testMethod] += (endMs - startMs);
        actualRun[testMethod]++;
        System.out.println("Test name: " + m.getName() + " testCount=" + testCount + " Of " + numOfIter + " iteration, Total time :" + (endMs - startMs) / 1000.0 + " seconds");
    }

    System.out.println("Test Summery :");
    for (int i = 0; i < numOfMethods; i++)
        System.out.println("test" + i + " :  Runs: " + actualRun[i] + " iterations ,avarage milis = " + milisArr[i]/numOfIter);

}


public static String test0() throws IOException {
    InputStream file = getInputStream();
    StringBuffer textBuffer = new StringBuffer();
    int c;
    while ((c = file.read()) != -1)
        textBuffer.append((char) c);


    file.close();
    return textBuffer.toString();
}

public static String test1() throws IOException {
    Reader reader = new FileReader(new File(filePath));
    BufferedReader br = new BufferedReader(reader);
    String line = br.readLine();
    String result = line;
    while (line != null) {
        line = br.readLine();
        if (line == null) {
        } else {
            result = result + "\n" + line;
        }
    }
    br.close();
    reader.close();
    return result;
}


public static String test2() throws IOException {
    byte[] buf = new byte[1024];
    int l;

    InputStream is = getInputStream();
    StringBuffer tmpBuf = new StringBuffer();
    while ((l = is.read(buf)) != -1) {
        tmpBuf.append(new String(buf, 0, l));
    }

    is.close();
    return tmpBuf.toString();
}

public static String test3() throws IOException {
    File source = new File(filePath);
    final DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(source)));
    final byte[] buffer = new byte[(int) source.length()];
    dis.readFully(buffer);
    dis.close();
    return new String(buffer, "UTF-8");
}

private static InputStream getInputStream() {
    try {
        return new FileInputStream(filePath);
    } catch (FileNotFoundException e) {
        e.printStackTrace();
        return null;
    }
}

public static void main(String[] args) {
    try {
        new Tester();
    } catch (NoSuchMethodException e) {
        e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
    }
}

}