我试图在文件(文件B)中搜索来自另一个文件(文件A)的字符串。如果在文件A中找到该字符串,则从文件B中打印整行,并在读取行时将其进度更新为相应的JProgressBar。
以下代码正常运行正常,但问题在于性能。处理大文件时,扫描仅需要大约15分钟。
我真的在寻找一种处理大型文件的方法,例如500K行。
请建议是否可以增强此功能以处理大型文件或我的代码的哪一部分导致速度变慢。
import java.awt.BorderLayout;
import java.awt.EventQueue;
import java.awt.TextField;
import javax.swing.JFrame;
import javax.swing.JPanel;
import javax.swing.border.EmptyBorder;
import javax.swing.JFileChooser;
import javax.swing.JProgressBar;
import javax.swing.JTextArea;
import javax.swing.JButton;
import java.awt.Font;
import javax.swing.JTextField;
import javax.swing.JLabel;
import javax.swing.JScrollPane;
import java.awt.event.ActionListener;
import java.awt.event.ActionEvent;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.LineNumberReader;
import java.time.LocalDateTime;
public class Test_MultiJProgressBars_MultiFileReads extends JFrame {
private JPanel contentPane;
private JTextField textField_File1;
private JTextField textField_File2;
private JProgressBar progressBar_F1;
private JProgressBar progressBar_F2;
private JTextArea textArea_File1;
/**
* Launch the application.
*/
public static void main(String[] args) {
EventQueue.invokeLater(new Runnable() {
public void run() {
try {
Test_MultiJProgressBars_MultiFileReads frame = new Test_MultiJProgressBars_MultiFileReads();
frame.setVisible(true);
} catch (Exception e) {
e.printStackTrace();
}
}
});
}
/**
* Create the frame.
*/
public void FileLineCount (JTextField TexFieldName, JProgressBar ProgressBarName) throws IOException {
File FileX = new File (TexFieldName.getText());
FileReader Fr = new FileReader(FileX);
LineNumberReader Lnr = new LineNumberReader(Fr);
int lineNumber =0 ;
while (Lnr.readLine() !=null) {
lineNumber++;
}
// Setting Maximum Value on ProgressBar
ProgressBarName.setMaximum(lineNumber);
System.out.println("Total line in file : "+lineNumber);
Lnr.close();
}
public void ScanFileForMatches() {
File My_Refernce_File = new File (textField_File1.getText());
File My_Source_File = new File (textField_File2.getText());
int F1_JP_v = 0;
int F2_JP_v = 0;
try {
BufferedReader F1_br = new BufferedReader(new FileReader(My_Refernce_File));
String F1_br_Line;
String F2_br_Line = null;
while ((F1_br_Line = F1_br.readLine()) !=null) {
//System.out.println("File 1 : "+F1_br_Line+"\n");
F1_JP_v++;
progressBar_F1.setValue(F1_JP_v);
try {
BufferedReader F2_br = new BufferedReader(new FileReader(My_Source_File));
while ((F2_br_Line = F2_br.readLine()) !=null) {
F2_JP_v++;
progressBar_F2.setValue(F2_JP_v);
if (F1_br_Line.contains(F2_br_Line)) {
System.out.println("MATCHED --- File 1:"+F1_br_Line+" File 2:"+F2_br_Line+"\n");
textArea_File1.append(LocalDateTime.now()+" : SYSOUT : MATCHED --- File 1:= "+F1_br_Line"\n");
} else {
System.out.println("NOMATCH --- File 1:"+F1_br_Line+" File 2:"+F2_br_Line+"\n");
}
// Reset Progressbar after each Loop.
progressBar_F2.setValue(0);
}
// Set Progressbar to last value in the loop.
progressBar_F2.setValue(F2_JP_v);
F2_br.close();
} catch (Exception e) {
// TODO: handle exception
}
}
F1_br.close();
} catch (Exception e) {
// TODO: handle exception
}
}
public Test_MultiJProgressBars_MultiFileReads() {
setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
setBounds(100, 100, 799, 568);
contentPane = new JPanel();
contentPane.setBorder(new EmptyBorder(5, 5, 5, 5));
setContentPane(contentPane);
contentPane.setLayout(null);
progressBar_F1 = new JProgressBar();
progressBar_F1.setStringPainted(true);
progressBar_F1.setBounds(10, 96, 763, 50);
contentPane.add(progressBar_F1);
progressBar_F2 = new JProgressBar();
progressBar_F2.setStringPainted(true);
progressBar_F2.setBounds(10, 169, 763, 50);
contentPane.add(progressBar_F2);
JScrollPane scrollPane = new JScrollPane();
scrollPane.setBounds(10, 264, 763, 109);
contentPane.add(scrollPane);
textArea_File1 = new JTextArea();
scrollPane.setViewportView(textArea_File1);
JScrollPane scrollPane_1 = new JScrollPane();
scrollPane_1.setBounds(10, 409, 763, 110);
contentPane.add(scrollPane_1);
JTextArea textArea_FIle2 = new JTextArea();
scrollPane_1.setViewportView(textArea_FIle2);
JButton btnStart = new JButton("SCAN");
btnStart.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent arg0) {
// Call FileLineCount Method and setMaximum value on respective JPorgress Bars.
try {
FileLineCount(textField_File1,progressBar_F1);
FileLineCount(textField_File2,progressBar_F2);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// Call ScanFileForMatches to Scan files and Update JProgress Bars.
Thread t1 = new Thread (new Runnable() {
@Override
public void run() {
// TODO Auto-generated method stub
//ScanFileForMatches();
ScanFileForMatches_TEST();
}
});
t1.start();
}
});
btnStart.setFont(new Font("Tahoma", Font.BOLD, 11));
btnStart.setBounds(684, 10, 89, 57);
contentPane.add(btnStart);
textField_File1 = new JTextField();
textField_File1.setBounds(10, 10, 486, 23);
contentPane.add(textField_File1);
textField_File1.setColumns(10);
textField_File2 = new JTextField();
textField_File2.setBounds(10, 44, 486, 23);
contentPane.add(textField_File2);
textField_File2.setColumns(10);
JButton btnFile_File1 = new JButton("File 1");
btnFile_File1.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent arg0) {
JFileChooser JFC_File1 = new JFileChooser();
JFC_File1.showOpenDialog(null);
File JFC_File1_Name = JFC_File1.getSelectedFile();
textField_File1.setText(JFC_File1_Name.getAbsolutePath());
}
});
btnFile_File1.setBounds(506, 10, 89, 23);
contentPane.add(btnFile_File1);
JButton btnFile_File2 = new JButton("File 2");
btnFile_File2.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent arg0) {
JFileChooser JFC_File2 = new JFileChooser();
JFC_File2.showOpenDialog(null);
File JFC_File2_Name = JFC_File2.getSelectedFile();
textField_File2.setText(JFC_File2_Name.getAbsolutePath());
}
});
btnFile_File2.setBounds(506, 44, 89, 23);
contentPane.add(btnFile_File2);
JLabel lblFile = new JLabel("File 1 Progress");
lblFile.setBounds(20, 78, 137, 14);
contentPane.add(lblFile);
JLabel lblFile_1 = new JLabel("File 2 Progress");
lblFile_1.setBounds(20, 150, 137, 14);
contentPane.add(lblFile_1);
JLabel lblFileLog = new JLabel("File 2 Log");
lblFileLog.setBounds(20, 384, 147, 14);
contentPane.add(lblFileLog);
JLabel lblFileLog_1 = new JLabel("File 1 Log");
lblFileLog_1.setBounds(20, 239, 147, 14);
contentPane.add(lblFileLog_1);
}
}
答案 0 :(得分:1)
您当前的解决方案是通过 file1 进行线性迭代,并且每行迭代遍历 file2 。这有效地导致O(F1*F2)
的运行时间:运行所需的时间将通过文件中的行(F1和F2)的数字来缩放 二次 。每次检查匹配时, file2 都会被加入内存,非常昂贵。
更好的解决方案是将 file2 读入内存(例如 ArrayList )并对其进行排序:
Collections.sort(file2);
然后 file1 可以像您当前那样进行迭代,并且对于每一行,使用二进制搜索来检查 file2 中是否存在该字符串:
for (String s1 : file1) int index = Collections.binarySearch(file2, s1);
如果s1位于 file2 中,则索引将为非负数。
此解决方案采用线性时间而不是二次,因此在较大输入上更好地缩放 。
如果您希望改善排序所需的时间,请考虑MSD Sort而不是Collections.sort
。这只是一个小小的进步,但是嘿,这很重要。
答案 1 :(得分:0)
您可以尝试对文件A中的行进行排序,即您正在搜索的文件。这样,您就可以在其中执行二进制搜索(inspiration)。
第二步,我会创建两个线程:
B-reader在内存中取一行行(而不是一行)。然后它启动一个A-reader线程,执行二进制搜索,而B-reader继续通过B来获取下一个行块。
您可以在第一场比赛结束后结束内线圈(如果您被允许)。
我会尝试减少try块的大小,这可能会阻止一些JVM优化。即使性能没有太大变化,我也没有看到包含在try块中的任何不能触发异常的指令(ref)。
您应该检测代码以了解大部分时间花在哪里,以便您可以对代码的这一部分进行微调。