我用Java编写代码来读取文件的内容。但它适用于小行文件,不能超过1000行文件。请告诉我我在下面的程序中犯了什么错误。
程序:
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class aaru
{
public static void main(String args[]) throws FileNotFoundException
{
File sourceFile = new File("E:\\parser\\parse3.txt");
File destinationFile = new File("E:\\parser\\new.txt");
FileInputStream fileIn = new FileInputStream(sourceFile);
FileOutputStream fileOut = new FileOutputStream(destinationFile);
DataInputStream dataIn = new DataInputStream(fileIn);
DataOutputStream dataOut = new DataOutputStream(fileOut);
String str = "";
String[] st;
String sub[] = null;
String word = "";
String contents = "";
String total = "";
String stri = "";
try
{
while ((contents = dataIn.readLine()) != null)
{
total = contents.replaceAll(",", "");
String str1 = total.replaceAll("--", "");
String str2 = str1.replaceAll(";", "");
String str3 = str2.replaceAll("&", "");
String str4 = str3.replaceAll("^", "");
String str5 = str4.replaceAll("#", "");
String str6 = str5.replaceAll("!", "");
String str7 = str6.replaceAll("/", "");
String str8 = str7.replaceAll(":", "");
String str9 = str8.replaceAll("]", "");
String str10 = str9.replaceAll("\\?", "");
String str11 = str10.replaceAll("\\*", "");
String str12 = str11.replaceAll("\\'", "");
Pattern pattern =
Pattern.compile("\\s+", Pattern.CASE_INSENSITIVE | Pattern.DOTALL | Pattern.MULTILINE);
Matcher matcher = pattern.matcher(str12);
//boolean check = matcher.find();
String result = str12;
Pattern p = Pattern.compile("^www\\.|\\@");
Matcher m = p.matcher(result);
stri = m.replaceAll(" ");
int i;
int j;
st = stri.split("\\.");
for (i = 0; i < st.length; i++)
{
st[i] = st[i].trim();
/*if(st[i].startsWith(" "))
st[i]=st[i].substring(1,st[i].length);*/
sub = st[i].split(" ");
if (sub.length > 1)
{
for (j = 0; j < sub.length - 1; j++)
{
word = word + sub[j] + "," + sub[j + 1] + "\r\n";
}
}
else
{
word = word + st[i] + "\r\n";
}
}
}
System.out.println(word);
dataOut.writeBytes(word + "\r\n");
fileIn.close();
fileOut.close();
dataIn.close();
dataOut.close();
} catch (Exception e)
{
System.out.print(e);
}
}
}
答案 0 :(得分:3)
为什么你的代码不能读取完整文件并不是很明显,但这里有两个提示:
首先:不要使用DataInputStream
来读取整行。而是将FileInputStream
包裹在InputStreamReader
(理想情况下提供编码)和BufferedReader
(由DataInputStream.readLine()
的JavaDoc记录):
像这样:
BufferedReader reader = new BufferedReader(new InputStreamReader(fileIn, "UTF-8"));
第二:当你不知道如何处理异常时至少打印它的堆栈跟踪如下:
catch(Exception e)
{
e.printStackTrace();
}