我正在将pdf文件转换为文本并删除具有页码的行,但问题是它留下了2行的空白空间。所以我想要删除这些空格连续有2行或更多空格而不是1行是空的。我的代码是:
// Open the file
FileInputStream fstream = new FileInputStream("C:\\Users\\Vivek\\Desktop\\novels\\Me1.txt");
BufferedReader br = new BufferedReader(new InputStreamReader(fstream));
String strLine;
String s=null;
//Read File Line By Line
while ((strLine = br.readLine()) != null) {
String pattern = "^[0-9]+[\\s]*$";
strLine=strLine.replaceAll(pattern, " ");
writeResult("C:\\Users\\Vivek\\Desktop\\novels\\doci.txt",strLine);
}
//Close the input stream
br.close();
}
public static void writeResult(String writeFileName, String text)
{
File log = new File(writeFileName);
try{
if(log.exists()==false){
System.out.println("We had to make a new file.");
log.createNewFile();
}
PrintWriter out = new PrintWriter(new FileWriter(log, true));
out.append(text );
out.println();
out.close();
}catch(IOException e){
System.out.println("COULD NOT LOG!!");
}
}
请帮助我。
答案 0 :(得分:1)
您可以在方法中使用后续空行计数器,例如SkrewEverything建议。
或使用这样的正则表达式进行后处理:
package testingThings;
import java.awt.Desktop;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
public class EmptyLinesReducer {
public Path reduceEmptyLines(Path in) throws UnsupportedEncodingException, IOException {
Path path = Paths.get("text_with_reduced_empty_lines.txt");
String originalContent = new String(Files.readAllBytes(in), "UTF-8");
String reducedContent = originalContent.replaceAll("(\r\n){2,}", "\n\n");
Files.write(path, reducedContent.getBytes());
return path;
}
public Path createFileWithEmptyLines() throws IOException {
Path path = Paths.get("text_with_multiple_empty_lines.txt");
PrintWriter out = new PrintWriter(new FileWriter(path.toFile()));
out.println("line1");
//empty lines
out.println();
out.println();
out.println();
out.println("line2");
//empty lines
out.println();
out.println("line3");
//empty lines
out.println();
out.println();
out.println();
out.println();
out.println();
out.println("line4");
out.close();
return path;
}
public static void main(String[] args) throws UnsupportedEncodingException, IOException {
EmptyLinesReducer app = new EmptyLinesReducer();
Path in = app.createFileWithEmptyLines();
Path out = app.reduceEmptyLines(in);
// open the default program for this file
Desktop.getDesktop().open(out.toFile());
}
}