使用Java编辑HTML文件

时间:2016-11-07 21:04:01

标签: java jsoup html-parsing

我能够解析HTML文件并进行必要的更改,但我无法将其存储为输出html文件,即更改在控制台中可见,但在html文件中不可见。 我的要求是将一个html文件作为输入,编辑它然后存储带有新更改的html文件。到目前为止,我所能做的就是在控制台中显示更改。

package html_editer;
import java.util.*;
import org.jsoup.Jsoup;
import org.jsoup.nodes.*;
import org.jsoup.select.*;
import java.io.*;

public class editer
{
static String htmlLocation = "C:\\Desktop\\hello\\";               
static String fileName = "x";
static StringBuilder buildTmpHTML = new StringBuilder();
static StringBuilder buildHTML = new StringBuilder();
static String name = "Ankit";
static String address = "India";
static String phoneNumber = "1234567890";

public static void main(String[] args)
{

    readHTML(htmlLocation, fileName);
    modifyHTML();

    System.out.println(buildHTML.toString());

    buildTmpHTML.setLength(0);
    buildHTML.setLength(0);

    System.exit(0);
}

    public static void readHTML(String directory, String fileName)
{
    try
    {
BufferedReaderbr=newBufferedReader
(newFileReader(directory+fileName+".html"));

        String line;
        while((line = br.readLine()) != null)
        {
            buildTmpHTML.append(line);
        }
        br.close();
    }
    catch (Exception e)
    {
        e.printStackTrace();
        System.exit(1);
    }
}


public static void modifyHTML()
{
    String htmld = buildTmpHTML.toString();
    Document doc = Jsoup.parse(htmld);

    final List<TextNode> nodesToChange = new ArrayList<TextNode>();

    NodeTraversor nd  = new NodeTraversor(new NodeVisitor() 
    {
      @Override
      public void tail(Node node, int depth) 
      {
        if (node instanceof TextNode) 
        {
          TextNode textNode = (TextNode) node;
          nodesToChange.add(textNode);
        }
      }

      @Override
      public void head(Node node, int depth) 
      {        
      }
    });

    nd.traverse(doc.body());

    for (TextNode textNode : nodesToChange) 
    {
      Node newNode = buildElementForText(textNode);
      textNode.replaceWith(newNode);
    }

    buildHTML.append(doc.html());
}

private static Node buildElementForText(TextNode textNode) 
  {
    String text = textNode.getWholeText();
    String[] words = text.trim().split(" ");
    Set<String> units = new HashSet<String>();
    for (String word : words) 
        units.add(word);

    String newText = text;
    for (String rpl : units) 
    {
        if(rpl.contains("Name"))
            newText = newText.replaceAll(rpl, "" + rpl + " " + name);
        if(rpl.contains("Address") || rpl.contains("Residence"))
            newText = newText.replaceAll(rpl, "" + rpl + " " + address);
        if(rpl.contains("Phone-Number") || rpl.contains("PhoneNumber"))
            newText = newText.replaceAll(rpl, ""+rpl+" "+phoneNumber);
    }
    return new DataNode(newText, textNode.baseUri());
  }}

0 个答案:

没有答案