Question

我有以下XML：

<customer>
   <name>Customer name</name>
   <address>
      <postalcode>94510</postalcode>
      <town>Green Bay</town>
   </address>
   <phone>0645878787</phone>
</customer>

我想只使用REGEX，将整个<address>..</address>标记替换为空字符串如果邮政编码 94510

我有

String s = "<the xml above here/>"
s = s.replace(source, target);

我只能控制“来源”和“目标”。是否有正则表达式可以解决这个问题？

谢谢

Answer 1

如上所述，请不使用常规处理XML。以下是您应采取的方法（代码改编自here和here）。：

String str = "<customer>\n" +
                        "   <name>Customer name</name>\n" +
                        "   <address>\n" +
                        "      <postalcode>94510</postalcode>\n" +
                        "      <town>Green Bay</town>\n" +
                        "   </address>\n" +
                        "   <phone>0645878787</phone>\n" +
                        "</customer>";
ByteArrayInputStream bais = new ByteArrayInputStream(str.getBytes());
    DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(bais);

//optional, but recommended
//read this - http://stackoverflow.com/questions/13786607/normalization-in-dom-parsing-with-java-how-does-it-work
doc.getDocumentElement().normalize();

System.out.println("Root element :" + doc.getDocumentElement().getNodeName());

NodeList nList = doc.getElementsByTagName("address");
    for(int i = 0; i < nList.getLength(); i++)
    {         
        NodeList children = nList.item(i).getChildNodes();
        for(int j = 0; j < children.getLength(); j++)
        {
            Node current = children.item(j);
            if((current.getNodeName().equals("postalcode")) && (current.getTextContent().equals("94510")))
            {
                current.getParentNode().getParentNode().removeChild(nList.item(i));                    
            }
        }            
    }

    Transformer transformer = TransformerFactory.newInstance().newTransformer();
    transformer.setOutputProperty(OutputKeys.INDENT, "yes");
    StreamResult result = new StreamResult(new StringWriter());
    DOMSource source = new DOMSource(doc);
    transformer.transform(source, result);

    String xmlString = result.getWriter().toString();
    System.out.println(xmlString);

哪个收益率：

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<customer>
   <name>Customer name</name>

   <phone>0645878787</phone>
</customer>

如果你真的，那么必须使用正则表达式，请看下面的内容：

String str = "<customer>\n" +
                        "   <name>Customer name</name>\n" +
                        "   <address>\n" +
                        "      <postalcode>94510</postalcode>\n" +
                        "      <town>Green Bay</town>\n" +
                        "   </address>\n" +
                        "   <phone>0645878787</phone>\n" +
                        "</customer>";

    System.out.println(str.replaceAll("(?s)<address>.+?<postalcode>94510</postalcode>.+?</address>.+?<phone>", "<phone>"));

收率：

<customer>
   <name>Customer name</name>
   <phone>0645878787</phone>
</customer>

Answer 2

在没有外部库的情况下，我可以看到最直接的方法是使用XPath表达式来选择应该删除的节点，然后删除它们。这在Java中相当冗长，但并不是非常复杂：

import java.io.*;
import javax.xml.parsers.*;
import javax.xml.xpath.*;
import javax.xml.transform.*;
import javax.xml.transform.stream.*;
import javax.xml.transform.dom.*;
import org.w3c.dom.*;

public class Foo {
  // Error handling should be done, but I can't know what you want to happen
  // in case of broken XML.
  public static void main(String[] args) throws Exception {
    String xml =
        "<customer>\n"
      + "   <name>Customer name</name>\n"
      + "   <address>\n"
      + "      <postalcode>94510</postalcode>\n"
      + "      <town>Green Bay</town>\n"
      + "   </address>\n"
      + "   <phone>0645878787</phone>\n"
      + "</customer>";

    // XPath expression: It selects all address nodes under /customer
    // that have a postalcode child whose text is 94510
    String selection = "/customer/address[postalcode=94510]";

    // Lots of fluff -- the XML API is full of factories; don't mind them.
    // What all this does is to parse the document from the string.
    InputStream     source   = new ByteArrayInputStream(xml.getBytes());
    Document        document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(source);

    // Create a list of nodes that match our XPath expression
    XPathExpression xpath    = XPathFactory.newInstance().newXPath().compile(selection);
    NodeList        nodes    = (NodeList) xpath.evaluate(document, XPathConstants.NODESET);

    // Remove all those nodes from the document
    for(int i = 0; i < nodes.getLength(); ++i) {
      Node n = nodes.item(i);
      n.getParentNode().removeChild(n);
    }

    // And finally print the document back into a string.
    StringWriter writer = new StringWriter();
    Transformer  tform  = TransformerFactory.newInstance().newTransformer();

    tform.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
    tform.transform(new DOMSource(document), new StreamResult(writer));

    // This is our result.
    String processed_xml = writer.getBuffer().toString();

    System.out.println(processed_xml);
  }
}

如果包含使用正则表达式的字符串，则替换父XML标记

2 个答案: