匹配XML

时间:2015-12-30 11:17:19

标签: regex xml tags

如何删除标记reg和第二个not

INPUT:

<reg><exp></exp></reg>
<not>me</not>
<again><not></not><yes>y</yes></again>

OUPUT:

<not>me</not>
<again><yes>y</yes></again>

https://regex101.com/r/wY5bU2/1

2 个答案:

答案 0 :(得分:1)

为了删除空标记(并不认为它是有效的XML):

true

答案 1 :(得分:0)

import java.io.ByteArrayInputStream;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;

import javax.xml.transform.ErrorListener;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Templates;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;

import org.apache.log4j.Logger;



/**
 * Class to remove empty nodes from input XML string. 
 * This class use XSL Transformation to remove empty
 * nodes from input XML string.
 */
public class XMLUtil
{
    /**
     * Variable to Log Messages
     */
    private static final Logger LOGGER = Logger.getLogger(XMLUtil.class);
    /**
     * Variable to hold the instance of XMLUtil
     */
    private static final XMLUtil INSTANCE = new XMLUtil();
    /**
     * XSLT String to transform Response
     */
    private static final String XSLCONTENT = "<?xml version = '1.0'?>"
            + "<xsl:stylesheet xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\"   "
            + "version=\"1.0\"> "
            + "<xsl:output method=\"xml\" indent=\"yes\" /> "
            + "<xsl:template match=\"@*[not(normalize-space(.))]\"/> "
            + "<xsl:template match=\"@*|node()\">       "
            + "<xsl:if test=\". != '' or ./@* != ''\">          "
            + "<xsl:copy>               "
            + "<xsl:apply-templates select=\"@*|node()\" />         "
            + "</xsl:copy>      "
            + "</xsl:if>    "
            + "</xsl:template>  "
            + "<xsl:template match=\"*[not(descendant-or-self::*[text()[normalize-space()] | @*])]\">       "
            + "<xsl:message>!MESSAGE : Node <xsl:value-of select=\"local-name()\" /> removed - since it is EMPTY</xsl:message>  "
            + "</xsl:template>" 
            +"</xsl:stylesheet>";;
    /**
     * Get instance of XMLUtil.
     * @return Singleton instance of XMLUtil
     */
    public static final XMLUtil getInstance()
    {
        return (INSTANCE);
    }
    /**
     * XSL Template
     */
    private Templates xslTemplate;
    /**
     * Message Handler
     */
    private MessageHandler errorHandler ;
    private XMLUtil()
    {
        try
        {
            TransformerFactory tFactory = TransformerFactory.newInstance();
            Source xslSrc = new StreamSource(new StringReader(XSLCONTENT));
            xslTemplate = tFactory.newTemplates(xslSrc);
            errorHandler = new MessageHandler();
        }
        catch (TransformerConfigurationException e) {
            LoggingUtils.logError(LOGGER,"Error in loading XSLT to remove empty XML nodes",e);
        }
    }

    private String doXSLTTransformation(String inputString,String encoding)
    {
        String result = null;
        try
        {
            if(inputString != null)
            {
                result = inputString;
            }
            if(xslTemplate != null)
            {
                //There is some bug in XSLT transformation. It was found that if there
                //is no new line then it will not work correctly.
                String inputStringVal = inputString.replaceAll("><", ">\n<"); 
                Source xmlSrc = new StreamSource(new ByteArrayInputStream(
                        inputStringVal.getBytes(encoding)));
                Writer outWriter = new StringWriter();   
                Result xmlResult = new StreamResult( outWriter ); 
                Transformer transformer = xslTemplate.newTransformer();
                transformer.setErrorListener(errorHandler);
                transformer.transform(xmlSrc,xmlResult);
                result = outWriter.toString();
            }
        }
        catch(Exception ex)
        {
            LoggingUtils.logError(LOGGER,"Error in applying XSL transformation",ex);
        }
        return result;
    }
    /**
     * Remove empty tag for input byte array .
     * @param requestBytes  Input XML Bytes with(possible) empty tags.
     * @param encoding Byte encoding like "UTF-8"
     * @return Transformed XML String without any empty tag.
     * @throws UnsupportedEncodingException If error in encoding 
     */
    public String removeEmptyTags(byte[] requestBytes, String encoding)
        throws UnsupportedEncodingException
    {
        return doXSLTTransformation(new String(requestBytes,encoding),encoding);
    }
    /**
     * Remove empty tag for input xml string.Defcult encoding is UTF-8
     * @param requestBytes  Input XML String with(possible) empty tags.
     * @return Transformed XML String without any empty tag.
     * 
     */
    public String removeEmptyTags(String requestBytes)
    {
        return removeEmptyTags(requestBytes,"UTF-8");
    }


    /**
     * Remove empty tag for input XML string.
     * @param requestBytes  Input XML String with(possible) empty tags.
     * @param encoding Character encoding
     * @return Transformed XML String without any empty tag.
     * 
     */
    public String removeEmptyTags(String requestBytes, String encoding)
    {
        return doXSLTTransformation(requestBytes,encoding);
    }

    private static class MessageHandler implements ErrorListener
    {
        @Override
        public void error(TransformerException ex)
            throws TransformerException 
        {
            LoggingUtils.logError(LOGGER,
                    "Error in XSLT Transformation to remove empty XML nodes", ex);
        }
        @Override
        public void fatalError(TransformerException ex)
            throws TransformerException 
        {
            LoggingUtils.logError(LOGGER,
                    "Fatal Error in XSLT Transformation to remove empty XML nodes", ex);
        }
        @Override
        public void warning(TransformerException arg0)
            throws TransformerException 
        {
            //Skip warning message.
        }
    }
}

调用removeEmptyTags方法。这使用了XSL转换 - 性能稍差,但可以完成你的工作:)

****将Logging Utils calss引用替换为您自己的日志语句/ sysout等:)