我正在尝试实现有意义的XML比较。我想比较两种不同的XML,以了解它们是否“有意义”相等。
示例XML 1:
import tkinter as tk
class TextLineNumbers(tk.Canvas):
def __init__(self, *args, **kwargs):
tk.Canvas.__init__(self, *args, **kwargs)
self.textwidget = None
def attach(self, text_widget):
self.textwidget = text_widget
def redraw(self, *args):
'''redraw line numbers'''
self.delete("all")
i = self.textwidget.index("@0,0")
while True :
dline= self.textwidget.dlineinfo(i)
if dline is None: break
y = dline[1]
linenum = str(i).split(".")[0]
# changed where text is draw: it starts from 4
self.create_text(4, y, anchor="nw", text=linenum)
i = self.textwidget.index("%s+1line" % i)
class CustomText(tk.Text):
def __init__(self, *args, **kwargs):
tk.Text.__init__(self, *args, **kwargs)
self.tk.eval('''
proc widget_proxy {widget widget_command args} {
# call the real tk widget command with the real args
set result [uplevel [linsert $args 0 $widget_command]]
# generate the event for certain types of commands
if {([lindex $args 0] in {insert replace delete}) ||
([lrange $args 0 2] == {mark set insert}) ||
([lrange $args 0 1] == {xview moveto}) ||
([lrange $args 0 1] == {xview scroll}) ||
([lrange $args 0 1] == {yview moveto}) ||
([lrange $args 0 1] == {yview scroll})} {
event generate $widget <<Change>> -when tail
}
# return the result from the real widget command
return $result
}
''')
self.tk.eval('''
rename {widget} _{widget}
interp alias {{}} ::{widget} {{}} widget_proxy {widget} _{widget}
'''.format(widget=str(self)))
class LinedText(tk.Frame):
def __init__(self, *args, **kwargs):
tk.Frame.__init__(self, *args, **kwargs)
self.settings = self.Settings()
self.linenumbers = None
self.text = CustomText(self)
self.vsb = tk.Scrollbar(orient="vertical", command=self.text.yview)
self.vsb.pack(side="right", fill="y")
self.text.configure(yscrollcommand=self.vsb.set)
self.text.tag_configure("bigfont", font=("Helvetica", "24", "bold"))
self.text.bind("<<Change>>", self._on_change)
self.text.bind("<Configure>", self._on_change)
self.text.insert("end", "one\ntwo\nthree\n")
self.text.insert("end", "four\n",("bigfont",))
self.text.insert("end", "five\n")
self.text.focus()
self.text.pack(side="right", fill="both", expand=True)
def hide(self,event=None):
if not self.settings.hide_linenumbers:
self.settings.hide_linenumbers = True
self.linenumbers.pack_forget()
self.linenumbers = None
def show(self,event=None):
if self.linenumbers is None:
self.linenumbers = TextLineNumbers(self, width=30)
self.linenumbers.attach(self.text)
self.linenumbers.pack(side="left", fill="y")
self.settings.hide_linenumbers = False
def _on_change(self, event):
if self.linenumbers:
self.linenumbers.redraw()
class Settings():
def __init__(self):
self.hide_linenumbers = True
if __name__ == "__main__":
root = tk.Tk()
top_frame = tk.Frame(root)
text = LinedText(top_frame)
text.pack(expand=1, fill="both")
top_frame.pack(side="top", expand=1, fill="both")
bottom_frame = tk.Frame(root)
button = tk.Button(bottom_frame, text="Hide", command=text.hide)
button.pack(side="right")
button = tk.Button(bottom_frame, text="Show", command=text.show)
button.pack(side="right")
bottom_frame.pack(side="bottom", fill="x")
root.mainloop()
XML 2:
<?xml version="1.0" encoding="UTF-8"?>
<al:moAttribute>
<al:name>impiId</al:name>
<al:value>616731935012345678</al:value>
</al:moAttribute>
<al:moAttribute>
<al:name>impuId</al:name>
<al:value>tel:+16167319350</al:value>
</al:moAttribute>
在这个例子中,两个XML都是“有意义的”相等,但只是元素序列不同。我想比较两者,以了解它们是否几乎相等。
我尝试了这个解决方案:
Best way to compare 2 XML documents in Java
我试过了:
<?xml version="1.0" encoding="UTF-8"?>
<al:moAttribute>
<al:name>impuId</al:name>
<al:value>tel:+16167319350</al:value>
</al:moAttribute>
<al:moAttribute>
<al:name>impiId</al:name>
<al:value>616731935012345678</al:value>
</al:moAttribute>
但是如果XML的顺序不同,则XML比较返回false。
有什么建议吗?
答案 0 :(得分:1)
XML级别的任何工具都会假定元素的顺序很重要。如果您知道在您的特定词汇表中,元素的顺序并不重要,那么您需要一个能够理解词汇量的工具。因此,最好的办法是编写一个规范化转换(通常在XSLT中),它可以消除与文档无关的差异(例如,通过对某些合适的键上的元素进行排序),以便在使用标准XML工具进行比较时比较相等(也许在XML之后) canonicalisation)。
答案 1 :(得分:0)
您可以使用jaxb来实现目标(例如http://www.mkyong.com/java/jaxb-hello-world-example/)
1使用给定的两个xml文件中的jaxb构造两个java对象
每个java对象中有2,你有一个每个xml文件的al:值列表(你只关心这个)
3比较这两个列表请参考Simple way to find if two different lists contain exactly the same elements?
通过这样做,您将克服订单问题答案 2 :(得分:0)
你可能会发现xmlunit的RecursiveElementNameAndTextQualifier在这里很有用。这是一个片段
XMLUnit.setIgnoreWhitespace(true);
XMLUnit.setIgnoreComments(true);
XMLUnit.setIgnoreAttributeOrder(true);
Document docx1 = XMLUnit.buildDocument(..);
Document docx2 = XMLUnit.buildDocument(..);
Diff diff = new Diff(docx1, docx2);
DifferenceEngine engine = new DifferenceEngine(diff);
ElementQualifier qualifier = new RecursiveElementNameAndTextQualifier();
diff = new Diff(docx1, docx2, engine, qualifier);
diff.overrideDifferenceListener(new DifferenceListener()
{
@Override public int differenceFound(Difference difference)
{
//do something with difference
// return processDiff(difference);
}
@Override public void skippedComparison(Node node, Node node1)
{
//no op
}
});
//check diff.identical() || diff.similar();
答案 3 :(得分:0)
伙计这对我来说绝对是完美的。 它显示了变化所在的差异。
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.net.URL;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.custommonkey.xmlunit.DetailedDiff;
import org.custommonkey.xmlunit.Diff;
import org.custommonkey.xmlunit.Difference;
import org.custommonkey.xmlunit.XMLUnit;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
public class Xmlreader {
public static void main(String[] args) throws SAXException, IOException, ParserConfigurationException {
XMLUnit.setIgnoreWhitespace(true);
XMLUnit.setIgnoreComments(true);
XMLUnit.setIgnoreAttributeOrder(true);
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setNamespaceAware(true);
dbf.setCoalescing(true);
dbf.setIgnoringElementContentWhitespace(true);
dbf.setIgnoringComments(true);
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc1 = db.parse(new File("C:/Users/sravanlx/Desktop/base.xml"));
doc1.normalizeDocument();
Document doc2 = db.parse(new File("C:/Users/sravanlx/Desktop/base2.xml"));
/* URL url1 = Xmlreader.class.getResource("C:/Users/sravanlx/Desktop/base.xml");
URL url2 = Xmlreader.class.getResource("C:/Users/sravanlx/Desktop/base2.xml");
FileReader fr1 = null;
FileReader fr2 = null;
try {
fr1 = new FileReader("C:/Users/username/Desktop/base.xml");
fr2 = new FileReader("C:/Users/username/Desktop/base2.xml");
} catch (FileNotFoundException e) {
e.printStackTrace();
}*/
Diff diff = new Diff(doc1, doc2);
System.out.println("Similar? " + diff.similar());
System.out.println("Identical? " + diff.identical());
DetailedDiff detDiff = new DetailedDiff(diff);
List differences = detDiff.getAllDifferences();
for (Object object : differences) {
Difference difference = (Difference)object;
System.out.println("***********************");
System.out.println(difference);
System.out.println("***********************");
}
} }
答案 4 :(得分:0)
我已经使用XSLT解决了这个问题,该XSLT在我的github中使用无序树比较。基本上,它将输出任何两个xml文件的匹配项和不匹配项,并与它相对于树根的位置有关。 例如:
<a>
<c/>
<e/>
</a>
并且:
<a>
<e/>
<c/>
</a>
将被视为平等。 您只需要修改工作表顶部的file变量,即可选择要比较的XML文件。 https://github.com/sflynn1812/xslt-diff-turbo
从效率的角度来看,任何树比较算法的速度都取决于两棵树中差异的数量。
当前将其应用到您的示例中,建议您先去除xml命名空间,因为当前不支持该命名空间。