我很难将头解析为将XML数据解析为MATLAB结构。
使用随时可用的xml2struct函数,我能够做到这一点。但是,最终节点的内容为char,我希望将其作为double,uint8或XML元素中已声明的任何属性。
以这个XML数据为例:
<?xml version="1.0"?>
<catalog>
<book id="bk101" type="struct" size="1 1">
<genre>Computer</genre>
<price type="double">44.95</price>
<publish_date>2000-10-01</publish_date>
<description>An in-depth look at creating applications
with XML.</description>
</book>
<book id="bk102" type="struct" size="1 1">
<genre>Fantasy</genre>
<price type="number" size="1 1">112</price>
<publish_date>2000-12-16</publish_date>
<description>A former architect battles corporate zombies,
an evil sorceress, and her own childhood to become queen
of the world.</description>
</book>
</catalog>
我想编写一个MATLAB代码,可以将上述数据解析为一个结构,但是根据其属性,直接在结构中将第一本书的价格作为double数据类型,将第二本书的价格作为uint8数据类型。
但是,使用该函数直接解析XML数据时,我将获得catalog.book {1,1} .price。将'44 .95'的值记为char。
直接使用MATLAB函数,可以检索xml的属性,但是我对如何在xml2struct中扩展该函数以根据数据的属性转换价格等数据类型内容感到困惑元素。
有人可以提示我可以在什么地方操纵XML内容以更改函数中的数据类型吗?我已经逐步调试了一下,只是为了了解函数的工作原理,但我似乎仍然找不到解决方法...
下面是完整的xml2struct函数代码:
function [ s ] = xml2struct( file )
%Convert xml file into a MATLAB structure
% [ s ] = xml2struct( file )
%
% A file containing:
% <XMLname attrib1="Some value">
% <Element>Some text</Element>
% <DifferentElement attrib2="2">Some more text</DifferentElement>
% <DifferentElement attrib3="2" attrib4="1">Even more text</DifferentElement>
% </XMLname>
%
% Will produce:
% s.XMLname.Attributes.attrib1 = "Some value";
% s.XMLname.Element.Text = "Some text";
% s.XMLname.DifferentElement{1}.Attributes.attrib2 = "2";
% s.XMLname.DifferentElement{1}.Text = "Some more text";
% s.XMLname.DifferentElement{2}.Attributes.attrib3 = "2";
% s.XMLname.DifferentElement{2}.Attributes.attrib4 = "1";
% s.XMLname.DifferentElement{2}.Text = "Even more text";
%
% Please note that the following characters are substituted
% '-' by '_dash_', ':' by '_colon_' and '.' by '_dot_'
%
% Written by W. Falkena, ASTI, TUDelft, 21-08-2010
% Attribute parsing speed increased by 40% by A. Wanner, 14-6-2011
% Added CDATA support by I. Smirnov, 20-3-2012
%
% Modified by X. Mo, University of Wisconsin, 12-5-2012
if (nargin < 1)
clc;
help xml2struct
return
end
if isa(file, 'org.apache.xerces.dom.DeferredDocumentImpl') || isa(file, 'org.apache.xerces.dom.DeferredElementImpl')
% input is a java xml object
xDoc = file;
else
%check for existance
if (exist(file,'file') == 0)
%Perhaps the xml extension was omitted from the file name. Add the
%extension and try again.
if (~contains(file,'.xml'))
file = [file '.xml'];
end
if (exist(file,'file') == 0)
error(['The file ' file ' could not be found']);
end
end
%read the xml file
xDoc = xmlread(file);
end
%parse xDoc into a MATLAB structure
s = parseChildNodes(xDoc);
end
% ----- Subfunction parseChildNodes -----
function [children,ptext,textflag] = parseChildNodes(theNode)
% Recurse over node children.
children = struct;
ptext = struct; textflag = 'Text';
if hasChildNodes(theNode)
childNodes = getChildNodes(theNode);
numChildNodes = getLength(childNodes);
for count = 1:numChildNodes
theChild = item(childNodes,count-1);
[text,name,attr,childs,textflag] = getNodeData(theChild);
%[text,name,childs,textflag] = getNodeData(theChild);
if (~strcmp(name,'#text') && ~strcmp(name,'#comment') && ~strcmp(name,'#cdata_dash_section'))
%XML allows the same elements to be defined multiple times,
%put each in a different cell
if (isfield(children,name))
if (~iscell(children.(name)))
%put existsing element into cell format
children.(name) = {children.(name)};
end
index = length(children.(name))+1;
%add new element
children.(name){index} = childs;
if(~isempty(fieldnames(text)))
children.(name){index} = text;
end
%if(~isempty(attr))
% children.(name){index}.('Attributes') = attr;
%end
else
%add previously unknown (new) element to the structure
children.(name) = childs;
if(~isempty(text) && ~isempty(fieldnames(text)))
children.(name) = text;
end
if(~isempty(attr))
children.(name).('Attributes') = attr;
end
end
else
ptextflag = 'Text_Me';
if (strcmp(name, '#cdata_dash_section'))
ptextflag = 'CDATA';
elseif (strcmp(name, '#comment'))
ptextflag = 'Comment';
end
%this is the text in an element (i.e., the parentNode)
if (~isempty(regexprep(text.(textflag),'[\s]*','')))
if (~isfield(ptext,ptextflag) || isempty(ptext.(ptextflag)))
ptext.(ptextflag) = text.(textflag);
else
%what to do when element data is as follows:
%<element>Text <!--Comment--> More text</element>
%put the text in different cells:
% if (~iscell(ptext)) ptext = {ptext}; end
% ptext{length(ptext)+1} = text;
%just append the text
ptext.(ptextflag) = [ptext.(ptextflag) text.(textflag)];
end
end
end
end
end
end
% ----- Subfunction getNodeData -----
function [text,name,attr,childs,textflag] = getNodeData(theNode)
% Create structure of node info.
%make sure name is allowed as structure name
name = toCharArray(getNodeName(theNode))';
% name = strrep(name, '-', '_dash_');
% name = strrep(name, ':', '_colon_');
% name = strrep(name, '.', '_dot_');
attr = parseAttributes(theNode);
if (isempty(fieldnames(attr)))
attr = [];
end
%parse child nodes
[childs,text,textflag] = parseChildNodes(theNode);
if (isempty(fieldnames(childs)) && isempty(fieldnames(text)))
%get the data of any childless nodes
% faster than if any(strcmp(methods(theNode), 'getData'))
% no need to try-catch (?)
% faster than text = char(getData(theNode));
text.(textflag) = toCharArray(getTextContent(theNode))';
end
end
% ----- Subfunction parseAttributes -----
function attributes = parseAttributes(theNode)
% Create attributes structure.
attributes = struct;
if hasAttributes(theNode)
theAttributes = getAttributes(theNode);
numAttributes = getLength(theAttributes);
for count = 1:numAttributes
% attrib = item(theAttributes,count-1);
% attr_name = regexprep(char(getName(attrib)),'[-:.]','_');
% attributes.(attr_name) = char(getValue(attrib));
%Suggestion of Adrian Wanner
str = toCharArray(toString(item(theAttributes,count-1)))';
k = strfind(str,'=');
attr_name = str(1:(k(1)-1));
% attr_name = strrep(attr_name, '-', '_dash_');
% attr_name = strrep(attr_name, ':', '_colon_');
% attr_name = strrep(attr_name, '.', '_dot_');
attributes.(attr_name) = str((k(1)+2):(end-1));
end
end
end
编辑:我找到了答案。要更改属性,我们应该在getNodeData子函数中添加应用更改。专门在其中调用parseChildNode之后添加此条件块。
%parse child nodes
[childs,text,textflag] = parseChildNodes(theNode);
if isfield(attr, 'type')
switch attr.type
case 'double'
text.(textflag) = str2double(strsplit(text.(textflag)));
case 'int'
text.(textflag) = str2number(strsplit(text.(textflag)));
end
end
答案 0 :(得分:0)
我认为您需要根据price的Attributes.type值转换数据类型。
下面是带有xml2struct的示例代码。它检查每个价格属性的Attributes.type字段,并根据类型(“数字”或“双精度”)转换数据类型。默认情况下,MATLAB将数值视为双精度值,因此通过使用str2double进行转换,价格112将被视为双精度值。我认为double很好,但是如果您想将其视为uint8,请使用uint8(str2double(price.Text))。
xmlhandler = xml2struct('yourXmlFile.xml');
books = xmlhandler.catalog.book;
for ii=1:size(books, 2)
eachBook = books{1, ii};
price = eachBook.price;
if isfield(price.Attributes, 'type')
dataType = price.Attributes.type;
% Add Value property depending on the data type
switch dataType
case 'number'
books{1, ii}.price.Value = uint8(str2double(price.Text));
%eachBook.price.Value = str2double(price.Text); % If double is fine
case 'double'
books{1, ii}.price.Value = str2double(price.Text);
otherwise
% Do something if type is not specified
end
end
end
以上示例代码将Value属性添加到了价格中,但仅替换Text属性,请像books{1, ii}.price.Text = str2double(price.Text)
一样。