我有一个XML字符串,其中包含一些特殊字符(<,>,&),因此无法使用jQuery $ .parseXML进行解析。
这是示例XML字符串
<?xml version="1.0" encoding="UTF-8"?>
<BackgroundCheck userId="{Username}" password="{Password}">
<BackgroundSearchPackage action="submit" type="{PackageName}">
<ReferenceId>ab<</ReferenceId>
<UserArea>
<PositionDetail>
<EmploymentState>{StateJob}</EmploymentState>
<ProposedSalary>{AnnualSalary}</ProposedSalary>
</PositionDetail>
</UserArea>
<PersonalData>
<PersonName>
<GivenName>{FirstName}</GivenName>
<MiddleName>{MiddleName}</MiddleName>
<FamilyName>{LastName}</FamilyName>
<Affix>{Generation}</Affix>
</PersonName>
<EmailAddress>{Email}</EmailAddress>
<DemographicDetail>
<GovernmentId countryCode="US" issuingAuthority="SSN">{SSN}</GovernmentId>
<DateOfBirth>{DateOfBirth}</DateOfBirth>
</DemographicDetail>
{Aliases}
{PostalAddress}
</PersonalData>
<Screenings useConfigurationDefaults="no">
{Screenings}
<AdditionalItems type="x:interface">
<Text>{Search&Type}</Text>
</AdditionalItems>
<AdditionalItems type="x:return_xml_results">
<Text>yes</Text>
</AdditionalItems>
<AdditionalItems type="x:embed_credentials">
<Text>true</Text>
</AdditionalItems>
<AdditionalItems type="x:integration_type">
<Text>Sample XML</Text>
</AdditionalItems>
<AdditionalItems type="x:postback_url">
<Text>{CallbackURL}</Text>
</AdditionalItems>
{AdditionalItems}
</Screenings>
{Documentation}
</BackgroundSearchPackage>
</BackgroundCheck>
注意第4行标记ReferenceId的值,它包含特殊字符,因此无法将此字符串解析为XML。
我需要的是用转义序列(&lt;,&gt;,&amp;)替换这些特殊字符。我遇到的最接近的是这个
how to escape xml entities in javascript?
但是这个答案假设我们已经有了XML节点值。
我的要求不同,我将完整的xml作为字符串,我想只替换节点值而不触及标记名称(标记也包含&lt;,&gt;)。
这是我尝试使用jQuery
$(xml).each(function() {
var t = $(this).wrap('<p/>').parent().html();
t.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, ''');
xml = t;
});
这很好用,这个代码的唯一问题是它将XML标记转换为小写。我认为这是因为jQuery的行为。
请建议为此修复/解决方案。谢谢
答案 0 :(得分:0)
var oParser = new DOMParser();
function replaceIllegalXML(t) {
var oDOM = oParser.parseFromString(t, "text/html");
var nok = oDOM.documentElement.nodeName == "parsererror";
if (nok) {
console.log("Could not parse the string");
return;
}
var allTexts = oDOM.documentElement.textContent.split("\n");
for (var i=0;i<allTexts.length;i++) {
var repl = allTexts[i].replace(/&/g, '&')
.replace(/</g, '\<')
.replace(/>/g, '\>')
.replace(/"/g, '\"')
.replace(/'/g, '\'')
if (repl != allTexts[i]) {
repl = repl.trim();
var re = new RegExp(">"+allTexts[i].trim()+"<");
console.log("string:",allTexts[i])
console.log("replace",repl)
console.log("re",re)
t = t.replace(re,">"+repl+"<");
}
}
return t;
}
var t = `<?xml version="1.0" encoding="UTF-8"?>
<BackgroundCheck userId="{Username}" password="{Password}">
<BackgroundSearchPackage action="submit" type="{PackageName}">
<ReferenceId>ab<</ReferenceId>
<UserArea>
<PositionDetail>
<EmploymentState>{StateJob}</EmploymentState>
<ProposedSalary>{AnnualSalary}</ProposedSalary>
</PositionDetail>
</UserArea>
<PersonalData>
<PersonName>
<GivenName>{FirstName}</GivenName>
<MiddleName>{MiddleName}</MiddleName>
<FamilyName>{LastName}</FamilyName>
<Affix>{Generation}</Affix>
</PersonName>
<EmailAddress>{Email}</EmailAddress>
<DemographicDetail>
<GovernmentId countryCode="US" issuingAuthority="SSN">{SSN}</GovernmentId>
<DateOfBirth>{DateOfBirth}</DateOfBirth>
</DemographicDetail>
{Aliases}
{PostalAddress}
</PersonalData>
<Screenings useConfigurationDefaults="no">
{Screenings}
<AdditionalItems type="x:interface">
<Text>{Search&Type}</Text>
</AdditionalItems>
<AdditionalItems type="x:return_xml_results">
<Text>yes</Text>
</AdditionalItems>
<AdditionalItems type="x:embed_credentials">
<Text>true</Text>
</AdditionalItems>
<AdditionalItems type="x:integration_type">
<Text>Sample XML</Text>
</AdditionalItems>
<AdditionalItems type="x:postback_url">
<Text>{CallbackURL}</Text>
</AdditionalItems>
{AdditionalItems}
</Screenings>
{Documentation}
</BackgroundSearchPackage>
</BackgroundCheck>`
t = replaceIllegalXML(t);
var newDOM = oParser.parseFromString(t, "text/xml")
var nok = newDOM.documentElement.nodeName == "parsererror";
if (nok) console.log("xml parsing failed");
else console.log(newDOM.getElementsByTagName("ReferenceId")[0].textContent);
答案 1 :(得分:0)
我终于实现了我所需要的。感谢@mplungjan。正如他所指出的那样,XML文件必须有效才能被解析,并且在创建XML时应该有效。
我的情景有所不同。我无法在javascript中修复无效的XML字符串,然后才能解析它。
我不得不应用肮脏的黑客来实现它。在使用javascript(或任何其他编程语言)解析XML字符串时,我们会收到错误,指出错误和行号。 我做的是这个
这就是我做的事情
var oParser = new DOMParser();
function escapeRegExp(str) {
return str.replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&");
}
function remove_error(str, line) {
var allTexts = str.split("\n");
var illegal = allTexts[line - 1];
var extract = illegal.match(/>(.*)</);
extract = extract.pop();
var fix_extract = extract.replace(/&/g, '&')
.replace(/</g, '<')
.replace(/>/g, '>')
.replace(/"/g, '"')
.replace(/'/g, ''');
fix_extract = fix_extract.trim();console.log(fix_extract);
var re = new RegExp(">"+escapeRegExp(extract.trim())+"<");
str = str.replace(re,">"+fix_extract+"<");
return str;
}
var fixed = "", final_fixed = "";
function process(orig_str) {
var newDOM = oParser.parseFromString(orig_str, "text/xml");
var error = newDOM.getElementsByTagName("parsererror");
if (error && error.length) {
if (error[0] && error[0].innerHTML) {
var err_html = error[0].innerHTML;
var parse_err = oParser.parseFromString(err_html, "text/html");
var err_log = parse_err.getElementsByTagName("div")[0].innerHTML;
console.log(err_log);
//var string = err_log.substring(err_log.lastIndexOf("line")+1, err_log.lastIndexOf("at"));
var regex = /line\s*(.*?)\s*at/g;
var matches = [];
while (m = regex.exec(err_log)) {
matches.push(m[1]);
}console.log(matches);
var err_line = matches[0];
fixed = remove_error(orig_str, err_line);
if (fixed) {console.log(fixed);//return;
process(fixed);
}
//console.log(fixed);
//alert('Invalid XML:' + err_log);
} else {
alert('XML could not be parsed');
return;
}
$('.welcome-page section.welcome .inner').html("<h3 class='text-center'>Invalid XML</h3>");
} else {
final_fixed = orig_str;
}
if(final_fixed) {
return final_fixed;
}
}
var newDOM = process(res[0][0]);
if (!newDOM) {
alert('XML could not be parsed');
return;
}
我知道我所做的只是一个黑客。但我没有任何其他选择。
PS-欢迎对此答案进行任何修改。