我上传了许多XML文件到我的Google云端硬盘。它们将与网络上的任何人共享。我列出了Google表格中的链接。 (如下图所示。)
我正在尝试使用Script from GitHub解析XML文件。 (请参阅下面的完整脚本。)但是,当我使用=xPath("@url",E2)
调用工作表的单元格F2中的函数时,出现错误:
TypeError:在对象中找不到函数getRootElement https://drive.google.com/uc?id=1heoXLD9uQOZitQHTuhS3xuM8oZduQPZJ。 (第49行)。
@url属性肯定存在于XML文件中。但是,我不知道为什么函数找不到它。 The link to the XML file is here
An editable copy of the Google Sheet with the script is here.
以下是GitHub的Google脚本:
/**
* Add simple XPath XML parsing to google scripts. Essentially be able to
* use the xpath notation that works in the IMPORTXML formula but from
* script code instead.
*
* NOTE: this is a GOOGLE SCRIPT library - this WILL NOT WORK outside of
* google scripts because it uses the apps script XML Service
* https://developers.google.com/apps-script/reference/xml-service/
*
* Usage:
*
* In your script, go to Resources -> Libraries then enter the following
* Project Key or Script ID in the 'Find a Library' box.
*
* Project Key: M1YVJTfv66XpF5AoIeE9zsAopJxr71Kma
* Script ID: 1EyZK520ihKS4JWE1B47Ra0fU4B4m9vAHX0FWMZ50xNUJsU_R9VRIsqf3
*
*/
/**
* Returns the value (or list of values) at the given path in
* the given xmlFile
*
* Example:
*
* <xml>
* <foo>
* <bar>
* <baz what='wrong'>thing1</baz>
* <baz what='test'>thing2</baz>
* </bar>
* </foo>
* <fizz>buzz</fizz>
* </xml>
*
* var xml = readRemoteXML('https://test-xml-file.xml');
*
* var simple = xPath('fizz', xml);
* simple; // 'fizz'
*
* var list_values = xPath('foo/bar//baz', xml);
* list_values; // ['thing1', 'thing2']
*
* var attribute_after_list = xPath('foo/bar//baz[1]/@what', xml);
* attribute_after_list; // test
*
*/
function xPath(path, xmlFile) {
var root = xmlFile.getRootElement();
return xPathStep(path, root);
}
/**
* Recursive path parsing - you probably want to use xPath instead of using
* this function directly.
*/
function xPathStep(path, node) {
// if node is an array, return the result for each entry
if (Array.isArray(node)) {
return node.map(function(singleNode) {
return xPathStep(path, singleNode);
});
}
if (!node) {
return;
}
var nextNode, nodeValue;
var paths = path.split('/');
var firstChild = paths[0];
var remainingPath = paths.slice(1).join('/');
// if child ends with [\d] - find a list, return this index
var indexMatch = firstChild.match(/(\w+)\[(\d+)\]/);
var attributeMatch = firstChild.match(/@(\w+)/);
if (indexMatch) {
var tagName = indexMatch[1];
var index = indexMatch[2];
var children = node.getChildren(tagName);
nextNode = children[index];
} else if (firstChild === '') {
// if another name is next, use as a matching tag (and remove from path)
var tagName = '';
if (paths.length > 1) {
tagName = paths[1];
remainingPath = paths.slice(2).join('/');
}
nextNode = node.getChildren(tagName);
} else if (attributeMatch) {
// @ means attribute
var attributeName = attributeMatch[1];
nodeValue = node.getAttribute(attributeName).getValue();
} else {
nextNode = node.getChild(firstChild);
}
var result;
if (nodeValue) {
result = nodeValue;
} else if (remainingPath !== '') {
result = xPathStep(remainingPath, nextNode);
} else {
result = nextNode.getText && nextNode.getText();
}
return result;
}
/**
* Read a remote file at xmlFileUrl, parse it as xml, and
* return an XMLService document object.
*/
function readRemoteXML(xmlFileUrl) {
var content = UrlFetchApp.fetch(xmlFileUrl).getContentText();
return XmlService.parse(content);
}
答案 0 :(得分:0)
为您的功能打了很多功,对我来说如此,我继续开发它,然后回到这里分享我的工作
我希望该函数可以理解更多路径语法元素,例如: //标签 标签1 /./标题 标签2 /.../标题 标签[@ attrib ='value'] 标签[包含(text(),'foo'] 因此,我必须使用数组来保持信息在此递归函数处理的深度之间进行发送和返回。
因为我是法语,所以我在脚本中的注释是法语。 抱歉,但是我希望我的贡献可以帮助一些开发人员在Google脚本中使用xpath 如果在我的Google应用程序脚本界面中可以完美地工作,但是自从Google工作表函数调用myImportXml()以来,我遇到了一个问题:当xpath的深度超过2时,我认为我没有任何答案! :(
var nextNodes;var log=0; // variables globales necessaires à la function xPathThrowDepthXml
function myImportXml(url,path){
nextNodes=[]; // table à déclarer globalement
if (url==null) url="http://api.allocine.fr/rest/v3/movie?media=mp4-lc&partner=YW5kcm9pZC12Mg&profile=large&version=2&code=265621"; // to test
if (path==null) path="//nationality"; // to test
var xmlDocument=XmlService.parse(UrlFetchApp.fetch(url).getContentText());
var root=xmlDocument.getRootElement();
var mynamespace=root.getNamespace();
var items=xPathThrowDepthXml(path, root,mynamespace);
return items;
}
function xPathThrowDepthXml(path, node,mynamespace,niveau) {
/* Retourne les texte et attribut selon le chemin xpath fourni, pour le noeud xml donné, avec son namespace
TOUT ce qui est géré fonctionne ! :)
Sauts d'éléments, même dès la racine, tag[2], tag[@attrib] , rating[@note]/@note
/tag1/tag2 ou //tag1/tag2 ou ///tag1/tag2 sont equivalents
Syntaxes xpath comprises: tag[@class="meta-body-item"] , tag[contains(text(),"Nationalité")]/span
un ou plusieurs points pour remonter dans la hierarchie (tag1/.../title tag1/./././title sont équivalents )
Ex de requette complexe qui est comprise: div[@class="meta-body-item"]/span[@class="light"][contains(text(),"Box Office")]/../span[2]
*/
var nextNodesDebug,NodNamDebug,firstChild,nodeValues,tagName,paths,remainingPath
var nextNodesCopi=nextNodes;var textWanted=null;
if (niveau==null)niveau=-1;
niveau++;
if (log==3) Logger.log(niveau);
nextNodes.push([]); // Faudrait vérifier si on ajoute pas trop de push que de niveaux !!!!!!!!!!!!!!!!!!
if (Array.isArray(node)) { // if node is an array, return the result for each entry Ne semble pas agir
return node.map(function(path,singleNode,mynamespace,niveau) {
return xPathThrowDepthXml(path, singleNode,mynamespace,niveau);
});
}
if (!node) {
return;
}
paths = path.replace(/\/{3,}/g, "//").replace(/^\/{2,}/, "/").split('/');
//paths = path.split('/');
firstChild= paths [0];
var firstChildDebug=firstChild;
remainingPath = paths .slice(1).join('/');
// if child ends with [\d] - find a list, return this index
var indexMatch = firstChild.match(/(\w+)\[(\d+)\]/); //tag[2]
var attributeMatch = firstChild.match(/^@(\w+)/); // /@attribu pour avoir la valeur de tel attribut
var searchAttributeMatch = firstChild.match(/(\w+)\[@(\w+)\]/); // tag[@attribu] Pour avoir l'element possédant un attribut nommé d'une telle façon
var searchAttributeEgalMatch = firstChild.match(/(\w+)\[@(\w+)=['"](.*)["']\]/); // tag[@attribu] Pour avoir l'element possédant un attribut nommé d'une telle façon et ayant telle valeure
var searchTextContainsMatch = firstChild.match(/(\w+)\[contains\(text\(\),['"](.*)['"]\)\]/); // tag[contains(text(),'Affiche')] Pour avoir l'element dont le texte est telle chaîne
if (indexMatch) {
tagName = indexMatch[1];
var index = indexMatch[2];
var children = node.getChildren(tagName,mynamespace);
nextNodesDebug=children[index];NodNamDebug=nextNodesDebug.getName();
nextNodes[niveau].push(nextNodesDebug);
} else if (firstChild === '') { // Cas des saut d'elements par comme newsList//title
// if another name is next, use as a matching tag (and remove from path)
tagName = '';
if (paths.length > 1) {
tagName = paths[1];
remainingPath = paths.slice(2).join('/');
}
if (tagName.match(/(.*)\[/)) tagName=tagName.match(/(.*)\[/)[1];
//nextNodesDebug=node.getChildren(tagName,mynamespace);
nextNodesDebug=node.getDescendants(); //if (nextNodesDebug=="")
//NodNamDebug=nextNodesDebug.getName();
for (var itag in nextNodesDebug){
try{ var nodeName=nextNodesDebug[itag].getName();}catch(er){var nodeName="";}
if (nodeName==tagName) {
nextNodes[niveau].push(nextNodesDebug[itag]);
}
}
} else if (searchAttributeMatch) {// cherche l'element tagName ayant un attribut nommé attribNameWanted
tagName = searchAttributeMatch[1];
var attribNameWanted = searchAttributeMatch[2];
var children= node.getChildren(tagName,mynamespace);
for (var itag in children){
if (children[itag].getName()==tagName){
var nodeName=children[itag].getName();
var attribs = children[itag].getAttributes();
for (var iAtrib in attribs){
if (attribs[iAtrib].getName()==attribNameWanted){ // on tient là un élément nommé tagName dont l'attribut est nommé attribNameWanted
nextNodesDebug=children[itag];NodNamDebug=nextNodesDebug.getName();
nextNodes[niveau].push(nextNodesDebug);//Logger.log("N"+nextNodes[niveau]);
}
}
}
}
} else if (searchAttributeEgalMatch) {// cherche l'element tagName ayant un attribut nommé attribNameWanted
tagName = searchAttributeEgalMatch[1];
var attribNameWanted = searchAttributeEgalMatch[2];
var valueAttribWanted = searchAttributeEgalMatch[3];
var children= node.getChildren(tagName,mynamespace);
for (var itag in children){
if (children[itag].getName()==tagName){
var nodeName=children[itag].getName();
var attribs = children[itag].getAttributes();
for (var iAtrib in attribs){
if (attribs[iAtrib].getName()==attribNameWanted){ // on tient là un élément nommé tagName dont l'attribut est nommé attribNameWanted
var atval=attribs[iAtrib].getValue();
if(attribs[iAtrib].getValue()==valueAttribWanted){
nextNodesDebug=children[itag];NodNamDebug=nextNodesDebug.getName();
nextNodes[niveau].push(nextNodesDebug);//Logger.log("N"+nextNodes[niveau]);
}
}
}
}
}
} else if (attributeMatch) {
// @ means attribute
var parent=node.getParentElement().getName();
var attributeName = attributeMatch[1];
//Logger.log("niv"+niveau);Logger.log("nod"+node.getDescendants())
var nodeValuesDebug=""
try{ nodeValuesDebug=node.getAttribute(attributeName).getValue();}catch(er){}
nodeValues=nodeValuesDebug;
} else if ( firstChild.split(".").length-1==firstChild.length) { // firsChild=="." ou ".." ou ..... ->Remonter d'autant de niveaux
var parent=node
for (var n=0;n<firstChild.length;n++){
parent=parent.getParentElement();
}
nextNodes[niveau].push(parent);
} else {
// simple child to search - Mais il faut plutôt try de chercher des children si has children !!!!!!!!!!!!!!!!!!!!!
textWanted=null;
if (searchTextContainsMatch){
firstChild=searchTextContainsMatch[1];
textWanted=searchTextContainsMatch[2];
}
var children= node.getChildren(firstChild,mynamespace);
if (children.length!=0){
for (var itag in children){
if (children[itag].getName()==firstChild){ // &&itag<3 LIMITATION POUR LE DEBOGADE (A RETIRER)
nextNodesDebug=children[itag];NodNamDebug=nextNodesDebug.getName();
if (log==3) Logger.log(niveau+"nextNodpush"+itag+NodNamDebug);
if (textWanted==null){
nextNodes[niveau].push(nextNodesDebug);//Logger.log(niveau+"->Nodes"+nextNodes[niveau]);
}
if (children[itag].getText()==textWanted){
nextNodes[niveau].push(nextNodesDebug);//Logger.log(niveau+"->Nodes"+nextNodes[niveau]);
}
}
}
}
}
var result=[];
if (nodeValues) {
result.push(nodeValues);
}
else{
if (log==3) Logger.log(niveau+"for nextNode"+nextNodes[niveau].length);
for (var inextNode in nextNodes[niveau]){
if (remainingPath !== '') {
var mem=xPathThrowDepthXml(remainingPath, nextNodes[niveau][inextNode],mynamespace,niveau)
if (log==3) Logger.log(niveau+"Result.PUSH MEM"+mem);
if (mem!="") result.push(mem); // Pb: Multiplie les reponse pour mediaList/media/title
} else {
var inextNodeTextDebug=""
if (nextNodes[niveau][inextNode]!=null) {
inextNodeTextDebug=nextNodes[niveau][inextNode].getText && nextNodes[niveau][inextNode].getText();
if (inextNodeTextDebug!=null)result.push(inextNodeTextDebug);
if (log==3) Logger.log(niveau+"Result.pushTEXT="+inextNodeTextDebug);
}
}
}
nextNodes[niveau].pop();
}
return result;
}
答案 1 :(得分:0)
我解决了我的问题,即当xpath的深度超过2时,使用Google工作表函数调用myImportXml()的答案中止了。
原因是我的数组nextNodes使用了全局变量[] 我通过在函数的递归调用之间添加数组return et send来替换它。因此,我的脚本不再需要全局变量作为下面的代码
我忘了告诉我为什么我重新创建了现有的Google工作表函数IMPORTXML! IMPORTXML无法处理未充分验证XML的HTML页面。 因此,使用我的脚本,我们可以在解析xml文档之前对XML文档进行修改,并进行xPath研究。
function myImportXml(url,path){
if (url==null) url="http://api.allocine.fr/rest/v3/movie?media=mp4-lc&partner=YW5kcm9pZC12Mg&profile=large&version=2&code=265621"; // to test
if (path==null) path="//nationality"; // to test
var xmlDocument=XmlService.parse(UrlFetchApp.fetch(url).getContentText());
var root=xmlDocument.getRootElement();
var mynamespace=root.getNamespace();
var items=xPathThrowDepthXml(path, root,mynamespace);
return items;
}
function xPathThrowDepthXml(path, node,mynamespace,niveau,nextNodes,log) {
//https://stackoverflow.com/questions/48185348/google-script-xml-parsing-error-cannot-find-function/58177338#58177338
/* Retourne les texte et attribut selon le chemin xpath fourni, pour le noeud xml donné, avec son namespace
TOUT ce qui est géré fonctionne ! :)
tag[2], tag[@attrib] , rating[@note]/@note
Sauts d'éléments, même dès la racine,
/tag1/tag2 ou //tag1/tag2 ou ///tag1/tag2 sont equivalents
Syntaxes xpath comprises: tag[@class="meta-body-item"] , tag[contains(text(),"Nationalité")]/span
un ou plusieurs points pour remonter dans la hierarchie (tag1/.../title tag1/./././title sont équivalents )
Ex de requette complexe qui est comprise: div[@class="meta-body-item"]/span[@class="light"][contains(text(),"Box Office")]/../span[2]
Plus de variable ni tableau à déclarer globalement. Ceux ci sont maintenant envoyés récursivement et bidirectionnelement
Ainsi le script répond à un appel dune function de feuille de calcul
*/
var nextNodesDebug,NodNamDebug,firstChild,nodeValues,tagName,paths,remainingPath
var nextNodesCopi=nextNodes;var textWanted=null;
if (niveau==null){
niveau=-1;
nextNodes=[];
}
niveau++;
if (log==3) Logger.log(niveau);
nextNodes.push([]);
if (Array.isArray(node)) { // if node is an array, return the result for each entry - This does not seem to occur !
return node.map(function(path,singleNode,mynamespace,niveau) {
return xPathThrowDepthXml(path, singleNode,mynamespace,niveau,nextNodes,log);
});
}
if (!node) {
return;
}
paths = path.replace(/\/{3,}/g, "//").replace(/^\/{2,}/, "/").split('/');
firstChild= paths [0];
var firstChildDebug=firstChild;
remainingPath = paths .slice(1).join('/');
var indexMatch = firstChild.match(/(\w+)\[(\d+)\]/); //tag[2] if child ends with [\d] - find a list, return this index
var attributeMatch = firstChild.match(/^@(\w+)/); // /@attribu - to give the value of a named attribut
var searchAttributeMatch = firstChild.match(/(\w+)\[@(\w+)\]/); // tag[@attribu] - to give element having a named attribut
var searchAttributeEgalMatch = firstChild.match(/(\w+)\[@(\w+)=['"](.*)["']\]/); // tag[@attribu] - to give element having a named attribut and a wanted value
var searchTextContainsMatch = firstChild.match(/(\w+)\[contains\(text\(\),['"](.*)['"]\)\]/); // tag[contains(text(),'Affiche')] - to give element having a wanted text
if (indexMatch) {
tagName = indexMatch[1];
var index = indexMatch[2];
var children = node.getChildren(tagName,mynamespace);
nextNodesDebug=children[index];NodNamDebug=nextNodesDebug.getName();
nextNodes[niveau].push(nextNodesDebug);
} else if (firstChild === '') { // skiping element cases like newsList//title
// if another name is next, use as a matching tag (and remove from path)
tagName = '';
if (paths.length > 1) {
tagName = paths[1];
remainingPath = paths.slice(2).join('/');
}
if (tagName.match(/(.*)\[/)) tagName=tagName.match(/(.*)\[/)[1];
nextNodesDebug=node.getDescendants();
for (var itag in nextNodesDebug){
try{ var nodeName=nextNodesDebug[itag].getName();}catch(er){var nodeName="";}
if (nodeName==tagName) {
nextNodes[niveau].push(nextNodesDebug[itag]);
}
}
} else if (searchAttributeMatch) {// search the tagNAme element having a wanted atribut named attribNameWanted
tagName = searchAttributeMatch[1];
var attribNameWanted = searchAttributeMatch[2];
var children= node.getChildren(tagName,mynamespace);
for (var itag in children){
if (children[itag].getName()==tagName){
var nodeName=children[itag].getName();
var attribs = children[itag].getAttributes();
for (var iAtrib in attribs){
if (attribs[iAtrib].getName()==attribNameWanted){ // here we have a tagNAme element having a wanted atribut named attribNameWanted
nextNodesDebug=children[itag];NodNamDebug=nextNodesDebug.getName();
nextNodes[niveau].push(nextNodesDebug);//Logger.log("N"+nextNodes[niveau]);
}
}
}
}
} else if (searchAttributeEgalMatch) {// search the tagNAme element having a wanted attribut named attribNameWanted that have the value valueAttribWanted
tagName = searchAttributeEgalMatch[1];
var attribNameWanted = searchAttributeEgalMatch[2];
var valueAttribWanted = searchAttributeEgalMatch[3];
var children= node.getChildren(tagName,mynamespace);
for (var itag in children){
if (children[itag].getName()==tagName){
var nodeName=children[itag].getName();
var attribs = children[itag].getAttributes();
for (var iAtrib in attribs){
if (attribs[iAtrib].getName()==attribNameWanted){ // here we have a tagNAme element having a wanted attribut named attribNameWanted that have the value valueAttribWanted
var atval=attribs[iAtrib].getValue();
if(attribs[iAtrib].getValue()==valueAttribWanted){
nextNodesDebug=children[itag];NodNamDebug=nextNodesDebug.getName();
nextNodes[niveau].push(nextNodesDebug);//Logger.log("N"+nextNodes[niveau]);
}
}
}
}
}
} else if (attributeMatch) {
// @ means attribute
var parent=node.getParentElement().getName();
var attributeName = attributeMatch[1];
//Logger.log("niv"+niveau);Logger.log("nod"+node.getDescendants())
var nodeValuesDebug=""
try{ nodeValuesDebug=node.getAttribute(attributeName).getValue();}catch(er){}
nodeValues=nodeValuesDebug;
} else if ( firstChild.split(".").length-1==firstChild.length) { // firsChild=="." or ".." or ..... -> go up one or more levels
var parent=node
for (var n=0;n<firstChild.length;n++){
parent=parent.getParentElement();
}
nextNodes[niveau].push(parent);
} else {
// simple child to search - But we must try to find children
textWanted=null;
if (searchTextContainsMatch){ // if we must finding element having a wanted text
firstChild=searchTextContainsMatch[1];
textWanted=searchTextContainsMatch[2];
}
var children= node.getChildren(firstChild,mynamespace);
if (children.length!=0){
for (var itag in children){
if (children[itag].getName()==firstChild){ // &&itag<3 limitation can be added here for degug purposes
nextNodesDebug=children[itag];NodNamDebug=nextNodesDebug.getName();
if (log==3) Logger.log(niveau+"nextNodpush"+itag+NodNamDebug);
if (textWanted==null){
nextNodes[niveau].push(nextNodesDebug);//Logger.log(niveau+"->Nodes"+nextNodes[niveau]);
}
if (children[itag].getText()==textWanted){
nextNodes[niveau].push(nextNodesDebug);//Logger.log(niveau+"->Nodes"+nextNodes[niveau]);
}
}
}
}
}
var result=[];
if (nodeValues) {
result.push(nodeValues);
}
else{
if (log==3) Logger.log(niveau+"for nextNode"+nextNodes[niveau].length);
for (var inextNode in nextNodes[niveau]){
if (remainingPath !== '') {
var mem=xPathThrowDepthXml(remainingPath, nextNodes[niveau][inextNode],mynamespace,niveau,nextNodes,log)
nextNodes=mem[1]
if (log==3) Logger.log(niveau+"Result.PUSH MEM"+mem[0]);
if (mem!="") result.push(mem[0]);
} else {
var inextNodeTextDebug=""
if (nextNodes[niveau][inextNode]!=null) {
inextNodeTextDebug=nextNodes[niveau][inextNode].getText && nextNodes[niveau][inextNode].getText();
if (inextNodeTextDebug!=null)result.push(inextNodeTextDebug);
if (log==3) Logger.log(niveau+"Result.pushTEXT="+inextNodeTextDebug);
}
}
}
nextNodes[niveau].pop();
}
if (niveau==0){return result
} else {
return [result,nextNodes]
}
}