Question

我上传了许多XML文件到我的Google云端硬盘。它们将与网络上的任何人共享。我列出了Google表格中的链接。（如下图所示。）

我正在尝试使用Script from GitHub解析XML文件。（请参阅下面的完整脚本。）但是，当我使用=xPath("@url",E2)调用工作表的单元格F2中的函数时，出现错误：

TypeError：在对象中找不到函数getRootElement https://drive.google.com/uc?id=1heoXLD9uQOZitQHTuhS3xuM8oZduQPZJ。（第49行）。

@url属性肯定存在于XML文件中。但是，我不知道为什么函数找不到它。 The link to the XML file is here

An editable copy of the Google Sheet with the script is here.

以下是GitHub的Google脚本：

/**
 * Add simple XPath XML parsing to google scripts. Essentially be able to
 * use the xpath notation that works in the IMPORTXML formula but from
 * script code instead.
 *
 * NOTE: this is a GOOGLE SCRIPT library - this WILL NOT WORK outside of
 * google scripts because it uses the apps script XML Service
 * https://developers.google.com/apps-script/reference/xml-service/
 *
 * Usage:
 *
 *   In your script, go to Resources -> Libraries then enter the following
 *   Project Key or Script ID in the 'Find a Library' box.
 *
 *   Project Key: M1YVJTfv66XpF5AoIeE9zsAopJxr71Kma
 *   Script ID: 1EyZK520ihKS4JWE1B47Ra0fU4B4m9vAHX0FWMZ50xNUJsU_R9VRIsqf3
 *
 */

/**
 * Returns the value (or list of values) at the given path in
 * the given xmlFile
 *
 * Example:
 *
 *     <xml>
 *       <foo>
 *         <bar>
 *           <baz what='wrong'>thing1</baz>
 *           <baz what='test'>thing2</baz>
 *         </bar>
 *       </foo>
 *       <fizz>buzz</fizz>
 *      </xml>
 *
 *     var xml = readRemoteXML('https://test-xml-file.xml');
 *
 *     var simple = xPath('fizz', xml);
 *     simple; // 'fizz'
 *
 *     var list_values = xPath('foo/bar//baz', xml);
 *     list_values; // ['thing1', 'thing2']
 *
 *     var attribute_after_list = xPath('foo/bar//baz[1]/@what', xml);
 *     attribute_after_list; // test
 *
 */
function xPath(path, xmlFile) {
  var root = xmlFile.getRootElement();
  return xPathStep(path, root);
}


/**
 * Recursive path parsing - you probably want to use xPath instead of using
 * this function directly.
 */
function xPathStep(path, node) {
  // if node is an array, return the result for each entry
  if (Array.isArray(node)) {
    return node.map(function(singleNode) {
      return xPathStep(path, singleNode);
    });
  }

  if (!node) {
    return;
  }

  var nextNode, nodeValue;
  var paths = path.split('/');
  var firstChild = paths[0];
  var remainingPath = paths.slice(1).join('/');

  // if child ends with [\d] - find a list, return this index
  var indexMatch = firstChild.match(/(\w+)\[(\d+)\]/);
  var attributeMatch = firstChild.match(/@(\w+)/);

  if (indexMatch) {
    var tagName = indexMatch[1];
    var index = indexMatch[2];

    var children = node.getChildren(tagName);
    nextNode = children[index];
  } else if (firstChild === '') {
    // if another name is next, use as a matching tag (and remove from path)
    var tagName = '';
    if (paths.length > 1) {
      tagName = paths[1];
      remainingPath = paths.slice(2).join('/');
    }
    nextNode = node.getChildren(tagName);
  } else if (attributeMatch) {
    // @ means attribute
    var attributeName = attributeMatch[1];
    nodeValue = node.getAttribute(attributeName).getValue();
  } else {
    nextNode = node.getChild(firstChild);
  }

  var result;
  if (nodeValue) {
    result = nodeValue;
  } else if (remainingPath !== '') {
    result = xPathStep(remainingPath, nextNode);
  } else {
    result = nextNode.getText && nextNode.getText();
  }

  return result;
}


/**
 * Read a remote file at xmlFileUrl, parse it as xml, and
 * return an XMLService document object.
 */
function readRemoteXML(xmlFileUrl) {
  var content = UrlFetchApp.fetch(xmlFileUrl).getContentText();
  return XmlService.parse(content);
}

Answer 1

为您的功能打了很多功，对我来说如此，我继续开发它，然后回到这里分享我的工作

我希望该函数可以理解更多路径语法元素，例如： //标签标签1 /./标题标签2 /.../标题标签[@ attrib ='value'] 标签[包含（text（），'foo'] 因此，我必须使用数组来保持信息在此递归函数处理的深度之间进行发送和返回。

因为我是法语，所以我在脚本中的注释是法语。抱歉，但是我希望我的贡献可以帮助一些开发人员在Google脚本中使用xpath 如果在我的Google应用程序脚本界面中可以完美地工作，但是自从Google工作表函数调用myImportXml（）以来，我遇到了一个问题：当xpath的深度超过2时，我认为我没有任何答案！：（

var nextNodes;var log=0; // variables globales necessaires à la function xPathThrowDepthXml
    function myImportXml(url,path){
      nextNodes=[]; // table à déclarer globalement
      if (url==null) url="http://api.allocine.fr/rest/v3/movie?media=mp4-lc&partner=YW5kcm9pZC12Mg&profile=large&version=2&code=265621"; // to test
      if (path==null) path="//nationality"; // to test
      var xmlDocument=XmlService.parse(UrlFetchApp.fetch(url).getContentText());
      var root=xmlDocument.getRootElement();
      var mynamespace=root.getNamespace();
      var items=xPathThrowDepthXml(path, root,mynamespace);
      return items;
    }
    function xPathThrowDepthXml(path, node,mynamespace,niveau) {
       /* Retourne les texte et attribut selon le chemin xpath fourni, pour le noeud xml donné, avec son namespace
       TOUT ce qui est géré fonctionne ! :)
        Sauts d'éléments, même dès la racine, tag[2], tag[@attrib] , rating[@note]/@note
        /tag1/tag2 ou //tag1/tag2 ou ///tag1/tag2  sont equivalents
        Syntaxes xpath comprises: tag[@class="meta-body-item"] , tag[contains(text(),"Nationalité")]/span
        un ou plusieurs points pour remonter dans la hierarchie    (tag1/.../title  tag1/./././title  sont équivalents )
        Ex de requette complexe qui est comprise: div[@class="meta-body-item"]/span[@class="light"][contains(text(),"Box Office")]/../span[2]
      */
      var nextNodesDebug,NodNamDebug,firstChild,nodeValues,tagName,paths,remainingPath
      var nextNodesCopi=nextNodes;var textWanted=null;
      if (niveau==null)niveau=-1;
      niveau++;
      if (log==3) Logger.log(niveau);
      nextNodes.push([]); // Faudrait vérifier si on ajoute pas trop de push que de niveaux !!!!!!!!!!!!!!!!!!
      if (Array.isArray(node)) {   // if node is an array, return the result for each entry  Ne semble pas agir
        return node.map(function(path,singleNode,mynamespace,niveau) {
          return xPathThrowDepthXml(path, singleNode,mynamespace,niveau);
        });
      }

      if (!node) {
        return;
      }
      paths = path.replace(/\/{3,}/g, "//").replace(/^\/{2,}/, "/").split('/');
      //paths = path.split('/');
      firstChild= paths [0];
      var firstChildDebug=firstChild;
      remainingPath = paths .slice(1).join('/');

      // if child ends with [\d] - find a list, return this index
      var indexMatch = firstChild.match(/(\w+)\[(\d+)\]/); //tag[2]
      var attributeMatch = firstChild.match(/^@(\w+)/); //  /@attribu pour avoir la valeur de tel attribut
      var searchAttributeMatch = firstChild.match(/(\w+)\[@(\w+)\]/);  // tag[@attribu] Pour avoir l'element possédant un attribut nommé d'une telle  façon
      var searchAttributeEgalMatch = firstChild.match(/(\w+)\[@(\w+)=['"](.*)["']\]/);  // tag[@attribu] Pour avoir l'element possédant un attribut nommé d'une telle  façon et ayant telle valeure
      var searchTextContainsMatch = firstChild.match(/(\w+)\[contains\(text\(\),['"](.*)['"]\)\]/);  // tag[contains(text(),'Affiche')] Pour avoir l'element dont le texte est telle chaîne

      if (indexMatch) {
        tagName = indexMatch[1];
        var index = indexMatch[2];
        var children = node.getChildren(tagName,mynamespace);
        nextNodesDebug=children[index];NodNamDebug=nextNodesDebug.getName();
        nextNodes[niveau].push(nextNodesDebug);  
      } else if (firstChild === '') { // Cas des saut d'elements par comme newsList//title
        // if another name is next, use as a matching tag (and remove from path)
        tagName = '';
        if (paths.length > 1) {
          tagName = paths[1];

          remainingPath = paths.slice(2).join('/');
        }
        if (tagName.match(/(.*)\[/)) tagName=tagName.match(/(.*)\[/)[1];
        //nextNodesDebug=node.getChildren(tagName,mynamespace);
        nextNodesDebug=node.getDescendants(); //if (nextNodesDebug=="") 
        //NodNamDebug=nextNodesDebug.getName();
        for (var itag in nextNodesDebug){
          try{ var nodeName=nextNodesDebug[itag].getName();}catch(er){var nodeName="";}
          if (nodeName==tagName) {
            nextNodes[niveau].push(nextNodesDebug[itag]);
          }
        }
      } else if (searchAttributeMatch) {// cherche l'element tagName ayant un attribut nommé attribNameWanted
        tagName = searchAttributeMatch[1];
        var attribNameWanted = searchAttributeMatch[2]; 
        var children= node.getChildren(tagName,mynamespace);
        for (var itag in children){
          if (children[itag].getName()==tagName){
            var nodeName=children[itag].getName();
            var attribs = children[itag].getAttributes(); 
            for (var iAtrib in attribs){
              if (attribs[iAtrib].getName()==attribNameWanted){ // on tient là un élément nommé tagName dont l'attribut est nommé attribNameWanted
                nextNodesDebug=children[itag];NodNamDebug=nextNodesDebug.getName();
                nextNodes[niveau].push(nextNodesDebug);//Logger.log("N"+nextNodes[niveau]);
              }
            }
          }
        }
      }  else if (searchAttributeEgalMatch) {// cherche l'element tagName ayant un attribut nommé attribNameWanted 
        tagName = searchAttributeEgalMatch[1];
        var attribNameWanted = searchAttributeEgalMatch[2]; 
        var valueAttribWanted = searchAttributeEgalMatch[3]; 
        var children= node.getChildren(tagName,mynamespace);
        for (var itag in children){
          if (children[itag].getName()==tagName){
            var nodeName=children[itag].getName();
            var attribs = children[itag].getAttributes(); 
            for (var iAtrib in attribs){
              if (attribs[iAtrib].getName()==attribNameWanted){ // on tient là un élément nommé tagName dont l'attribut est nommé attribNameWanted
                var atval=attribs[iAtrib].getValue();
                if(attribs[iAtrib].getValue()==valueAttribWanted){
                  nextNodesDebug=children[itag];NodNamDebug=nextNodesDebug.getName();
                  nextNodes[niveau].push(nextNodesDebug);//Logger.log("N"+nextNodes[niveau]);
                }
              }
            }
          }
        }
      } else if (attributeMatch) {
        // @ means attribute
        var parent=node.getParentElement().getName();
        var attributeName = attributeMatch[1];
        //Logger.log("niv"+niveau);Logger.log("nod"+node.getDescendants())
        var nodeValuesDebug=""
        try{ nodeValuesDebug=node.getAttribute(attributeName).getValue();}catch(er){}
        nodeValues=nodeValuesDebug;

      } else if (  firstChild.split(".").length-1==firstChild.length) { // firsChild=="." ou ".." ou .....  ->Remonter d'autant de niveaux
        var parent=node
          for (var n=0;n<firstChild.length;n++){
          parent=parent.getParentElement();
        }
        nextNodes[niveau].push(parent);
      } else {
        // simple child to search - Mais il faut plutôt try de chercher des children si has children !!!!!!!!!!!!!!!!!!!!!
        textWanted=null;
        if (searchTextContainsMatch){
          firstChild=searchTextContainsMatch[1];
          textWanted=searchTextContainsMatch[2];
        }
        var children= node.getChildren(firstChild,mynamespace);
        if (children.length!=0){
          for (var itag in children){
            if (children[itag].getName()==firstChild){  // &&itag<3 LIMITATION POUR LE DEBOGADE (A RETIRER)
              nextNodesDebug=children[itag];NodNamDebug=nextNodesDebug.getName();
              if (log==3) Logger.log(niveau+"nextNodpush"+itag+NodNamDebug);
              if (textWanted==null){
                nextNodes[niveau].push(nextNodesDebug);//Logger.log(niveau+"->Nodes"+nextNodes[niveau]);  
              }
              if (children[itag].getText()==textWanted){
                nextNodes[niveau].push(nextNodesDebug);//Logger.log(niveau+"->Nodes"+nextNodes[niveau]);  
              }
            }
          }
        }
      }

      var result=[];

      if (nodeValues) {
        result.push(nodeValues);
      }
      else{
        if (log==3) Logger.log(niveau+"for nextNode"+nextNodes[niveau].length);
        for (var inextNode in nextNodes[niveau]){
          if (remainingPath !== '') {

              var mem=xPathThrowDepthXml(remainingPath, nextNodes[niveau][inextNode],mynamespace,niveau)
              if (log==3) Logger.log(niveau+"Result.PUSH MEM"+mem);
              if (mem!="") result.push(mem); // Pb: Multiplie les reponse pour mediaList/media/title

          } else {
            var inextNodeTextDebug=""
            if (nextNodes[niveau][inextNode]!=null) { 
              inextNodeTextDebug=nextNodes[niveau][inextNode].getText && nextNodes[niveau][inextNode].getText();
              if (inextNodeTextDebug!=null)result.push(inextNodeTextDebug);
              if (log==3) Logger.log(niveau+"Result.pushTEXT="+inextNodeTextDebug);
            }
          }
        }
        nextNodes[niveau].pop();
      }
      return result;
    }

Answer 2

我解决了我的问题，即当xpath的深度超过2时，使用Google工作表函数调用myImportXml（）的答案中止了。

原因是我的数组nextNodes使用了全局变量[] 我通过在函数的递归调用之间添加数组return et send来替换它。因此，我的脚本不再需要全局变量作为下面的代码

我忘了告诉我为什么我重新创建了现有的Google工作表函数IMPORTXML！ IMPORTXML无法处理未充分验证XML的HTML页面。因此，使用我的脚本，我们可以在解析xml文档之前对XML文档进行修改，并进行xPath研究。

function myImportXml(url,path){
  if (url==null) url="http://api.allocine.fr/rest/v3/movie?media=mp4-lc&partner=YW5kcm9pZC12Mg&profile=large&version=2&code=265621"; // to test
  if (path==null) path="//nationality"; // to test
  var xmlDocument=XmlService.parse(UrlFetchApp.fetch(url).getContentText());
  var root=xmlDocument.getRootElement();
  var mynamespace=root.getNamespace();
  var items=xPathThrowDepthXml(path, root,mynamespace);
  return items;
}
function xPathThrowDepthXml(path, node,mynamespace,niveau,nextNodes,log) {
//https://stackoverflow.com/questions/48185348/google-script-xml-parsing-error-cannot-find-function/58177338#58177338
   /* Retourne les texte et attribut selon le chemin xpath fourni, pour le noeud xml donné, avec son namespace
   TOUT ce qui est géré fonctionne ! :)
    tag[2], tag[@attrib] , rating[@note]/@note
    Sauts d'éléments, même dès la racine, 
    /tag1/tag2 ou //tag1/tag2 ou ///tag1/tag2  sont equivalents
    Syntaxes xpath comprises: tag[@class="meta-body-item"] , tag[contains(text(),"Nationalité")]/span
    un ou plusieurs points pour remonter dans la hierarchie    (tag1/.../title  tag1/./././title  sont équivalents )
    Ex de requette complexe qui est comprise: div[@class="meta-body-item"]/span[@class="light"][contains(text(),"Box Office")]/../span[2]
    Plus de variable ni tableau à déclarer globalement. Ceux ci sont maintenant envoyés récursivement et bidirectionnelement 
     Ainsi le script répond à un appel dune function de feuille de calcul
  */
  var nextNodesDebug,NodNamDebug,firstChild,nodeValues,tagName,paths,remainingPath
  var nextNodesCopi=nextNodes;var textWanted=null;
  if (niveau==null){
    niveau=-1;
    nextNodes=[];  
  }
  niveau++;
  if (log==3) Logger.log(niveau);
  nextNodes.push([]);
  if (Array.isArray(node)) {   // if node is an array, return the result for each entry - This does not seem to occur !
    return node.map(function(path,singleNode,mynamespace,niveau) {
      return xPathThrowDepthXml(path, singleNode,mynamespace,niveau,nextNodes,log);
    });
  }

  if (!node) {
    return;
  }
  paths = path.replace(/\/{3,}/g, "//").replace(/^\/{2,}/, "/").split('/');
  firstChild= paths [0];
  var firstChildDebug=firstChild;
  remainingPath = paths .slice(1).join('/');

  var indexMatch = firstChild.match(/(\w+)\[(\d+)\]/); //tag[2]  if child ends with [\d] - find a list, return this index

  var attributeMatch = firstChild.match(/^@(\w+)/); //  /@attribu  - to give the value of a named attribut
  var searchAttributeMatch = firstChild.match(/(\w+)\[@(\w+)\]/);  // tag[@attribu] - to give element having a named attribut 
  var searchAttributeEgalMatch = firstChild.match(/(\w+)\[@(\w+)=['"](.*)["']\]/);  // tag[@attribu] - to give element having a named attribut and a wanted value
  var searchTextContainsMatch = firstChild.match(/(\w+)\[contains\(text\(\),['"](.*)['"]\)\]/);  // tag[contains(text(),'Affiche')] - to give element having a wanted text

  if (indexMatch) {
    tagName = indexMatch[1];
    var index = indexMatch[2];
    var children = node.getChildren(tagName,mynamespace);
    nextNodesDebug=children[index];NodNamDebug=nextNodesDebug.getName();
    nextNodes[niveau].push(nextNodesDebug);  
  } else if (firstChild === '') { // skiping element cases like newsList//title
    // if another name is next, use as a matching tag (and remove from path)
    tagName = '';
    if (paths.length > 1) {
      tagName = paths[1];

      remainingPath = paths.slice(2).join('/');
    }
    if (tagName.match(/(.*)\[/)) tagName=tagName.match(/(.*)\[/)[1];
    nextNodesDebug=node.getDescendants(); 
    for (var itag in nextNodesDebug){
      try{ var nodeName=nextNodesDebug[itag].getName();}catch(er){var nodeName="";}
      if (nodeName==tagName) {
        nextNodes[niveau].push(nextNodesDebug[itag]);
      }
    }
  } else if (searchAttributeMatch) {// search the tagNAme element having a wanted atribut named attribNameWanted
    tagName = searchAttributeMatch[1];
    var attribNameWanted = searchAttributeMatch[2]; 
    var children= node.getChildren(tagName,mynamespace);
    for (var itag in children){
      if (children[itag].getName()==tagName){
        var nodeName=children[itag].getName();
        var attribs = children[itag].getAttributes(); 
        for (var iAtrib in attribs){
          if (attribs[iAtrib].getName()==attribNameWanted){ // here we have a tagNAme element having a wanted atribut named attribNameWanted
            nextNodesDebug=children[itag];NodNamDebug=nextNodesDebug.getName();
            nextNodes[niveau].push(nextNodesDebug);//Logger.log("N"+nextNodes[niveau]);
          }
        }
      }
    }
  }  else if (searchAttributeEgalMatch) {// search the tagNAme element having a wanted attribut named attribNameWanted  that have the value valueAttribWanted
    tagName = searchAttributeEgalMatch[1];
    var attribNameWanted = searchAttributeEgalMatch[2]; 
    var valueAttribWanted = searchAttributeEgalMatch[3]; 
    var children= node.getChildren(tagName,mynamespace);
    for (var itag in children){
      if (children[itag].getName()==tagName){
        var nodeName=children[itag].getName();
        var attribs = children[itag].getAttributes(); 
        for (var iAtrib in attribs){
          if (attribs[iAtrib].getName()==attribNameWanted){ // here we have a tagNAme element having a wanted attribut named attribNameWanted  that have the value valueAttribWanted
            var atval=attribs[iAtrib].getValue();
            if(attribs[iAtrib].getValue()==valueAttribWanted){
              nextNodesDebug=children[itag];NodNamDebug=nextNodesDebug.getName();
              nextNodes[niveau].push(nextNodesDebug);//Logger.log("N"+nextNodes[niveau]);
            }
          }
        }
      }
    }
  } else if (attributeMatch) {
    // @ means attribute
    var parent=node.getParentElement().getName();
    var attributeName = attributeMatch[1];
    //Logger.log("niv"+niveau);Logger.log("nod"+node.getDescendants())
    var nodeValuesDebug=""
    try{ nodeValuesDebug=node.getAttribute(attributeName).getValue();}catch(er){}
    nodeValues=nodeValuesDebug;

  } else if (  firstChild.split(".").length-1==firstChild.length) { // firsChild=="." or ".." or .....  -> go up one or more levels
    var parent=node
      for (var n=0;n<firstChild.length;n++){
      parent=parent.getParentElement();
    }
    nextNodes[niveau].push(parent);
  } else {
    // simple child to search - But we must try to find children
    textWanted=null;
    if (searchTextContainsMatch){ // if we must finding element having a wanted text
      firstChild=searchTextContainsMatch[1];
      textWanted=searchTextContainsMatch[2];
    }
    var children= node.getChildren(firstChild,mynamespace);
    if (children.length!=0){
      for (var itag in children){
        if (children[itag].getName()==firstChild){  // &&itag<3 limitation can be added here for degug purposes
          nextNodesDebug=children[itag];NodNamDebug=nextNodesDebug.getName();
          if (log==3) Logger.log(niveau+"nextNodpush"+itag+NodNamDebug);
          if (textWanted==null){
            nextNodes[niveau].push(nextNodesDebug);//Logger.log(niveau+"->Nodes"+nextNodes[niveau]);  
          }
          if (children[itag].getText()==textWanted){
            nextNodes[niveau].push(nextNodesDebug);//Logger.log(niveau+"->Nodes"+nextNodes[niveau]);  
          }
        }
      }
    }
  }

  var result=[];

  if (nodeValues) {
    result.push(nodeValues);
  }
  else{
    if (log==3) Logger.log(niveau+"for nextNode"+nextNodes[niveau].length);
    for (var inextNode in nextNodes[niveau]){
      if (remainingPath !== '') {

          var mem=xPathThrowDepthXml(remainingPath, nextNodes[niveau][inextNode],mynamespace,niveau,nextNodes,log)
          nextNodes=mem[1]
          if (log==3) Logger.log(niveau+"Result.PUSH MEM"+mem[0]);
          if (mem!="") result.push(mem[0]);

      } else {
        var inextNodeTextDebug=""
        if (nextNodes[niveau][inextNode]!=null) { 
          inextNodeTextDebug=nextNodes[niveau][inextNode].getText && nextNodes[niveau][inextNode].getText();
          if (inextNodeTextDebug!=null)result.push(inextNodeTextDebug);
          if (log==3) Logger.log(niveau+"Result.pushTEXT="+inextNodeTextDebug);
        }
      }
    }
    nextNodes[niveau].pop();
  }
  if (niveau==0){return result
  } else {
    return [result,nextNodes]
  }
}

Google Script XML解析错误：无法找到函数

2 个答案: