鉴于Google文档/云端硬盘中的“普通文档”(例如段落,列表,表格)包含分散在整个内容中的外部链接,如何编制使用Google Apps脚本的链接列表?
具体来说,我想通过在每个网址中搜索 oldText 来更新文档中所有损坏的链接,并将其替换为每个网址中的 newText ,但不是文字。
我认为开发文档的replacing text部分不是我需要的 - 我是否需要扫描文档的每个元素?我可以editAsText并使用html正则表达式吗?例子将不胜感激。
答案 0 :(得分:15)
这只是痛苦的! 代码可用as part of a gist。
是的,我无法拼写。
这是一个实用程序函数,它扫描文档中的所有LinkUrls,并将它们返回到数组中。
/**
* Get an array of all LinkUrls in the document. The function is
* recursive, and if no element is provided, it will default to
* the active document's Body element.
*
* @param {Element} element The document element to operate on.
* .
* @returns {Array} Array of objects, vis
* {element,
* startOffset,
* endOffsetInclusive,
* url}
*/
function getAllLinks(element) {
var links = [];
element = element || DocumentApp.getActiveDocument().getBody();
if (element.getType() === DocumentApp.ElementType.TEXT) {
var textObj = element.editAsText();
var text = element.getText();
var inUrl = false;
for (var ch=0; ch < text.length; ch++) {
var url = textObj.getLinkUrl(ch);
if (url != null) {
if (!inUrl) {
// We are now!
inUrl = true;
var curUrl = {};
curUrl.element = element;
curUrl.url = String( url ); // grab a copy
curUrl.startOffset = ch;
}
else {
curUrl.endOffsetInclusive = ch;
}
}
else {
if (inUrl) {
// Not any more, we're not.
inUrl = false;
links.push(curUrl); // add to links
curUrl = {};
}
}
}
}
else {
var numChildren = element.getNumChildren();
for (var i=0; i<numChildren; i++) {
links = links.concat(getAllLinks(element.getChild(i)));
}
}
return links;
}
此实用程序以getAllLinks
为基础进行查找&amp;替换功能。
/**
* Replace all or part of UrlLinks in the document.
*
* @param {String} searchPattern the regex pattern to search for
* @param {String} replacement the text to use as replacement
*
* @returns {Number} number of Urls changed
*/
function findAndReplaceLinks(searchPattern,replacement) {
var links = getAllLinks();
var numChanged = 0;
for (var l=0; l<links.length; l++) {
var link = links[l];
if (link.url.match(searchPattern)) {
// This link needs to be changed
var newUrl = link.url.replace(searchPattern,replacement);
link.element.setLinkUrl(link.startOffset, link.endOffsetInclusive, newUrl);
numChanged++
}
}
return numChanged;
}
为了演示这些实用程序的使用,这里有几个UI扩展:
function onOpen() {
// Add a menu with some items, some separators, and a sub-menu.
DocumentApp.getUi().createMenu('Utils')
.addItem('List Links', 'sidebarLinks')
.addItem('Replace Link Text', 'searchReplaceLinks')
.addToUi();
}
function searchReplaceLinks() {
var ui = DocumentApp.getUi();
var app = UiApp.createApplication()
.setWidth(250)
.setHeight(100)
.setTitle('Change Url text');
var form = app.createFormPanel();
var flow = app.createFlowPanel();
flow.add(app.createLabel("Find: "));
flow.add(app.createTextBox().setName("searchPattern"));
flow.add(app.createLabel("Replace: "));
flow.add(app.createTextBox().setName("replacement"));
var handler = app.createServerHandler('myClickHandler');
flow.add(app.createSubmitButton("Submit").addClickHandler(handler));
form.add(flow);
app.add(form);
ui.showDialog(app);
}
// ClickHandler to close dialog
function myClickHandler(e) {
var app = UiApp.getActiveApplication();
app.close();
return app;
}
function doPost(e) {
var numChanged = findAndReplaceLinks(e.parameter.searchPattern,e.parameter.replacement);
var ui = DocumentApp.getUi();
var app = UiApp.createApplication();
sidebarLinks(); // Update list
var result = DocumentApp.getUi().alert(
'Results',
"Changed "+numChanged+" urls.",
DocumentApp.getUi().ButtonSet.OK);
}
/**
* Shows a custom HTML user interface in a sidebar in the Google Docs editor.
*/
function sidebarLinks() {
var links = getAllLinks();
var sidebar = HtmlService
.createHtmlOutput()
.setTitle('URL Links')
.setWidth(350 /* pixels */);
// Display list of links, url only.
for (var l=0; l<links.length; l++) {
var link = links[l];
sidebar.append('<p>'+link.url);
}
DocumentApp.getUi().showSidebar(sidebar);
}
答案 1 :(得分:4)
我为您的第一个问题提供了另一个较短的答案,即关于遍历文档正文中的所有链接。这个指导性代码在当前文档的主体中返回一个平面链接数组,其中每个链接由一个对象表示,该对象的条目指向文本元素(text
),段落元素或列表项元素它包含的内容(paragraph
),链接显示的文本中的偏移索引(startOffset
)和URL本身(url
)。希望您能够轻松满足自己的需求。
它使用getTextAttributeIndices()
方法而不是迭代文本的每个字符,因此预期比以前写的答案要快得多。
编辑:由于最初发布此答案,我修改了该功能几次。它现在还(1)包含每个链接的endOffsetInclusive
属性(请注意,对于扩展到文本元素末尾的链接,它可以是null
- 在这种情况下,可以使用{{1}而不是); (2)在文档的所有部分(不仅是正文)中找到链接,(3)包括link.text.length-1
和section
属性以指示链接的位置; (4)接受参数isFirstPageSection
,当设置为true时,将仅返回链接到同一URL的连续文本的单个链接条目(例如,如果是部分URL,则将被视为单独的文本的样式与其他部分不同)。
为了在所有部分下包含链接,引入了新的效用函数mergeAdjacent
。
iterateSections()
答案 2 :(得分:1)
我正在玩游戏并整合@Mogsdad的answer - 这是真正复杂的版本:
var _ = Underscorejs.load(); // loaded via http://googleappsdeveloper.blogspot.com/2012/11/using-open-source-libraries-in-apps.html, rolled my own
var ui = DocumentApp.getUi();
// #region --------------------- Utilities -----------------------------
var gDocsHelper = (function(P, un) {
// heavily based on answer https://stackoverflow.com/a/18731628/1037948
var updatedLinkText = function(link, offset) {
return function() { return 'Text: ' + link.getText().substring(offset,100) + ((link.getText().length-offset) > 100 ? '...' : ''); }
}
P.updateLink = function updateLink(link, oldText, newText, start, end) {
var oldLink = link.getLinkUrl(start);
if(0 > oldLink.indexOf(oldText)) return false;
var newLink = oldLink.replace(new RegExp(oldText, 'g'), newText);
link.setLinkUrl(start || 0, (end || oldLink.length), newLink);
log(true, "Updating Link: ", oldLink, newLink, start, end, updatedLinkText(link, start) );
return { old: oldLink, "new": newLink, getText: updatedLinkText(link, start) };
};
// moving this reused block out to 'private' fn
var updateLinkResult = function(text, oldText, newText, link, urls, sidebar, updateResult) {
// and may as well update the link while we're here
if(false !== (updateResult = P.updateLink(text, oldText, newText, link.start, link.end))) {
sidebar.append('<li>' + updateResult['old'] + ' → ' + updateResult['new'] + ' at ' + updateResult['getText']() + '</li>');
}
urls.push(link.url); // so multiple links get added to list
};
P.updateLinksMenu = function() {
// https://developers.google.com/apps-script/reference/base/prompt-response
var oldText = ui.prompt('Old link text to replace').getResponseText();
var newText = ui.prompt('New link text to replace with').getResponseText();
log('Replacing: ' + oldText + ', ' + newText);
var sidebar = gDocUiHelper.createSidebar('Update All Links', '<h3>Replacing</h3><p><code>' + oldText + '</code> → <code>' + newText + '</code></p><hr /><ol>');
// current doc available to script
var doc = DocumentApp.getActiveDocument().getBody();//.getActiveSection();
// Search until a link is found
var links = P.findAllElementsFor(doc, function(text) {
var i = -1, n = text.getText().length, link = false, url, urls = [], updateResult;
// note: the following only gets the FIRST link in the text -- while(i < n && !(url = text.getLinkUrl(i++)));
// scan the text element for links
while(++i < n) {
// getLinkUrl will continue to get a link while INSIDE the stupid link, so only do this once
if(url = text.getLinkUrl(i)) {
if(false === link) {
link = { start: i, end: -1, url: url };
// log(true, 'Type: ' + text.getType(), 'Link: ' + url, function() { return 'Text: ' + text.getText().substring(i,100) + ((n-i) > 100 ? '...' : '')});
}
else {
link.end = i; // keep updating the end position until we leave
}
}
// just left the link -- reset link tracking
else if(false !== link) {
// and may as well update the link while we're here
updateLinkResult(text, oldText, newText, link, urls, sidebar);
link = false; // reset "counter"
}
}
// once we've reached the end of the text, must also check to see if the last thing we found was a link
if(false !== link) updateLinkResult(text, oldText, newText, link, urls, sidebar);
return urls;
});
sidebar.append('</ol><p><strong>' + links.length + ' links reviewed</strong></p>');
gDocUiHelper.attachSidebar(sidebar);
log(links);
};
P.findAllElementsFor = function(el, test) {
// generic utility function to recursively find all elements; heavily based on https://stackoverflow.com/a/18731628/1037948
var results = [], searchResult = null, i, result;
// https://developers.google.com/apps-script/reference/document/body#findElement(ElementType)
while (searchResult = el.findElement(DocumentApp.ElementType.TEXT, searchResult)) {
var t = searchResult.getElement().editAsText(); // .asParagraph()
// check to add to list
if(test && (result = test(t))) {
if( _.isArray(result) ) results = results.concat(result); // could be big? http://jsperf.com/self-concatenation/
else results.push(result);
}
}
// recurse children if not plain text item
if(el.getType() !== DocumentApp.ElementType.TEXT) {
i = el.getNumChildren();
var result;
while(--i > 0) {
result = P.findAllElementsFor(el.getChild(i));
if(result && result.length > 0) results = results.concat(result);
}
}
return results;
};
return P;
})({});
// really? it can't handle object properties?
function gDocsUpdateLinksMenu() {
gDocsHelper.updateLinksMenu();
}
gDocUiHelper.addMenu('Zaus', [ ['Update links', 'gDocsUpdateLinksMenu'] ]);
// #endregion --------------------- Utilities -----------------------------
为了完整性,我在下面创建了用于创建菜单,侧边栏等的“额外”实用程序类:
var log = function() {
// return false;
var args = Array.prototype.slice.call(arguments);
// allowing functions delegates execution so we can save some non-debug cycles if code left in?
if(args[0] === true) Logger.log(_.map(args, function(v) { return _.isFunction(v) ? v() : v; }).join('; '));
else
_.each(args, function(v) {
Logger.log(_.isFunction(v) ? v() : v);
});
}
// #region --------------------- Menu -----------------------------
var gDocUiHelper = (function(P, un) {
P.addMenuToSheet = function addMenu(spreadsheet, title, items) {
var menu = ui.createMenu(title);
// make sure menu items are correct format
_.each(items, function(v,k) {
var err = [];
// provided in format [ [name, fn],... ] instead
if( _.isArray(v) ) {
if ( v.length === 2 ) {
menu.addItem(v[0], v[1]);
}
else {
err.push('Menu item ' + k + ' missing name or function: ' + v.join(';'))
}
}
else {
if( !v.name ) err.push('Menu item ' + k + ' lacks name');
if( !v.functionName ) err.push('Menu item ' + k + ' lacks function');
if(!err.length) menu.addItem(v.name, v.functionName);
}
if(err.length) {
log(err);
ui.alert(err.join('; '));
}
});
menu.addToUi();
};
// list of things to hook into
var initializers = {};
P.addMenu = function(menuTitle, menuItems) {
if(initializers[menuTitle] === un) {
initializers[menuTitle] = [];
}
initializers[menuTitle] = initializers[menuTitle].concat(menuItems);
};
P.createSidebar = function(title, content, options) {
var sidebar = HtmlService
.createHtmlOutput()
.setTitle(title)
.setWidth( (options && options.width) ? width : 350 /* pixels */);
sidebar.append(content);
if(options && options.on) DocumentApp.getUi().showSidebar(sidebar);
// else { sidebar.attach = function() { DocumentApp.getUi().showSidebar(this); }; } // should really attach to prototype...
return sidebar;
};
P.attachSidebar = function(sidebar) {
DocumentApp.getUi().showSidebar(sidebar);
};
P.onOpen = function() {
var spreadsheet = SpreadsheetApp.getActive();
log(initializers);
_.each(initializers, function(v,k) {
P.addMenuToSheet(spreadsheet, k, v);
});
};
return P;
})({});
// #endregion --------------------- Menu -----------------------------
/**
* A special function that runs when the spreadsheet is open, used to add a
* custom menu to the spreadsheet.
*/
function onOpen() {
gDocUiHelper.onOpen();
}
答案 3 :(得分:1)
让Mogsdad的解决方案工作有些麻烦。具体来说,它错过了以父元素结尾的链接,因此没有一个尾随的非链接字符来终止它。我实现了一些解决这个问题并返回标准范围元素的东西。在这里分享,有人发现它很有用。
function getAllLinks(element) {
var rangeBuilder = DocumentApp.getActiveDocument().newRange();
// Parse the text iteratively to find the start and end indices for each link
if (element.getType() === DocumentApp.ElementType.TEXT) {
var links = [];
var string = element.getText();
var previousUrl = null; // The URL of the previous character
var currentLink = null; // The latest link being built
for (var charIndex = 0; charIndex < string.length; charIndex++) {
var currentUrl = element.getLinkUrl(charIndex);
// New URL means create a new link
if (currentUrl !== null && previousUrl !== currentUrl) {
if (currentLink !== null) links.push(currentLink);
currentLink = {};
currentLink.url = String(currentUrl);
currentLink.startOffset = charIndex;
}
// In a URL means extend the end of the current link
if (currentUrl !== null) {
currentLink.endOffsetInclusive = charIndex;
}
// Not in a URL means close and push the link if ready
if (currentUrl === null) {
if (currentLink !== null) links.push(currentLink);
currentLink = null;
}
// End the loop and go again
previousUrl = currentUrl;
}
// Handle the end case when final character is a link
if (currentLink !== null) links.push(currentLink);
// Convert the links into a range before returning
links.forEach(function(link) {
rangeBuilder.addElement(element, link.startOffset, link.endOffsetInclusive);
});
}
// If not a text element then recursively get links from child elements
else if (element.getNumChildren) {
for (var i = 0; i < element.getNumChildren(); i++) {
rangeBuilder.addRange(getAllLinks(element.getChild(i)));
}
}
return rangeBuilder.build();
}
答案 4 :(得分:1)
这是一种无需脚本即可实现相同目标的快速而肮脏的方法:
Google Docs的RTF格式非常完整-往返时我没有注意到任何保真度,它的优点是可以将所有超链接,格式以及与文档有关的所有其他信息完全暴露在易于编辑和应用正则表达式工具的表单。
答案 5 :(得分:0)
你是对的...搜索和替换不适用于此处。 使用setLinkUrl()https://developers.google.com/apps-script/reference/document/container-element#setLinkUrl(String)
基本上你必须递归地遍历元素(元素可以包含元素)并且每个元素都是 使用getLinkUrl()来获取oldText 如果不为null,则setLinkUrl(newText)....使显示的文本保持不变
答案 6 :(得分:0)
此Excel宏列出了Word文档中的链接。您需要先将数据复制到Word文档中。
Sub getLinks()
Dim wApp As Word.Application, wDoc As Word.Document
Dim i As Integer, r As Range
Const filePath = "C:\test\test.docx"
Set wApp = CreateObject("Word.Application")
'wApp.Visible = True
Set wDoc = wApp.Documents.Open(filePath)
Set r = Range("A1")
For i = 1 To wDoc.Hyperlinks.Count
r = wDoc.Hyperlinks(i).Address
Set r = r.Offset(1, 0)
Next i
wApp.Quit
Set wDoc = Nothing
Set wApp = Nothing
End Sub