我需要定期将指定网站的所有链接保存到Google doc文件中。我花了几个小时尝试这样做,但我是一个初学者,没有什么对我有用。我很感激任何建议。
这是我的一次尝试(但最好忽略它,因为它无论如何都不起作用):
function save_links() {
// create a google doc file named 'links'
var doc = DocumentApp.create('links');
// save the source code of the website in question to a string
var str = UrlFetchApp.fetch('https://www.the_website_in_question').getContentText();
// find all links
var link = str.findText('https:\/\/.*\/');
// save every link to the google doc file
while (link != null) {
var foundLink = link.getElement().asText();
doc.getBody().appendParagraph(foundLink);
link = link.findText('http:\/\/.*\/', link);
}
}
答案 0 :(得分:2)
请注意,Google Apps脚本是一种基于JavaScript的脚本语言。 你需要使用正确的正则表达式,它应该工作:
function save_links() {
// create a google doc file named 'links'
var doc = DocumentApp.create('links');
// save the source code of the website in question to a string
var str = UrlFetchApp.fetch('https://riyafa.wordpress.com/').getContentText();
var regExp=/(?:(?:https?|ftp|file):\/\/|www\.|ftp\.)(?:\([-A-Z0-9+&@#\/%=~_|$?!:,.]*\)|[-A-Z0-9+&@#\/%=~_|$?!:,.])*(?:\([-A-Z0-9+&@#\/%=~_|$?!:,.]*\)|[A-Z0-9+&@#\/%=~_|$])/igm;
var theResult= str.match(regExp);
// save every link to the google doc file
for(i in theResult){
doc.getBody().appendParagraph(theResult[i]);
}
}