我在HTML中有一个文本框,用户可以在其中输入任何类型的文本。它也可以包含URL。
如何使用JavaScript从该文本中将URL提取到数组中?
This URL为如何替换文本提供了一个很好的示例。但我想将它们提取到一个数组中,以便我可以处理它们。
var userText = "Hello, I can't http://google.com for c*ap"
var urls = getUrlsFromText(userText);
console.log("Urls!",urls)
--> must give ['http://google.com']
编辑:不重复,因为我在发布问题之前发现正则表达式检测到URL。我需要一种方法将匹配从字符串中提取到数组,
答案 0 :(得分:3)
只需使用String.match():
function getUrlsFromText(input) {
return input.match(/* your URL regex here */);
}
很容易找到网址的正则表达式,例如https://mathiasbynens.be/demo/url-regex
答案 1 :(得分:2)
您可以使用此代码:
function getURLsFromString(s) {
var re = /((([A-Za-z]{3,9}:(?:\/\/)?)(?:[-;:&=+$,\w]+@)?[A-Za-z0-9.-]+|(?:www\.|[-;:&=+$,\w]+@)[A-Za-z0-9.-]+)((?:\/[+~%\/.\w-]*)?\??(?:[-+=&;%@.\w]*)#?\w*)?)/gm;
var m;
var arr = [];
while ((m = re.exec(str)) !== null) {
if (m.index === re.lastIndex) {
re.lastIndex++;
}
arr.push(m[0]);
}
return arr;
}
var str = 'Hello, I can\'t http://google.com for c*ap';
console.log(getURLsFromString(str));

我清理了正则表达式,因为有太多的转义,-
有一个问题。
如果您更喜欢Diego Perini's URL regex,请使用
var re = RegExp(
// protocol identifier
"(?:(?:https?|ftp)://)" +
// user:pass authentication
"(?:\\S+(?::\\S*)?@)?" +
"(?:" +
// IP address exclusion
// private & local networks
"(?!(?:10|127)(?:\\.\\d{1,3}){3})" +
"(?!(?:169\\.254|192\\.168)(?:\\.\\d{1,3}){2})" +
"(?!172\\.(?:1[6-9]|2\\d|3[0-1])(?:\\.\\d{1,3}){2})" +
// IP address dotted notation octets
// excludes loopback network 0.0.0.0
// excludes reserved space >= 224.0.0.0
// excludes network & broacast addresses
// (first & last IP address of each class)
"(?:[1-9]\\d?|1\\d\\d|2[01]\\d|22[0-3])" +
"(?:\\.(?:1?\\d{1,2}|2[0-4]\\d|25[0-5])){2}" +
"(?:\\.(?:[1-9]\\d?|1\\d\\d|2[0-4]\\d|25[0-4]))" +
"|" +
// host name
"(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)" +
// domain name
"(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*" +
// TLD identifier
"(?:\\.(?:[a-z\\u00a1-\\uffff]{2,}))" +
// TLD may end with dot
"\\.?" +
")" +
// port number
"(?::\\d{2,5})?" +
// resource path
"(?:[/?#]\\S*)?",
"gi"
);
或者它的1-liner:
var re = /(?:(?:https?|ftp):\/\/)(?:\S+(?::\S*)?@)?(?:(?!(?:10|127)(?:\.\d{1,3}){3})(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)(?:\.(?:[a-z\u00a1-\uffff0-9]-*)*[a-z\u00a1-\uffff0-9]+)*(?:\.(?:[a-z\u00a1-\uffff]{2,}))\.?)(?::\d{2,5})?(?:[\/?#]\S*)?/gi;