我正在构建一个可以将URL存储在数据库中的NodeJS应用程序。我想使用URL作为主键,以避免重复存储。为此,我需要尽可能采用最简单的网址,并删除多余的斜杠,参数和前缀。
如何将下面列出的所有URL转换为与列出的第一个URL相同的字符串?有办法安全地解决我可能未在下面列出的其他变化吗?
https://website.com/coolpage/938921/
https://www.website.com/coolpage/938921/
http://website.com/coolpage/938921/
https://website.com/coolpage/938921/
https://website.com/coolpage/938921/?awesome=1
答案 0 :(得分:2)
使用标准的Node.js url
模块。
解决方案:
require('url');
function getBaseUrl(url){
const u = new URL(url);
const result =`${u.host}${u.pathname}`
.split('//').join('/')
.replace('www.', '');
// cut off the trailing '/' character from the result
if (result.length && result[result.length - 1] === '/')
return result.substring(0, result.length - 1)
return result;
}
测试:
const urls = [
"https://website.com/coolpage/938921/",
"https://www.website.com/coolpage/938921/",
"http://website.com/coolpage/938921/",
"https://website.com/coolpage/938921/",
"https://website.com/coolpage/938921/?awesome=1",
"https://website.com/coolpage/938921?awesome=1",
"https:///website.com//coolpage//938921//"
];
for (let i = 0; i < urls.length; i++) {
const u = getBaseUrl(urls[i]);
console.log(`${i}: ${u}`);
}
控制台输出:
0: website.com/coolpage/938921 1: website.com/coolpage/938921 2: website.com/coolpage/938921 3: website.com/coolpage/938921 4: website.com/coolpage/938921 5: website.com/coolpage/938921 6: website.com/coolpage/938921
答案 1 :(得分:0)
在这里,您可以执行所需的功能:
function convertURL(url) {
var urlParts = url.split('/')
var finalURL = ''
urlParts.forEach((p, i) => {
if(finalURL.length == 0){
if(p.includes('.com')){
finalURL += p
}
}
else if (p.length > 0 && i < urlParts.length - 1){
finalURL += '/' + p
}
})
return finalURL
}
var url = convertURL('https://website.com/coolpage/938921/?awesome=1')
console.log(url)
答案 2 :(得分:0)
您可以将String.prototype.replace
与RegExp
\/+
配合使用,以匹配用单个/
和/
替换的一个或多个正斜杠字符String.prototype.match()
RegExp
/[a-z0-9]+\.[a-z0-9]+(?=\/+)\/[a-z0-9]+(?=\/+)\/[a-z0-9]+/ig
以匹配URL的主机名和路径名。
let urls = ["https://website.com/coolpage/938921/", "https://www.website.com/coolpage/938921/", "http://website.com/coolpage/938921/", "https://website.com/coolpage/938921/", "https://website.com/coolpage/938921/?awesome=1", "https://website.com/coolpage/938921?awesome=1", "https:///website.com//coolpage//938921//"];
let _URL = "website.com/coolpage/938921";
let replaceForwardSlashes = /\/+/g;
let matchHostAndPathNames = /[a-z0-9]+\.[a-z0-9]+(?=\/+)\/[a-z0-9]+(?=\/+)\/[a-z0-9]+/ig;
let matchedURLS = urls.map(url => url.replace(replaceForwardSlashes,'/').match(matchHostAndPathNames));
console.log(matchedURLS, new Set(...matchedURLS).size === 1, matchedURLS.every(u => u == _URL));