目前,我可以使用以下正则表达式从任何网址中提取“域名”:
apply plugin:AddDepPlugin
class AddDepPlugin implements Plugin<Gradle> {
def addDeps = [
"org.ensime.gradle": "gradle.plugin.net.coacoas.gradle:ensime-gradle:0.2.2",
"com.github.dcendents.android-maven": "com.github.dcendents:android-maven-plugin:1.2"]
def addRepos = ["https://plugins.gradle.org/m2/"]
void apply(Gradle gradle) {
def add = 0
gradle.allprojects { project ->
plugins.whenPluginAdded { t ->
if (++add == 1) {
project.getBuildScriptSource()
def bs = project.getBuildscript()
bs.getDependencies()
def repo = bs.getRepositories()
def ccf = bs.class.getDeclaredField("classpathConfiguration")
ccf.setAccessible(true)
def cc = ccf.get(bs)
addDeps.each { k,v-> cc.dependencies.add(project.dependencies.create(v))}
addRepos.each { k-> repo.maven { -> setUrl(k) } }
}
if (add == 8)
addDeps.each { k,v ->
if (!k.startsWith("x")) project.apply([plugin: k])
}
}
}
}
}
但是我也得到了子域名,我想避免。例如,如果我有网站:
我目前得到:
最后两个我想排除/^(?:https?:\/\/)?(?:[^@\n]+@)?(?:www\.)?([^:\/\n\?\=]+)/im
和freds
子域部分,只提取真正的域josh
。
我确实找到了另一个尝试用PHP解决的SOF,遗憾的是我不知道PHP。这可以翻译成JS(我实际上是使用Google Script FYI)吗?
meatmarket.co.uk
答案 0 :(得分:12)
那么,你需要从你的结果中删除第一个主机名,除非只有两个部分?
只需对匹配该条件的正则表达式进行第一次匹配后处理结果:
function domain_from_url(url) {
var result
var match
if (match = url.match(/^(?:https?:\/\/)?(?:[^@\n]+@)?(?:www\.)?([^:\/\n\?\=]+)/im)) {
result = match[1]
if (match = result.match(/^[^\.]+\.(.+\..+)$/)) {
result = match[1]
}
}
return result
}
console.log(domain_from_url("www.google.com"))
console.log(domain_from_url("yahoo.com/something"))
console.log(domain_from_url("freds.meatmarket.co.uk?someparameter"))
console.log(domain_from_url("josh.meatmarket.co.uk/asldf/asdf"))
// google.com
// yahoo.com
// meatmarket.co.uk
// meatmarket.co.uk
答案 1 :(得分:0)
试试这个:
https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.([a-z]{2,6}){1}
答案 2 :(得分:0)
尝试用其他内容替换www
:
/^(?:https?:\/\/)?(?:[^@\n]+@)?(?:[^.]+\.)?([^:\/\n\?\=]+)/im
编辑:
如果您绝对想将www
保留在正则表达式中,可以试试这个:
/^(?:https?:\/\/)?(?:[^@\n]+@)?(?:www\.)?(?:[^.]+\.)?([^:\/\n\?\=]+)/im
答案 3 :(得分:0)
export const extractHostname = url => {
let hostname;
// find & remove protocol (http, ftp, etc.) and get hostname
if (url.indexOf("://") > -1)
{
hostname = url.split('/')[2];
}
else
{
hostname = url.split('/')[0];
}
// find & remove port number
hostname = hostname.split(':')[0];
// find & remove "?"
hostname = hostname.split('?')[0];
return hostname;
};
export const extractRootDomain = url => {
let domain = extractHostname(url),
splitArr = domain.split('.'),
arrLen = splitArr.length;
// extracting the root domain here
// if there is a subdomain
if (arrLen > 2)
{
domain = splitArr[arrLen - 2] + '.' + splitArr[arrLen - 1];
// check to see if it's using a Country Code Top Level Domain (ccTLD) (i.e. ".me.uk")
if (splitArr[arrLen - 2].length === 2 && splitArr[arrLen - 1].length === 2)
{
//this is using a ccTLD
domain = splitArr[arrLen - 3] + '.' + domain;
}
}
return domain;
};