JavaScript Regex URL仅提取域

时间:2016-01-15 19:06:06

标签: javascript regex

目前,我可以使用以下正则表达式从任何网址中提取“域名”:

apply plugin:AddDepPlugin class AddDepPlugin implements Plugin<Gradle> { def addDeps = [ "org.ensime.gradle": "gradle.plugin.net.coacoas.gradle:ensime-gradle:0.2.2", "com.github.dcendents.android-maven": "com.github.dcendents:android-maven-plugin:1.2"] def addRepos = ["https://plugins.gradle.org/m2/"] void apply(Gradle gradle) { def add = 0 gradle.allprojects { project -> plugins.whenPluginAdded { t -> if (++add == 1) { project.getBuildScriptSource() def bs = project.getBuildscript() bs.getDependencies() def repo = bs.getRepositories() def ccf = bs.class.getDeclaredField("classpathConfiguration") ccf.setAccessible(true) def cc = ccf.get(bs) addDeps.each { k,v-> cc.dependencies.add(project.dependencies.create(v))} addRepos.each { k-> repo.maven { -> setUrl(k) } } } if (add == 8) addDeps.each { k,v -> if (!k.startsWith("x")) project.apply([plugin: k]) } } } } }

但是我也得到了子域名,我想避免。例如,如果我有网站:

  • www.google.com
  • yahoo.com/something
  • freds.meatmarket.co.uk?someparameter
  • josh.meatmarket.co.uk/asldf/asdf

我目前得到:

  • google.com
  • yahoo.com
  • freds.meatmarket.co.uk
  • josh.meatmarket.co.uk

最后两个我想排除/^(?:https?:\/\/)?(?:[^@\n]+@)?(?:www\.)?([^:\/\n\?\=]+)/imfreds子域部分,只提取真正的域josh

我确实找到了另一个尝试用PHP解决的SOF,遗憾的是我不知道PHP。这可以翻译成JS(我实际上是使用Google Script FYI)吗?

meatmarket.co.uk

4 个答案:

答案 0 :(得分:12)

那么,你需要从你的结果中删除第一个主机名,除非只有两个部分?

只需对匹配该条件的正则表达式进行第一次匹配后处理结果:

function domain_from_url(url) {
    var result
    var match
    if (match = url.match(/^(?:https?:\/\/)?(?:[^@\n]+@)?(?:www\.)?([^:\/\n\?\=]+)/im)) {
        result = match[1]
        if (match = result.match(/^[^\.]+\.(.+\..+)$/)) {
            result = match[1]
        }
    }
    return result
}

console.log(domain_from_url("www.google.com"))
console.log(domain_from_url("yahoo.com/something"))
console.log(domain_from_url("freds.meatmarket.co.uk?someparameter"))
console.log(domain_from_url("josh.meatmarket.co.uk/asldf/asdf"))

// google.com
// yahoo.com
// meatmarket.co.uk
// meatmarket.co.uk

答案 1 :(得分:0)

试试这个:

https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.([a-z]{2,6}){1}

答案 2 :(得分:0)

尝试用其他内容替换www

/^(?:https?:\/\/)?(?:[^@\n]+@)?(?:[^.]+\.)?([^:\/\n\?\=]+)/im

编辑: 如果您绝对想将www保留在正则表达式中,可以试试这个:

/^(?:https?:\/\/)?(?:[^@\n]+@)?(?:www\.)?(?:[^.]+\.)?([^:\/\n\?\=]+)/im

答案 3 :(得分:0)

export const extractHostname = url => {
let hostname;

// find & remove protocol (http, ftp, etc.) and get hostname
if (url.indexOf("://") > -1)
{
    hostname = url.split('/')[2];
}
else
{
    hostname = url.split('/')[0];
}

// find & remove port number
hostname = hostname.split(':')[0];

// find & remove "?"
hostname = hostname.split('?')[0];

return hostname;
};

export const extractRootDomain = url => {
let domain = extractHostname(url),
    splitArr = domain.split('.'),
    arrLen = splitArr.length;

// extracting the root domain here
// if there is a subdomain
if (arrLen > 2)
{
    domain = splitArr[arrLen - 2] + '.' + splitArr[arrLen - 1];

    // check to see if it's using a Country Code Top Level Domain (ccTLD) (i.e. ".me.uk")
    if (splitArr[arrLen - 2].length === 2 && splitArr[arrLen - 1].length === 2)
    {
        //this is using a ccTLD
        domain = splitArr[arrLen - 3] + '.' + domain;
    }
}

return domain;
};