我在google和stackoverflow中搜索了很多次,我发现了很多问题和答案,但我发现所有错误都存在。
所以我试着编写自己的算法,我不确定。看起来很有效。 我使用了一个infinte循环。这是一个问题吗?如果是,如何避免这样的循环。
如果我的代码中存在任何错误,我需要你的帮助。
注意:英语不是我的母语。
最诚挚的问候。
/**
* @description get pure domain or domain.extension
* @param $p_domain
* @param $ext :: if 1 => with extension
* @return string|false
*/
public function get_pure_domain($p_domain, $ext=1)
{
//remove protocols
$clean = array("http://", "https://", "ftp://", "@", "ftps://", ":");
$domain = str_replace($clean, "", $p_domain);
//remove slashes before dot
$dot_len=strpos($domain,'.');
if ( strpos( substr($domain,0,$dot_len) ,'/')) return false;
if (!$dot_len)
return false;
// if slashes exist remove after
if (strpos($domain, "/"))
$domain=substr($domain, 0, strpos($domain, "/") );
$clean_domain=$domain;
// explode with dot
$items = explode(".", $domain);
//select probable domain without extension
$sub_domain_count=count($items)-2;
$pure_domain=$items[$sub_domain_count];
//if selected domain matched an extension, select provious (go back -1 in array)
$d_count=0;
while(1==1)
{
$d_count++;
if ( $this->validate_ext($pure_domain) )
$pure_domain=$items[--$sub_domain_count];
else
break;
//break infinite loop after 50
if ($d_count==50) break;
}
// if requested, return with extension
if ($ext==1)
{
$extension_start=strpos($clean_domain,$pure_domain)+strlen($pure_domain);
$pure_domain=$pure_domain.substr($clean_domain,$extension_start);
}
//total parts cannot be greater than 3
$items = explode(".", $pure_domain);
if(count($items)>2)
{
$pure_domain = $items[count($items) - 3] . '.' . $items[count($items) - 2] . '.' . $items[count($items) - 1];
//search for last two parts in extensions if not exist return these two parts as domain
if ( !$this->validate_ext($items[count($items) - 2] . '.' . $items[count($items) - 1]) )
{
if ($ext==1)
$pure_domain=$items[count($items) - 2] . '.' . $items[count($items) - 1];
else
$pure_domain=$items[count($items) - 2];
}
}
//if extensions and domain are same algorithm may take subdomain as domain
$items = explode(".", $pure_domain);
if(count($items)>1)
{
if ($items[count($items) - 1] == $items[count($items) - 2])
{
if ($ext==1)
$pure_domain = $items[count($items) - 2] . '.' . $items[count($items) - 1];
else
$pure_domain=$items[count($items) - 2];
}
}
//if domain numeric may be an ip return false
if (is_numeric($pure_domain))
return false;
//return result
return $pure_domain;
}
public function validate_ext($ext)
{
if (strpos($ext,".")) str_replace('.','',$ext);
if (in_array($ext, Data::$extensions))
return true;
else
return false;
}
class Data
{
//18-12-2014 all domains extension
public static $extensions = array("abogado", "ac", "academy", "accountants", "active", "actor", "ad", "adult", "ae", "aero", "af", "ag", "agency", "ai",
"airforce", "al", "allfinanz", "alsace", "am", "an", "android", "ao", "aq", "aquarelle", "ar", "archi", "army", "arpa", "as", "asia", "associates",
"at", "attorney", "au", "auction", "audio", "autos", "aw", "ax", "axa", "az", "ba", "band", "bar", "bargains", "bayern", "bb", "bd", "be", "beer", "berlin",
"best", "bf", "bg", "bh", "bi", "bid", "bike", "bio", "biz", "bj", "black", "blackfriday", "bloomberg", "blue", "bm", "bmw", "bn", "bnpparibas", "bo", "boo",
"boutique", "br", "brussels", "bs", "bt", "budapest", "build", "builders", "business", "buzz", "bv", "bw", "by", "bz", "bzh", "ca", "cab", "cal", "camera",
"camp", "cancerresearch", "capetown", "capital", "caravan", "cards", "care", "career", "careers", "cartier", "casa", "cash", "cat", "catering", "cc",
"cd", "center", "ceo", "cern", "cf", "cg", "ch", "channel", "cheap", "christmas", "chrome", "church", "ci", "citic", "city", "ck", "cl", "claims", "cleaning",
"click", "clinic", "clothing", "club", "cm", "cn", "co", "coach", "codes", "coffee", "college", "cologne", "com", "community", "company", "computer", "condos",
"construction", "consulting", "contractors", "cooking", "cool", "coop", "country", "cr", "credit", "creditcard", "cricket", "crs", "cruises", "cu",
"cuisinella", "cv", "cw", "cx", "cy", "cymru", "cz", "dad", "dance", "dating", "day", "de", "deals", "degree", "delivery", "democrat", "dental", "dentist",
"desi", "diamonds", "diet", "digital", "direct", "directory", "discount", "dj", "dk", "dm", "dnp", "do", "domains", "doosan", "durban", "dvag", "dz", "eat",
"ec", "edu", "education", "ee", "eg", "email", "emerck", "energy", "engineer", "engineering", "enterprises", "equipment", "er", "es", "esq", "estate", "et",
"eu", "eurovision", "eus", "events", "everbank", "exchange", "expert", "exposed", "fail", "farm", "fashion", "feedback", "fi", "finance", "financial",
"firmdale", "fish", "fishing", "fitness", "fj", "fk", "flights", "florist", "flsmidth", "fly", "fm", "fo", "foo", "forsale", "foundation", "fr", "frl",
"frogans", "fund", "furniture", "futbol", "ga", "gal", "gallery", "garden", "gb", "gbiz", "gd", "ge", "gent", "gf", "gg", "gh", "gi", "gift", "gifts", "gives",
"gl", "glass", "gle", "global", "globo", "gm", "gmail", "gmo", "gmx", "gn", "google", "gop", "gov", "gp", "gq", "gr", "graphics", "gratis", "green", "gripe",
"gs", "gt", "gu", "guide", "guitars", "guru", "gw", "gy", "hamburg", "haus", "healthcare", "help", "here", "hiphop", "hiv", "hk", "hm", "hn", "holdings",
"holiday", "homes", "horse", "host", "hosting", "house", "how", "hr", "ht", "hu", "ibm", "id", "ie", "il", "im", "immo", "immobilien", "in", "industries",
"info", "ing", "ink", "institute", "insure", "int", "international", "investments", "io", "iq", "ir", "irish", "is", "it", "iwc", "je", "jetzt", "jm", "jo",
"jobs", "joburg", "jp", "juegos", "kaufen", "ke", "kg", "kh", "ki", "kim", "kitchen", "kiwi", "km", "kn", "koeln", "kp", "kr", "krd", "kred", "kw", "ky", "kz",
"la", "lacaixa", "land", "latrobe", "lawyer", "lb", "lc", "lds", "lease", "legal", "lgbt", "li", "lidl", "life", "lighting", "limited", "limo", "link", "lk",
"loans", "london", "lotto", "lr", "ls", "lt", "ltda", "lu", "luxe", "luxury", "lv", "ly", "ma", "madrid", "maison", "management", "mango", "market", "marketing",
"mc", "md", "me", "media", "meet", "melbourne", "meme", "memorial", "menu", "mg", "mh", "miami", "mil", "mini", "mk", "ml", "mm", "mn", "mo", "mobi", "moda",
"moe", "monash", "money", "mormon", "mortgage", "moscow", "motorcycles", "mov", "mp", "mq", "mr", "ms", "mt", "mu", "museum", "mv", "mw", "mx", "my", "mz", "na",
"nagoya", "name", "navy", "nc", "ne", "net", "network", "neustar", "new", "nexus", "nf", "ng", "ngo", "nhk", "ni", "ninja", "nl", "no", "np", "nr", "nra", "nrw",
"nu", "nyc", "nz", "okinawa", "om", "ong", "onl", "ooo", "org", "organic", "osaka", "otsuka", "ovh", "pa", "paris", "partners", "parts", "party", "pe", "pf", "pg",
"ph", "pharmacy", "photo", "photography", "photos", "physio", "pics", "pictures", "pink", "pizza", "pk", "pl", "place", "plumbing", "pm", "pn", "pohl", "poker",
"porn", "post", "pr", "praxi", "press", "pro", "prod", "productions", "prof", "properties", "property", "ps", "pt", "pub", "pw", "py", "qa", "qpon", "quebec",
"re", "realtor", "recipes", "red", "rehab", "reise", "reisen", "reit", "ren", "rentals", "repair", "report", "republican", "rest", "restaurant", "reviews",
"rich", "rio", "rip", "ro", "rocks", "rodeo", "rs", "rsvp", "ru", "ruhr", "rw", "ryukyu", "sa", "saarland", "samsung", "sarl", "sb", "sc", "sca", "scb", "schmidt",
"schule", "schwarz", "science", "scot", "sd", "se", "services", "sew", "sexy", "sg", "sh", "shiksha", "shoes", "si", "singles", "sj", "sk", "sky", "sl", "sm", "sn",
"so", "social", "software", "sohu", "solar", "solutions", "soy", "space", "spiegel", "sr", "st", "su", "supplies", "supply", "support", "surf", "surgery",
"suzuki", "sv", "sx", "sy", "sydney", "systems", "sz", "taipei", "tatar", "tattoo", "tax", "tc", "td", "technology", "tel", "tf", "tg", "th", "tienda", "tips",
"tirol", "tj", "tk", "tl", "tm", "tn", "to", "today", "tokyo", "tools", "top", "town", "toys", "tp", "tr", "trade", "training", "travel", "trust", "tt", "tui",
"tv", "tw", "tz", "ua", "ug", "uk", "university", "uno", "uol", "us", "uy", "uz", "va", "vacations", "vc", "ve", "vegas", "ventures", "versicherung", "vet", "vg",
"vi", "viajes", "villas", "vision", "vlaanderen", "vn", "vodka", "vote", "voting", "voto", "voyage", "vu", "wales", "wang", "watch", "webcam", "website",
"wed", "wedding", "wf", "whoswho", "wien", "wiki", "williamhill", "wme", "work", "works", "world", "ws", "wtc", "wtf", "xn--1qqw23a", "xn--3bst00m",
"xn--3ds443g", "xn--3e0b707e", "xn--45brj9c", "xn--45q11c", "xn--4gbrim", "xn--55qw42g", "xn--55qx5d", "xn--6frz82g", "xn--6qq986b3xl", "xn--80adxhks",
"xn--80ao21a", "xn--80asehdb", "xn--80aswg", "xn--90a3ac", "xn--c1avg", "xn--cg4bki", "xn--clchc0ea0b2g2a9gcd", "xn--czr694b", "xn--czrs0t",
"xn--czru2d", "xn--d1acj3b", "xn--d1alf", "xn--fiq228c5hs", "xn--fiq64b", "xn--fiqs8s", "xn--fiqz9s", "xn--flw351e", "xn--fpcrj9c3d", "xn--fzc2c9e2c",
"xn--gecrj9c", "xn--h2brj9c", "xn--hxt814e", "xn--i1b6b1a6a2e", "xn--io0a7i", "xn--j1amh", "xn--j6w193g", "xn--kprw13d", "xn--kpry57d", "xn--kput3i",
"xn--l1acc", "xn--lgbbat1ad8j", "xn--mgb9awbf", "xn--mgba3a4f16a", "xn--mgbaam7a8h", "xn--mgbab2bd", "xn--mgbayh7gpa", "xn--mgbbh1a71e",
"xn--mgbc0a9azcg", "xn--mgberp4a5d4ar", "xn--mgbx4cd0ab", "xn--ngbc5azd", "xn--node", "xn--nqv7f", "xn--nqv7fs00ema", "xn--o3cw4h", "xn--ogbpf8fl",
"xn--p1acf", "xn--p1ai", "xn--pgbs0dh", "xn--q9jyb4c", "xn--qcka1pmc", "xn--rhqv96g", "xn--s9brj9c", "xn--ses554g", "xn--unup4y",
"xn--vermgensberater-ctb", "xn--vermgensberatung-pwb", "xn--vhquv", "xn--wgbh1c", "xn--wgbl6a", "xn--xhq521b", "xn--xkc2al3hye2a",
"xn--xkc2dl3a5ee0h", "xn--yfro4i67o", "xn--ygbi2ammx", "xn--zfr164b", "xxx", "xyz", "yachts", "yandex", "ye", "yoga", "yokohama", "youtube", "yt", "za",
"zip", "zm", "zone", "zw");
}
答案 0 :(得分:1)
没有人回答我的问题所以我改进了我的方法,我会像这样使用:
public function get_pure_domain($p_domain, $ext=1)
{
//remove protocols
$clean = array("http://", "https://", "ftp://", "@", "ftps://", ":");
$domain = str_replace($clean, "", $p_domain);
//remove slashes before dot
$dot_len=strpos($domain,'.');
if ( strpos( substr($domain,0,$dot_len) ,'/')) return false;
if (!$dot_len)
return false;
// if slashes exist remove after
if (strpos($domain, "/"))
$domain=substr($domain, 0, strpos($domain, "/") );
$clean_domain=$domain;
// explode with dot
$items = explode(".", $domain);
//select probable domain without extension
$sub_domain_count=count($items)-2;
$pure_domain=$items[$sub_domain_count];
//if selected domain matched an extension, select provious (go back -1 in array)
$d_count=0;
while(1==1)
{
$d_count++;
if ( $this->validate_ext($pure_domain) )
$pure_domain=$items[--$sub_domain_count];
else
break;
//break infinite loop after 50
if ($d_count==50) break;
}
// if requested, return with extension
if ($ext==1)
{
$extension_start=strpos($clean_domain,$pure_domain)+strlen($pure_domain);
$pure_domain=$pure_domain.substr($clean_domain,$extension_start);
}
//total parts cannot be greater than 3
$items = explode(".", $pure_domain);
if(count($items)>2)
{
$pure_domain = $items[count($items) - 3] . '.' . $items[count($items) - 2] . '.' . $items[count($items) - 1];
//search for last two parts in extensions if not exist return these two parts as domain
if ( !$this->validate_ext($items[count($items) - 2] . '.' . $items[count($items) - 1]) )
{
if ($ext==1)
$pure_domain=$items[count($items) - 2] . '.' . $items[count($items) - 1];
else
$pure_domain=$items[count($items) - 2];
}
}
//if extensions and domain are same algorithm may take subdomain as domain
$items = explode(".", $pure_domain);
if(count($items)>1)
{
if ($items[count($items) - 1] == $items[count($items) - 2])
{
if ($ext==1)
$pure_domain = $items[count($items) - 2] . '.' . $items[count($items) - 1];
else
$pure_domain=$items[count($items) - 2];
}
}
//if domain numeric may be an ip return false
if (is_numeric($pure_domain))
return false;
//return result
return $pure_domain;
}