是否有人在c#中使用公共后缀列表(获取真实域名)解析器。
它可以在code.google.com / p / domainname-parser /(删除名称中的p之前的空格)上获得,但项目可能已从Google代码中删除,因为我无法再访问项目文件夹和来源。
答案 0 :(得分:8)
我无法通过gary的答案获得传递mozilla test cases的代码,因此根据原始和here描述的算法将这些mod一起入侵。希望如果域名解析器代码从网络中消失,它对某人有用。
namespace DomainName.Parser
{
using System.Collections.Generic;
using System.Linq;
using System;
public class DomainName
{
public DomainName(string rawDomainName, string publicSuffix, string registerableDomainName)
{
this.RawDomainName = rawDomainName;
this.PublicSuffix = publicSuffix;
this.RegisterableDomainName = registerableDomainName;
}
public string RawDomainName { get; private set; }
public string PublicSuffix { get; private set; }
public string RegisterableDomainName { get; private set; }
public static bool TryParse(string rawDomainName, PublicSuffixRuleCache ruleCache, out DomainName domainName)
{
if (string.IsNullOrEmpty(rawDomainName) || !rawDomainName.Contains('.') || rawDomainName.StartsWith("."))
{
domainName = new DomainName(rawDomainName, null, null);
return true;
}
try
{
rawDomainName = rawDomainName.ToLower();
// Split our domain into parts (based on the '.')
// We'll be checking rules from the right-most part of the domain
var domainLabels = rawDomainName.Trim().Split('.').ToList();
domainLabels.Reverse();
// If no rules match, the prevailing rule is "*"
var prevailingRule = FindMatchingRule(domainLabels, ruleCache) ?? new PublicSuffixRule("*");
// If the prevailing rule is an exception rule, modify it by removing the leftmost label.
if (prevailingRule.Type == PublicSuffixRule.RuleType.Exception)
{
var labels = prevailingRule.Labels;
labels.Reverse();
labels.RemoveAt(0);
prevailingRule = new PublicSuffixRule(string.Join(".", labels));
}
// The public suffix is the set of labels from the domain which directly match the labels of the prevailing rule (joined by dots).
var publicSuffix = Enumerable.Range(0, prevailingRule.Labels.Count).Aggregate(string.Empty, (current, i) => string.Format("{0}.{1}", domainLabels[i], current).Trim('.'));
// The registered or registrable domain is the public suffix plus one additional label.
var registrableDomain = string.Format("{0}.{1}", domainLabels[prevailingRule.Labels.Count], publicSuffix);
domainName = new DomainName(rawDomainName, publicSuffix, registrableDomain);
return true;
}
catch
{
domainName = null;
return false;
}
}
private static PublicSuffixRule FindMatchingRule(List<string> domainLabels, PublicSuffixRuleCache ruleCache)
{
var ruleMatches = ruleCache.PublicSuffixRules.Where(r => r.AppliesTo(domainLabels)).ToList();
// If there is only one match, return it.
if (ruleMatches.Count() == 1)
{
return ruleMatches[0];
}
// If more than one rule matches, the prevailing rule is the one which is an exception rule.
var exceptionRules = ruleMatches.Where(r => r.Type == PublicSuffixRule.RuleType.Exception).ToList();
if (exceptionRules.Count() == 1)
{
return exceptionRules[0];
}
if (exceptionRules.Count() > 1)
{
throw new ApplicationException("Unexpectedly found multiple matching exception rules.");
}
// If there is no matching exception rule, the prevailing rule is the one with the most labels.
var prevailingRule = ruleMatches.OrderByDescending(r => r.Labels.Count).Take(1).SingleOrDefault();
return prevailingRule;
}
}
public class PublicSuffixRule
{
/// <summary>
/// Construct a rule based on a single line from the www.publicsuffix.org list
/// </summary>
/// <param name="ruleLine">The rule line.</param>
public PublicSuffixRule(string ruleLine)
{
if (string.IsNullOrEmpty(ruleLine) || string.IsNullOrWhiteSpace(ruleLine))
{
throw new ArgumentNullException("ruleLine");
}
// Parse the rule and set properties accordingly:
string ruleName;
if (ruleLine.StartsWith("*", StringComparison.InvariantCultureIgnoreCase))
{
this.Type = RuleType.Wildcard;
ruleName = ruleLine;
}
else if (ruleLine.StartsWith("!", StringComparison.InvariantCultureIgnoreCase))
{
this.Type = RuleType.Exception;
ruleName = ruleLine.Substring(1);
}
else
{
this.Type = RuleType.Normal;
ruleName = ruleLine;
}
this.Name = ruleName.Split(' ')[0];
var labels = this.Name.Split('.').ToList();
labels.Reverse();
this.Labels = labels;
}
public string Name { get; private set; }
public RuleType Type { get; private set; }
public List<string> Labels { get; private set; }
public bool AppliesTo(List<string> domainLabels)
{
if (this.Labels.Count > domainLabels.Count)
{
return false;
}
foreach (var position in Enumerable.Range(0, this.Labels.Count))
{
if (this.Labels[position] == "*")
{
return true;
}
if (this.Labels[position] != domainLabels[position])
{
return false;
}
}
return true;
}
/// <summary>
/// Rule type
/// </summary>
public enum RuleType
{
/// <summary>
/// A normal rule
/// </summary>
Normal,
/// <summary>
/// A wildcard rule, as defined by www.publicsuffix.org
/// </summary>
Wildcard,
/// <summary>
/// An exception rule, as defined by www.publicsuffix.org
/// </summary>
Exception
}
}
public class PublicSuffixRuleCache
{
public PublicSuffixRuleCache(string publicSuffixRulesFileLocation)
{
if (string.IsNullOrEmpty(publicSuffixRulesFileLocation))
{
throw new ArgumentNullException("publicSuffixRulesFileLocation");
}
this.PublicSuffixRules = GetRules(publicSuffixRulesFileLocation);
}
public PublicSuffixRuleCache(IEnumerable<string> publicSuffixRules)
{
this.PublicSuffixRules = GetRules(publicSuffixRules);
}
public List<PublicSuffixRule> PublicSuffixRules { get; private set; }
/// <summary>
/// Gets the list of TLD rules from the cache
/// </summary>
/// <returns></returns>
private static List<PublicSuffixRule> GetRules(string publicSuffixRulesFileLocation)
{
var results = new List<PublicSuffixRule>();
// If the cached suffix rules file exists...
if (File.Exists(publicSuffixRulesFileLocation))
{
// Load the rules from the cached text file
var ruleStrings = File.ReadAllLines(publicSuffixRulesFileLocation, Encoding.UTF8).ToList();
results = GetRules(ruleStrings);
}
return results;
}
private static List<PublicSuffixRule> GetRules(IEnumerable<string> publicSuffixRules)
{
// Strip out any lines that are a comment or blank.
return
publicSuffixRules.Where(
ruleString =>
ruleString.Trim().Length != 0
&& !ruleString.StartsWith("//", StringComparison.InvariantCultureIgnoreCase)).Select(ruleString => new PublicSuffixRule(ruleString)).ToList();
}
}
}
单元测试:
namespace DomainName.Library.Tests
{
using FluentAssertions;
using Xunit;
using DomainName.Parser;
public class CheckPublicSuffixTest2
{
public bool CheckRegisterableDomain(string domainName, string registerableDomain)
{
var ruleCache = new PublicSuffixRuleCache(@"C:\publicsuffix.txt");
DomainName outDomain;
if (DomainName.TryParse(domainName, ruleCache, out outDomain))
{
return outDomain.RegisterableDomainName == registerableDomain;
}
return registerableDomain == null;
}
[Fact]
public void TestCheckPublicSuffix()
{
// Any copyright is dedicated to the Public Domain.
// http://creativecommons.org/publicdomain/zero/1.0/
// null input.
this.CheckRegisterableDomain(null, null).Should().BeTrue();
// Mixed case.
this.CheckRegisterableDomain("COM", null).Should().BeTrue();
this.CheckRegisterableDomain("example.COM", "example.com").Should().BeTrue();
this.CheckRegisterableDomain("WwW.example.COM", "example.com").Should().BeTrue();
// Leading dot.
this.CheckRegisterableDomain(".com", null).Should().BeTrue();
this.CheckRegisterableDomain(".example", null).Should().BeTrue();
this.CheckRegisterableDomain(".example.com", null).Should().BeTrue();
this.CheckRegisterableDomain(".example.example", null).Should().BeTrue();
// Unlisted TLD.
this.CheckRegisterableDomain("example", null).Should().BeTrue();
this.CheckRegisterableDomain("example.example", "example.example").Should().BeTrue();
this.CheckRegisterableDomain("b.example.example", "example.example").Should().BeTrue();
this.CheckRegisterableDomain("a.b.example.example", "example.example").Should().BeTrue();
// TLD with only 1 rule.
this.CheckRegisterableDomain("biz", null).Should().BeTrue();
this.CheckRegisterableDomain("domain.biz", "domain.biz").Should().BeTrue();
this.CheckRegisterableDomain("b.domain.biz", "domain.biz").Should().BeTrue();
this.CheckRegisterableDomain("a.b.domain.biz", "domain.biz").Should().BeTrue();
// TLD with some 2-level rules.
this.CheckRegisterableDomain("com", null).Should().BeTrue();
this.CheckRegisterableDomain("example.com", "example.com").Should().BeTrue();
this.CheckRegisterableDomain("b.example.com", "example.com").Should().BeTrue();
this.CheckRegisterableDomain("a.b.example.com", "example.com").Should().BeTrue();
this.CheckRegisterableDomain("uk.com", null).Should().BeTrue();
this.CheckRegisterableDomain("example.uk.com", "example.uk.com").Should().BeTrue();
this.CheckRegisterableDomain("b.example.uk.com", "example.uk.com").Should().BeTrue();
this.CheckRegisterableDomain("a.b.example.uk.com", "example.uk.com").Should().BeTrue();
this.CheckRegisterableDomain("test.ac", "test.ac").Should().BeTrue();
// TLD with only 1 (wildcard) rule.
this.CheckRegisterableDomain("cy", null).Should().BeTrue();
this.CheckRegisterableDomain("c.cy", null).Should().BeTrue();
this.CheckRegisterableDomain("b.c.cy", "b.c.cy").Should().BeTrue();
this.CheckRegisterableDomain("a.b.c.cy", "b.c.cy").Should().BeTrue();
// More complex TLD.
this.CheckRegisterableDomain("jp", null).Should().BeTrue();
this.CheckRegisterableDomain("test.jp", "test.jp").Should().BeTrue();
this.CheckRegisterableDomain("www.test.jp", "test.jp").Should().BeTrue();
this.CheckRegisterableDomain("ac.jp", null).Should().BeTrue();
this.CheckRegisterableDomain("test.ac.jp", "test.ac.jp").Should().BeTrue();
this.CheckRegisterableDomain("www.test.ac.jp", "test.ac.jp").Should().BeTrue();
this.CheckRegisterableDomain("kyoto.jp", null).Should().BeTrue();
this.CheckRegisterableDomain("test.kyoto.jp", "test.kyoto.jp").Should().BeTrue();
this.CheckRegisterableDomain("ide.kyoto.jp", null).Should().BeTrue();
this.CheckRegisterableDomain("b.ide.kyoto.jp", "b.ide.kyoto.jp").Should().BeTrue();
this.CheckRegisterableDomain("a.b.ide.kyoto.jp", "b.ide.kyoto.jp").Should().BeTrue();
this.CheckRegisterableDomain("c.kobe.jp", null).Should().BeTrue();
this.CheckRegisterableDomain("b.c.kobe.jp", "b.c.kobe.jp").Should().BeTrue();
this.CheckRegisterableDomain("a.b.c.kobe.jp", "b.c.kobe.jp").Should().BeTrue();
this.CheckRegisterableDomain("city.kobe.jp", "city.kobe.jp").Should().BeTrue();
this.CheckRegisterableDomain("www.city.kobe.jp", "city.kobe.jp").Should().BeTrue();
// TLD with a wildcard rule and exceptions.
this.CheckRegisterableDomain("om", null).Should().BeTrue();
this.CheckRegisterableDomain("test.om", null).Should().BeTrue();
this.CheckRegisterableDomain("b.test.om", "b.test.om").Should().BeTrue();
this.CheckRegisterableDomain("a.b.test.om", "b.test.om").Should().BeTrue();
this.CheckRegisterableDomain("songfest.om", "songfest.om").Should().BeTrue();
this.CheckRegisterableDomain("www.songfest.om", "songfest.om").Should().BeTrue();
// US K12.
this.CheckRegisterableDomain("us", null).Should().BeTrue();
this.CheckRegisterableDomain("test.us", "test.us").Should().BeTrue();
this.CheckRegisterableDomain("www.test.us", "test.us").Should().BeTrue();
this.CheckRegisterableDomain("ak.us", null).Should().BeTrue();
this.CheckRegisterableDomain("test.ak.us", "test.ak.us").Should().BeTrue();
this.CheckRegisterableDomain("www.test.ak.us", "test.ak.us").Should().BeTrue();
this.CheckRegisterableDomain("k12.ak.us", null).Should().BeTrue();
this.CheckRegisterableDomain("test.k12.ak.us", "test.k12.ak.us").Should().BeTrue();
this.CheckRegisterableDomain("www.test.k12.ak.us", "test.k12.ak.us").Should().BeTrue();
}
}
}
答案 1 :(得分:1)
我自己只是在寻找它,它似乎仍然可用 -