解析邮件头中的MIME日期(C#但与语言无关)

时间:2016-02-01 13:54:17

标签: c# email parsing mime

我正在开发一个处理电子邮件的小型本地c#应用。我正在使用下载和分隔标题的S22 / Imap。当我尝试获取电子邮件的日期时,它会返回字符串版本,就像在MIME标题中一样。

对于大多数人来说,DateTime.TryParse运行正常,但对于某些日期,它会失败。以下是每个示例,我无法找到如何在RFC中处理它们:

  • 星期五,2016年1月15日20:21:44 -0600 -0700
  • 2011年1月3日星期一20:32:19 +0000 (GMT + 00:00)
  • 星期二,2012年6月12日19:22:28 0200 (是+隐式?)
  • 星期一,2010年1月11日17:28:39 3600 (不在RFC中)
  • 2010年10月7日星期四17:31:20 7200
  • 星期五,2009年7月24日21:13:28 +0100 (巴黎,马德里)(我必须忽略+0100之后的内容吗?我可以不丢失信息吗?)
  • 2015年5月28日星期四10:58:24 +0200 (巴黎,马德里(heured'été))(同一问题)

有人能告诉我前五个的含义,还是指出一个可以解释其他格式的文档?

提前感谢您的帮助。

1 个答案:

答案 0 :(得分:0)

我必须编写自己的类来解析这些(如果你感兴趣,我有一个竞争库到S22.Imap,名为MailKit,它使用我的MimeKit库来解析消息,日期等)

我冒昧地将我的日期解析器从MimeKit中分离出来,以便将其作为独立的

粘贴在此处
using System;
using System.Text;
using System.Collections.Generic;

namespace DateParserUtils {
    [Flags]
    enum DateTokenFlags : byte
    {
        None           = 0,
        NonNumeric     = (1 << 0),
        NonWeekday     = (1 << 1),
        NonMonth       = (1 << 2),
        NonTime        = (1 << 3),
        NonAlphaZone   = (1 << 4),
        NonNumericZone = (1 << 5),
        HasColon       = (1 << 6),
        HasSign        = (1 << 7),
    }

    class DateToken
    {
        public DateTokenFlags Flags { get; private set; }
        public int StartIndex { get; private set; }
        public int Length { get; private set; }

        public bool IsNumeric {
            get { return (Flags & DateTokenFlags.NonNumeric) == 0; }
        }

        public bool IsWeekday {
            get { return (Flags & DateTokenFlags.NonWeekday) == 0; }
        }

        public bool IsMonth {
            get { return (Flags & DateTokenFlags.NonMonth) == 0; }
        }

        public bool IsTimeOfDay {
            get { return (Flags & DateTokenFlags.NonTime) == 0 && (Flags & DateTokenFlags.HasColon) != 0; }
        }

        public bool IsNumericZone {
            get { return (Flags & DateTokenFlags.NonNumericZone) == 0 && (Flags & DateTokenFlags.HasSign) != 0; }
        }

        public bool IsAlphaZone {
            get { return (Flags & DateTokenFlags.NonAlphaZone) == 0; }
        }

        public bool IsTimeZone {
            get { return IsNumericZone || IsAlphaZone; }
        }

        public DateToken (DateTokenFlags flags, int startIndex, int length)
        {
            StartIndex = startIndex;
            Length = length;
            Flags = flags;
        }
    }

    /// <summary>
    /// Utility methods to parse and format rfc822 date strings.
    /// </summary>
    /// <remarks>
    /// Utility methods to parse and format rfc822 date strings.
    /// </remarks>
    public static class DateUtils
    {
        internal static readonly DateTime UnixEpoch = new DateTime (1970, 1, 1, 0, 0, 0, 0);
        const string MonthCharacters = "JanuaryFebruaryMarchAprilMayJuneJulyAugustSeptemberOctoberNovemberDecember";
        const string WeekdayCharacters = "SundayMondayTuesdayWednesdayThursdayFridaySaturday";
        const string AlphaZoneCharacters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
        const string NumericZoneCharacters = "+-0123456789";
        const string NumericCharacters = "0123456789";
        const string TimeCharacters = "0123456789:";

        static readonly string[] Months = {
            "Jan", "Feb", "Mar", "Apr", "May", "Jun",
            "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
        };

        static readonly string[] WeekDays = {
            "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
        };

        static readonly Dictionary<string, int> timezones;
        static readonly DateTokenFlags[] datetok;

        static DateUtils ()
        {
            timezones = new Dictionary<string, int> {
                { "UT",       0 }, { "UTC",      0 }, { "GMT",      0 },
                { "EDT",   -400 }, { "EST",   -500 },
                { "CDT",   -500 }, { "CST",   -600 },
                { "MDT",   -600 }, { "MST",   -700 },
                { "PDT",   -700 }, { "PST",   -800 },
                // Note: rfc822 got the signs backwards for the military
                // timezones so some sending clients may mistakenly use the
                // wrong values.
                { "A",      100 }, { "B",      200 }, { "C",      300 },
                { "D",      400 }, { "E",      500 }, { "F",      600 },
                { "G",      700 }, { "H",      800 }, { "I",      900 },
                { "K",     1000 }, { "L",     1100 }, { "M",     1200 },
                { "N",     -100 }, { "O",     -200 }, { "P",     -300 },
                { "Q",     -400 }, { "R",     -500 }, { "S",     -600 },
                { "T",     -700 }, { "U",     -800 }, { "V",     -900 },
                { "W",    -1000 }, { "X",    -1100 }, { "Y",    -1200 },
                { "Z",        0 },
            };

            datetok = new DateTokenFlags[256];
            var any = new char[2];

            for (int c = 0; c < 256; c++) {
                if (c >= 0x41 && c <= 0x5a) {
                    any[1] = (char) (c + 0x20);
                    any[0] = (char) c;
                } else if (c >= 0x61 && c <= 0x7a) {
                    any[0] = (char) (c - 0x20);
                    any[1] = (char) c;
                }

                if (NumericZoneCharacters.IndexOf ((char) c) == -1)
                    datetok[c] |= DateTokenFlags.NonNumericZone;
                if (AlphaZoneCharacters.IndexOf ((char) c) == -1)
                    datetok[c] |= DateTokenFlags.NonAlphaZone;
                if (WeekdayCharacters.IndexOfAny (any) == -1)
                    datetok[c] |= DateTokenFlags.NonWeekday;
                if (NumericCharacters.IndexOf ((char) c) == -1)
                    datetok[c] |= DateTokenFlags.NonNumeric;
                if (MonthCharacters.IndexOfAny (any) == -1)
                    datetok[c] |= DateTokenFlags.NonMonth;
                if (TimeCharacters.IndexOf ((char) c) == -1)
                    datetok[c] |= DateTokenFlags.NonTime;
            }

            datetok[':'] |= DateTokenFlags.HasColon;
            datetok['+'] |= DateTokenFlags.HasSign;
            datetok['-'] |= DateTokenFlags.HasSign;
        }

        static bool TryGetWeekday (DateToken token, byte[] text, out DayOfWeek weekday)
        {
            weekday = DayOfWeek.Sunday;

            if (!token.IsWeekday || token.Length < 3)
                return false;

            var name = Encoding.ASCII.GetString (text, token.StartIndex, token.Length);

            if (name.Length > 3)
                name = name.Substring (0, 3);

            for (int day = 0; day < WeekDays.Length; day++) {
                if (WeekDays[day].Equals (name, StringComparison.OrdinalIgnoreCase)) {
                    weekday = (DayOfWeek) day;
                    return true;
                }
            }

            return false;
        }

        static bool TryParseInt32 (byte[] text, ref int index, int endIndex, out int value)
        {
            int startIndex = index;

            value = 0;

            while (index < endIndex && text[index] >= (byte) '0' && text[index] <= (byte) '9') {
                int digit = text[index] - (byte) '0';

                if (value > int.MaxValue / 10) {
                    // integer overflow
                    return false;
                }

                if (value == int.MaxValue / 10 && digit > int.MaxValue % 10) {
                    // integer overflow
                    return false;
                }

                value = (value * 10) + digit;
                index++;
            }

            return index > startIndex;
        }

        static bool TryGetDayOfMonth (DateToken token, byte[] text, out int day)
        {
            int endIndex = token.StartIndex + token.Length;
            int index = token.StartIndex;

            day = 0;

            if (!token.IsNumeric)
                return false;

            if (!TryParseInt32 (text, ref index, endIndex, out day))
                return false;

            if (day <= 0 || day > 31)
                return false;

            return true;
        }

        static bool TryGetMonth (DateToken token, byte[] text, out int month)
        {
            month = 0;

            if (!token.IsMonth || token.Length < 3)
                return false;

            var name = Encoding.ASCII.GetString (text, token.StartIndex, token.Length);

            if (name.Length > 3)
                name = name.Substring (0, 3);

            for (int i = 0; i < Months.Length; i++) {
                if (Months[i].Equals (name, StringComparison.OrdinalIgnoreCase)) {
                    month = i + 1;
                    return true;
                }
            }

            return false;
        }

        static bool TryGetYear (DateToken token, byte[] text, out int year)
        {
            int endIndex = token.StartIndex + token.Length;
            int index = token.StartIndex;

            year = 0;

            if (!token.IsNumeric)
                return false;

            if (!TryParseInt32 (text, ref index, endIndex, out year))
                return false;

            if (year < 100)
                year += (year < 70) ? 2000 : 1900;

            return year >= 1969;
        }

        static bool TryGetTimeOfDay (DateToken token, byte[] text, out int hour, out int minute, out int second)
        {
            int endIndex = token.StartIndex + token.Length;
            int index = token.StartIndex;

            hour = minute = second = 0;

            if (!token.IsTimeOfDay)
                return false;

            if (!TryParseInt32 (text, ref index, endIndex, out hour) || hour > 23)
                return false;

            if (index >= endIndex || text[index++] != (byte) ':')
                return false;

            if (!TryParseInt32 (text, ref index, endIndex, out minute) || minute > 59)
                return false;

            // Allow just hh:mm (i.e. w/o the :ss?)
            if (index >= endIndex || text[index++] != (byte) ':')
                return true;

            if (!TryParseInt32 (text, ref index, endIndex, out second) || second > 59)
                return false;

            return index == endIndex;
        }

        static bool TryGetTimeZone (DateToken token, byte[] text, out int tzone)
        {
            tzone = 0;

            if (token.IsNumericZone) {
                int endIndex = token.StartIndex + token.Length;
                int index = token.StartIndex;
                int sign;

                if (text[index] == (byte) '-')
                    sign = -1;
                else if (text[index] == (byte) '+')
                    sign = 1;
                else
                    return false;

                index++;

                if (!TryParseInt32 (text, ref index, endIndex, out tzone) || index != endIndex)
                    return false;

                tzone *= sign;
            } else if (token.IsAlphaZone) {
                if (token.Length > 3)
                    return false;

                var name = Encoding.ASCII.GetString (text, token.StartIndex, token.Length);

                if (!timezones.TryGetValue (name, out tzone))
                    return false;
            } else if (token.IsNumeric) {
                int endIndex = token.StartIndex + token.Length;
                int index = token.StartIndex;

                if (!ParseUtils.TryParseInt32 (text, ref index, endIndex, out tzone) || index != endIndex)
                    return false;
            }

            return true;
        }

        static bool IsWhiteSpace (byte c)
        {
            return c == ' ' || c == '\t';
        }

        static bool IsTokenDelimeter (byte c)
        {
            return c == (byte) '-' || c == (byte) '/' || c == (byte) ',' || IsWhiteSpace (c);
        }

        static bool SkipWhiteSpace (byte[] text, ref int index, int endIndex)
        {
            int startIndex = index;

            while (index < endIndex && IsWhiteSpace (text[index]))
                index++;

            return index > startIndex;
        }

        static bool SkipComment (byte[] text, ref int index, int endIndex)
        {
            bool escaped = false;
            int depth = 1;

            index++;

            while (index < endIndex && depth > 0) {
                if (text[index] == (byte) '\\') {
                    escaped = !escaped;
                } else if (!escaped) {
                    if (text[index] == (byte) '(')
                        depth++;
                    else if (text[index] == (byte) ')')
                        depth--;
                    escaped = false;
                } else {
                    escaped = false;
                }

                index++;
            }

            return depth == 0;
        }

        static bool SkipCommentsAndWhiteSpace (byte[] text, ref int index, int endIndex)
        {
            SkipWhiteSpace (text, ref index, endIndex);

            while (index < endIndex && text[index] == (byte) '(') {
                int startIndex = index;

                if (!SkipComment (text, ref index, endIndex))
                    return false;

                SkipWhiteSpace (text, ref index, endIndex);
            }

            return true;
        }

        static IEnumerable<DateToken> TokenizeDate (byte[] text, int startIndex, int length)
        {
            int endIndex = startIndex + length;
            int index = startIndex;
            DateTokenFlags mask;
            int start;

            while (index < endIndex) {
                if (!SkipCommentsAndWhiteSpace (text, ref index, endIndex))
                    break;

                if (index >= endIndex)
                    break;

                // get the initial mask for this token
                if ((mask = datetok[text[index]]) != DateTokenFlags.None) {
                    start = index++;

                    // find the end of this token
                    while (index < endIndex && !IsTokenDelimeter (text[index]))
                        mask |= datetok[text[index++]];

                    yield return new DateToken (mask, start, index - start);
                }

                // skip over the token delimeter
                index++;
            }

            yield break;
        }

        static bool TryParseStandardDateFormat (IList<DateToken> tokens, byte[] text, out DateTimeOffset date)
        {
            int day, month, year, tzone;
            int hour, minute, second;
            DayOfWeek weekday;
            //bool haveWeekday;
            int n = 0;

            date = new DateTimeOffset ();

            // we need at least 5 tokens, 6 if we have a weekday
            if (tokens.Count < 5)
                return false;

            // Note: the weekday is not required
            if (TryGetWeekday (tokens[n], text, out weekday)) {
                if (tokens.Count < 6)
                    return false;

                //haveWeekday = true;
                n++;
            }

            if (!TryGetDayOfMonth (tokens[n++], text, out day))
                return false;

            if (!TryGetMonth (tokens[n++], text, out month))
                return false;

            if (!TryGetYear (tokens[n++], text, out year))
                return false;

            if (!TryGetTimeOfDay (tokens[n++], text, out hour, out minute, out second))
                return false;

            if (!TryGetTimeZone (tokens[n], text, out tzone))
                tzone = 0;

            while (tzone < -1400)
                tzone += 2400;

            while (tzone > 1400)
                tzone -= 2400;

            int minutes = tzone % 100;
            int hours = tzone / 100;

            var offset = new TimeSpan (hours, minutes, 0);

            try {
                date = new DateTimeOffset (year, month, day, hour, minute, second, offset);
            } catch (ArgumentOutOfRangeException) {
                return false;
            }

            return true;
        }

        static bool TryParseUnknownDateFormat (IList<DateToken> tokens, byte[] text, out DateTimeOffset date)
        {
            int? day = null, month = null, year = null, tzone = null;
            int hour = 0, minute = 0, second = 0;
            bool numericMonth = false;
            bool haveWeekday = false;
            bool haveTime = false;
            DayOfWeek weekday;
            TimeSpan offset;

            for (int i = 0; i < tokens.Count; i++) {
                int value;

                if (!haveWeekday && tokens[i].IsWeekday) {
                    if (TryGetWeekday (tokens[i], text, out weekday)) {
                        haveWeekday = true;
                        continue;
                    }
                }

                if ((month == null || numericMonth) && tokens[i].IsMonth) {
                    if (TryGetMonth (tokens[i], text, out value)) {
                        if (numericMonth) {
                            numericMonth = false;
                            day = month;
                        }

                        month = value;
                        continue;
                    }
                }

                if (!haveTime && tokens[i].IsTimeOfDay) {
                    if (TryGetTimeOfDay (tokens[i], text, out hour, out minute, out second)) {
                        haveTime = true;
                        continue;
                    }
                }

                if (tzone == null && tokens[i].IsTimeZone) {
                    if (TryGetTimeZone (tokens[i], text, out value)) {
                        tzone = value;
                        continue;
                    }
                }

                if (tokens[i].IsNumeric) {
                    if (tokens[i].Length == 4) {
                        if (year == null) {
                            if (TryGetYear (tokens[i], text, out value))
                                year = value;
                        } else if (tzone == null) {
                            if (TryGetTimeZone (tokens[i], text, out value))
                                tzone = value;
                        }

                        continue;
                    }

                    if (tokens[i].Length > 2)
                        continue;

                    // Note: we likely have either YYYY[-/]MM[-/]DD or MM[-/]DD[-/]YY
                    int endIndex = tokens[i].StartIndex + tokens[i].Length;
                    int index = tokens[i].StartIndex;

                    TryParseInt32 (text, ref index, endIndex, out value);

                    if (month == null && value > 0 && value <= 12) {
                        numericMonth = true;
                        month = value;
                        continue;
                    }

                    if (day == null && value > 0 && value <= 31) {
                        day = value;
                        continue;
                    }

                    if (year == null && value >= 69) {
                        year = 1900 + value;
                        continue;
                    }
                }

                // WTF is this??
            }

            if (year == null || month == null || day == null) {
                date = new DateTimeOffset ();
                return false;
            }

            if (!haveTime)
                hour = minute = second = 0;

            if (tzone != null) {
                int minutes = tzone.Value % 100;
                int hours = tzone.Value / 100;

                offset = new TimeSpan (hours, minutes, 0);
            } else {
                offset = new TimeSpan (0);
            }

            try {
                date = new DateTimeOffset (year.Value, month.Value, day.Value, hour, minute, second, offset);
            } catch (ArgumentOutOfRangeException) {
                date = new DateTimeOffset ();
                return false;
            }

            return true;
        }

        /// <summary>
        /// Tries to parse the given input buffer into a new <see cref="System.DateTimeOffset"/> instance.
        /// </summary>
        /// <remarks>
        /// Parses an rfc822 date and time from the supplied buffer starting at the given index
        /// and spanning across the specified number of bytes.
        /// </remarks>
        /// <returns><c>true</c>, if the date was successfully parsed, <c>false</c> otherwise.</returns>
        /// <param name="buffer">The input buffer.</param>
        /// <param name="startIndex">The starting index of the input buffer.</param>
        /// <param name="length">The number of bytes in the input buffer to parse.</param>
        /// <param name="date">The parsed date.</param>
        /// <exception cref="System.ArgumentNullException">
        /// <paramref name="buffer"/> is <c>null</c>.
        /// </exception>
        /// <exception cref="System.ArgumentOutOfRangeException">
        /// <paramref name="startIndex"/> and <paramref name="length"/> do not specify
        /// a valid range in the byte array.
        /// </exception>
        public static bool TryParse (byte[] buffer, int startIndex, int length, out DateTimeOffset date)
        {
            if (buffer == null)
                throw new ArgumentNullException ("buffer");

            if (startIndex < 0 || startIndex > buffer.Length)
                throw new ArgumentOutOfRangeException ("startIndex");

            if (length < 0 || length > (buffer.Length - startIndex))
                throw new ArgumentOutOfRangeException ("length");

            var tokens = new List<DateToken> (TokenizeDate (buffer, startIndex, length));

            if (TryParseStandardDateFormat (tokens, buffer, out date))
                return true;

            if (TryParseUnknownDateFormat (tokens, buffer, out date))
                return true;

            date = new DateTimeOffset ();

            return false;
        }

        /// <summary>
        /// Tries to parse the given input buffer into a new <see cref="System.DateTimeOffset"/> instance.
        /// </summary>
        /// <remarks>
        /// Parses an rfc822 date and time from the supplied buffer starting at the specified index.
        /// </remarks>
        /// <returns><c>true</c>, if the date was successfully parsed, <c>false</c> otherwise.</returns>
        /// <param name="buffer">The input buffer.</param>
        /// <param name="startIndex">The starting index of the input buffer.</param>
        /// <param name="date">The parsed date.</param>
        /// <exception cref="System.ArgumentNullException">
        /// <paramref name="buffer"/> is <c>null</c>.
        /// </exception>
        /// <exception cref="System.ArgumentOutOfRangeException">
        /// <paramref name="startIndex"/> is not within the range of the byte array.
        /// </exception>
        public static bool TryParse (byte[] buffer, int startIndex, out DateTimeOffset date)
        {
            if (buffer == null)
                throw new ArgumentNullException ("buffer");

            if (startIndex < 0 || startIndex > buffer.Length)
                throw new ArgumentOutOfRangeException ("startIndex");

            int length = buffer.Length - startIndex;
            var tokens = new List<DateToken> (TokenizeDate (buffer, startIndex, length));

            if (TryParseStandardDateFormat (tokens, buffer, out date))
                return true;

            if (TryParseUnknownDateFormat (tokens, buffer, out date))
                return true;

            date = new DateTimeOffset ();

            return false;
        }

        /// <summary>
        /// Tries to parse the given input buffer into a new <see cref="System.DateTimeOffset"/> instance.
        /// </summary>
        /// <remarks>
        /// Parses an rfc822 date and time from the specified buffer.
        /// </remarks>
        /// <returns><c>true</c>, if the date was successfully parsed, <c>false</c> otherwise.</returns>
        /// <param name="buffer">The input buffer.</param>
        /// <param name="date">The parsed date.</param>
        /// <exception cref="System.ArgumentNullException">
        /// <paramref name="buffer"/> is <c>null</c>.
        /// </exception>
        public static bool TryParse (byte[] buffer, out DateTimeOffset date)
        {
            if (buffer == null)
                throw new ArgumentNullException ("buffer");

            var tokens = new List<DateToken> (TokenizeDate (buffer, 0, buffer.Length));

            if (TryParseStandardDateFormat (tokens, buffer, out date))
                return true;

            if (TryParseUnknownDateFormat (tokens, buffer, out date))
                return true;

            date = new DateTimeOffset ();

            return false;
        }

        /// <summary>
        /// Tries to parse the given input buffer into a new <see cref="System.DateTimeOffset"/> instance.
        /// </summary>
        /// <remarks>
        /// Parses an rfc822 date and time from the specified text.
        /// </remarks>
        /// <returns><c>true</c>, if the date was successfully parsed, <c>false</c> otherwise.</returns>
        /// <param name="text">The input text.</param>
        /// <param name="date">The parsed date.</param>
        /// <exception cref="System.ArgumentNullException">
        /// <paramref name="text"/> is <c>null</c>.
        /// </exception>
        public static bool TryParse (string text, out DateTimeOffset date)
        {
            if (text == null)
                throw new ArgumentNullException ("text");

            var buffer = Encoding.UTF8.GetBytes (text);
            var tokens = new List<DateToken> (TokenizeDate (buffer, 0, buffer.Length));

            if (TryParseStandardDateFormat (tokens, buffer, out date))
                return true;

            if (TryParseUnknownDateFormat (tokens, buffer, out date))
                return true;

            date = new DateTimeOffset ();

            return false;
        }
    }
}