Question

所以我有一个正在搜索的文本，即时搜索文本中的特定单词。为了表明这一点，我将我搜索的单词定义为："1 Johannes 1:12"。我使用String.Contains方法，但该方法返回两个答案，因为，我还搜索另一个句子："1 Johannes 1:1"。那么它做了什么，而不是首先采用"1 Johannes 1:12"然后采用"1 Johannes 1:1"，而是采取相反的做法。现在这对我不好。因为我想要正确的句子。

我尝试过不同的选项，包括子字符串。但我在这个问题上需要帮助。

提前感谢所有答案。干杯!

List<string> sentences = new List<string>();
sentences.Add("1 Johannes 1:12");
sentences.Add("1 Johannes 1:1");
string fulltext = "randomtext 1 Johannes 1:12 randomtext";

foreach (string item in sentences)
{
    if (fulltext.Contains(item))
    {
        //expect the result to be 1 Johannes 1:12, but the result is 1 Johannes 1:1 
        //do operation
    }
}

Answer 1

圣经参考解析和识别是棘手的，特别是因为有多种缩写样式，数字看起来相似。你遇到的问题是String.Contains()是一个非常大的锤子，你需要更像一套套筒扳手的东西。换句话说，一个完整和正确的答案将需要更多的代码，而不是这种格式可以舒适地适应。我编写了代码来完成灵修和成绩单，并提取所有参考资料。代码位于私有存储库中，但我会尝试发布相关部分。

圣经参考书采用以下格式编写：{Book} {Chapter}:{Verse}，其中包含一些诗歌范围的变体。所以第一部分是认识这本书。为此，我创建了一个代表一本书的类及其已知的缩写（我支持两种记录的缩写样式）。 Book类看起来像这样：

public class Book
{
    // The set of books we recognize
    private static readonly List<Book> books;
    private static readonly Dictionary<string, Book> commonMisspellings;

    static Book()
    {
        // Initialize the set
        books = new List<Book>{
            // Old Testament
            new Book("Genesis", "Gen.", "Ge", 50), // Gen
            new Book("Exodus", "Ex.", "Ex", 40),  // Exod
            new Book("Leviticus", "Lev.", "Le", 27), // Lev
            new Book("Numbers", "Num.", "Nu", 36), // Num
            new Book("Deuteronomy", "Deut.", "De", 34), // Deut
            new Book("Joshua", "Josh.", "Jos", 24), // Josh
            new Book("Judges", "Judg.", "Jud", 21), // Judg
            new Book("Ruth", "Ruth", "Ru", 4), // Ruth
            new Book("1 Samuel", "1 Sam.", "1 S", 31), // 1Sam
            new Book("2 Samuel", "2 Sam.", "2 S", 24), // 2Sam
            new Book("1 Kings", "1 Kings", "1 K", 22), // 1Kgs
            new Book("2 Kings", "2 Kings", "2 K", 25), // 2Kgs
            new Book("1 Chronicles", "1 Chron.", "1 Chr", 29), // 1Chr
            new Book("2 Chronicles", "2 Chron.", "2 Chr", 36), // 2Chr
            new Book("Ezra", "Ezra", "Ezr", 10), // Ezra
            new Book("Nehemiah", "Neh.", "Ne", 13), // Neh
            new Book("Esther", "Est.", "Est", 10), // Esth
            new Book("Job", "Job", "Jb", 42), // Job
            new Book("Psalms", "Ps.", "Ps", 150), // Ps
            new Book("Proverbs", "Prov.", "Pr", 31), // Prov
            new Book("Ecclesiastes", "Eccl.", "Ec", 12), // Eccl
            new Book("Song of Solomon", "Song", "Song", 8), // Song
            new Book("Isaiah", "Isa.", "Is", 66), // Isa
            new Book("Jeremiah", "Jer.", "Je", 52), // Jer
            new Book("Lamentations", "Lam.", "Lam", 5), // Lam
            new Book("Ezekiel", "Ezek.", "Ez", 48), // Ezek
            new Book("Daniel", "Dan.", "Da", 12), // Dan
            new Book("Hosea", "Hos.", "Ho", 14), // Hos
            new Book("Joel", "Joel", "Joel", 3), // Joel
            new Book("Amos", "Amos", "Am", 9), // Amos
            new Book("Obadaiah", "Obad.", "Obad", 1), // Obad
            new Book("Jonah", "Jonah", "Jona", 4), // Jonah
            new Book("Micah", "Mic.", "Mi", 7), // Mic
            new Book("Nahum", "Nah.", "Na", 3), // Nah
            new Book("Habakkuk", "Hab.", "Hab", 3), // Hab
            new Book("Zephaniah", "Zeph.", "Zep", 3), // Zeph
            new Book("Haggai", "Hag.", "Hag", 2), // Hag
            new Book("Zechariah", "Zech.", "Zec", 14), // Zech
            new Book("Malachai", "Mal.", "Mal", 4), // Mal

            // New Testament
            new Book("Matthew", "Matt.", "Mt", 28), // Matt
            new Book("Mark", "Mark", "Mk", 16), // Mark
            new Book("Luke", "Luke", "Lu", 24), // Luke
            new Book("John", "John", "Jn", 21), // John
            new Book("Acts", "Acts", "Ac", 28), // Acts
            new Book("Romans", "Rom.", "Ro", 16), // Rom
            new Book("1 Corinthians", "1 Cor.", "1 Co", 16), // 1Cor
            new Book("2 Corinthians", "2 Cor.", "2 Co", 13), // 2Cor
            new Book("Galatians", "Gal.", "Ga", 6), // Gal
            new Book("Ephesians", "Eph.", "Ep", 6), // Eph
            new Book("Philippians", "Phil.", "Ph", 4), // Phil
            new Book("Colossians", "Col.", "Col", 4), // Col
            new Book("1 Thessalonians", "1 Thes.", "1 Th", 5), // 1Thess
            new Book("2 Thessalonians", "2 Thes.", "2 Th", 3), // 2Thess
            new Book("1 Timothy", "1 Tim.", "1 Ti", 6), // 1Tim
            new Book("2 Timothy", "2 Tim.", "2 Ti", 4), // 2Tim
            new Book("Titus", "Titus", "Tit", 3), // Titus
            new Book("Philemon", "Philem.", "Phm", 1), // Phlm
            new Book("Hebrews", "Heb.", "He", 13), // Heb
            new Book("James", "James", "Ja", 5), // Jas
            new Book("1 Peter", "1 Peter", "1 Pe", 5), // 1Pet
            new Book("2 Peter", "2 Peter", "2 Pe", 3), // 2Pet
            new Book("1 John", "1 John", "1 Jn", 5), // 1John
            new Book("2 John", "2 John", "2 Jn", 1), // 2John
            new Book("3 John", "3 John", "3 Jn", 1), // 3John
            new Book("Jude", "Jude", "Jude", 1), // Jude
            new Book("Revelation", "Rev.", "Re", 22) // Rev
        };

        Debug.Assert(books.Count == 66);

        // These are based on what I found in the set of over 6,000
        // transcripts that people typed.
        commonMisspellings = new Dictionary<string, Book>();
        commonMisspellings.Add("song of songs", books.FirstOrDefault(b => b.ThompsonAbreviation == "Song"));
        commonMisspellings.Add("psalm", books.FirstOrDefault(b => b.ThompsonAbreviation == "Ps"));
        commonMisspellings.Add("like", books.FirstOrDefault(b => b.ThompsonAbreviation == "Lu"));
        commonMisspellings.Add("jerimiah", books.FirstOrDefault(b => b.ThompsonAbreviation == "Je"));
        commonMisspellings.Add("galations", books.FirstOrDefault(b => b.ThompsonAbreviation == "Ga"));
    }

    private static int numCreated = 0;
    private int order;

    private Book(string fullName, string abbrev, string thompsan, int chapters)
    {
        order = numCreated;
        Name = fullName;
        StandardAbreviation = abbrev;
        ThompsonAbreviation = thompsan;
        ChapterCount = chapters;
        numCreated++;
    }

    /// <summary>
    /// The unabbreviated name of the book.
    /// </summary>
    public string Name { get; private set; }

    /// <summary>
    /// Standard abbreviations as defined in "The Christian Writer's
    /// Manual of Style", 2004 edition (ISBN: 9780310487715).
    /// </summary>
    public string StandardAbreviation { get; private set; }

    /// <summary>
    /// Thompson Chain references, pulled from the 5th edition.
    /// </summary>
    public string ThompsonAbreviation { get; private set; }

    /// <summary>
    /// The number of chapters in the book.
    /// </summary>
    public int ChapterCount { get; private set; }

    public static bool TryParse(string inString, out Book book)
    {
        string potentialBook = StandardizeBookOrdinals(inString);

        // Find the first book where the input string now matches one of the recognized formats.
        book = books.FirstOrDefault(
            b => b.ThompsonAbreviation.Equals(potentialBook, StringComparison.InvariantCultureIgnoreCase) 
                || b.StandardAbreviation.Equals(potentialBook, StringComparison.InvariantCultureIgnoreCase)
                || b.Name.Equals(potentialBook, StringComparison.InvariantCultureIgnoreCase));

        if (book != null)
        {
            return true;
        }

        // If we didn't find it, check to see if we just missed it because the abbreviation
        // didn't have a period
        book = books.FirstOrDefault((b) =>
        {
            string stdAbrev = b.StandardAbreviation;
            if(stdAbrev.EndsWith("."))
            {
                stdAbrev = stdAbrev.Substring(0, stdAbrev.Length - 1);
            }

            return potentialBook == stdAbrev;
        });

        if (book != null)
        {
            return true;
        }

        // Special Case: check for common misspellings
        string lowercase = potentialBook.ToLowerInvariant();
        commonMisspellings.TryGetValue(lowercase, out book);

        return book != null;
    }

    private static string StandardizeBookOrdinals(string str)
    {
        // Break up on all remaining white space
        string[] parts = (str ?? "").Trim().Split(' ', '\r', '\n', '\t');

        // If the first part is a roman numeral, or spelled ordinal, convert it to arabic
        var number = parts[0].ToLowerInvariant();
        switch (number)
        {
            case "first":
            case "i":
                parts[0] = "1";
                break;

            case "second":
            case "ii":
                parts[0] = "2";
                break;

            case "third":
            case "iii":
                parts[0] = "3";
                break;
        }

        // Recompile the parts into one string that only has a single space separating elements
        return string.Join(" ", parts);
    }

    public static IEnumerable<Book> List()
    {
        return books.ToArray();
    }
}

因此，如果您将该文字提供给TryParse()，则可以识别任何图书。我们甚至处理常见的拼写错误，罗马数字（I，II，III）与阿拉伯数字（1,2,3）以及多种缩写风格。尽可能随意适应，但一旦我们能够认出一本书，其余部分将是相同的。当您查看下一节处理Reference

的课程时，列出书中章节数的原因会更加明显

public class Reference
{
    private static readonly Regex RemoveHtml = new Regex("<[^>]*>", RegexOptions.Compiled);

    public Book Book { get; set; }
    public int Chapter { get; set; }
    public int[] Verses { get; set; }

    public static bool TryParse(string text, out Reference reference)
    {
        string errorString;
        reference = InternalParse(text, out errorString);

        if(errorString!=null)
        {
            reference = null;
            return false;
        }

        return true;
    }

   private static Reference InternalParse(string text, out string errorString)
    {
        errorString = null;
        int colon = text.LastIndexOf(':');
        int chapter = -1;
        string chapterSection = "1";
        string verseSection = "";

        if (colon > 0)
        {
            verseSection = text.Substring(colon + 1);
            chapter = colon - 3;

            chapterSection = text.Substring(chapter, colon - chapter);
            while (!string.IsNullOrEmpty(chapterSection) && !Char.IsDigit(chapterSection[0]))
            {
                chapter++;
                chapterSection = text.Substring(chapter, colon - chapter);
            }
        }
        else
        {
            chapter = 2;  // skip initial numbers for books
            while(chapter < text.Length && !Char.IsDigit(text[chapter]))
            {
                chapter++;
            }

            if(chapter == text.Length)
            {
                errorString = "There are no chapter or verses, can't be a reference.";
                return null;
            }

            verseSection = text.Substring(chapter);
        }

        Book book;
        if (!Book.TryParse(text.Substring(0, chapter), out book))
        {
            errorString = "There is no book, can't be a reference.";
            return null;
        }

        if(!int.TryParse(chapterSection, out chapter))
        {
            errorString = "Bad chapter format";
            return null;
        }

        Reference reference = new Reference
        {
            Book = book,
            Chapter = chapter
        };

        if(colon < 0 && reference.Book.ChapterCount > 1)
        {
            if(!int.TryParse(verseSection, out chapter))
            {
                errorString = "Bad chapter format.";
                return null;
            }

            reference.Chapter = chapter;
            reference.Verses = new int[0];
            return reference;
        }

        if (reference.Chapter > reference.Book.ChapterCount)
        {
            errorString = "Chapter found was too high";
            return null;
        }

        reference.Verses = ParseRanges(verseSection, out errorString);

        return reference;
    }

    private static int[] ParseRanges(string section, out string errorString)
    {
        errorString = null;
        List<int> numbers = new List<int>();
        string[] items = section.Split(',');

        foreach (string verse in items)
        {
            string[] ranges = verse.Split('-');

            if (ranges.Length > 2 || ranges.Length == 0)
            {
                errorString = "Invalid range specification";
                return new int[0];
            }

            int start;
            if(!int.TryParse(ranges[0], out start))
            {
                errorString = "Invalid range specification";
                return new int[0];
            }

            int end = start;
            if(ranges.Length >1 && !int.TryParse(ranges[1], out end))
            {
                errorString = "Invalid range specification";
                return new int[0];
            }

            if (end < start)
            {
                errorString = "invalid range specification";
                return new int[0];
            }

            for (int i = start; i <= end; i++)
            {
                numbers.Add(i);
            }
        }

        return numbers.ToArray();
    }
}

通过所有设置，我们现在可以扫描任何文本以获取圣经参考。这个方法也在我的Reference类中：

    public static ICollection<Reference> Scan(string text)
    {
        List<Reference> references = new List<Reference>();

        if (text == null)
        {
            return references;
        }

        string[] words = RemoveHtml.Replace(text, "").Split(' ', '(', ')', ';', '\r', '\n', '\t');

        for (int i = 0; i < words.Length; i++)
        {
            string one = words[i];

            // If we are starting with a blank entry, just skip this cycle
            if(string.IsNullOrWhiteSpace(one))
            {
                continue;
            }

            string two = i + 1 < words.Length ? string.Join(" ", one, words[i + 1]) : one;
            string three = i + 2 < words.Length ? string.Join(" ", two, words[i + 2]) : two;

            Book book;
            bool match = Book.TryParse(one, out book);
            match = match || Book.TryParse(two, out book);
            match = match || Book.TryParse(three, out book);

            if(match)
            {
                string four = i + 3 < words.Length ? string.Join(" ", three, words[i + 3]) : three;
                string five = i + 4 < words.Length ? string.Join(" ", four, words[i + 4]) : four;

                // Keep the most inclusive version of the reference
                Reference found = null;
                foreach(string test in new [] {two,three,four,five})
                {
                    Reference check;
                    if(TryParse(test, out check))
                    {
                        found = check;
                    }
                }

                if(found != null && !references.Contains(found))
                {
                    references.Add(found);
                }
            }
        }

        return references;
    }

这将是处理您想要的最强大的方法，并处理您未考虑的角落情况。还有更多的代码来处理排序，相等和获取一组引用并将它们减少到最小的集合（在我们通常通过一段经文逐步完成的脚本中，所以这让我们创建引用扫描整个成绩单后的整个范围。）

Answer 2

让当前搜索字符串定义如下：

string searchString="1 Johannes 1:1";

简单的更改将为您提供预期的结果，即在搜索字符串的开头和结尾添加空格：

string searchString=" 1 Johannes 1:1 ";

Answer 3

您应该删除字符串中的所有空格和您要搜索的字符串

searchString.Replace(" ", string.Empty);

fullText.Replace(" ", string.Empty);



fullText.Contains(searchString)

或者您想要完全匹配，您可以使用RegEx

bool contains = Regex.IsMatch(fullText, @"(^|\s)" + searchString + "(\s|$)");

Answer 4

如果您希望根据您的代码推出列表，您需要Sort()句号列表

List<string> sentences = new List<string>();
sentences.Add("1 Johannes 1:12");
sentences.Add("1 Johannes 1:1");
string fulltext = "randomtext 1 Johannes 1:12 randomtext";
sentences.Sort();
foreach(string item in sentences)
{
   if(fulltext.Contains(item))
   {
      //expect the result to be 1 Johannes 1:12, but the result is 1 Johannes 1:1 
      //do operation
      Console.WriteLine(item);//try it in a Console App you will get the results in the order that you are expecting
   }
}
Console.Read();

Answer 5

好的，这个全文包含了你的两个值。所以你总是得到你的列表的最后一个值，如果你想得到这个过滤器的第一个值，你可以使用这样的东西：

string item1 = "1 Johannes 1:12";
string item2 = "1 Johannes 1:1";
string fullText= "randomtext 1 Johannes 1:12 randomtext";
string comparedValue =fullText.Replace(" ", string.Empty)
string result ;
List<string> sentences = new List<string>();
sentences.add(item1.Replace(" ", string.Empty));
sentences.add(item2.Replace(" ", string.Empty));
foreach(string item in sentences){
      if(comparedValue .Contains(item){
      result = item;
   break;

      }

}

现在您可以使用结果

字符串包含。 C＃

5 个答案: