所以我有一个正在搜索的文本,即时搜索文本中的特定单词。为了表明这一点,我将我搜索的单词定义为:"1 Johannes 1:12"
。我使用String.Contains方法,但该方法返回两个答案,因为,我还搜索另一个句子:"1 Johannes 1:1"
。那么它做了什么,而不是首先采用"1 Johannes 1:12"
然后采用"1 Johannes 1:1"
,而是采取相反的做法。现在这对我不好。因为我想要正确的句子。
我尝试过不同的选项,包括子字符串。但我在这个问题上需要帮助。
提前感谢所有答案。干杯!
List<string> sentences = new List<string>();
sentences.Add("1 Johannes 1:12");
sentences.Add("1 Johannes 1:1");
string fulltext = "randomtext 1 Johannes 1:12 randomtext";
foreach (string item in sentences)
{
if (fulltext.Contains(item))
{
//expect the result to be 1 Johannes 1:12, but the result is 1 Johannes 1:1
//do operation
}
}
答案 0 :(得分:2)
圣经参考解析和识别是棘手的,特别是因为有多种缩写样式,数字看起来相似。你遇到的问题是String.Contains()
是一个非常大的锤子,你需要更像一套套筒扳手的东西。换句话说,一个完整和正确的答案将需要更多的代码,而不是这种格式可以舒适地适应。我编写了代码来完成灵修和成绩单,并提取所有参考资料。代码位于私有存储库中,但我会尝试发布相关部分。
圣经参考书采用以下格式编写:{Book} {Chapter}:{Verse}
,其中包含一些诗歌范围的变体。所以第一部分是认识这本书。为此,我创建了一个代表一本书的类及其已知的缩写(我支持两种记录的缩写样式)。 Book
类看起来像这样:
public class Book
{
// The set of books we recognize
private static readonly List<Book> books;
private static readonly Dictionary<string, Book> commonMisspellings;
static Book()
{
// Initialize the set
books = new List<Book>{
// Old Testament
new Book("Genesis", "Gen.", "Ge", 50), // Gen
new Book("Exodus", "Ex.", "Ex", 40), // Exod
new Book("Leviticus", "Lev.", "Le", 27), // Lev
new Book("Numbers", "Num.", "Nu", 36), // Num
new Book("Deuteronomy", "Deut.", "De", 34), // Deut
new Book("Joshua", "Josh.", "Jos", 24), // Josh
new Book("Judges", "Judg.", "Jud", 21), // Judg
new Book("Ruth", "Ruth", "Ru", 4), // Ruth
new Book("1 Samuel", "1 Sam.", "1 S", 31), // 1Sam
new Book("2 Samuel", "2 Sam.", "2 S", 24), // 2Sam
new Book("1 Kings", "1 Kings", "1 K", 22), // 1Kgs
new Book("2 Kings", "2 Kings", "2 K", 25), // 2Kgs
new Book("1 Chronicles", "1 Chron.", "1 Chr", 29), // 1Chr
new Book("2 Chronicles", "2 Chron.", "2 Chr", 36), // 2Chr
new Book("Ezra", "Ezra", "Ezr", 10), // Ezra
new Book("Nehemiah", "Neh.", "Ne", 13), // Neh
new Book("Esther", "Est.", "Est", 10), // Esth
new Book("Job", "Job", "Jb", 42), // Job
new Book("Psalms", "Ps.", "Ps", 150), // Ps
new Book("Proverbs", "Prov.", "Pr", 31), // Prov
new Book("Ecclesiastes", "Eccl.", "Ec", 12), // Eccl
new Book("Song of Solomon", "Song", "Song", 8), // Song
new Book("Isaiah", "Isa.", "Is", 66), // Isa
new Book("Jeremiah", "Jer.", "Je", 52), // Jer
new Book("Lamentations", "Lam.", "Lam", 5), // Lam
new Book("Ezekiel", "Ezek.", "Ez", 48), // Ezek
new Book("Daniel", "Dan.", "Da", 12), // Dan
new Book("Hosea", "Hos.", "Ho", 14), // Hos
new Book("Joel", "Joel", "Joel", 3), // Joel
new Book("Amos", "Amos", "Am", 9), // Amos
new Book("Obadaiah", "Obad.", "Obad", 1), // Obad
new Book("Jonah", "Jonah", "Jona", 4), // Jonah
new Book("Micah", "Mic.", "Mi", 7), // Mic
new Book("Nahum", "Nah.", "Na", 3), // Nah
new Book("Habakkuk", "Hab.", "Hab", 3), // Hab
new Book("Zephaniah", "Zeph.", "Zep", 3), // Zeph
new Book("Haggai", "Hag.", "Hag", 2), // Hag
new Book("Zechariah", "Zech.", "Zec", 14), // Zech
new Book("Malachai", "Mal.", "Mal", 4), // Mal
// New Testament
new Book("Matthew", "Matt.", "Mt", 28), // Matt
new Book("Mark", "Mark", "Mk", 16), // Mark
new Book("Luke", "Luke", "Lu", 24), // Luke
new Book("John", "John", "Jn", 21), // John
new Book("Acts", "Acts", "Ac", 28), // Acts
new Book("Romans", "Rom.", "Ro", 16), // Rom
new Book("1 Corinthians", "1 Cor.", "1 Co", 16), // 1Cor
new Book("2 Corinthians", "2 Cor.", "2 Co", 13), // 2Cor
new Book("Galatians", "Gal.", "Ga", 6), // Gal
new Book("Ephesians", "Eph.", "Ep", 6), // Eph
new Book("Philippians", "Phil.", "Ph", 4), // Phil
new Book("Colossians", "Col.", "Col", 4), // Col
new Book("1 Thessalonians", "1 Thes.", "1 Th", 5), // 1Thess
new Book("2 Thessalonians", "2 Thes.", "2 Th", 3), // 2Thess
new Book("1 Timothy", "1 Tim.", "1 Ti", 6), // 1Tim
new Book("2 Timothy", "2 Tim.", "2 Ti", 4), // 2Tim
new Book("Titus", "Titus", "Tit", 3), // Titus
new Book("Philemon", "Philem.", "Phm", 1), // Phlm
new Book("Hebrews", "Heb.", "He", 13), // Heb
new Book("James", "James", "Ja", 5), // Jas
new Book("1 Peter", "1 Peter", "1 Pe", 5), // 1Pet
new Book("2 Peter", "2 Peter", "2 Pe", 3), // 2Pet
new Book("1 John", "1 John", "1 Jn", 5), // 1John
new Book("2 John", "2 John", "2 Jn", 1), // 2John
new Book("3 John", "3 John", "3 Jn", 1), // 3John
new Book("Jude", "Jude", "Jude", 1), // Jude
new Book("Revelation", "Rev.", "Re", 22) // Rev
};
Debug.Assert(books.Count == 66);
// These are based on what I found in the set of over 6,000
// transcripts that people typed.
commonMisspellings = new Dictionary<string, Book>();
commonMisspellings.Add("song of songs", books.FirstOrDefault(b => b.ThompsonAbreviation == "Song"));
commonMisspellings.Add("psalm", books.FirstOrDefault(b => b.ThompsonAbreviation == "Ps"));
commonMisspellings.Add("like", books.FirstOrDefault(b => b.ThompsonAbreviation == "Lu"));
commonMisspellings.Add("jerimiah", books.FirstOrDefault(b => b.ThompsonAbreviation == "Je"));
commonMisspellings.Add("galations", books.FirstOrDefault(b => b.ThompsonAbreviation == "Ga"));
}
private static int numCreated = 0;
private int order;
private Book(string fullName, string abbrev, string thompsan, int chapters)
{
order = numCreated;
Name = fullName;
StandardAbreviation = abbrev;
ThompsonAbreviation = thompsan;
ChapterCount = chapters;
numCreated++;
}
/// <summary>
/// The unabbreviated name of the book.
/// </summary>
public string Name { get; private set; }
/// <summary>
/// Standard abbreviations as defined in "The Christian Writer's
/// Manual of Style", 2004 edition (ISBN: 9780310487715).
/// </summary>
public string StandardAbreviation { get; private set; }
/// <summary>
/// Thompson Chain references, pulled from the 5th edition.
/// </summary>
public string ThompsonAbreviation { get; private set; }
/// <summary>
/// The number of chapters in the book.
/// </summary>
public int ChapterCount { get; private set; }
public static bool TryParse(string inString, out Book book)
{
string potentialBook = StandardizeBookOrdinals(inString);
// Find the first book where the input string now matches one of the recognized formats.
book = books.FirstOrDefault(
b => b.ThompsonAbreviation.Equals(potentialBook, StringComparison.InvariantCultureIgnoreCase)
|| b.StandardAbreviation.Equals(potentialBook, StringComparison.InvariantCultureIgnoreCase)
|| b.Name.Equals(potentialBook, StringComparison.InvariantCultureIgnoreCase));
if (book != null)
{
return true;
}
// If we didn't find it, check to see if we just missed it because the abbreviation
// didn't have a period
book = books.FirstOrDefault((b) =>
{
string stdAbrev = b.StandardAbreviation;
if(stdAbrev.EndsWith("."))
{
stdAbrev = stdAbrev.Substring(0, stdAbrev.Length - 1);
}
return potentialBook == stdAbrev;
});
if (book != null)
{
return true;
}
// Special Case: check for common misspellings
string lowercase = potentialBook.ToLowerInvariant();
commonMisspellings.TryGetValue(lowercase, out book);
return book != null;
}
private static string StandardizeBookOrdinals(string str)
{
// Break up on all remaining white space
string[] parts = (str ?? "").Trim().Split(' ', '\r', '\n', '\t');
// If the first part is a roman numeral, or spelled ordinal, convert it to arabic
var number = parts[0].ToLowerInvariant();
switch (number)
{
case "first":
case "i":
parts[0] = "1";
break;
case "second":
case "ii":
parts[0] = "2";
break;
case "third":
case "iii":
parts[0] = "3";
break;
}
// Recompile the parts into one string that only has a single space separating elements
return string.Join(" ", parts);
}
public static IEnumerable<Book> List()
{
return books.ToArray();
}
}
因此,如果您将该文字提供给TryParse()
,则可以识别任何图书。我们甚至处理常见的拼写错误,罗马数字(I,II,III)与阿拉伯数字(1,2,3)以及多种缩写风格。尽可能随意适应,但一旦我们能够认出一本书,其余部分将是相同的。当您查看下一节处理Reference
public class Reference
{
private static readonly Regex RemoveHtml = new Regex("<[^>]*>", RegexOptions.Compiled);
public Book Book { get; set; }
public int Chapter { get; set; }
public int[] Verses { get; set; }
public static bool TryParse(string text, out Reference reference)
{
string errorString;
reference = InternalParse(text, out errorString);
if(errorString!=null)
{
reference = null;
return false;
}
return true;
}
private static Reference InternalParse(string text, out string errorString)
{
errorString = null;
int colon = text.LastIndexOf(':');
int chapter = -1;
string chapterSection = "1";
string verseSection = "";
if (colon > 0)
{
verseSection = text.Substring(colon + 1);
chapter = colon - 3;
chapterSection = text.Substring(chapter, colon - chapter);
while (!string.IsNullOrEmpty(chapterSection) && !Char.IsDigit(chapterSection[0]))
{
chapter++;
chapterSection = text.Substring(chapter, colon - chapter);
}
}
else
{
chapter = 2; // skip initial numbers for books
while(chapter < text.Length && !Char.IsDigit(text[chapter]))
{
chapter++;
}
if(chapter == text.Length)
{
errorString = "There are no chapter or verses, can't be a reference.";
return null;
}
verseSection = text.Substring(chapter);
}
Book book;
if (!Book.TryParse(text.Substring(0, chapter), out book))
{
errorString = "There is no book, can't be a reference.";
return null;
}
if(!int.TryParse(chapterSection, out chapter))
{
errorString = "Bad chapter format";
return null;
}
Reference reference = new Reference
{
Book = book,
Chapter = chapter
};
if(colon < 0 && reference.Book.ChapterCount > 1)
{
if(!int.TryParse(verseSection, out chapter))
{
errorString = "Bad chapter format.";
return null;
}
reference.Chapter = chapter;
reference.Verses = new int[0];
return reference;
}
if (reference.Chapter > reference.Book.ChapterCount)
{
errorString = "Chapter found was too high";
return null;
}
reference.Verses = ParseRanges(verseSection, out errorString);
return reference;
}
private static int[] ParseRanges(string section, out string errorString)
{
errorString = null;
List<int> numbers = new List<int>();
string[] items = section.Split(',');
foreach (string verse in items)
{
string[] ranges = verse.Split('-');
if (ranges.Length > 2 || ranges.Length == 0)
{
errorString = "Invalid range specification";
return new int[0];
}
int start;
if(!int.TryParse(ranges[0], out start))
{
errorString = "Invalid range specification";
return new int[0];
}
int end = start;
if(ranges.Length >1 && !int.TryParse(ranges[1], out end))
{
errorString = "Invalid range specification";
return new int[0];
}
if (end < start)
{
errorString = "invalid range specification";
return new int[0];
}
for (int i = start; i <= end; i++)
{
numbers.Add(i);
}
}
return numbers.ToArray();
}
}
通过所有设置,我们现在可以扫描任何文本以获取圣经参考。这个方法也在我的Reference
类中:
public static ICollection<Reference> Scan(string text)
{
List<Reference> references = new List<Reference>();
if (text == null)
{
return references;
}
string[] words = RemoveHtml.Replace(text, "").Split(' ', '(', ')', ';', '\r', '\n', '\t');
for (int i = 0; i < words.Length; i++)
{
string one = words[i];
// If we are starting with a blank entry, just skip this cycle
if(string.IsNullOrWhiteSpace(one))
{
continue;
}
string two = i + 1 < words.Length ? string.Join(" ", one, words[i + 1]) : one;
string three = i + 2 < words.Length ? string.Join(" ", two, words[i + 2]) : two;
Book book;
bool match = Book.TryParse(one, out book);
match = match || Book.TryParse(two, out book);
match = match || Book.TryParse(three, out book);
if(match)
{
string four = i + 3 < words.Length ? string.Join(" ", three, words[i + 3]) : three;
string five = i + 4 < words.Length ? string.Join(" ", four, words[i + 4]) : four;
// Keep the most inclusive version of the reference
Reference found = null;
foreach(string test in new [] {two,three,four,five})
{
Reference check;
if(TryParse(test, out check))
{
found = check;
}
}
if(found != null && !references.Contains(found))
{
references.Add(found);
}
}
}
return references;
}
这将是处理您想要的最强大的方法,并处理您未考虑的角落情况。还有更多的代码来处理排序,相等和获取一组引用并将它们减少到最小的集合(在我们通常通过一段经文逐步完成的脚本中,所以这让我们创建引用扫描整个成绩单后的整个范围。)
答案 1 :(得分:0)
让当前搜索字符串定义如下:
string searchString="1 Johannes 1:1";
简单的更改将为您提供预期的结果,即在搜索字符串的开头和结尾添加空格:
string searchString=" 1 Johannes 1:1 ";
答案 2 :(得分:0)
您应该删除字符串中的所有空格和您要搜索的字符串
searchString.Replace(" ", string.Empty);
fullText.Replace(" ", string.Empty);
fullText.Contains(searchString)
或者您想要完全匹配,您可以使用RegEx
bool contains = Regex.IsMatch(fullText, @"(^|\s)" + searchString + "(\s|$)");
答案 3 :(得分:0)
如果您希望根据您的代码推出列表,您需要Sort()
句号列表
List<string> sentences = new List<string>();
sentences.Add("1 Johannes 1:12");
sentences.Add("1 Johannes 1:1");
string fulltext = "randomtext 1 Johannes 1:12 randomtext";
sentences.Sort();
foreach(string item in sentences)
{
if(fulltext.Contains(item))
{
//expect the result to be 1 Johannes 1:12, but the result is 1 Johannes 1:1
//do operation
Console.WriteLine(item);//try it in a Console App you will get the results in the order that you are expecting
}
}
Console.Read();
答案 4 :(得分:0)
好的,这个全文包含了你的两个值。所以你总是得到你的列表的最后一个值,如果你想得到这个过滤器的第一个值,你可以使用这样的东西:
string item1 = "1 Johannes 1:12";
string item2 = "1 Johannes 1:1";
string fullText= "randomtext 1 Johannes 1:12 randomtext";
string comparedValue =fullText.Replace(" ", string.Empty)
string result ;
List<string> sentences = new List<string>();
sentences.add(item1.Replace(" ", string.Empty));
sentences.add(item2.Replace(" ", string.Empty));
foreach(string item in sentences){
if(comparedValue .Contains(item){
result = item;
break;
}
}
现在您可以使用结果