我必须在C#中创建一个字符串解析器。 string需要在父子关系中解析,string就像:
Water, Bulgur Wheat (29%), Sweetened Dried Cranberries (5%) (Sugar, Cranberries), Sunflower Seeds (3%), Onion (3%), Green Lentils (2%), Palm Oil, Flavourings (contain Barley), Lemon Juice Powder (<2%) (Maltodextrin, Lemon Juice Concentrate), Ground Spices (<2%) (Paprika, Black Pepper, Cinnamon, Coriander, Cumin, Chilli Powder, Cardamom, Pimento, Ginger), Dried Herbs (<2%) (Coriander, Parsley, Mint), Dried Garlic (<2%), Salt, Maltodextrin, Onion Powder (<2%), Cumin Seeds, Dried Lemon Peel (<2%), Acid (Citric Acid)
我知道我可以通过char来查找char并最终找到它,但是获取此信息的最简单方法是什么。
预期产出: -
答案 0 :(得分:0)
public static string ParseString(string input)
{
StringBuilder sb = new StringBuilder();
bool skipNext = false; // used to skip spaces after commas
foreach (char c in input)
{
if (!skipNext)
{
switch (c)
{
case '(':
sb.Append("\n\t");
break;
case ',':
sb.Append("\n");
skipNext = true;
break;
case ')':
sb.Append("\n");
break;
default:
sb.Append(c);
break;
}
}
else
{
skipNext = false;
}
}
return sb.ToString();
}
这应该让你开始。它不处理不表示儿童的括号。
答案 1 :(得分:0)
在查看发布的数据(Water,Bulgur ......)之后,一个问题将区分/分离每个项目:1水,2 Bulgar ..,3 Sweetened。
分裂逗号“,”将无法正常工作,因为在某些括号“()”中有逗号(Sugar,Cranberries)。这些食物(糖,蔓越莓)是加糖的蔓越莓的SUB项目...所以用逗号分隔字符串是行不通的。
根据您提供的数据,我会考虑更改其格式以适应这种情况。一个简单的改变是将子组之间的逗号分隔符更改为其他内容...短划线“ - ”可能有效。
下面的正则表达式代码就是这样做的。这基本上改变了每个逗号“,”在开括号和右括号“()”到短划线“ - ”之间。这将允许用逗号分割来识别每个项目。
private static string ReplaceCommaBetweenParens(string inString) {
string pattern = @"(?<=\([^\)]*)+,(?!\()(?=[^\(]*\))";
return Regex.Replace(inString, pattern, "-");
}
上面的代码并不漂亮,我从其他地方获得了这些代码,并希望我可以找到原作者。我欢迎所有正则表达式的爱好者批评这种模式。我不确定如何使用常规字符串方法(split / indexof)来完成此操作。我相信这需要几个步骤。 Regex在某些情况下有用的一个很好的例子。它可能很丑,但它很快就会疯狂。幸运的是,上述神秘的代码(Regex)在这一步之后不会有太大帮助。
完成此更改后,根据需要缩进输出是一个相当简单的过程。下面的代码从DataTable
读取每一行。每行可能有一个或多个项目用逗号分隔“,”。代码循环遍历每行解析字符串中的项目。我做了一个简单的课程来举行这些项目;但是,如果不需要类,则代码会使用正确的输出。希望这会有所帮助。
用于保存单个项目的简单类
class Ingredient {
int ID { get; set; }
string Name { get; set; }
string Percent { get; set; }
List<string> Ingredients { get; set; }
public Ingredient(int id, string name, string pct, List<string> ingredients) {
ID = id;
Name = name;
Percent = pct;
Ingredients = ingredients;
}
public override string ToString() {
StringBuilder sb = new StringBuilder();
sb.Append(ID + "\t" + Name + " " + Percent + Environment.NewLine);
foreach (string s in Ingredients) {
sb.Append("\t\t" + s + Environment.NewLine);
}
return sb.ToString();
}
}
使用上述类的代码
static string ingredients = "Water, Bulgur Wheat(29%), Sweetened Dried Cranberries(5%) (Sugar, Cranberries)," +
" Sunflower Seeds(3%), Onion(3%), Green Lentils(2%), Palm Oil, Flavourings (contain Barley)," +
" Lemon Juice Powder(<2%) (Maltodextrin, Lemon Juice Concentrate)," +
" Ground Spices(<2%) (Paprika, Black Pepper, Cinnamon, Coriander, Cumin, Chilli Powder, Cardamom, Pimento, Ginger)," +
" Dried Herbs(<2%) (Coriander, Parsley, Mint), Dried Garlic(<2%), Salt, Maltodextrin, Onion Powder(<2%)," +
" Cumin Seeds, Dried Lemon Peel(<2%), Acid(Citric Acid)";
static List<Ingredient> allIngredients;
static void Main(string[] args) {
allIngredients = ParseString(ingredients);
foreach (Ingredient curIngredient in allIngredients) {
Console.Write(curIngredient.ToString());
}
Console.ReadLine();
}
private static List<Ingredient> ParseString(string inString) {
List<Ingredient> allIngredients = new List<Ingredient>();
string temp = ReplaceCommaBetweenParens(ingredients);
string[] allItems = temp.Split(',');
int count = 1;
foreach (string curItem in allItems) {
if (curItem.Contains("(")) {
allIngredients.Add(ParseItem(curItem, count));
}
else {
allIngredients.Add(new Ingredient(count, curItem.Trim(), "", new List<string>()));
//Console.WriteLine(count + "\t" + curItem.Trim());
}
count++;
}
return allIngredients;
}
private static Ingredient ParseItem(string item, int count) {
string pct = "";
List<string> items = new List<string>();
int firstParenIndex = item.IndexOf("(");
//Console.Write(count + "\t" + item.Substring(0, firstParenIndex).Trim());
Regex expression = new Regex(@"\((.*?)\)");
MatchCollection matches = expression.Matches(item);
bool percentPresent = true;
foreach (Match match in matches) {
if (match.ToString().Contains("%")) { // <-- if the string between parenthesis does not contain "%" - move to next line, otherwise print on same line
//Console.WriteLine(" " + match.ToString().Trim());
pct = match.ToString().Trim();
percentPresent = false;
}
else {
if (percentPresent) {
//Console.WriteLine();
}
items = GetLastItems(match.ToString().Trim());
}
}
return new Ingredient(count, item.Substring(0, firstParenIndex).Trim(), pct, items);
}
private static List<string> GetLastItems(string inString) {
List<string> result = new List<string>();
string temp = inString.Replace("(", "");
temp = temp.Replace(")", "");
string[] allItems = temp.Split('-');
foreach (string curItem in allItems) {
//Console.WriteLine("\t\t" + curItem.Trim());
result.Add(curItem.Trim());
}
return result;
}
private static string ReplaceCommaBetweenParens(string inString) {
string pattern = @"(?<=\([^\)]*)+,(?!\()(?=[^\(]*\))";
return Regex.Replace(inString, pattern, "-");
}