假设我有string
喜欢这个,左边的部分是单词,右边部分是索引(单个或范围)的集合,用于在我的单词中引用kanjis的假名(语音):
string myString = "子で子にならぬ時鳥,0:こ;2:こ;7-8:ほととぎす"
详细模式:
word,<startIndex>(-<endIndex>):<furigana>
实现这样的事情的最佳方法是什么(在汉字面前留出一个空格来标记哪个部分与[假名]相关联):
子[こ]で 子[こ]にならぬ 時鳥[ほととぎす]
修改 :(感谢您的评论)
这是我到目前为止所写的内容:
static void Main(string[] args)
{
string myString = "ABCDEF,1:test;3:test2";
//Split Kanjis / Indices
string[] tokens = myString.Split(',');
//Extract furigana indices
string[] indices = tokens[1].Split(';');
//Dictionnary to store furigana indices
Dictionary<string, string> furiganaIndices = new Dictionary<string, string>();
//Collect
foreach (string index in indices)
{
string[] splitIndex = index.Split(':');
furiganaIndices.Add(splitIndex[0], splitIndex[1]);
}
//Processing
string result = tokens[0] + ",";
for (int i = 0; i < tokens[0].Length; i++)
{
string currentIndex = i.ToString();
if (furiganaIndices.ContainsKey(currentIndex)) //add [furigana]
{
string currentFurigana = furiganaIndices[currentIndex].ToString();
result = result + " " + tokens[0].ElementAt(i) + string.Format("[{0}]", currentFurigana);
}
else //nothing to add
{
result = result + tokens[0].ElementAt(i);
}
}
File.AppendAllText(@"D:\test.txt", result + Environment.NewLine);
}
结果:
ABCDEF,A B[test]C D[test2]EF
我很难找到处理远程索引的方法:
string myString = "ABCDEF,1:test;2-3:test2";
Result : ABCDEF,A B[test] CD[test2]EF
答案 0 :(得分:1)
这应该做(甚至处理远程索引),基于你输入字符串的格式 -
using System;
using System.Collections.Generic;
public class stringParser
{
private struct IndexElements
{
public int start;
public int end;
public string value;
}
public static void Main()
{
//input string
string myString = "子で子にならぬ時鳥,0:こ;2:こ;7-8:ほととぎす";
int wordIndexSplit = myString.IndexOf(',');
string word = myString.Substring(0,wordIndexSplit);
string indices = myString.Substring(wordIndexSplit + 1);
string[] eachIndex = indices.Split(';');
Dictionary<int,IndexElements> index = new Dictionary<int,IndexElements>();
string[] elements;
IndexElements e;
int dash;
int n = 0;
int last = -1;
string results = "";
foreach (string s in eachIndex)
{
e = new IndexElements();
elements = s.Split(':');
if (elements[0].Contains("-"))
{
dash = elements[0].IndexOf('-');
e.start = int.Parse(elements[0].Substring(0,dash));
e.end = int.Parse(elements[0].Substring(dash + 1));
}
else
{
e.start = int.Parse(elements[0]);
e.end = e.start;
}
e.value = elements[1];
index.Add(n,e);
n++;
}
//this is the part that takes the "setup" from the parts above and forms the result string
//loop through each of the "indices" parsed above
for (int i = 0; i < index.Count; i++)
{
//if this is the first iteration through the loop, and the first "index" does not start
//at position 0, add the beginning characters before its start
if (last == -1 && index[i].start > 0)
{
results += word.Substring(0,index[i].start);
}
//if this is not the first iteration through the loop, and the previous iteration did
//not stop at the position directly before the start of the current iteration, add
//the intermediary chracters
else if (last != -1 && last + 1 != index[i].start)
{
results += word.Substring(last + 1,index[i].start - (last + 1));
}
//add the space before the "index" match, the actual match, and then the formatted "index"
results += " " + word.Substring(index[i].start,(index[i].end - index[i].start) + 1)
+ "[" + index[i].value + "]";
//remember the position of the ending for the next iteration
last = index[i].end;
}
//if the last "index" did not stop at the end of the input string, add the remaining characters
if (index[index.Keys.Count - 1].end + 1 < word.Length)
{
results += word.Substring(index[index.Keys.Count-1].end + 1);
}
//trimming spaces that may be left behind
results = results.Trim();
Console.WriteLine("INPUT - " + myString);
Console.WriteLine("OUTPUT - " + results);
Console.Read();
}
}
input - 子で子にならぬ時鳥,0:こ;2:こ;7-8:ほととぎす
output - 子[こ]で 子[こ]にならぬ 時鳥[ほととぎす]
请注意,如果您想使用英语,这也适用于英文字母 -
input - iliketocodeverymuch,2:A;4-6:B;9-12:CDEFG
output - il i[A]k eto[B]co deve[CDEFG]rymuch
答案 1 :(得分:1)
我没有任何反对本身手动操作字符串的内容。但鉴于您似乎有一个描述输入的常规模式,在我看来,使用正则表达式的解决方案将更易于维护和读取。所以考虑到这一点,这是一个采用这种方法的示例程序:
class Program
{
private const string _kinvalidFormatException = "Invalid format for edit specification";
private static readonly Regex
regex1 = new Regex(@"(?<word>[^,]+),(?<edit>(?:\d+)(?:-(?:\d+))?:(?:[^;]+);?)+", RegexOptions.Compiled),
regex2 = new Regex(@"(?<start>\d+)(?:-(?<end>\d+))?:(?<furigana>[^;]+);?", RegexOptions.Compiled);
static void Main(string[] args)
{
string myString = "子で子にならぬ時鳥,0:こ;2:こ;7-8:ほととぎす";
string result = EditString(myString);
}
private static string EditString(string myString)
{
Match editsMatch = regex1.Match(myString);
if (!editsMatch.Success)
{
throw new ArgumentException(_kinvalidFormatException);
}
int ichCur = 0;
string input = editsMatch.Groups["word"].Value;
StringBuilder text = new StringBuilder();
foreach (Capture capture in editsMatch.Groups["edit"].Captures)
{
Match oneEditMatch = regex2.Match(capture.Value);
if (!oneEditMatch.Success)
{
throw new ArgumentException(_kinvalidFormatException);
}
int start, end;
if (!int.TryParse(oneEditMatch.Groups["start"].Value, out start))
{
throw new ArgumentException(_kinvalidFormatException);
}
Group endGroup = oneEditMatch.Groups["end"];
if (endGroup.Success)
{
if (!int.TryParse(endGroup.Value, out end))
{
throw new ArgumentException(_kinvalidFormatException);
}
}
else
{
end = start;
}
text.Append(input.Substring(ichCur, start - ichCur));
if (text.Length > 0)
{
text.Append(' ');
}
ichCur = end + 1;
text.Append(input.Substring(start, ichCur - start));
text.Append(string.Format("[{0}]", oneEditMatch.Groups["furigana"]));
}
if (ichCur < input.Length)
{
text.Append(input.Substring(ichCur));
}
return text.ToString();
}
}
注意:
Regex
类实际上有Replace()
方法,允许完全自定义。上述内容可以通过这种方式实现,使用Replace()
和MatchEvaluator
来提供替换文本,而不是仅仅将文本附加到StringBuilder
。如果您需要更灵活的实施选项(即,如果结果的确切格式可能有所不同),那么以哪种方式执行此操作主要是首选问题,但MatchEvaluator
可能更受欢迎。StringBuilder
而不是简单地连接到results
变量。对于短字符串来说,这并不重要,但是当你有一个循环逐渐添加到字符串值时,你应该养成总是使用StringBuilder
的习惯,因为对于长字符串使用串联的性能影响可能非常消极。