示例输入是(注意这是一行字符串,我在这里使用引号使其更具可读性):
Level 1/129/1 Murray Ave& 15A& B ERICA AVENUE& 12 HARVEY STREET& 34 VICTORIA STREET& 3/56 ST LEONARDS ST,MOSMAN PARK(约翰)。 10分之78 WELLINGTON ST MOSMAN PARK(兰博)
我目前的输出是:
1/129/1 - Murray - Ave -
15A - - -
B - ERICA - AVENUE -
12 - HARVEY - STREET -
34 - VICTORIA - STREET -
3/56 - ST LEONARDS - ST - MOSMAN PARK
78/10 - WELLINGTON - ST - MOSMAN PARK
所需的输出是:
1/129/1 - Murray - Ave -
15A - ERICA - AVENUE -
15B - ERICA - AVENUE -
12 - HARVEY - STREET -
34 - VICTORIA - STREET -
3/56 - ST LEONARDS - ST - MOSMAN PARK
78/10 - WELLINGTON - ST - MOSMAN PARK
如果第一个属性只包含该数字,它应该继承下一个记录中的信息,如果下一个记录号只包含一个字母,它向后显示继承前一个记录的编号,例如:
15A - Erica - Avenue
15B - Erica - Avenue
这为我提供了所需的上述输出,我该如何归档?
这是我的代码(注意:后缀是List<string>
):
static void Main(string[] args)
{
List<ResultData> result = new List<ResultData>();
string myColumn = "Level 1/129/1 Murray Ave & 15A&B ERICA AVENUE & 12 HARVEY STREET & 34 VICTORIA STREET & 3/56 ST LEONARDS ST, MOSMAN PARK ( John). 78/10 WELLINGTON ST MOSMAN PARK (Rambo)";
// dot replaced with & as they are to be split
myColumn = myColumn.Replace('.', '&');
// I don't need the Level word which means
// each property starts with numbers now
myColumn = myColumn.Replace("Level", "");
// Removes anything in between parentheses and the parentheses
myColumn = RemoveBetween(myColumn, '(', ')');
string[] splitResult = myColumn.Split('&');
foreach (string item in splitResult)
{
string property = item.Trim();
if (property.IndexOf(' ') > 0)
{
string area = string.Empty;
string locationType = string.Empty;
string number = property.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).First();
property = property.Replace(number, "").Trim();
// When comma is present, area is always the last
// and locationType always before it
if (property.IndexOf(',') > 0)
{
area = property.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries).Last().Trim();
property = property.Replace(area, "").Replace(",", "").Trim();
locationType = property.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).Last().Trim();
property = property.Replace(" " + locationType, "").Trim();
}
else
{
// When comma is not present I have to check
// if the string contains a given street suffix
// and pick up from there
string found = suffixes.Find(x => property.Trim().Contains(" " + x, StringComparison.OrdinalIgnoreCase));
if (!string.IsNullOrEmpty(found))
found = " " + found;
// need the space otherwise it will delete
// places like ST LEONARD.
locationType = property.Substring(property.ToLower().IndexOf(found.ToLower()), found.Length).Trim();
int total = property.ToLower().IndexOf(found.ToLower()) + found.Length;
if (property.ToLower().IndexOf(found.ToLower()) > 0 && total < property.Length)
area = property.Substring(total, property.Length - total).Trim();
property = property.Replace(",", "").Trim().Replace(locationType, "").Trim();
if (!string.IsNullOrEmpty(area))
property = property.Replace(area, "").Trim();
}
string name = property;
result.Add(new ResultData() { Number = number, Name = name, LocationType = locationType, Area = area });
}
else
{
result.Add(new ResultData() { Number = property });
}
}
string save = string.Empty;
foreach (ResultData item in result)
{
Console.WriteLine(item.Number + " - " + item.Name + " - " + item.LocationType + " - " + item.Area);
save += item.Number + " - " + item.Name + " - " + item.LocationType + " - " + item.Area + Environment.NewLine;
}
System.IO.File.WriteAllLines(@"save.txt", save.Split(new string[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries));
Console.WriteLine(Environment.NewLine + "Press any key to leave...");
Console.ReadKey();
}
/// <summary>
/// Remove from the string the pattern and what is in between it
/// more format double space to single
/// </summary>
static string RemoveBetween(string s, char begin, char end)
{
Regex regex = new Regex(string.Format("\\{0}.*?\\{1}", begin, end));
return new Regex(" +").Replace(regex.Replace(s, string.Empty), " ");
}
public static bool Contains(this string source, string toCheck, StringComparison comp)
{
return source.IndexOf(toCheck, comp) >= 0;
}
如果您知道我可以采取哪些措施来改进上述代码,我也很乐意知道,请写下评论。
答案 0 :(得分:0)
此代码应该完成这项工作:
string prevString = "";
string[] splitResult = myColumn.Split('&');
foreach (string item in splitResult)
{
string property = item.Trim();
if (property.IndexOf(' ') > 0)
{
string area = string.Empty;
string locationType = string.Empty;
string number = property.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).First();
property = property.Replace(number, "").Trim();
// When comma is present, area is always the last
// and locationType always before it
if (property.IndexOf(',') > 0)
{
area = property.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries).Last().Trim();
property = property.Replace(area, "").Replace(",", "").Trim();
locationType = property.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).Last().Trim();
property = property.Replace(" " + locationType, "").Trim();
}
else
{
// When comma is not present I have to check
// if the string contains a given street suffix
// and pick up from there
string found = suffixes.Find(x => property.Trim().Contains(" " + x, StringComparison.OrdinalIgnoreCase));
if (!string.IsNullOrEmpty(found))
found = " " + found;
// need the space otherwise it will delete
// places like ST LEONARD.
locationType = property.Substring(property.ToLower().IndexOf(found.ToLower()), found.Length).Trim();
int total = property.ToLower().IndexOf(found.ToLower()) + found.Length;
if (property.ToLower().IndexOf(found.ToLower()) > 0 && total < property.Length)
area = property.Substring(total, property.Length - total).Trim();
property = property.Replace(",", "").Trim().Replace(locationType, "").Trim();
if (!string.IsNullOrEmpty(area))
property = property.Replace(area, "").Trim();
}
string name = property;
if (prevString != "")
{
result.Add(new ResultData() { Number = prevString, Name = name, LocationType = locationType, Area = area });
string numbersFromString = new String(number.Where(x => x >= '0' && x <= '9').ToArray());
if (numbersFromString == "")
{
string numbersFromString2 = new String(prevString.Where(x => x >= '0' && x <= '9').ToArray());
result.Add(new ResultData() { Number = (int)numbersFromString2 + number, Name = name, LocationType = locationType, Area = area });
}
else
{
result.Add(new ResultData() { Number = number, Name = name, LocationType = locationType, Area = area });
}
}
else
{
result.Add(new ResultData() { Number = number, Name = name, LocationType = locationType, Area = area });
}
prevString = "";
}
else
{
prevString = property;
}
}
答案 1 :(得分:-1)
看起来每条线都以空格开头,然后是数字。
因此,在进行任何其他处理之前,这可能是一个更简单的首次通过清理。
var myString = "Line: 12 this way 23 that way 34 no way".ToCharArray();
var firstDigitFound = false;
for (int i = 0; i < myString.Length; i++)
{
var isNumber = char.IsNumber(myString[i]);
if (isNumber && i > 0 && !firstDigitFound)
{
firstDigitFound = true;
myString[i - 1] = '|';
}
else { firstDigitFound = false; }
}
var myNewArray = new string(myString).Split('|');