处理一些现有的代码,我遇到了大型正则表达式的问题(不是我的强项)。正则表达式只匹配城市的表达式,只有一个单词(.ie.YORK而不是纽约)。
很抱歉,如果不清楚,正则表达式只匹配城市的第二个名称,而不是整个城市名称。如果不清楚,我道歉,谢谢。
我在玩弄
@"(?<city>[a-zA-Z]+\s*){1,3},\s+(?<state>\w\w)\s+(?<zip>\d+)
对于这个城市但无济于事。
非常感谢任何帮助。
守则
private void ReadPdfFile(string filePath)
{
var res = GetTextFromAllPages(filePath);
Regex typ = new Regex(@"\s\n(?<llctype>[\w\s\(\)]+)?---------[\-\s]+", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
Regex city = new Regex(@"(?<city>\w+?),\s+(?<state>\w\w)\s+(?<zip>\d+)", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
Regex data = new Regex(@"(?<filling>\d{8,})\s+(?<name>[^\n\r]*).*?law\s+:\s+203.*?county\s+:\s*(?<county>[\w]+)\s+(?<fileraddress>[^\n\r]+).*?EFF\..*?:\s(?<effdate>([\d/]+|\s+))\s(?<address2>[^\r\n]*).*?:\s(\s+|\w+)(?<zip>[^\r\n]+)\s+(?<processaddress>[^\r\n]+)\s+(?<processaddress2>[^\r\n]+)\s+(?<processzip>[^\r\n]+)\s", RegexOptions.Compiled | RegexOptions.IgnoreCase | RegexOptions.Singleline);
foreach (var page in res)
{
if (typ.IsMatch(page))
{
var mtchTitle = typ.Match(page);
var title = mtchTitle.Groups["llctype"] != null ? mtchTitle.Groups["llctype"].Value : "";
var matchData = data.Matches(page);
foreach (Match m in matchData)
{
var fileradd2 = m.Groups["address2"] != null ? m.Groups["address2"].Value : "";
var processadd2 = m.Groups["processaddress2"] != null ? m.Groups["processaddress2"].Value : "";
var address = m.Groups["fileraddress"] != null ? m.Groups["fileraddress"].Value : "" + (fileradd2.IndexOf(" ") > -1 ? " " + fileradd2.Split(new string[] { " " }, StringSplitOptions.None)[0] : "");
var processaddress = m.Groups["processaddress"] != null ? m.Groups["processaddress"].Value : "" + (processadd2.IndexOf(" ") > -1 ? " " + processadd2.Split(new string[] { " " }, StringSplitOptions.None)[0] : "");
var filerstreet = (fileradd2.IndexOf(" ") > -1 ? " " + fileradd2.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries)[1] : fileradd2);
var processstreet = (processadd2.IndexOf(" ") > -1 ? " " + processadd2.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries)[1] : processadd2);
var cityMatchFiler = city.Match(m.Groups["zip"] != null ? m.Groups["zip"].Value : "");
var cityMatchProcess = city.Match(m.Groups["processzip"] != null ? m.Groups["processzip"].Value : "");
var d = new DocEntity
{
County = m.Groups["county"] != null ? m.Groups["county"].Value.Trim() : "",
EFFDate = m.Groups["effdate"] != null && !string.IsNullOrWhiteSpace(m.Groups["effdate"].Value) ? DateTime.Parse(m.Groups["effdate"].Value) : DateTime.MinValue,
FILER_ADDRESS = address.Trim(),
FILER_CITY = cityMatchFiler.Groups["city"] != null ? cityMatchFiler.Groups["city"].Value.Trim() : "",
FILER_STATE = cityMatchFiler.Groups["state"] != null ? cityMatchFiler.Groups["state"].Value.Trim() : "",
FILER_STREET = filerstreet.Trim(),
FILER_ZIP = cityMatchFiler.Groups["zip"] != null && !string.IsNullOrWhiteSpace(cityMatchFiler.Groups["zip"].Value) ? int.Parse(cityMatchFiler.Groups["zip"].Value) : 0,
FillingNumber = m.Groups["filling"] != null ? m.Groups["filling"].Value.Trim() : "",
LLC_NAME = m.Groups["name"] != null ? m.Groups["name"].Value.Trim() : "",
LLCType = title.Trim(),
ProcessAddress = processaddress.Trim(),
ProcessCity = cityMatchProcess.Groups["city"] != null ? cityMatchProcess.Groups["city"].Value.Trim() : "",
ProcessState = cityMatchProcess.Groups["state"] != null ? cityMatchProcess.Groups["state"].Value.Trim() : "",
ProcessStreet = processstreet.Trim(),
ProcessZIP = cityMatchProcess.Groups["zip"] != null && !string.IsNullOrWhiteSpace(cityMatchProcess.Groups["zip"].Value) ? int.Parse(cityMatchProcess.Groups["zip"].Value) : 0,
};
results.Add(d);
}
}
}
dataGridView1.DataSource = results;
}
以下是文字示例:
170914000673 FRESH TRESSES LLC
LAW : 206 LLC *FILER ADDRESS / PROCESS ADDRESS*
COUNTY : QUEE EZ LEGAL PUBLISHING LLC
EFF. DATE: 09/14/2017 244 FIFTH AVENUE SUITE 2503
DUR. DATE: NEW YORK, NY 10001
170914000215 FYATIKTK, LLC
LAW : 206 LLC *FILER ADDRESS / PROCESS ADDRESS*
COUNTY : KING PABLO G. VELEZ, ESQ.
EFF. DATE: 09/14/2017 VELEZ & CIPRIANO, PLLC 347 5TH AVENUE STE 810
DUR. DATE: NEW YORK, NY 10016
170914000676 GLOBAL PROPERTY GROUP LLC
LAW : 206 LLC *FILER ADDRESS / PROCESS ADDRESS*
COUNTY : ROCK ALLSTATE CORPORATE SERVICES CORP.
EFF. DATE: 09/14/2017 ONE COMMERCE PLAZA 99 WASHINGTON AVENUE, SUITE 1008
DUR. DATE: ALBANY, NY 12260
170914000075 GNU SECURITIES LLC
LAW : 206 LLC *FILER ADDRESS / PROCESS ADDRESS*
COUNTY : ALBA INTEGRATED MANAGEMENT
EFF. DATE: 09/14/2017 SOLUTIONS USA LLC 39 BROADWAY, STE 3300
DUR. DATE: NEW YORK, NY 10006
170914000129 HIGH HILL ASSOCIATES LLC
LAW : 206 LLC *FILER ADDRESS / PROCESS ADDRESS*
COUNTY : WEST JOHN MORELLO
EFF. DATE: 09/14/2017 3 HIGH HILL FARM PL
DUR. DATE: THORNWOOD, NY 10594
170914000151 HOLLY SLEPT OVER, LLC
LAW : 206 LLC *FILER ADDRESS / PROCESS ADDRESS*
COUNTY : ONON GERMAIN & GERMAIN
EFF. DATE: 09/14/2017 314 E FAYETTE STREET
DUR. DATE: SYRACUSE, NY 13202
答案 0 :(得分:1)
尝试(?<city>[^,]+),
这将捕获任何不是逗号的内容并将其保存为命名捕获“city”
你也可以尝试类似于负面前瞻的东西:
(?<city>(?!,\s+).+)
不是c#程序员,但我涉猎正则表达式:D