我正在尝试编写一个正则表达式来从SQL脚本中获取每个插入行。当我在Regex Hero I get my expected 8 matches上使用.NET Regex Tester时。但是,当我将此snippit作为控制台应用程序运行时,它不返回任何匹配项。
const string text =
@"INSERT INTO [AdminPrefs] ( [SpayClinic] , [VaxClinic] , [ShelterClinic] , [DateModified] , [Prefix] , [UpdateCounter] , [LockedRecs] , [dbName] , [Timer] , [MedCtrClinic] , [OtherClinic] , [Da2PPPx] , [Da2PPEPx] , [FVRCPPx] , [FVRCPEPx] , [FELVTPx] , [FELVTEPx] , [FELVVPx] , [FELVVEPx] , [HWTPx] , [HWTEPx] , [RabiesPx] , [RabiesEPx] , [FIVTest] , [FIVTestE] , [OnePlusChar] , [XSHWMPx] , [XSHWMEPx] , [SHWMPx] , [SHWMEPx] , [MHWMPx] , [MHWMEPx] , [LHWMPx] , [LHWMEPx] , [DebuggerOn] , [PayThisAmount] , [free6] , [XSHWMPillPx] , [XSHWMPillEPx] , [SHWMPillPx] , [SHWMPillEPx] , [MHWMPillPx] , [MHWMPillEPx] , [LHWMPillPx] , [LHWMPillEPx] , [free7] , [free8] , [free9] , [XSPMPx] , [XSPMEPx] , [SPMPx] , [SPMEPx] , [MPMPx] , [MPMEPx] , [LPMPx] , [LPMEPx] , [ReceiptFooter] , [MonthsUntilBenefits] , [free12] , [XSPMPillPx] , [XSPMPillEPx] , [SPMPillPx] , [SPMPillEPx] , [MPMPillPx] , [MPMPillEPx] , [LPMPillPx] , [LPMPillEPx] , [free14] , [ClinicName] , [ShelterName] , [ShelterAbbr] , [Address1] , [Address2] , [City] , [State] , [ZipCode] , [MainPhone] , [MainFax] , [SplashPict] , [free17] , [free18] , [LicenseNo] , [SerialNo] , [free20] , [free21] , [free22] , [VLogCC] , [SNLogCC] , [free23] , [free24] , [free25] , [AgeAndBDay] , [free26] , [free27] , [free28] , [CurrRouteNum] )
VALUES
(12 , 7 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(15 , 53 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(20 , 216 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(16 , 8 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0);
INSERT INTO [AdminPrefs] ( [SpayClinic] , [VaxClinic] , [ShelterClinic] , [DateModified] , [Prefix] , [UpdateCounter] , [LockedRecs] , [dbName] , [Timer] , [MedCtrClinic] , [OtherClinic] , [Da2PPPx] , [Da2PPEPx] , [FVRCPPx] , [FVRCPEPx] , [FELVTPx] , [FELVTEPx] , [FELVVPx] , [FELVVEPx] , [HWTPx] , [HWTEPx] , [RabiesPx] , [RabiesEPx] , [FIVTest] , [FIVTestE] , [OnePlusChar] , [XSHWMPx] , [XSHWMEPx] , [SHWMPx] , [SHWMEPx] , [MHWMPx] , [MHWMEPx] , [LHWMPx] , [LHWMEPx] , [DebuggerOn] , [PayThisAmount] , [free6] , [XSHWMPillPx] , [XSHWMPillEPx] , [SHWMPillPx] , [SHWMPillEPx] , [MHWMPillPx] , [MHWMPillEPx] , [LHWMPillPx] , [LHWMPillEPx] , [free7] , [free8] , [free9] , [XSPMPx] , [XSPMEPx] , [SPMPx] , [SPMEPx] , [MPMPx] , [MPMEPx] , [LPMPx] , [LPMEPx] , [ReceiptFooter] , [MonthsUntilBenefits] , [free12] , [XSPMPillPx] , [XSPMPillEPx] , [SPMPillPx] , [SPMPillEPx] , [MPMPillPx] , [MPMPillEPx] , [LPMPillPx] , [LPMPillEPx] , [free14] , [ClinicName] , [ShelterName] , [ShelterAbbr] , [Address1] , [Address2] , [City] , [State] , [ZipCode] , [MainPhone] , [MainFax] , [SplashPict] , [free17] , [free18] , [LicenseNo] , [SerialNo] , [free20] , [free21] , [free22] , [VLogCC] , [SNLogCC] , [free23] , [free24] , [free25] , [AgeAndBDay] , [free26] , [free27] , [free28] , [CurrRouteNum] )
VALUES
(26 , 5 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(18 , 12 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(9 , 10 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0),
(2 , 72 , 0 , '0000/00/00 00:00:00:00' , '' , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '' , '' , '' , '' , '' , '' , '' , '' , '' , '' , X'5443503408' , 0 , 0 , '' , 0 , 0 , 0 , 0 , '' , '' , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0);
";
static void Main(string[] args)
{
string query = @"^\(.*?\)(,|;)$";
var matches = Regex.Matches(text, query, RegexOptions.Singleline | RegexOptions.Multiline);
Console.WriteLine("Expected Matches: 8");
Console.WriteLine("Matches Found: {0}", matches.Count);
Console.ReadLine();
}
对于网站和我的代码(Multiline和Singleline)我的选项是完全一样的,他们应该使用相同的.NET正则表达式引擎,那么是什么造成了两者之间的区别呢?
最终结果:
对于所有那些好奇我的最终正则表达式
@"(?<=^\() # The beginning of a line followed by a (
((('(?<c>.*?)'(?!')(?=[\s\)])) | # Text string in SQL supports line breaks
(?<c>-?[\d\.]+) | # Any numbers
(X'(?<c>[0-9a-f]*)') # Something formatted like X'0123456789abcdef'
)(\s,\s)? # Spaces and commas between the records
)+ # Repeat the pattern at least one time
(?=(?<!'')\)[;,]\r?$) # The End of the line ending with ); or ), and not immediately proceeded by ''";
请注意所有计划将其用于R&amp; D(rip-off和deploy)开发的人,这仅适用于我的SQL,因为它非常规律。如果与我的第三方程序未生成的SQL一起使用,则需要调整以处理许多我不需要处理的边缘情况。
以下是解析器解析代码的完整代码。希望它能帮助那些陷入类似事情的其他人。
foreach (var tableFolder in Directory.GetDirectories(_exportFolder))
{
//Popluate the schema of the DataTable
DataTable table = new DataTable();
using (SqlDataAdapter ada = new SqlDataAdapter(String.Format("Select top 0 * from [{0}]", Path.GetFileName(tableFolder)), conn))
{
ada.Fill(table);
}
//All of the files to import for this table
string[] filePaths = Directory.GetFiles(tableFolder, "*.sql");
foreach (string file in filePaths)
{
string text;
using (var txtRdr = new StreamReader(file))
{
text = txtRdr.ReadToEnd();
}
const string recordRegex =
@"(?<=^\() #The begining of a line followed by a (
((('(?<s>.*?)'(?!')(?=[\s\)])) | # Something formatted like 'some text' supports line breaks
(?<n>-?[\d\.]+) | # Any numbers
(X'(?<h>[0-9a-f]*)') # Something formatted like X'0123456789abcdef'
)(\s,\s)? # Spaces and commas between the records
)+ # Repeat the pattern at least one time
(?=(?<!'')\)[;,]\r?$) # The End of the line ending with ); or ), and not immedatly proceded by ''";
//Creates one match per row in the database
var records = Regex.Matches(text, recordRegex, RegexOptions.Singleline | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase | RegexOptions.ExplicitCapture);
const string headerRegex = @"^INSERT\sINTO\s\[[\w_\-\s]+\]\s\(\s(?:\[([\w_\-\s]+)\]\s(?:,\s)?)+\)";
var header = Regex.Match(text, headerRegex).Groups[1].Captures.Cast<Capture>().ToArray();
foreach (Match record in records)
{
//Due to how we captured the 3 groups we had to put them back in order in one list.
var columns = record.Groups.Cast<Group>()
.Skip(1) //Groups[0] contins the entire record.
.SelectMany(group => group.Captures.Cast<Capture>()) //Flattens all of the captures in the three groups in to one list
.OrderBy(capture => capture.Index) //Reorder the combined list as the SelectMany will not be outputting the correct order.
.ToArray();
DataRow row = table.NewRow();
for (int i = 0; i < columns.Length; i++)
{
Type columnType = table.Columns[header[i].Value].DataType;
if (columnType == typeof(String))
{
row[header[i].Value] = columns[i].Value;
}
else if (columnType == typeof(Int32))
{
row[header[i].Value] = Convert.ToInt32(columns[i].Value);
}
else if (columnType == typeof(Double))
{
row[header[i].Value] = Convert.ToDouble(columns[i].Value);
}
else if (columnType == typeof(Boolean))
{
if (columns[i].Value == "0")
row[header[i].Value] = false;
else if (columns[i].Value == "1")
row[header[i].Value] = true;
else
throw new InvalidDataException();
}
else if (columnType == typeof(Int16))
{
row[header[i].Value] = Convert.ToInt16(columns[i].Value);
}
else if (columnType == typeof(Byte[]))
{
row[header[i].Value] = StringToByteArray(columns[i].Value);
}
else
{
throw new NotImplementedException();
}
}
table.Rows.Add(row);
}
using (var bulkCopy = new SqlBulkCopy(conn))
{
bulkCopy.DestinationTableName = Path.GetFileName(tableFolder);
bulkCopy.BulkCopyTimeout = 0;
bulkCopy.WriteToServer(table);
}
}
}
更新
通过将caputre组重命名为所有相同的名称,.NET的正则表达式引擎将它们组合起来,简化了
var columns = record.Groups[1].Cast<Group>().Skip(1).SelectMany(group => group.Captures.Cast<Capture>()).OrderBy(capture => capture.Index).ToArray();
到
var columns = record.Groups[1].Captures.Cast<Capture>().ToArray();
答案 0 :(得分:6)
请注意,在Regex Hero页面上切换“CrLf标记行结束”设置会导致8行停止匹配;这是导致问题的原因的线索。
在C#代码中,文字字符串中的换行符被编码为CR / LF对("\r\n"
)。正则表达式中的$
(与多行模式中的行尾匹配)仅匹配\n
字符。因此,正则表达式未考虑的最终逗号(或分号)之间存在额外的\r
字符,并且匹配失败。
您可以通过以下方式解决此问题:
text = text.Replace("\r\n", "\n");
或string query = @"^\(.*?\)(,|;)\r$";