我有一个xml,如下所示。我想提取在每个#NEWPAGE#标记之间找到的所有xml标记,并将它们分别保存到sql sever数据库。请建议一种方法。
最初,我有一个包含以下细节的txt文件,但我想将文件的每一行转换为xml标记行。现在,我有一个xml文件。我无法根据节点值#NEWPAGE#提取部分xml。
XML内容如下:
<?xml version="1.0" encoding="utf-8"?>
<root>
<Line>#HEADINGBEGIN#</Line>
<Line></Line>
<Line>Employee: 16062 Name: MERZLAK,BRIAN Base: MSP Eqpt: E70 Pos: CA</Line>
<Line></Line>
<Line> Daily On Off Daily Daily Jr Accum</Line>
<Line>Date Assign Duty Duty TAFB Block Credit Trip Guarantee Man Credit</Line>
<Line>----- ------ ----- ----- ----- ----- ------ --------------- ---- ------</Line>
<Line>#HEADINGEND#</Line>
<Line>11/01 M2100A 0:01 0:00 4:35 0:00 0:00 0:00 0:00 </Line>
<Line>11/02 " 0:00 7:17 0:00 0:00 0:00 0:00 </Line>
<Line>11/03 " 19:12 67:12 6:51 20:14 0:00 0:00 20:14 </Line>
<Line>#GROUPNOBREAK#</Line>
<Line>#GROUPBEGIN#</Line>
<Line></Line>
<Line> Taxable TAFB 0:00 </Line>
<Line> Non-Taxable TAFB 178:00 </Line>
<Line> Total TAFB 178:00 </Line>
<Line>#GROUPEND#</Line>
<Line>#NEWPAGE#</Line>
<Line>#HEADINGBEGIN#</Line>
<Line></Line>
<Line>Employee: 19814 Name: GRAYSON,MONIQUE Base: LAX Eqpt: E70 Pos: CA</Line>
<Line></Line>
<Line> Daily On Off Daily Daily Jr Accum</Line>
<Line>Date Assign Duty Duty TAFB Block Credit Trip Guarantee Man Credit</Line>
<Line>----- ------ ----- ----- ----- ----- ------ --------------- ---- ------</Line>
<Line>#HEADINGEND#</Line>
<Line>11/01 OFF 0:00 0:00 0:00 0:00 0:00 0:00 </Line>
<Line>11/02 OFF 0:00 0:00 0:00 0:00 0:00 0:00 </Line>
<Line>11/03 L2488 13:30 0:00 7:10 0:00 0:00 0:00 0:00 </Line>
<Line>11/04 " 0:00 4:25 0:00 0:00 0:00 0:00 </Line>
<Line>#GROUPNOBREAK#</Line>
<Line>#GROUPBEGIN#</Line>
<Line></Line>
<Line> Taxable TAFB 0:00 Over Guar: 17:08</Line>
<Line> Non-Taxable TAFB 327:29 </Line>
<Line> Total TAFB 327:29 </Line>
<Line>#GROUPEND#</Line>
<Line>#NEWPAGE#</Line>
<Line>#HEADINGBEGIN#</Line>
<Line></Line>
<Line>Employee: 20730 Name: ZAHN,GEOFFREY Base: SEA Eqpt: E70 Pos: CA</Line>
<Line></Line>
<Line> Daily On Off Daily Daily Jr Accum</Line>
<Line>Date Assign Duty Duty TAFB Block Credit Trip Guarantee Man Credit</Line>
<Line>----- ------ ----- ----- ----- ----- ------ --------------- ---- ------</Line>
<Line>#HEADINGEND#</Line>
<Line>11/01 OFF 0:00 0:00 0:00 0:00 0:00 0:00 </Line>
<Line>11/02 OFF 0:00 0:00 0:00 0:00 0:00 0:00 </Line>
<Line>11/03 S2088 10:02 0:00 6:47 0:00 0:00 0:00 0:00 </Line>
<Line>#GROUPNOBREAK#</Line>
<Line>#GROUPBEGIN#</Line>
<Line></Line>
<Line> Taxable TAFB 9:25 Over Guar: 0:53</Line>
<Line> Non-Taxable TAFB 122:30 </Line>
<Line> Total TAFB 131:55 </Line>
<Line>#GROUPEND#</Line>
</root>
答案 0 :(得分:1)
您可以Linq
使用Xml
并实现此目标。
XDocument doc = XDocument.Load(filepath);
var result = doc.Descendants("Line") // Get all descendants of Line
.SkipWhile(x=> (string)x.Value == "#NEWPAGE#") // Skip Lines till we found tag.
.TakeWhile(x=>(string)x.Value != "#NEWPAGE#") // Take lines until we found other tag.
.ToList();
// Write to file
File.WriteAllLines(newfile,result.Select(x=>x.ToString()); // TODO : Provide filename
选中此Demo
答案 1 :(得分:1)
正如你所说,你也有文本文件。然后你也可以使用这个简单的方法 - &gt;
static void Main()
{
string filePath = @"C:\yourTextFile.txt";
string input = File.ReadAllText(filePath);
string pattern = @"#HEADINGBEGIN#.*?#GROUPEND#";
var matches = Regex.Matches(input, pattern, RegexOptions.Singleline);
List<string> list = new List<string>();
foreach (var v in matches)
{
list.Add(v.ToString());
}
// Now save this list where ever you want.
}
这为所有员工提供了 #HEADINGBEGIN#和 #GROUPEND#之间的数据,用 #NEWPAGE#分隔。
答案 2 :(得分:0)
整个方法看起来很弱......我确信有一个更好的概念来做到这一点。但要回答您的问题:您可以为自己的行编号,找到#NEWPAGE#
代码并使用它们来分割结果集:
注意:这使用自{1}以来可用的LEAD
更新我会将XML作为参数传递给存储过程并在那里进行解析......
DECLARE @xml XML='Your XML here';
WITH AllLines AS
(
SELECT ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS RowNr
,Line.value('.','nvarchar(max)') AS Content
FROM @xml.nodes('root/Line') AS One(Line)
)
,NewPages AS
(
SELECT 0 AS NewpageStart --the very first line has no #NEWPAGE#...
UNION ALL
SELECT RowNr FROM AllLines WHERE Content='#NEWPAGE#'
UNION ALL
SELECT 999999 --Needs a final mark too...
)
,PageBorders AS
(
SELECT ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS PageNr
,NewpageStart+1 AS NewPageStart
,LEAD(NewPageStart) OVER(ORDER BY NewPageStart)-1 AS NewPageEnd
FROM NewPages
)
SELECT PageNr
,ROW_NUMBER() OVER(PARTITION BY PageNr ORDER BY RowNr) AS PageRowNr
,AllLines.*
FROM PageBorders
INNER JOIN AllLines ON AllLines.RowNr BETWEEN PageBorders.NewPageStart AND PageBorders.NewPageEnd
答案 3 :(得分:0)
你可以从这个
开始using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
using System.Text.RegularExpressions;
namespace ConsoleApplication102
{
class Program
{
enum State
{
FIND_HEADINGBEGIN,
HEADINGBEGIN,
EMPLOYEE,
FIND_GROUPBEGIN,
GROUP
}
const string FILENAME = @"c:\temp\test.xml";
static void Main(string[] args)
{
List<Employee> employees = ParseXml(FILENAME);
}
static List<Employee> ParseXml(string filename)
{
string employeePattern = @"Employee:\s*(?'employee'\d*)\s+Name:\s*(?'name'[^\s]*)\s*Base:\s+(?'base'[^\s]*)\s*Eqpt:\s+(?'eqpt'[^\s]*)\s+Pos:\s+(?'pos'[^\s]*)";
List<Employee> employees = new List<Employee>();
Employee newEmployee = null;
List<int> assmentColumnWidths = new List<int>() {7, 7, 14, 7, 7, 8, 16, 8, 8};
int lineNo = 0;
State state = State.FIND_HEADINGBEGIN;
XDocument doc = XDocument.Load(FILENAME);
foreach (XElement xLine in doc.Descendants("Line"))
{
string line = ((string)xLine).Trim();
if (line.Length > 0)
{
switch (state)
{
case State.FIND_HEADINGBEGIN:
if (line.StartsWith("#HEADINGBEGIN#"))
{
state = State.HEADINGBEGIN;
lineNo = 0;
}
break;
case State.HEADINGBEGIN:
if (line.StartsWith("#HEADINGEND#"))
{
state = State.EMPLOYEE;
}
else
{
if (lineNo++ == 0)
{
newEmployee = new Employee();
employees.Add(newEmployee);
Match expr = Regex.Match(line, employeePattern);
newEmployee.id = expr.Groups["employee"].Value;
newEmployee.name = expr.Groups["name"].Value;
newEmployee._base = expr.Groups["base"].Value;
newEmployee.eqpt = expr.Groups["eqpt"].Value;
newEmployee.pos = expr.Groups["pos"].Value;
newEmployee.eqpt = expr.Groups["eqpt"].Value;
}
}
break;
case State.EMPLOYEE:
if (line.StartsWith("#GROUPNOBREAK#"))
{
state = State.FIND_GROUPBEGIN;
lineNo = 0;
}
else
{
List<string> assignmentData = GetFixedWidth(line, assmentColumnWidths);
Assignment assignment = new Assignment();
if (newEmployee.assignments == null) newEmployee.assignments = new List<Assignment>();
newEmployee.assignments.Add(assignment);
assignment.date = assignmentData[0];
assignment.name = (assignmentData[1] == "\"") ? newEmployee.assignments[newEmployee.assignments.Count - 2].name : assignmentData[1];
assignment.onDuty = assignmentData[2];
assignment.offDuty = assignmentData[3];
assignment.tafb = assignmentData[4];
assignment.dailyBlock = assignmentData[5];
assignment.dailyCredit = assignmentData[6];
assignment.tripGuarantee = assignmentData[7];
assignment.jrMan = assignmentData[8];
}
break;
case State.FIND_GROUPBEGIN:
if (line.StartsWith("#GROUPBEGIN#"))
{
state = State.GROUP;
Total total = new Total();
newEmployee.total = new Total();
}
break;
case State.GROUP:
if (line.StartsWith("#GROUPEND#"))
{
state = State.FIND_HEADINGBEGIN;
}
else
{
string[] splitLine = line.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);
switch (++lineNo)
{
case 1 :
newEmployee.total.taxable = splitLine[2];
break;
case 2:
newEmployee.total.nonTaxable = splitLine[2];
break;
case 3:
newEmployee.total.total = splitLine[2];
break;
}
}
break;
}
}
}
return employees;
}
static List<string> GetFixedWidth(string input, List<int> columns)
{
int index = 0;
List<string> output = new List<string>();
for (int startPos = 0; (startPos < input.Length) && (index < columns.Count); startPos += columns[index])
{
if (startPos + columns[index] <= input.Length)
{
output.Add(input.Substring(startPos, columns[index++]).Trim());
}
else
{
output.Add(input.Substring(startPos).Trim());
}
}
return output;
}
}
public class Employee
{
public string id { get; set; }
public string name { get; set; }
public string _base { get; set; }
public string eqpt { get; set; }
public string pos { get; set; }
public List<Assignment> assignments { get; set; }
public Total total { get; set; }
}
public class Assignment
{
public string date { get; set; }
public string name { get; set; }
public string onDuty { get; set; }
public string offDuty { get; set; }
public string tafb { get; set; }
public string dailyBlock { get; set; }
public string dailyCredit { get; set; }
public string tripGuarantee { get; set; }
public string jrMan { get; set; }
}
public class Total
{
public string taxable { get; set; }
public string nonTaxable { get; set; }
public string total { get; set; }
}
}