批量数据提取脚本

时间:2017-10-25 14:41:48

标签: c# xml batch-processing

为新手问题道歉,因为我对编程世界等很陌生。

我有一个包含大型XML文件(请参阅下面的链接)。它包含法人实体(LEI)的ID号,后跟其各自母公司的ID。

Example from the XML file

黄色是实体LEI编号,绿色是母公司LEI。

我想创建某种批处理脚本或GUI,这样我就可以输入实体(绿色)LEI编号列表,然后给出所有相应父LEI编号的输出。

这是任何想知道的人的文件:https://leidata.gleif.org/api/v1/concatenated-files/rr/20171025/zip

我非常缺乏经验,所以我不知道从哪里开始。

非常感谢

2 个答案:

答案 0 :(得分:0)

将整个文件内容复制到剪贴板,然后打开Visual Studio项目并转到菜单 - >编辑 - >粘贴特殊 - >粘贴为xml类。这将在您当前打开的文件中生成一堆类。保存此文件。然后使用此代码加载文件并返回加载的数据:

public static RelationshipData LoadFile(string fileName)
{
    var serializer = new XmlSerializer(typeof(Items));

    RelationshipData data;
    using (Stream r = new FileStream(filename, FileMode.Open))
    {
        data = (RelationshipData)serializer.Deserialize(r);
    }

    return data;
}

现在,您应该能够使用返回的data对象的属性访问文件内容。没有将此方法用于使用前缀的xml数据,但我认为这也将被处理......

使用这些数据,你应该建立一个字典或类似的东西,将一个ID映射到另一个ID。

你应该真正开始阅读为什么以及如何运作......

未经过测试

答案 1 :(得分:0)

我想我得到了大部分内容。由于存在大量节点,因此需要仔细检查。我添加了一个编写器,所以我可以使用Beyond Compare来检查我是否获得了所有节点。修正了一些错误。还需要一些小调整。

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Serialization;

namespace ConsoleApplication1
{
    class Program
    {
        const string INPUT_FILENAME = @"c:\temp\test.xml";
        const string OUTPUT_FILENAME = @"c:\temp\test1.xml";

        static void Main(string[] args)
        {
            XmlReaderSettings settings = new XmlReaderSettings();
            settings.CheckCharacters = false;
            XmlReader reader = XmlReader.Create(INPUT_FILENAME, settings);

            XmlSerializer serializer = new XmlSerializer(typeof(RelationshipDataRelationshipData));
            RelationshipDataRelationshipData relationshipDataRelationshipData = (RelationshipDataRelationshipData)serializer.Deserialize(reader);

            XmlWriterSettings wSettings = new XmlWriterSettings();
            wSettings.Indent = true;

            XmlWriter writer = XmlWriter.Create(OUTPUT_FILENAME, wSettings);


            XmlSerializerNamespaces ns = new XmlSerializerNamespaces();
            ns.Add("rr", "http://www.gleif.org/data/schema/rr/2016");
            ns.Add("gleif", "http://www.gleif.org/concatenated-file/header-extension/2.0");

            serializer.Serialize(writer, relationshipDataRelationshipData, ns);


            writer.Flush();
            writer.Close();

        }
    }
    [XmlRoot(ElementName = "RelationshipData", Namespace = "http://www.gleif.org/data/schema/rr/2016")]
    public class RelationshipDataRelationshipData
    {
        [XmlElement("Header")]
        public Header header { get; set; }

        [XmlElement("RelationshipRecords", Namespace = "http://www.gleif.org/data/schema/rr/2016")]
        public RelationshipRecords relationshipRecords { get; set; }
    }
    [XmlRoot(ElementName = "Header", Namespace = "http://www.gleif.org/data/schema/rr/2016")]
    public class Header
    {
        [XmlElement("ContentDate")]
        public DateTime ContentDate { get; set; }
        [XmlElement("FileContent")]
        public string FileContent { get; set; }
        [XmlElement("RecordCount")]
        public int RecordCount { get; set; }

        [XmlElement("Extension", Namespace = "http://www.gleif.org/data/schema/rr/2016")]
        public Extension extension { get; set; }
    }
    [XmlRoot(ElementName = "Extension", Namespace = "http://www.gleif.org/data/schema/rr/2016")]
    public class Extension
    {
        [XmlElement(ElementName = "Sources", Namespace = "http://www.gleif.org/concatenated-file/header-extension/2.0")]
        public Sourses sources { get; set; }

    }

    [XmlRoot(ElementName = "Sources", Namespace = "http://www.gleif.org/concatenated-file/header-extension/2.0")]
    public class Sourses
    {
        [XmlElement("Source")]
        public List<Source> source { get; set; }
    }

    [XmlRoot(ElementName = "Source", Namespace = "http://www.gleif.org/concatenated-file/header-extension/2.0")]
    public class Source
    {
        [XmlElement("ContentDate")]
        public DateTime ContentDate { get; set; }
        [XmlElement("Originator")]
        public string Originator { get; set; }
        [XmlElement("RecordCount")]
        public int RecordCount { get; set; }
    }

    [XmlRoot(ElementName = "RelationshipRecords", Namespace = "http://www.gleif.org/data/schema/rr/2016")]
    public class RelationshipRecords
    {
        [XmlElement("RelationshipRecord")]
        public List<RelationshipRecord> relationshipRecord { get; set; }
    }
    [XmlRoot(ElementName = "RelationshipRecord", Namespace = "http://www.gleif.org/data/schema/rr/2016")]
    public class RelationshipRecord
    {
        [XmlElement("Relationship")]
        public Relationship relationship { get; set; }
        [XmlElement("Registration")]
        public Registration registration { get; set; }
        [XmlElement("Extension", Namespace = "http://www.gleif.org/data/schema/rr/2016")]
        public Extension extension { get; set; }

    }
    [XmlRoot(ElementName = "Relationship", Namespace = "http://www.gleif.org/data/schema/rr/2016")]
    public class Relationship
    {
        [XmlElement("StartNode")]
        public Node StartNode { get; set; }
        [XmlElement("EndNode")]
        public Node EndNode { get; set; }
        public string RelationshipType { get; set; }

        [XmlElement("RelationshipPeriods")]
        public RelationshipPeriods relationshipPeriods { get; set; }

        public string RelationshipStatus { get; set; }

        [XmlElement("RelationshipQualifiers")]
        public RelationshipQualifiers relationshipQualifiers { get; set; }
    }
    [XmlRoot(ElementName = "Node", Namespace = "http://www.gleif.org/data/schema/rr/2016")]
    public class Node
    {
        public string NodeID { get; set; }
        public string NodeIDType { get; set; }
    }
    [XmlRoot(ElementName = "class RelationshipQualifiers", Namespace = "http://www.gleif.org/data/schema/rr/2016")]
    public class RelationshipQualifiers
    {
        [XmlElement("RelationshipQualifier")]
        public RelationshipQualifier relationshipQualifier { get; set; }
    }
    [XmlRoot(ElementName = "class RelationshipQualifier", Namespace = "http://www.gleif.org/data/schema/rr/2016")]
    public class RelationshipQualifier
    {
        public string QualifierDimension { get; set; }
        public string QualifierCategory { get; set; }
    }
    [XmlRoot(ElementName = "Registration", Namespace = "http://www.gleif.org/data/schema/rr/2016")]
    public class Registration
    {
        public DateTime InitialRegistrationDate { get; set; }
        public DateTime LastUpdateDate { get; set; }
        public string RegistrationStatus { get; set; }
        public DateTime NextRenewalDate { get; set; }
        public string ManagingLOU { get; set; }
        public string ValidationSources { get; set; }
        public string ValidationDocuments { get; set; }
    }

    [XmlRoot(ElementName = "class RelationshipPeriods", Namespace = "http://www.gleif.org/data/schema/rr/2016")]
    public class RelationshipPeriods
    {
        [XmlElement("RelationshipPeriod")]
        public List<RelationshipPeriod> relationshipPeriod { get; set; }
    }
    [XmlRoot(ElementName = "class RelationshipPeriod", Namespace = "http://www.gleif.org/data/schema/rr/2016")]
    public class RelationshipPeriod
    {
        [XmlElement("StartDate")]
        public DateTime StartDate { get; set; }
        [XmlElement("EndDate")]
        public DateTime EndDate { get; set; }
        [XmlElement("PeriodType")]
        public string PeriodType { get; set; }

    }
    [XmlRoot(ElementName = "class RelationshipQuantifiers", Namespace = "http://www.gleif.org/data/schema/rr/2016")]
    public class RelationshipQuantifiers
    {
        [XmlElement("RelationshipQuantifier")]
        public List<RelationshipQuantifier> relationshipQuantifier { get; set; }
    }
    [XmlRoot(ElementName = "class RelationshipQuantifier", Namespace = "http://www.gleif.org/data/schema/rr/2016")]
    public class RelationshipQuantifier
    {
        [XmlElement("MeasurementMethod")]
        public DateTime MeasurementMethod { get; set; }
        [XmlElement("QuantifierAmount")]
        public decimal QuantifierAmount { get; set; }
        [XmlElement("QuantifierUnits")]
        public string QuantifierUnits { get; set; }

    }

}