通过非制表符分隔符的非结构化XML?

时间:2018-05-04 11:11:43

标签: c# xml xslt xsd xml-parsing

我们有一个复杂的XML结构,而且确实很大(> 500 MB)。结构的XSD是:This XSD

我们知道这是一个复杂的问题。并且由于大小或非制表符分隔符结构,我无法将其转换为可读的更好的演示文稿。

我想通过C#阅读此文件并搜索药物名称。我的代码出了什么问题?

        try
        {
            XmlReader xmlFile;
            xmlFile = XmlReader.Create("C:\\Users\\Dr\\Desktop\\full database.xml", new XmlReaderSettings());
            DataSet ds = new DataSet();
            ds.ReadXml(xmlFile);
            dataGridView1.DataSource = ds.Tables[0];
        }
        catch (Exception ex)
        {
            MessageBox.Show(ex.ToString());
        }

我的错误如下:

Error Figure

如何在此XML中搜索并获取有关药物名称的信息?

更新:示例XML

The XML Structure

jdweng的答案很好,我们想要提取所有信息。

5 个答案:

答案 0 :(得分:7)

决定以艰难的方式创建SQL数据库。没有一个工具运作良好。我只把几张桌子放进去。如果需要,您可以添加更多。

步骤1使用以下查询在SQL Server Management Studio中创建数据库数据库。如果您多次运行并且数据库或表已经存在,则查询可能会出错。

c#项目之后的运行:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Data;
using System.Data.SqlClient;
using System.Xml;
using System.Xml.Linq;
using System.IO;

namespace DrubBank
{
    class Program
    {
        const string FILENAME = @"c:\temp\full database.xml";
        static void Main(string[] args)
        {
            new UploadXml(FILENAME);
        }
    }
    public class UploadXml
    {
        const string INSERT_DRUG =
            "INSERT INTO [DrugBank].[dbo].[Drugs] (" +
            "[Type],[Created],[Updated],[ID],[Name],[Description],[Case Number],[Unii],[State]," +
            "[Synthesis Reference],[Indication] ,[Pharmacodynamics] ,[Mechanism of Action], [Toxicity]," +
            "[Metabolism] , [Absorption] ,[Half Life], [Protein Binding]," +
            "[Route of Eelimination], [Volume of Distribution] ,[Clearance])" +
            " VALUES " +
            "(@Type, @Created, @Updated, @ID, @Name, @Description, @Case_Number, @Unii, @State," +
             "@Synthesis_Reference,@Indication ,@Pharmacodynamics ,@Mechanism_of_Action, @Toxicity," +
             "@Metabolism , @Absorption ,@Half_Life, @Protein_Binding," +
             "@Route_of_Elimination, @Volume_of_Distribution ,@Clearance)";

        const string INSERT_DRUG_LINK =
            "INSERT INTO [DrugBank].[dbo].[Links] (" +
            "[ID],[Title],[URL])" +
            " VALUES " +
            "(@ID,@Title, @URL)";

        const string INSERT_DRUG_ARTICLE =
            "INSERT INTO [DrugBank].[dbo].[Articles] (" +
            "[ID],[Pubmed ID],[Citation])" +
            " VALUES " +
            "(@ID,@Pubmed_ID, @Citation)";

        const string INSERT_DRUG_INTERACTION =
            "INSERT INTO [DrugBank].[dbo].[Interactions] (" +
            "[ID],[Interaction ID],[Description])" +
            " VALUES " +
            "(@ID,@Interaction_ID, @Description)";

        const string INSERT_DRUG_ID =
            "INSERT INTO [DrugBank].[dbo].[IDs] (" +
            "[ID],[ALT ID])" +
            " VALUES " +
            "(@ID, @ALT_ID)";

        const string INSERT_DRUG_PRODUCT =
            "INSERT INTO [DrugBank].[dbo].[Products] (" +
            "[ID],[Name],[Labeller], [NDC ID], [NDC Product Code], [DPD ID]," +
            "[EMA Product Code],[EMA MA Number],[Started Marketing On], [Ended Marketing On], [Dosage Form]," +
            "[Strength],[Route],[FDA Application Number],[Generic],[Over the Counter],[Approved],[Country],[Source])" +
            " VALUES " +
            "(@ID,@Name,@Labeller, @NDC_ID, @NDC_Product_Code,@DPD_ID," +
            "@EMA_Product_Code,@EMA_MA_Number,@Started_Marketing_On, @Ended_Marketing_On, @Dosage_Form," +
            "@Strength,@Route,@FDA_Application_Number,@Generic,@Over_the_Counter,@Approved,@Country,@Source)";

        const string INSERT_DRUG_MIXTURE =
            "INSERT INTO [DrugBank].[dbo].[Mixtures] (" +
            "[ID], [Name] , [ingredients])" +
            " VALUES " +
            "(@ID, @Name, @ingredients)";

        const string INSERT_DRUG_PACKAGER =
             "INSERT INTO [DrugBank].[dbo].[Packagers] (" +
             "[ID], [Name], [URL])" +
             " VALUES " +
             "(@ID, @Name, @URL)";

        const string INSERT_DRUG_PRICE = 
             "INSERT INTO [DrugBank].[dbo].[Prices] (" +
             "[ID], [Description], [Cost], [Currency], [Unit])" +
             " VALUES " +
             "(@ID, @Description, @Cost, @Currency, @Unit)";

        const string INSERT_DRUG_CATEGORY =
            "INSERT INTO [DrugBank].[dbo].[Categories] (" +
            "[ID], [Category], [Mesh ID])" +
            " VALUES " +
            "(@ID, @Category, @Mesh_ID)";

        const string INSERT_DRUG_ORGANISM =
            "INSERT INTO [DrugBank].[dbo].[Organisms] (" +
            "[ID], [Organism])" +
            " VALUES " +
            "(@ID, @Organism)";

        const string INSERT_DRUG_PATENT =
            "INSERT INTO [DrugBank].[dbo].[Patents] (" +
            "[ID], [Number], [Country], [Approved], [Expires], [Pediatric Extension]) " +
            " VALUES " +
            "(@ID, @Number, @Country, @Approved, @Expires, @Pediatric_Extension) ";

        const string INSERT_DRUG_SEQUENCE =
            "INSERT INTO [DrugBank].[dbo].[Sequences] (" +
            "[ID], [Format], [Type], [Sequence])" +
            " VALUES " +
            "(@ID, @Format, @Type, @Sequence)";

        const string INSERT_DRUG_PROPERTY =
            "INSERT INTO [DrugBank].[dbo].[Properties] (" +
            "[ID], [Kind], [Value], [Source])" +
            " VALUES " +
            "(@ID, @Kind, @Value, @Source)";

        const string INSERT_DRUG_IDENTIFIER =
            "INSERT INTO [DrugBank].[dbo].[Identifiers] (" +
            "[ID], [Resource], [Identifier])" +
            " VALUES " +
            "(@ID, @Resource, @Identifier)";

        const string INSERT_DRUG_ENZYM =
            "INSERT INTO [DrugBank].[dbo].[Enzymes] (" +
            "[ID], [UniprotID])" +
            " VALUES " +
            "(@ID, @UniprotID)";

        SqlCommand productCmd = null;
        SqlCommand interactionCmd = null;
        SqlCommand articleCmd = null;
        SqlCommand linkCmd = null;
        SqlCommand drugCmd = null;
        SqlCommand idCmd = null;
        SqlCommand mixtureCmd = null;
        SqlCommand packagerCmd = null;
        SqlCommand priceCmd = null;
        SqlCommand categoryCmd = null;
        SqlCommand organismCmd = null;
        SqlCommand patentCmd = null;
        SqlCommand sequenceCmd = null;
        SqlCommand propertyCmd = null;
        SqlCommand identifierCmd = null;
        SqlCommand enzymCmd = null;

        public UploadXml(string filename)
        {
            string connStr = DrugBank.Properties.Settings.Default.DrugBankConnectionString;
            SqlConnection conn = new SqlConnection(connStr);
            conn.Open();

            drugCmd = new SqlCommand(INSERT_DRUG, conn);

            drugCmd.Parameters.Add("@Type", SqlDbType.VarChar, 20);
            drugCmd.Parameters.Add("@Created", SqlDbType.DateTime);
            drugCmd.Parameters.Add("@Updated", SqlDbType.DateTime);
            drugCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            drugCmd.Parameters.Add("@Name", SqlDbType.VarChar, 50);
            drugCmd.Parameters.Add("@Description", SqlDbType.VarChar);
            drugCmd.Parameters.Add("@Case_Number", SqlDbType.VarChar, 20);
            drugCmd.Parameters.Add("@Unii", SqlDbType.VarChar, 20);
            drugCmd.Parameters.Add("@State", SqlDbType.VarChar, 20);

            drugCmd.Parameters.Add("@Synthesis_reference", SqlDbType.VarChar, 1024);
            drugCmd.Parameters.Add("@Indication", SqlDbType.VarChar);
            drugCmd.Parameters.Add("@Pharmacodynamics", SqlDbType.VarChar, 1024);
            drugCmd.Parameters.Add("@Mechanism_of_Action", SqlDbType.VarChar, 1024);
            drugCmd.Parameters.Add("@Toxicity", SqlDbType.VarChar, 1024);
            drugCmd.Parameters.Add("@Metabolism", SqlDbType.VarChar);
            drugCmd.Parameters.Add("@Absorption", SqlDbType.VarChar, 1024);
            drugCmd.Parameters.Add("@Half_Life", SqlDbType.VarChar, 256);
            drugCmd.Parameters.Add("@Protein_Binding", SqlDbType.VarChar, 64);
            drugCmd.Parameters.Add("@Route_of_Elimination", SqlDbType.VarChar);
            drugCmd.Parameters.Add("@Volume_of_Distribution", SqlDbType.VarChar);
            drugCmd.Parameters.Add("@Clearance", SqlDbType.VarChar);

            idCmd = new SqlCommand(INSERT_DRUG_ID, conn);
            idCmd.Parameters.Add("@ID", SqlDbType.VarChar, 256);
            idCmd.Parameters.Add("@ALT_ID", SqlDbType.VarChar, 20);

            articleCmd = new SqlCommand(INSERT_DRUG_ARTICLE, conn);

            articleCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            articleCmd.Parameters.Add("@Pubmed_ID", SqlDbType.VarChar, 256);
            articleCmd.Parameters.Add("@Citation", SqlDbType.VarChar, 20);

            linkCmd = new SqlCommand(INSERT_DRUG_LINK, conn);
            linkCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            linkCmd.Parameters.Add("@Title", SqlDbType.VarChar, 256);
            linkCmd.Parameters.Add("@URL", SqlDbType.VarChar, 64);

            interactionCmd = new SqlCommand(INSERT_DRUG_INTERACTION, conn);

            interactionCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            interactionCmd.Parameters.Add("@Interaction_ID", SqlDbType.VarChar, 20);
            interactionCmd.Parameters.Add("@Description", SqlDbType.VarChar, 256);

            productCmd = new SqlCommand(INSERT_DRUG_PRODUCT, conn);

            productCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            productCmd.Parameters.Add("@Name", SqlDbType.VarChar, 128);
            productCmd.Parameters.Add("@Labeller", SqlDbType.VarChar, 64);
            productCmd.Parameters.Add("@NDC_ID", SqlDbType.VarChar, 20);
            productCmd.Parameters.Add("@NDC_Product_Code", SqlDbType.VarChar, 20);
            productCmd.Parameters.Add("@DPD_ID", SqlDbType.VarChar, 20);
            productCmd.Parameters.Add("@EMA_Product_Code", SqlDbType.VarChar, 20);
            productCmd.Parameters.Add("@EMA_MA_Number", SqlDbType.VarChar, 20);
            productCmd.Parameters.Add("@Started_Marketing_On", SqlDbType.DateTime2, 20);
            productCmd.Parameters.Add("@Ended_Marketing_On", SqlDbType.DateTime2, 20);
            productCmd.Parameters.Add("@Dosage_Form", SqlDbType.VarChar, 64);
            productCmd.Parameters.Add("@Strength", SqlDbType.VarChar, 20);
            productCmd.Parameters.Add("@Route", SqlDbType.VarChar, 20);
            productCmd.Parameters.Add("@FDA_Application_Number", SqlDbType.VarChar, 20);
            productCmd.Parameters.Add("@Generic", SqlDbType.Bit);
            productCmd.Parameters.Add("@Over_the_Counter", SqlDbType.Bit);
            productCmd.Parameters.Add("@Approved", SqlDbType.Bit);
            productCmd.Parameters.Add("@Country", SqlDbType.VarChar, 20);
            productCmd.Parameters.Add("@Source", SqlDbType.VarChar, 20);


            mixtureCmd = new SqlCommand(INSERT_DRUG_MIXTURE, conn);

            mixtureCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            mixtureCmd.Parameters.Add("@Name", SqlDbType.VarChar, 64);
            mixtureCmd.Parameters.Add("@Ingredients", SqlDbType.VarChar, 64);

            packagerCmd = new SqlCommand(INSERT_DRUG_PACKAGER, conn);

            packagerCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            packagerCmd.Parameters.Add("@Name", SqlDbType.VarChar, 64);
            packagerCmd.Parameters.Add("@URL", SqlDbType.VarChar, 64);


            priceCmd = new SqlCommand(INSERT_DRUG_PRICE, conn);

            priceCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            priceCmd.Parameters.Add("@Description", SqlDbType.VarChar, 128);
            priceCmd.Parameters.Add("@Cost", SqlDbType.Decimal);
            priceCmd.Parameters.Add("@Currency", SqlDbType.VarChar,20);
            priceCmd.Parameters.Add("@Unit", SqlDbType.VarChar, 20);


            categoryCmd = new SqlCommand(INSERT_DRUG_CATEGORY, conn);

            categoryCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            categoryCmd.Parameters.Add("@Category", SqlDbType.VarChar, 128);
            categoryCmd.Parameters.Add("@Mesh_ID", SqlDbType.VarChar, 20);

            organismCmd = new SqlCommand(INSERT_DRUG_ORGANISM, conn);

            organismCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            organismCmd.Parameters.Add("@Organism", SqlDbType.VarChar, 128);

            patentCmd = new SqlCommand(INSERT_DRUG_PATENT, conn);

            patentCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            patentCmd.Parameters.Add("@Number", SqlDbType.VarChar, 20);
            patentCmd.Parameters.Add("@Country", SqlDbType.VarChar, 20);
            patentCmd.Parameters.Add("@Approved", SqlDbType.DateTime2);
            patentCmd.Parameters.Add("@Expires", SqlDbType.DateTime2);
            patentCmd.Parameters.Add("@Pediatric_Extension", SqlDbType.Bit);

            sequenceCmd = new SqlCommand(INSERT_DRUG_SEQUENCE, conn);

            sequenceCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            sequenceCmd.Parameters.Add("@Format", SqlDbType.VarChar, 20);
            sequenceCmd.Parameters.Add("@Sequence", SqlDbType.VarChar);
            sequenceCmd.Parameters.Add("@Type", SqlDbType.VarChar, 20);

            propertyCmd = new SqlCommand(INSERT_DRUG_PROPERTY, conn);

            propertyCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            propertyCmd.Parameters.Add("@Kind", SqlDbType.VarChar, 20);
            propertyCmd.Parameters.Add("@Value", SqlDbType.VarChar, 20);
            propertyCmd.Parameters.Add("@Source", SqlDbType.VarChar, 20);

            identifierCmd = new SqlCommand(INSERT_DRUG_IDENTIFIER, conn);

            identifierCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            identifierCmd.Parameters.Add("@Resource", SqlDbType.VarChar, 64);
            identifierCmd.Parameters.Add("@Identifier", SqlDbType.VarChar, 64);

            enzymCmd = new SqlCommand(INSERT_DRUG_ENZYM, conn);

            enzymCmd.Parameters.Add("@ID", SqlDbType.VarChar, 20);
            enzymCmd.Parameters.Add("@UniprotID", SqlDbType.VarChar, 20);

            XmlReader reader = XmlReader.Create(filename);
            while (!reader.EOF)
            {
                if (reader.Name != "drug")
                {
                    reader.ReadToFollowing("drug");
                }
                if (!reader.EOF)
                {
                    XElement drug = (XElement)XElement.ReadFrom(reader);
                    string primaryID = (string)drug.Elements().Where(x => (x.Name.LocalName == "drugbank-id") && (x.Attribute("primary") != null)).FirstOrDefault();

                    AddDrug(conn, drug, primaryID);
                    AddArticles(conn, drug, primaryID);
                    AddInteractions(conn, drug, primaryID);
                    AddProducts(conn, drug, primaryID);
                    AddMixtures(conn, drug, primaryID);
                    AddPackagers(conn, drug, primaryID);
                    AddPrices(conn, drug, primaryID);
                    AddCategories(conn, drug, primaryID);
                    AddOrganisms(conn, drug, primaryID);
                    AddPatents(conn, drug, primaryID);
                    AddSequences(conn, drug, primaryID);
                    AddProperties(conn, drug, primaryID);
                    AddIdentifiers(conn, drug, primaryID);
                    AddEnzymes(conn, drug, primaryID);
                }
            }

        }

答案 1 :(得分:3)

这里最简单的方法是使用xsd your.xsd /c,将生成的your.cs添加到项目中,并对任何类型的“root”类型使用XmlSerializer;它看起来像drugbanktype,所以:

var ser = new XmlSerializer(typeof(drugbanktype));
var obj = (drugbanktype)ser.Deserialize(reader);

然后走obj找到你需要的东西;据推测:

foreach(var drug in obj.drug) {
  ...
}

注意:xsd.exe吐出的类型名称很抱歉;它们只是从xsd镜像xml结构。如果您愿意,可以更改它们,但如果您要更新xsd并需要重新生成C#,则会变得很尴尬。

处理大量数据时,最好的办法是使用XmlReader跳过想要的数据,然后使用XmlSerializer和子数据-tree阅读器读取想要的数据;这意味着您可以一次处理一个项目,而无需将所有内容反序列化为单个对象模型,这可能会导致内存问题。例如:

class Program
{
    static void Main()
    {
        using (var reader = XmlReader.Create("my.xml"))
        {
            var ser = new XmlSerializer(typeof(Product));
            while(reader.Read())
            {
                if(reader.NodeType == XmlNodeType.Element 
                    && reader.Name == "product"
                    && reader.NamespaceURI == "http://www.drugbank.ca")
                {
                    using (var subReader = reader.ReadSubtree())
                    {
                        var obj = (Product)ser.Deserialize(subReader);
                        Console.WriteLine(obj.Name);
                    }
                }
            }
        }
    }
}
[XmlRoot("product", Namespace = "http://www.drugbank.ca")]
public class Product
{
    [XmlElement("name", Namespace = "http://www.drugbank.ca")]
    public string Name { get; set; }
}

while(reader.Read())按顺序查看所有节点;当我们检测到<product>if检查)时,我们使用ReadSubtree创建一个作用于该元素的子读取器,并反序列化该元素。然后我们继续前进。

答案 2 :(得分:2)

只是添加替代版本,

while (reader.Read())
{
    if (reader.NodeType == XmlNodeType.Element &&  reader.Name == "product")
    {                   
        var productElement = XElement.ReadFrom(reader);

        // use element
        string productName = productElement.Element("name").Value;
    }
}

XElement类来自System.Xml.Linq,我发现它是处理XML的最简单方法(没有对类进行完全反序列化)。

答案 3 :(得分:1)

以下是代码的第2部分:

       public void AddDrug(SqlConnection conn, XElement drug, string primaryID)
        {
            string dType = ((string)drug.Attribute("type")).Trim();
            DateTime created = (DateTime)drug.Attribute("created");
            DateTime updated = (DateTime)drug.Attribute("updated");
            List<XElement> drugbank_ids = drug.Elements().Where(x => (x.Name.LocalName == "drugbank-id") && (x.Attribute("primary") != null)).ToList();

            string name = ((string)drug.Elements().Where(x => x.Name.LocalName == "name").FirstOrDefault()).Trim();
            foreach (string drugbank_id in drugbank_ids)
            {
                idCmd.Parameters["@ID"].Value = primaryID;
                idCmd.Parameters["@ALT_ID"].Value = drugbank_id;
                idCmd.ExecuteNonQuery();
            }

            string description = ((string)drug.Elements().Where(x => x.Name.LocalName == "description").FirstOrDefault()).Trim();
            int za = description.Length;
            string case_number = ((string)drug.Elements().Where(x => x.Name.LocalName == "cas-number").FirstOrDefault());
            int zb = case_number.Length;
            string unii = ((string)drug.Elements().Where(x => x.Name.LocalName == "unii").FirstOrDefault());
            int zc = unii.Length;
            string state = (drug.Elements().Where(x => x.Name.LocalName == "state").FirstOrDefault() == null) ? "" : ((string)drug.Elements().Where(x => x.Name.LocalName == "state").FirstOrDefault()).Trim();

            int zd = state.Length;
            string synthesis_reference = ((string)drug.Elements().Where(x => x.Name.LocalName == "synthesis-reference").FirstOrDefault());
            int ze = synthesis_reference.Length;
            string indication = ((string)drug.Elements().Where(x => x.Name.LocalName == "indication").FirstOrDefault());
            int zf = indication.Length;
            string pharmacodynamics = ((string)drug.Elements().Where(x => x.Name.LocalName == "pharmacodynamics").FirstOrDefault());
            int zg = pharmacodynamics.Length;
            string mechanism_of_action = ((string)drug.Elements().Where(x => x.Name.LocalName == "mechanism-of-action").FirstOrDefault());
            int zh = mechanism_of_action.Length;
            string toxicity = ((string)drug.Elements().Where(x => x.Name.LocalName == "toxicity").FirstOrDefault());
            int zi = toxicity.Length;
            string metabolism = ((string)drug.Elements().Where(x => x.Name.LocalName == "metabolism").FirstOrDefault());
            int zj = metabolism.Length;
            string absorption = ((string)drug.Elements().Where(x => x.Name.LocalName == "absorption").FirstOrDefault());
            int zk = absorption.Length;
            string half_life = ((string)drug.Elements().Where(x => x.Name.LocalName == "half-life").FirstOrDefault());
            int zl = half_life.Length;
            string protein_binding = ((string)drug.Elements().Where(x => x.Name.LocalName == "protein-binding").FirstOrDefault());
            int zm = protein_binding.Length;
            string route_of_elimination = ((string)drug.Elements().Where(x => x.Name.LocalName == "route-of-elimination").FirstOrDefault());
            int zn = route_of_elimination.Length;
            string volume_of_distribution = ((string)drug.Elements().Where(x => x.Name.LocalName == "volume-of-distribution").FirstOrDefault());
            int zo = volume_of_distribution.Length;
            string clearance = ((string)drug.Elements().Where(x => x.Name.LocalName == "clearance").FirstOrDefault());
            int zp = clearance.Length;

            drugCmd.Parameters["@Type"].Value = dType;
            drugCmd.Parameters["@Created"].Value = created;
            drugCmd.Parameters["@Updated"].Value = updated;
            drugCmd.Parameters["@ID"].Value = primaryID;
            drugCmd.Parameters["@Name"].Value = name;
            drugCmd.Parameters["@Description"].Value = description;
            drugCmd.Parameters["@Case_Number"].Value = case_number;
            drugCmd.Parameters["@Unii"].Value = unii;
            drugCmd.Parameters["@State"].Value = state;

            drugCmd.Parameters["@Synthesis_Reference"].Value = synthesis_reference;
            drugCmd.Parameters["@Indication"].Value = indication;
            drugCmd.Parameters["@Pharmacodynamics"].Value = pharmacodynamics;
            drugCmd.Parameters["@Mechanism_of_Action"].Value = mechanism_of_action;
            drugCmd.Parameters["@Toxicity"].Value = toxicity;
            drugCmd.Parameters["@Metabolism"].Value = metabolism;
            drugCmd.Parameters["@Absorption"].Value = absorption;
            drugCmd.Parameters["@Half_Life"].Value = half_life;
            drugCmd.Parameters["@Protein_Binding"].Value = protein_binding;
            drugCmd.Parameters["@Route_of_Elimination"].Value = route_of_elimination;
            drugCmd.Parameters["@Volume_of_Distribution"].Value = volume_of_distribution;
            drugCmd.Parameters["@Clearance"].Value = clearance;

            drugCmd.ExecuteNonQuery();

        }

        public void AddArticles(SqlConnection conn, XElement drug, string id)
        {
            foreach (XElement article in drug.Descendants().Where(XElement => XElement.Name.LocalName == ("article")))
            {
                string pubmed_id = ((string)article.Elements().Where(XElement => XElement.Name.LocalName == "pubmed-id").FirstOrDefault());
                string citation = ((string)article.Elements().Where(XElement => XElement.Name.LocalName == "citation").FirstOrDefault());

                articleCmd.Parameters["@ID"].Value = id;
                articleCmd.Parameters["@Pubmed_ID"].Value = pubmed_id;
                articleCmd.Parameters["@Citation"].Value = citation;

                articleCmd.ExecuteNonQuery();

            }

            foreach (XElement article in drug.Descendants().Where(XElement => XElement.Name.LocalName == ("link")))
            {
                string title = ((string)article.Elements().Where(XElement => XElement.Name.LocalName == "title").FirstOrDefault());
                string url = ((string)article.Elements().Where(XElement => XElement.Name.LocalName == "url").FirstOrDefault());

                linkCmd.Parameters["@ID"].Value = id;
                linkCmd.Parameters["@Title"].Value = title;
                linkCmd.Parameters["@URL"].Value = url;

                linkCmd.ExecuteNonQuery();
            }
        }
        public void AddInteractions(SqlConnection conn, XElement drug, string id)
        {
            foreach (XElement interaction in drug.Descendants().Where(XElement => XElement.Name.LocalName == ("drug-interaction")))
            {
                string interactionID = ((string)interaction.Elements().Where(XElement => XElement.Name.LocalName == "drugbank-id").FirstOrDefault()).Trim();
                string description = ((string)interaction.Elements().Where(XElement => XElement.Name.LocalName == "description").FirstOrDefault());

                interactionCmd.Parameters["@ID"].Value = id;
                interactionCmd.Parameters["@Interaction_ID"].Value = interactionID;
                interactionCmd.Parameters["@Description"].Value = description;

                interactionCmd.ExecuteNonQuery();
            }

        }

        public void AddProducts(SqlConnection conn, XElement drug, string id)
        {
            foreach (XElement product in drug.Descendants().Where(XElement => XElement.Name.LocalName == ("product")))
            {
                string name = ((string)product.Elements().Where(XElement => XElement.Name.LocalName == "name").FirstOrDefault()).Trim();
                string labeller = ((string)product.Elements().Where(XElement => XElement.Name.LocalName == "labeller").FirstOrDefault()).Trim();
                string ndc_id = ((string)product.Elements().Where(XElement => XElement.Name.LocalName == "ndc-id").FirstOrDefault());
                string ndc_product_code = ((string)product.Elements().Where(XElement => XElement.Name.LocalName == "ndc-product-code").FirstOrDefault());
                string dpd_id = ((string)product.Elements().Where(XElement => XElement.Name.LocalName == "dpd-id").FirstOrDefault());
                string ema_product_code = ((string)product.Elements().Where(XElement => XElement.Name.LocalName == "ema-product-code").FirstOrDefault());
                string ema_ma_number = ((string)product.Elements().Where(XElement => XElement.Name.LocalName == "ema-ma-number").FirstOrDefault());
                string started_marketing_onStr = ((string)product.Elements().Where(XElement => XElement.Name.LocalName == "started-marketing-on").FirstOrDefault());
                string ended_marketing_onStr = ((string)product.Elements().Where(XElement => XElement.Name.LocalName == "ended-marketing-on").FirstOrDefault());
                string dosage_form = ((string)product.Elements().Where(XElement => XElement.Name.LocalName == "dosage-form").FirstOrDefault());
                string strength = ((string)product.Elements().Where(XElement => XElement.Name.LocalName == "strength").FirstOrDefault());
                string route = ((string)product.Elements().Where(XElement => XElement.Name.LocalName == "route").FirstOrDefault());
                string fda_application_number = ((string)product.Elements().Where(XElement => XElement.Name.LocalName == "fda-application-number").FirstOrDefault());
                string genericStr = ((string)product.Elements().Where(XElement => XElement.Name.LocalName == "generic").FirstOrDefault());
                byte? generic = string.IsNullOrEmpty(genericStr) ? null : ((genericStr == "true") ? (byte?)1 : (byte?)0);
                string over_the_counterStr = ((string)product.Elements().Where(XElement => XElement.Name.LocalName == "over-the-counter").FirstOrDefault());
                byte? over_the_counter = string.IsNullOrEmpty(over_the_counterStr) ? null : ((over_the_counterStr == "true") ? (byte?)1 : (byte?)0);
                string approvedStr = ((string)product.Elements().Where(XElement => XElement.Name.LocalName == "approved").FirstOrDefault());
                byte? approved = string.IsNullOrEmpty(approvedStr) ? null : ((approvedStr == "true") ? (byte?)1 : (byte?)0);
                string country = ((string)product.Elements().Where(XElement => XElement.Name.LocalName == "country").FirstOrDefault());
                string source = ((string)product.Elements().Where(XElement => XElement.Name.LocalName == "source").FirstOrDefault());

                productCmd.Parameters["@ID"].Value = id;
                productCmd.Parameters["@Name"].Value = name;
                productCmd.Parameters["@Labeller"].Value = labeller;
                productCmd.Parameters["@NDC_ID"].Value = ndc_id;
                productCmd.Parameters["@NDC_Product_Code"].Value = ndc_product_code;
                productCmd.Parameters["@DPD_ID"].Value = dpd_id;
                productCmd.Parameters["@EMA_Product_Code"].Value = ema_product_code;
                productCmd.Parameters["@EMA_MA_Number"].Value = ema_ma_number;
                if (!string.IsNullOrEmpty(started_marketing_onStr))
                {
                    productCmd.Parameters["@Started_Marketing_On"].Value = DateTime.Parse(started_marketing_onStr);
                }
                else
                {
                    productCmd.Parameters["@Started_Marketing_On"].Value = new DateTime();
                }
                if (!string.IsNullOrEmpty(ended_marketing_onStr))
                {
                    productCmd.Parameters["@Ended_Marketing_On"].Value = DateTime.Parse(ended_marketing_onStr);
                }
                else
                {
                    productCmd.Parameters["@Ended_Marketing_On"].Value = new DateTime();
                }
                productCmd.Parameters["@Dosage_Form"].Value = dosage_form;
                productCmd.Parameters["@Strength"].Value = strength;
                productCmd.Parameters["@Route"].Value = route;
                productCmd.Parameters["@FDA_Application_Number"].Value = fda_application_number;
                productCmd.Parameters["@Generic"].Value = generic;
                productCmd.Parameters["@Over_the_Counter"].Value = over_the_counter;
                productCmd.Parameters["@Approved"].Value = approved;
                productCmd.Parameters["@Country"].Value = country;
                productCmd.Parameters["@Source"].Value = source;

                productCmd.ExecuteNonQuery();
            }
        }
        public void AddMixtures(SqlConnection conn, XElement drug, string id)
        {
            foreach (XElement mixture in drug.Descendants().Where(XElement => XElement.Name.LocalName == ("mixture")))
            {
                string name = ((string)mixture.Elements().Where(XElement => XElement.Name.LocalName == "name").FirstOrDefault()).Trim();
                string ingredient = ((string)mixture.Elements().Where(XElement => XElement.Name.LocalName == "ingredients").FirstOrDefault()).Trim();

                mixtureCmd.Parameters["@ID"].Value = id;
                mixtureCmd.Parameters["@Name"].Value = name;
                mixtureCmd.Parameters["@Ingredients"].Value = ingredient;

                mixtureCmd.ExecuteNonQuery();

            }
        }
        public void AddPackagers(SqlConnection conn, XElement drug, string id)
        {
            foreach (XElement packager in drug.Descendants().Where(XElement => XElement.Name.LocalName == ("packager")))
            {
                string name = ((string)packager.Elements().Where(XElement => XElement.Name.LocalName == "name").FirstOrDefault()).Trim();
                string url = ((string)packager.Elements().Where(XElement => XElement.Name.LocalName == "url").FirstOrDefault()).Trim();

                packagerCmd.Parameters["@ID"].Value = id;
                packagerCmd.Parameters["@Name"].Value = name;
                packagerCmd.Parameters["@URL"].Value = url;

                packagerCmd.ExecuteNonQuery();

            }
        }
        public void AddPrices(SqlConnection conn, XElement drug, string id)
        {
            foreach (XElement price in drug.Descendants().Where(XElement => XElement.Name.LocalName == ("price")))
            {
                string description = ((string)price.Elements().Where(XElement => XElement.Name.LocalName == "description").FirstOrDefault()).Trim();
                XElement xCost = (price.Elements().Where(XElement => XElement.Name.LocalName == "cost").FirstOrDefault());
                string cost = ((string)xCost).Trim();
                string currency = (string)xCost.Attribute("currency");
                string unit = ((string)price.Elements().Where(XElement => XElement.Name.LocalName == "unit").FirstOrDefault()).Trim();

                priceCmd.Parameters["@ID"].Value = id;
                priceCmd.Parameters["@Description"].Value = description;
                priceCmd.Parameters["@Cost"].Value = cost;
                priceCmd.Parameters["@Currency"].Value = currency;
                priceCmd.Parameters["@Unit"].Value = unit;

                priceCmd.ExecuteNonQuery();

            }
        }
        public void AddCategories(SqlConnection conn, XElement drug, string id)
        {
            XElement categories = drug.Descendants().Where(XElement => XElement.Name.LocalName == ("categories")).FirstOrDefault();
            foreach (XElement xCategory in categories.Elements().Where(XElement => XElement.Name.LocalName == ("category")))
            {
                string category = ((string)xCategory.Elements().Where(XElement => XElement.Name.LocalName == "category").FirstOrDefault()).Trim();
                string meshID = ((string)xCategory.Elements().Where(XElement => XElement.Name.LocalName == "mesh-id").FirstOrDefault()).Trim();

                categoryCmd.Parameters["@ID"].Value = id;
                categoryCmd.Parameters["@Category"].Value = category;
                categoryCmd.Parameters["@Mesh_ID"].Value = meshID;

                categoryCmd.ExecuteNonQuery();
            }
        }
        public void AddOrganisms(SqlConnection conn, XElement drug, string id)
        {
            foreach (XElement xOrganism in drug.Descendants().Where(XElement => XElement.Name.LocalName == ("affected-organism")))
            {
                string organism = ((string)xOrganism).Trim();

                organismCmd.Parameters["@ID"].Value = id;
                organismCmd.Parameters["@Organism"].Value = organism;

                organismCmd.ExecuteNonQuery();

            }
        }
        public void AddPatents(SqlConnection conn, XElement drug, string id)
        {
            foreach (XElement patent in drug.Descendants().Where(XElement => XElement.Name.LocalName == ("patent")))
            {
                string number = ((string)patent.Elements().Where(XElement => XElement.Name.LocalName == "number").FirstOrDefault()).Trim();
                string country = ((string)patent.Elements().Where(XElement => XElement.Name.LocalName == "country").FirstOrDefault()).Trim();
                DateTime approved = (DateTime)patent.Elements().Where(XElement => XElement.Name.LocalName == "approved").FirstOrDefault();
                DateTime expires = (DateTime)patent.Elements().Where(XElement => XElement.Name.LocalName == "expires").FirstOrDefault();
                string pediatric_extensionStr = ((string)patent.Elements().Where(XElement => XElement.Name.LocalName == "pediatric-extension").FirstOrDefault());
                byte? pediatric_extension = string.IsNullOrEmpty(pediatric_extensionStr) ? null : ((pediatric_extensionStr == "true") ? (byte?)1 : (byte?)0);

                patentCmd.Parameters["@ID"].Value = id;
                patentCmd.Parameters["@Number"].Value = number;
                patentCmd.Parameters["@Country"].Value = country;
                patentCmd.Parameters["@Approved"].Value = approved;
                patentCmd.Parameters["@Expires"].Value = expires;
                patentCmd.Parameters["@Pediatric_Extension"].Value = pediatric_extension;

                patentCmd.ExecuteNonQuery();

            }
        }
        public void AddSequences(SqlConnection conn, XElement drug, string id)
        {
            string format = "";
            string sequence = "";

            foreach (XElement xSequence in drug.Descendants().Where(XElement => XElement.Name.LocalName == ("sequence")))
            {
                format = (string)xSequence.Attribute("format");
                sequence = ((string)xSequence).Trim();

                sequenceCmd.Parameters["@ID"].Value = id;
                sequenceCmd.Parameters["@Format"].Value = format == null ? "" : format;
                sequenceCmd.Parameters["@Type"].Value = "sequence";
                sequenceCmd.Parameters["@Sequence"].Value =sequence;

                sequenceCmd.ExecuteNonQuery(); 
            }

            XElement amino_acid_sequence = drug.Descendants().Where(XElement => XElement.Name.LocalName == ("amino-acid-sequence")).FirstOrDefault();
            if (amino_acid_sequence != null)
            {
                format = (string)amino_acid_sequence.Attribute("format");
                sequence = ((string)amino_acid_sequence).Trim();

                sequenceCmd.Parameters["@ID"].Value = id;
                sequenceCmd.Parameters["@Format"].Value = format;
                sequenceCmd.Parameters["@Type"].Value = "amino-acid-sequence";
                sequenceCmd.Parameters["@Sequence"].Value = sequence;

                sequenceCmd.ExecuteNonQuery();
            }

            XElement gene_sequence = drug.Descendants().Where(XElement => XElement.Name.LocalName == ("gene-sequence")).FirstOrDefault();
            if (gene_sequence != null)
            {
                format = (string)gene_sequence.Attribute("format");
                sequence = ((string)gene_sequence).Trim();

                sequenceCmd.Parameters["@ID"].Value = id;
                sequenceCmd.Parameters["@Format"].Value = format;
                sequenceCmd.Parameters["@Type"].Value = "gene_sequence";
                sequenceCmd.Parameters["@Sequence"].Value = sequence;

                sequenceCmd.ExecuteNonQuery();
            }

        }
        public void AddProperties(SqlConnection conn, XElement drug, string id)
        {
            foreach (XElement property in drug.Descendants().Where(XElement => XElement.Name.LocalName == ("property")))
            {
                string kind = ((string)property.Elements().Where(XElement => XElement.Name.LocalName == "kind").FirstOrDefault()).Trim();
                string value = ((string)property.Elements().Where(XElement => XElement.Name.LocalName == "value").FirstOrDefault()).Trim();
                string source = ((string)property.Elements().Where(XElement => XElement.Name.LocalName == "source").FirstOrDefault()).Trim();

                propertyCmd.Parameters["@ID"].Value = id;
                propertyCmd.Parameters["@Kind"].Value = kind;
                propertyCmd.Parameters["@Value"].Value = value;
                propertyCmd.Parameters["@Source"].Value = source;

                propertyCmd.ExecuteNonQuery();
            }
        }
        public void AddIdentifiers(SqlConnection conn, XElement drug, string id)
        {
            foreach (XElement xIdentifier in drug.Descendants().Where(XElement => XElement.Name.LocalName == ("external-identifier")))
            {
                string resource = ((string)xIdentifier.Elements().Where(XElement => XElement.Name.LocalName == "resource").FirstOrDefault()).Trim();
                string identifier = ((string)xIdentifier.Elements().Where(XElement => XElement.Name.LocalName == "identifier").FirstOrDefault()).Trim();

                identifierCmd.Parameters["@ID"].Value = id;
                identifierCmd.Parameters["@Resource"].Value = resource;
                identifierCmd.Parameters["@Identifier"].Value = identifier;

                identifierCmd.ExecuteNonQuery();

            }
        }
        public void AddEnzymes(SqlConnection conn, XElement drug, string id)
        {
            foreach (XElement enzyme in drug.Descendants().Where(XElement => XElement.Name.LocalName == ("uniprot-id")))
            {
                string uniprot_id = (string)enzyme;

                enzymCmd.Parameters["@ID"].Value = id;
                enzymCmd.Parameters["@UniprotID"].Value = uniprot_id;

                enzymCmd.ExecuteNonQuery();

            }
        }

    }

答案 4 :(得分:1)

这是SQL脚本:

Create Database DrugBank
;
GO

Use DrugBank
drop table Drugs
drop table IDs
drop table Articles
drop table Links
drop table Interactions
drop table Products
drop table Mixtures
drop table Packagers
drop table Prices
drop table Categories
drop table Organisms
drop table Patents
drop table Sequences
drop table Properties
drop table Identifiers
drop table Enzymes
;
GO

CREATE TABLE Drugs ( 
type varchar(20) NULL,
Created datetime NULL,
Updated datetime NULL,
ID varchar(20) primary key,
Name varchar(50) NULL,
Description varchar(max) NULL,
[Case Number] varchar(20) NULL,
Unii varchar(20) NULL,
State varchar(20) NULL,
[Synthesis Reference] varchar(1024) NULL,
Indication varchar(max) NULL,
Pharmacodynamics varchar(1024) NULL,
[Mechanism Of Action] varchar(1024) NULL,
Toxicity varchar(1024) NULL,
Metabolism varchar(max) NULL,
Absorption varchar(1024) NULL,
[Half Life] varchar(256) NULL,
[Protein Binding] varchar(64) NULL,
[Route of Eelimination] varchar(max) NULL,
[Volume of Distribution] varchar(max) NULL,
Clearance varchar(max) NULL
)



CREATE TABLE IDs
(
ID varchar(20),
[ALT ID] varchar(20)
)
CREATE TABLE Articles
(
ID varchar(20),
[Pubmed ID] varchar(20) NULL,
[Citation] varchar(1024) NULL
)
CREATE TABLE Links
(
ID varchar(20),
[Title] varchar(256) NULL,
[URL] varchar(64) NULL
)
CREATE TABLE Interactions
(
ID varchar(20),
[Interaction ID] varchar(20) NULL,
[Description] varchar(1024) NULL,

)
CREATE TABLE Products
(
[ID] varchar(20),
[Name] varchar(128) NULL,
[Labeller] varchar(64) NULL,
[NDC ID] varchar(20) NULL,
[NDC Product Code] varchar(20) NULL,
[DPD ID] varchar(20) NULL,
[EMA Product Code] varchar(20) NULL,
[EMA MA Number] varchar(20) NULL,
[Started Marketing On] date NULL,
[Ended Marketing On] date NULL,
[Dosage Form] varchar(64) NULL,
[Strength] varchar(20) NULL,
[Route] varchar(20) NULL,
[FDA Application Number] varchar(20) NULL,
[Generic] bit NULL,
[Over the Counter] bit NULL,
[Approved] bit NULL,
[Country] varchar(20) NULL,
[Source] varchar(20) NULL,

)
CREATE TABLE Mixtures
(
[ID] varchar(20),
[Name] varchar(64),
[ingredients] varchar(64) NULL,
)
CREATE TABLE Packagers
(
[ID] varchar(20),
[Name] varchar(64),
[URL] varchar(64) NULL,
)
CREATE TABLE Prices
(
[ID] varchar(20),
[Description] varchar(128),
[Cost] decimal NULL,
[Currency] varchar(20),
[Unit] varchar(20),
)
CREATE TABLE Categories
(
[ID] varchar(20),
[Category] varchar(128),
[Mesh ID] varchar(20),
)
CREATE TABLE Organisms
(
[ID] varchar(20),
[Organism] varchar(128),
)
CREATE TABLE Patents
(
[ID] varchar(20),
[Number] varchar(20),
[Country] varchar(20),
[Approved] datetime2,
[Expires] datetime,
[Pediatric Extension] bit,
)
CREATE TABLE Sequences
(
[ID] varchar(20),
[Format] varchar(20),
[Type] varchar(20),
[Sequence] varchar(max),
)
CREATE TABLE Properties
(
[ID] varchar(20),
[Kind] varchar(64),
[Value] varchar(32),
[Source] varchar(128)
)
CREATE TABLE Identifiers
(
[ID] varchar(20),
[Resource] varchar(64),
[identifier] varchar(64)
)
CREATE TABLE Enzymes
(
[ID] varchar(20),
[UniprotID] varchar(20)
)