避免<item type =“”>节点在C#中创建JSON到XML转换器

时间:2016-10-24 08:26:19

标签: c# .net json xml

我们正尝试使用以下C#代码将JSON转换为XML:

Object httpConn = Dts.Connections["HTTP"].AcquireConnection(null);
            HttpClientConnection myConnection = new HttpClientConnection(httpConn);
            myConnection.ServerURL = string.Format(("http://xxxx.com/jjjj"),"userid","password");
            byte[] webdata = myConnection.DownloadData();

            String result_data = Convert.ToBase64String(webdata);
            XmlDocument xd = new XmlDocument();
            XmlDictionaryReader xr = JsonReaderWriterFactory.CreateJsonReader(webdata, XmlDictionaryReaderQuotas.Max);

            xr.Read();
            xd.LoadXml(xr.ReadOuterXml());
            xd.Save("\\Server\\ZZZZ\\Downloads\\Data.xml"); 

示例json是:

[
{
        "id" : 21953,
        "mainReqIdentity" : "xxxx",
        "itemName" : "xxxx",
        "kanbanPhase" : "xxxx",
        "kanbanStatus" : "xxxx",
        "backlogItemType" : "xxxx",
        "identityDomain" : "xxxx",
        "fromDatetime" : "2016-08-05 17:52:34",
        "teams" : [],
        "releases" : [{
                "id" : 1229,
                "release_name" : "xxxx",
                "release_connection_type" : "xxxx"
            }
        ],
        "fpReleases" : [],
        "sources" : [{
                "sourceName" : "xxxx",
                "sourceRecordUrl" : "xxxx",
                "sourceRecordIdentity" : "xxxx"
            }
        ],
        "productNumbers" : [],
        "tags" : [],
        "productComponents" : [],
        "ranPlatforms" : [],
        "subReleases" : [],
        "requirementAreaId" : "xxxx",
        "requirementArea" : "xxxx",
        "toBeHandledAtxxxx" : "xxxx"
    }, {
        "id" : 22014,
        "mainReqIdentity" : "xxxx",
        "itemName" : "xxxx",
        "kanbanPhase" : "xxxx",
        "kanbanStatus" : "xxxx",
        "backlogItemType" : "xxxx",
        "identityDomain" : "xxxx",
        "fromDatetime" : "2016-08-05 17:52:34",
        "teams" : [],
        "releases" : [{
                "id" : "xxxx",
                "release_name" : "xxxx",
                "release_connection_type" : "xxxx"
            }
        ],
        "fpReleases" : [],
        "sources" : [{
                "sourceName" : "xxxx",
                "sourceRecordUrl" : "xxxx",
                "sourceRecordIdentity" : "xxxx"
            }
        ],
        "productNumbers" : [],
        "tags" : [],
        "productComponents" : [],
        "ranPlatforms" : [],
        "subReleases" : [],
        "requirementAreaId" : "xxxx",
        "requirementArea" : "xxxx",
        "f0Date" : "2015-10-01",
        "f1Date" : "2015-10-01",
        "f2Date" : "2016-02-01",
        "f4Date" : "2016-03-31",
        "fgDate" : "2016-04-29",
        "toBeHandledAtxxxx" : "xxxx"
    }
    ]

转换后,XML看起来像:

    <root type="array">
    <id type="number">21286</id>
    <mainReqIdentity type="string">xxxxxx</mainReqIdentity>
    <itemName type="string">xxxxxx</itemName>
    <kanbanPhase type="string">xxxxxx</kanbanPhase>
    <kanbanStatus type="string">xxxxxx</kanbanStatus>
    <kanbanNote type="string">xxxxxx</kanbanNote>
    <backlogItemType type="string">xxxxxx</backlogItemType>
    <identityDomain type="string">xxxxxx</identityDomain>
    <fromDatetime type="string">2016-08-23 17:01:52</fromDatetime>
    <teams type="array">
      <item type="object">
        <team_name type="string">xxxxxx</team_name>
        <preliminary type="boolean">xxxxxx</preliminary>
      </item>
    </teams>
    <releases type="array">
      <item type="object">
        <id type="number">xxxxxx</id>
        <release_name type="string">xxxxxx</release_name>
        <release_connection_type type="string">xxxxxx</release_connection_type>
      </item>
    </releases>
    <fpReleases type="array">
    </fpReleases>
    <sources type="array">
      <item type="object">
        <sourceName type="string">xxxxxx</sourceName>
        <sourceRecordUrl type="string">xxxxxx</sourceRecordUrl>
      </item>
    </sources>
    <productNumbers type="array">
    </productNumbers>
    <tags type="array">
    </tags>
    <productComponents type="array">
    </productComponents>
    <ranPlatforms type="array">
    </ranPlatforms>
    <subReleases type="array">
    </subReleases>
    <requirementAreaId type="number">xxxxxx</requirementAreaId>
    <requirementArea type="string">xxxxxx</requirementArea>
    <itemContact type="string">xxxxxx</itemContact>
    <toBeHandledAtxxx type="string">xxxxxx</toBeHandledAtxxx>
  </item>
    <item type="object">
    <id type="number">xxxxxx</id>
    <mainReqIdentity type="string">xxxxxx</mainReqIdentity>
    <itemName type="string">xxxxxx</itemName>
    <kanbanPhase type="string">xxxxxx</kanbanPhase>
    <kanbanStatus type="string">xxxxxx</kanbanStatus>
    <kanbanNote type="string">xxxxxx</kanbanNote>
    <backlogItemType type="string">xxxxxx</backlogItemType>
    <identityDomain type="string">xxxxxx</identityDomain>
    <fromDatetime type="string">2016-08-23 17:01:52</fromDatetime>
    <teams type="array">
      <item type="object">
        <team_name type="string">xxxxxx</team_name>
        <preliminary type="boolean">xxxxxx</preliminary>
      </item>
    </teams>
    <releases type="array">
      <item type="object">
        <id type="number">xxxxxx</id>
        <release_name type="string">xxxxxx</release_name>
        <release_connection_type type="string">xxxxxx</release_connection_type>
      </item>
    </releases>
    <fpReleases type="array">
    </fpReleases>
    <sources type="array">
      <item type="object">
        <sourceName type="string">xxxxxx</sourceName>
        <sourceRecordUrl type="string">xxxxxx</sourceRecordUrl>
      </item>
    </sources>
    <productNumbers type="array">
    </productNumbers>
    <tags type="array">
    </tags>
    <productComponents type="array">
    </productComponents>
    <ranPlatforms type="array">
    </ranPlatforms>
    <subReleases type="array">
    </subReleases>
    <requirementAreaId type="number">xxxxxx</requirementAreaId>
    <requirementArea type="string">xxxxxx</requirementArea>
    <oaResultReference type="string">xxxxxx</oaResultReference>
    <itemContact type="string">xxxxxx</itemContact>
    <f0Date type="string">2014-10-17</f0Date>
    <f1Date type="string">2015-01-16</f1Date>
    <f2Date type="string">2015-02-13</f2Date>
    <f4Date type="string">2015-06-12</f4Date>
    <faDate type="string">2015-06-12</faDate>
    <fgDate type="string">2015-06-12</fgDate>
    <toBeHandledAtxxx type="string">xxxxxx</toBeHandledAtxxx>
  </item>
 </root>

如果您在每个节点之前观察,则会创建 项类型 。我的问题是如何避免从C#类创建它们。

1 个答案:

答案 0 :(得分:2)

JsonReaderWriterFactory中描述了"type"使用的XML和JSON之间的对应关系。特别是:

  • 由于XML不具有数组原语的概念,因此JSON数组被映射到两个XML级别,其中内部元素的[本地名称]将是&#34; item&#34;。

  • 将属性XmlDocument添加到每个元素。如文档中所述,此属性用于在映射的XML中保留JSON类型(字符串,数字,布尔值,对象,数组或null)。

如果此映射不能满足您的需求,您可以在存储XML之前使用某些适当的API(例如Mapping Between JSON and XMLXSLT transformation修改)自定义生成的XML。

例如,LINQ to XML,取代.Net 3.5中的XDocument xd; using (var xr = JsonReaderWriterFactory.CreateJsonReader(webdata, XmlDictionaryReaderQuotas.Max)) { xd = XDocument.Load(xr); } API的API,允许在内存中轻松修改XML。首先,将您的JSON加载到LINQ to XML,如下所示:

foreach (var arrayElement in xd.Root.Descendants().Where(e => (string)e.Attribute("type") == "array").ToList())
{
    // Get all child nodes of the array container node.
    var nodes = arrayElement.Nodes().ToList();
    bool anyChildren = false;
    // Change names of child elements from <item> to the name of the container node, <releases> or whatever.
    foreach (var element in nodes.OfType<XElement>())
    {
        anyChildren = true;
        element.Name = arrayElement.Name;
    }
    if (anyChildren)
    {
        // Remove all its child nodes.
        nodes.Remove();
        // Add formerly child nodes right after the container node.
        arrayElement.AddAfterSelf(nodes);
        // Remove the container node itself.
        arrayElement.Remove();
    }
}

现在,如果你想&#34; un-nest&#34;来自其外部容器元素的数组项,您可以按如下方式执行 - 除了根元素的情况。如果XML文档的XDocument对应于JSON数组,则其项目不能冒出来,因为这样做会违反每个XML文档必须具有root element的规则。话虽如此,以下应该有效:

"type"

(对于嵌套的JSON数组,可能需要进一步自定义此代码。)

接下来,如果您要删除foreach (var element in xd.Descendants().Where(e => e.Attribute("type") != null)) element.Attributes("type").Remove(); 属性,可以使用以下代码:

xd.Save("\\Server\\ZZZZ\\Downloads\\Data.xml");

最后,要保存文档,请执行以下操作:

<root>
  <item>
    <id>21953</id>
    <mainReqIdentity>xxxx</mainReqIdentity>
    <itemName>xxxx</itemName>
    <kanbanPhase>xxxx</kanbanPhase>
    <kanbanStatus>xxxx</kanbanStatus>
    <backlogItemType>xxxx</backlogItemType>
    <identityDomain>xxxx</identityDomain>
    <fromDatetime>2016-08-05 17:52:34</fromDatetime>
    <teams></teams>
    <releases>
      <id>1229</id>
      <release_name>xxxx</release_name>
      <release_connection_type>xxxx</release_connection_type>
    </releases>
    <fpReleases></fpReleases>
    <sources>
      <sourceName>xxxx</sourceName>
      <sourceRecordUrl>xxxx</sourceRecordUrl>
      <sourceRecordIdentity>xxxx</sourceRecordIdentity>
    </sources>
    <productNumbers></productNumbers>
    <tags></tags>
    <productComponents></productComponents>
    <ranPlatforms></ranPlatforms>
    <subReleases></subReleases>
    <requirementAreaId>xxxx</requirementAreaId>
    <requirementArea>xxxx</requirementArea>
    <toBeHandledAtxxxx>xxxx</toBeHandledAtxxxx>
  </item>
  <item>
    <id>22014</id>
    <mainReqIdentity>xxxx</mainReqIdentity>
    <itemName>xxxx</itemName>
    <kanbanPhase>xxxx</kanbanPhase>
    <kanbanStatus>xxxx</kanbanStatus>
    <backlogItemType>xxxx</backlogItemType>
    <identityDomain>xxxx</identityDomain>
    <fromDatetime>2016-08-05 17:52:34</fromDatetime>
    <teams></teams>
    <releases>
      <id>xxxx</id>
      <release_name>xxxx</release_name>
      <release_connection_type>xxxx</release_connection_type>
    </releases>
    <fpReleases></fpReleases>
    <sources>
      <sourceName>xxxx</sourceName>
      <sourceRecordUrl>xxxx</sourceRecordUrl>
      <sourceRecordIdentity>xxxx</sourceRecordIdentity>
    </sources>
    <productNumbers></productNumbers>
    <tags></tags>
    <productComponents></productComponents>
    <ranPlatforms></ranPlatforms>
    <subReleases></subReleases>
    <requirementAreaId>xxxx</requirementAreaId>
    <requirementArea>xxxx</requirementArea>
    <f0Date>2015-10-01</f0Date>
    <f1Date>2015-10-01</f1Date>
    <f2Date>2016-02-01</f2Date>
    <f4Date>2016-03-31</f4Date>
    <fgDate>2016-04-29</fgDate>
    <toBeHandledAtxxxx>xxxx</toBeHandledAtxxxx>
  </item>
</root>

链接这两个转换的结果是:

1 import gensim
  2 import nltk
  3 from gensim.models import word2vec
  4 from nltk.corpus import stopwords
  5 from nltk.corpus import wordnet
  6 import logging
  7 import re
  8 import itertools
  9 import glob
 10 from collections import defaultdict
 11 import csv
 12 from nltk.stem.wordnet import WordNetLemmatizer
 13 import os
 14 import os.path
 15 
 16 stopwords = nltk.corpus.stopwords.words('english')
 17 
 18 path = "/home/mona/computer_vision/imgur/tiny_comments/*.txt"
 19 files = glob.glob(path)
 20 csv_file_complete = open("tiny_graph.csv", "wb")
 21 stat_csv_file = open("tiny_stat.csv", "r")
 22 csv_reader = csv.reader(stat_csv_file)
 23 lemmatizer = WordNetLemmatizer()
 24 list_of_rows = []
 25 
 26 with open('swear_words_uniq.txt') as swear_words_file:
 27     swear_words = swear_words_file.read()
 28     swear_words = re.sub("[^a-zA-Z]", ' ', swear_words).lower().split()
 29 swear_words_file.close()
30 
 31 
 32 for file1, file2 in itertools.combinations(files, 2):
 33     with open(file1) as f1:
 34         f1_text = f1.read()
 35         f1_text = re.sub(r'^https?:\/\/.*[\r\n]*', '',f1_text, flags=re.MULTILINE)
 36         f1_words = re.sub("[^a-zA-Z]", ' ', f1_text).lower().split()
 37         lemmatized_f1_words = [str(lemmatizer.lemmatize(w, wordnet.VERB)) for w in f1_words if w not in stopwords]
 38         cleaned_f1_words = [w for w in lemmatized_f1_words if w not in swear_words and len(w) > 2]
 39     f1.close()
 40     with open(file2) as f2:
 41         f2_text = f2.read()
 42         f2_words = re.sub("[^a-zA-Z]", ' ', f2_text).lower().split()
 43         lemmatized_f2_words = [str(lemmatizer.lemmatize(w, wordnet.VERB)) for w in f2_words if w not in stopwords]
 44         cleaned_f2_words = [w for w in lemmatized_f2_words if w not in swear_words and len(w) > 2]
 45     f2.close()
 46     f1_head, f1_tail = os.path.split(file1)
 47     f2_head, f2_tail = os.path.split(file2)
 48     tail_to_numbers = {ftail: fnum for fnum, ftail in csv_reader}
 49     stat_csv_file.seek(0)
50     try:
 51         file1_file_number = tail_to_numbers[f1_tail]
 52         file2_file_number = tail_to_numbers[f2_tail]
 53     except KeyError as e:
 54         print(e)
 55         continue
 56     else:
 57         row_complete = [file1_file_number.strip(), file2_file_number.strip()]
 58         list_of_rows.append(row_complete)
 59         print(len(list_of_rows))
 60 a_complete = csv.writer(csv_file_complete, delimiter=',')
 61 for row in list_of_rows:
 62     print(row)
 63     a_complete.writerow(row)
 64 
 65 csv_file_complete.close()