我是BizTalk中的新手,我必须从CSV文件创建XSD架构:
如您所见,它包括标题和订单。每个订单都有自己的订单标题 - 包含商店名称和填充Period
字段的行,但是空Quantity
,Price
,Cost
和Currency
领域。除Period
外,每个订单的位置都很少,这些位置已填满所有字段。每个订单头都有ItemNumber
= 0.如何在这种情况下创建正确的平面文件架构?
OrderDate;OrderNumber;ItemNumber;DESCRIPTION_LONG;Quantity;Price;Cost;Period;Currency
30-04-17;9;0;Shop: McDonalds;;;;2017-04;
30-04-17;9;1;Double burger;2;5,99;11,98;;USD
30-04-17;9;2;Coca-Cola;2;2,19;4,38;;USD
30-04-17;10;0;Shop: Hunting and fishing;;;;2017-04;
30-04-17;10;1;Fishing rod;2;10,90;21,80;;USD
30-04-17;10;2;Bait;5;1,00;5,00;;USD
30-04-17;10;3;Hunting gun;1;999,00;999,00;;USD
答案 0 :(得分:3)
我认为最好的选择是将平面文件解析为XML,而不是试图“强制”最终结构,正如Pieter在回答之前所说。像这样:
<?xml version="1.0" encoding="utf-16"?>
<xs:schema xmlns:b="http://schemas.microsoft.com/BizTalk/2003" xmlns="http://BizTalkMassCopy.FlatFileSchema3" targetNamespace="http://BizTalkMassCopy.FlatFileSchema3" xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:annotation>
<xs:appinfo>
<schemaEditorExtension:schemaInfo namespaceAlias="b" extensionClass="Microsoft.BizTalk.FlatFileExtension.FlatFileExtension" standardName="Flat File" xmlns:schemaEditorExtension="http://schemas.microsoft.com/BizTalk/2003/SchemaEditorExtensions" />
<b:schemaInfo standard="Flat File" codepage="65001" default_pad_char=" " pad_char_type="char" count_positions_by_byte="false" parser_optimization="speed" lookahead_depth="3" suppress_empty_nodes="false" generate_empty_nodes="true" allow_early_termination="false" early_terminate_optional_fields="false" allow_message_breakup_of_infix_root="false" compile_parse_tables="false" root_reference="Root" />
</xs:appinfo>
</xs:annotation>
<xs:element name="Root">
<xs:annotation>
<xs:appinfo>
<b:recordInfo structure="delimited" child_delimiter_type="hex" child_delimiter="0xD 0xA" child_order="infix" sequence_number="1" preserve_delimiter_for_empty_data="true" suppress_trailing_delimiters="false" />
</xs:appinfo>
</xs:annotation>
<xs:complexType>
<xs:sequence>
<xs:annotation>
<xs:appinfo>
<groupInfo sequence_number="0" xmlns="http://schemas.microsoft.com/BizTalk/2003" />
</xs:appinfo>
</xs:annotation>
<xs:element name="Header">
<xs:annotation>
<xs:appinfo>
<b:recordInfo structure="delimited" child_delimiter_type="char" child_delimiter=";" child_order="infix" sequence_number="1" preserve_delimiter_for_empty_data="true" suppress_trailing_delimiters="false" />
</xs:appinfo>
</xs:annotation>
<xs:complexType>
<xs:sequence>
<xs:annotation>
<xs:appinfo>
<groupInfo sequence_number="0" xmlns="http://schemas.microsoft.com/BizTalk/2003" />
</xs:appinfo>
</xs:annotation>
<xs:element name="HeaderOrderDate" type="xs:string">
<xs:annotation>
<xs:appinfo>
<b:fieldInfo justification="left" sequence_number="1" />
</xs:appinfo>
</xs:annotation>
</xs:element>
<xs:element name="HeaderOrderNumber" type="xs:string">
<xs:annotation>
<xs:appinfo>
<b:fieldInfo justification="left" sequence_number="2" />
</xs:appinfo>
</xs:annotation>
</xs:element>
<xs:element name="HeaderItemNumber" type="xs:string">
<xs:annotation>
<xs:appinfo>
<b:fieldInfo justification="left" sequence_number="3" />
</xs:appinfo>
</xs:annotation>
</xs:element>
<xs:element name="HeaderDescription" type="xs:string">
<xs:annotation>
<xs:appinfo>
<b:fieldInfo justification="left" sequence_number="4" />
</xs:appinfo>
</xs:annotation>
</xs:element>
<xs:element name="HeaderQuantity" type="xs:string">
<xs:annotation>
<xs:appinfo>
<b:fieldInfo justification="left" sequence_number="5" />
</xs:appinfo>
</xs:annotation>
</xs:element>
<xs:element name="HeaderPrice" type="xs:string">
<xs:annotation>
<xs:appinfo>
<b:fieldInfo justification="left" sequence_number="6" />
</xs:appinfo>
</xs:annotation>
</xs:element>
<xs:element name="HeaderCost" type="xs:string">
<xs:annotation>
<xs:appinfo>
<b:fieldInfo justification="left" sequence_number="7" />
</xs:appinfo>
</xs:annotation>
</xs:element>
<xs:element name="HeaderPeriod" type="xs:string">
<xs:annotation>
<xs:appinfo>
<b:fieldInfo justification="left" sequence_number="8" />
</xs:appinfo>
</xs:annotation>
</xs:element>
<xs:element name="HeaderCurrency" type="xs:string">
<xs:annotation>
<xs:appinfo>
<b:fieldInfo justification="left" sequence_number="9" />
</xs:appinfo>
</xs:annotation>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element maxOccurs="unbounded" name="OrderItem">
<xs:annotation>
<xs:appinfo>
<b:recordInfo structure="delimited" child_delimiter_type="char" child_delimiter=";" child_order="infix" sequence_number="2" preserve_delimiter_for_empty_data="true" suppress_trailing_delimiters="false" />
</xs:appinfo>
</xs:annotation>
<xs:complexType>
<xs:sequence>
<xs:annotation>
<xs:appinfo>
<groupInfo sequence_number="0" xmlns="http://schemas.microsoft.com/BizTalk/2003" />
</xs:appinfo>
</xs:annotation>
<xs:element name="OrderDate" type="xs:string">
<xs:annotation>
<xs:appinfo>
<b:fieldInfo justification="left" sequence_number="1" />
</xs:appinfo>
</xs:annotation>
</xs:element>
<xs:element name="OrderNumber" type="xs:string">
<xs:annotation>
<xs:appinfo>
<b:fieldInfo justification="left" sequence_number="2" />
</xs:appinfo>
</xs:annotation>
</xs:element>
<xs:element name="ItemNumber" type="xs:string">
<xs:annotation>
<xs:appinfo>
<b:fieldInfo justification="left" sequence_number="3" />
</xs:appinfo>
</xs:annotation>
</xs:element>
<xs:element name="Description" type="xs:string">
<xs:annotation>
<xs:appinfo>
<b:fieldInfo justification="left" sequence_number="4" />
</xs:appinfo>
</xs:annotation>
</xs:element>
<xs:element name="Quantity" type="xs:string">
<xs:annotation>
<xs:appinfo>
<b:fieldInfo justification="left" sequence_number="5" />
</xs:appinfo>
</xs:annotation>
</xs:element>
<xs:element name="Price" type="xs:string">
<xs:annotation>
<xs:appinfo>
<b:fieldInfo justification="left" sequence_number="6" />
</xs:appinfo>
</xs:annotation>
</xs:element>
<xs:element name="Cost" type="xs:string">
<xs:annotation>
<xs:appinfo>
<b:fieldInfo justification="left" sequence_number="7" />
</xs:appinfo>
</xs:annotation>
</xs:element>
<xs:element name="Period" type="xs:string">
<xs:annotation>
<xs:appinfo>
<b:fieldInfo justification="left" sequence_number="8" />
</xs:appinfo>
</xs:annotation>
</xs:element>
<xs:element name="Currency" type="xs:string">
<xs:annotation>
<xs:appinfo>
<fieldInfo sequence_number="9" justification="left" xmlns="http://schemas.microsoft.com/BizTalk/2003" />
</xs:appinfo>
</xs:annotation>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:schema>
使用类似于此的平面架构:
def custom_getter(my_dict, my_key):
# If the key is in the dictionary, we return its value
if my_key in my_dict:
return my_dict[my_key]
# If the key is NOT in the dictionary, we return an empty string
return ''
output_file = csv.writer(open('transactions000000000029.csv', 'wb+'))
output_file.writerow(['blockNumber','blockHash','hash','from','to','gas','gasUsed','gasPrice','input','logs','nonce','value','timestamp'])
for line in inpt:
resource = json.loads(line)
output_file.writerow(
[custom_getter(resource,'blockNumber'),
custom_getter(resource,'blockHash'),
custom_getter(resource,'hash'),
custom_getter(resource,'from'),
custom_getter(resource,'to'),
custom_getter(resource,'gas'),
custom_getter(resource,'gasUsed'),
custom_getter(resource,'gasPrice'),
custom_getter(resource,'input'),
custom_getter(resource,'logs'),
custom_getter(resource,'nonce'),
custom_getter(resource,'value'),
custom_getter(resource,'timestamp')]
)
然后使用地图获取最终结果,使用NumPy inspects
等分组机制答案 1 :(得分:1)
您可以使用标记标识符来执行此操作(请参阅https://blogs.msdn.microsoft.com/biztalknotes/2013/02/05/flat-file-schema-creation-with-tag-identifiers-in-the-input-flat-file-repeating-in-a-random-fashion/获取示例),但我不能100%确定这可以正常工作,因为标识符(ItemNumber = 0)不在线的开头。我的经验告诉我,平面文件反汇编程序探测在这些情况下成功有限。
在这种情况下,我对人们的一般建议是不要在平面文件架构中尝试“强制”结构,因为很明显你的平面文件结构没有它。我看到的是每一行都有相同的结构/格式。它只是在功能上不同。
从这个角度来看,我会选择一个与你的CSV文件结构相匹配的Flat File模式(一种记录以逗号分隔的字段和一行换行作为行分隔符),然后从那里映射到一个模式在功能上更有意义。
答案 2 :(得分:1)
我的解决方案是,
创建用于解析平面文件的源架构。
为分组结果创建目标架构
为您的源架构创建一个地图组。
第一个脚本functoid
<xsl:key name="groups" match="Order" use="OrderNumber"/>
对于第二个脚本functoid
<xsl:for-each select="Order[generate-id(.)=generate-id(key('groups',OrderNumber))]">
<xsl:sort select="OrderNumber" order="ascending"/>
<xsl:choose>
<xsl:when test="OrderDate != 'OrderDate'">
<Order>
<OrderDate><xsl:value-of select="OrderDate/text()" /></OrderDate>
<OrderNumber><xsl:value-of select="OrderNumber/text()" /></OrderNumber>
<Description><xsl:value-of select="Description/text()" /></Description>
<Period><xsl:value-of select="Period/text()" /></Period>
<Items>
<xsl:for-each select="key('groups',OrderNumber)">
<xsl:choose>
<xsl:when test="ItemNumber > '0'">
<Item>
<ItemNumber><xsl:value-of select="ItemNumber" /></ItemNumber>
<Quantity><xsl:value-of select="Quantity" /></Quantity>
<Price><xsl:value-of select="Price" /></Price>
<Cost><xsl:value-of select="Cost" /></Cost>
<Currency><xsl:value-of select="Currency" /></Currency>
</Item>
</xsl:when>
</xsl:choose>
</xsl:for-each>
</Items>
</Order>
</xsl:when>
</xsl:choose>
</xsl:for-each>
<ns0:Orders xmlns:ns0="http://BizTalkServerTestProject.FlatFileSchema6">
<Order>
<OrderDate>30-04-17</OrderDate>
<OrderNumber>10</OrderNumber>
<Description>Shop: Hunting and fishing</Description>
<Period>2017-04</Period>
<Items>
<Item>
<ItemNumber>1</ItemNumber>
<Quantity>2</Quantity>
<Price>10,90</Price>
<Cost>21,80</Cost>
<Currency>USD</Currency>
</Item>
<Item>
<ItemNumber>2</ItemNumber>
<Quantity>5</Quantity>
<Price>1,00</Price>
<Cost>5,00</Cost>
<Currency>USD</Currency>
</Item>
<Item>
<ItemNumber>3</ItemNumber>
<Quantity>1</Quantity>
<Price>999,00</Price>
<Cost>999,00</Cost>
<Currency>USD</Currency>
</Item>
</Items>
</Order>
<Order>
<OrderDate>30-04-17</OrderDate>
<OrderNumber>9</OrderNumber>
<Description>Shop: McDonalds</Description>
<Period>2017-04</Period>
<Items>
<Item>
<ItemNumber>1</ItemNumber>
<Quantity>2</Quantity>
<Price>5,99</Price>
<Cost>11,98</Cost>
<Currency>USD</Currency>
</Item>
<Item>
<ItemNumber>2</ItemNumber>
<Quantity>2</Quantity>
<Price>2,19</Price>
<Cost>4,38</Cost>
<Currency>USD</Currency>
</Item>
</Items>
</Order>
</ns0:Orders>
您可以从此网址找到架构和地图的完整源代码。http://blog.sarslan.com/group-source-schema-in-a-map/