Azure数据工厂 - 如何为不正常到达的blob构建切片

时间:2016-07-05 13:19:20

标签: azure-data-factory

我的blob没有以固定的时间表到达,但内容需要尽可能及时地加载到Azure SQL DB中,并且它们何时到达会有一些滞后。

现在使用以下约定logs/{year}/{month}/{day}/{hour}/{minute}/{second}

命名blob

如何对数据工厂进行编码以尽快加载这些文件,理想情况下如果文件丢失则不会生成故障?

到目前为止我有什么

输入数据

    {
    "$schema": "http://datafactories.schema.management.azure.com/schemas/2015-09-01/Microsoft.DataFactory.Table.json",
    "name": "blobs",
     "properties": {
    "availability": {
      "frequency": "Minute",
      "interval": 15
    },
    "external": true,
    "linkedServiceName": "blob",
    "policy": { "externalData": { "dataDelay": "1:00:00" } },
    "structure": [
      {
        "name": "Column0",
        "type": "Int64"
      }
      ],
    "type": "AzureBlob",
    "typeProperties": {
      "folderPath": "myblobs/{Year}/{Month}/{Day}/{Hour}/{Minute}",
      "format": {
        "type": "TextFormat",
        "rowDelimiter": "\n",
        "columnDelimiter": "\t"
      },
      "partitionedBy": [
        {
          "name": "Year",
          "value": {
            "type": "DateTime",
            "date": "SliceStart",
            "format": "yyyy"
          }
        },
        {
          "name": "Month",
          "value": {
            "type": "DateTime",
            "date": "SliceStart",
            "format": "%M"
          }
        },
        {
          "name": "Day",
          "value": {
            "type": "DateTime",
            "date": "SliceStart",
            "format": "%d"
          }
        },
        {
          "name": "Hour",
          "value": {
            "type": "DateTime",
            "date": "SliceStart",
            "format": "%H"
          }
        },
        {
          "name": "Minute",
          "value": {
            "type": "DateTime",
            "date": "SliceStart",
            "format": "%m"
          }
        }
      ]
    }
  }
}

管道

{
    "$schema": "http://datafactories.schema.management.azure.com/schemas/2015-09-01/Microsoft.DataFactory.Pipeline.json",
    "name": "insert",
    "properties": {
        "description": "Insert data from blobs to sql db",
        "activities": [
            {
                "name": "copyblobtosql",
                "type": "Copy",
                "inputs": [
                    {
                        "name": "blobs"
                    }
                ],
                "outputs": [
                    {
                        "name": "tbl"
                    }
                ],
              "typeProperties": {
                "source": {
                  "type": "BlobSource",
                  "recursive": false
                },
                "sink": {
                  "type": "SqlSink",
                  "writeBatchSize": 0,
                  "writeBatchTimeout": "00:00:00"
                },
                "translator": {
                  "type": "TabularTranslator",
                  "columnMappings": "Column0:id"
                }
              },
                "policy": {
                    "concurrency": 10,
                    "executionPriorityOrder": "OldestFirst",
                    "retry": 3,
                    "timeout": "01:00:00"
                },
                "scheduler": {
                    "frequency": "Minute",
                    "interval": 15
                }
            }
        ],
        "start": "2016-01-01T00:00:00Z",
        "end": "2099-05-05T00:00:00Z"
    }
}

输出数据

{
    "$schema": "http://datafactories.schema.management.azure.com/schemas/2015-09-01/Microsoft.DataFactory.Table.json",
    "name": "tbl",
    "properties": {
        "type": "AzureSqlTable",
        "linkedServiceName": "db",
        "structure": [
            {"name": "id","type": "Int32"}
          ],
        "typeProperties": {
            "tableName": "tbl"
        },
        "availability": {
            "frequency": "Minute",
            "interval": 15
        }
    }
}

0 个答案:

没有答案