我的blob没有以固定的时间表到达,但内容需要尽可能及时地加载到Azure SQL DB中,并且它们何时到达会有一些滞后。
现在使用以下约定logs/{year}/{month}/{day}/{hour}/{minute}/{second}
如何对数据工厂进行编码以尽快加载这些文件,理想情况下如果文件丢失则不会生成故障?
{
"$schema": "http://datafactories.schema.management.azure.com/schemas/2015-09-01/Microsoft.DataFactory.Table.json",
"name": "blobs",
"properties": {
"availability": {
"frequency": "Minute",
"interval": 15
},
"external": true,
"linkedServiceName": "blob",
"policy": { "externalData": { "dataDelay": "1:00:00" } },
"structure": [
{
"name": "Column0",
"type": "Int64"
}
],
"type": "AzureBlob",
"typeProperties": {
"folderPath": "myblobs/{Year}/{Month}/{Day}/{Hour}/{Minute}",
"format": {
"type": "TextFormat",
"rowDelimiter": "\n",
"columnDelimiter": "\t"
},
"partitionedBy": [
{
"name": "Year",
"value": {
"type": "DateTime",
"date": "SliceStart",
"format": "yyyy"
}
},
{
"name": "Month",
"value": {
"type": "DateTime",
"date": "SliceStart",
"format": "%M"
}
},
{
"name": "Day",
"value": {
"type": "DateTime",
"date": "SliceStart",
"format": "%d"
}
},
{
"name": "Hour",
"value": {
"type": "DateTime",
"date": "SliceStart",
"format": "%H"
}
},
{
"name": "Minute",
"value": {
"type": "DateTime",
"date": "SliceStart",
"format": "%m"
}
}
]
}
}
}
{
"$schema": "http://datafactories.schema.management.azure.com/schemas/2015-09-01/Microsoft.DataFactory.Pipeline.json",
"name": "insert",
"properties": {
"description": "Insert data from blobs to sql db",
"activities": [
{
"name": "copyblobtosql",
"type": "Copy",
"inputs": [
{
"name": "blobs"
}
],
"outputs": [
{
"name": "tbl"
}
],
"typeProperties": {
"source": {
"type": "BlobSource",
"recursive": false
},
"sink": {
"type": "SqlSink",
"writeBatchSize": 0,
"writeBatchTimeout": "00:00:00"
},
"translator": {
"type": "TabularTranslator",
"columnMappings": "Column0:id"
}
},
"policy": {
"concurrency": 10,
"executionPriorityOrder": "OldestFirst",
"retry": 3,
"timeout": "01:00:00"
},
"scheduler": {
"frequency": "Minute",
"interval": 15
}
}
],
"start": "2016-01-01T00:00:00Z",
"end": "2099-05-05T00:00:00Z"
}
}
{
"$schema": "http://datafactories.schema.management.azure.com/schemas/2015-09-01/Microsoft.DataFactory.Table.json",
"name": "tbl",
"properties": {
"type": "AzureSqlTable",
"linkedServiceName": "db",
"structure": [
{"name": "id","type": "Int32"}
],
"typeProperties": {
"tableName": "tbl"
},
"availability": {
"frequency": "Minute",
"interval": 15
}
}
}