我使用Hive2.1.1和Atlas2.0.0。
我可以找到表沿袭,但找不到列级沿袭
详细信息如下所示
对于调试,
我首先尝试Restful,与列沿无关。
[root@cent1 bin]# curl -X GET -u admin:admin http://cent1:21000/api/atlas/v2/lineage/27c81b16-b422-4479-84b9-2d643b5dba48
{"baseEntityGuid":"27c81b16-b422-4479-84b9-2d643b5dba48","lineageDirection":"BOTH","lineageDepth":3,"guidEntityMap":{},"relations":[]}
我从下面的ATLAS_HOOK
主题中打印Kafka消息:
{
"version":{
"version":"1.0.0",
"versionParts":[
1
]
},
"msgCompressionKind":"NONE",
"msgSplitIdx":1,
"msgSplitCount":1,
"msgSourceIP":"192.168.10.128",
"msgCreatedBy":"root",
"msgCreationTime":1572687183793,
"message":{
"type":"ENTITY_CREATE_V2",
"user":"root",
"entities":{
"referredEntities":{
"-53640556650491":{
"typeName":"hive_column",
"attributes":{
"owner":"root",
"qualifiedName":"atlas1.pokes.bar@primary",
"name":"bar",
"comment":null,
"position":1,
"type":"string",
"table":{
"guid":"-53640556650488",
"typeName":"hive_table",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes@primary"
}
}
},
"guid":"-53640556650491",
"provenanceType":0,
"version":0,
"proxy":false
},
"-53640556650492":{
"typeName":"hive_table",
"attributes":{
"owner":"root",
"temporary":false,
"lastAccessTime":1572687176000,
"qualifiedName":"atlas1.pokes_create3@primary",
"columns":[
{
"guid":"-53640556650494",
"typeName":"hive_column",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes_create3.foo@primary"
}
},
{
"guid":"-53640556650495",
"typeName":"hive_column",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes_create3.bar@primary"
}
}
],
"tableType":"MANAGED_TABLE",
"sd":{
"guid":"-53640556650493",
"typeName":"hive_storagedesc",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes_create3@primary_storage"
}
},
"createTime":1572687176000,
"name":"pokes_create3",
"comment":null,
"partitionKeys":[
],
"parameters":{
"totalSize":"5812",
"numRows":"500",
"rawDataSize":"5312",
"COLUMN_STATS_ACCURATE":"{"BASIC_STATS":"true"}",
"numFiles":"1",
"transient_lastDdlTime":"1572687178"
},
"db":{
"guid":"-53640556650487",
"typeName":"hive_db",
"uniqueAttributes":{
"qualifiedName":"atlas1@primary"
}
},
"retention":0
},
"guid":"-53640556650492",
"provenanceType":0,
"version":0,
"proxy":false
},
"-53640556650490":{
"typeName":"hive_column",
"attributes":{
"owner":"root",
"qualifiedName":"atlas1.pokes.foo@primary",
"name":"foo",
"comment":null,
"position":0,
"type":"int",
"table":{
"guid":"-53640556650488",
"typeName":"hive_table",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes@primary"
}
}
},
"guid":"-53640556650490",
"provenanceType":0,
"version":0,
"proxy":false
},
"-53640556650495":{
"typeName":"hive_column",
"attributes":{
"owner":"root",
"qualifiedName":"atlas1.pokes_create3.bar@primary",
"name":"bar",
"comment":null,
"position":1,
"type":"string",
"table":{
"guid":"-53640556650492",
"typeName":"hive_table",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes_create3@primary"
}
}
},
"guid":"-53640556650495",
"provenanceType":0,
"version":0,
"proxy":false
},
"-53640556650493":{
"typeName":"hive_storagedesc",
"attributes":{
"qualifiedName":"atlas1.pokes_create3@primary_storage",
"storedAsSubDirectories":false,
"location":"hdfs://cent1:9000/user/hive/warehouse/atlas1.db/pokes_create3",
"compressed":false,
"inputFormat":"org.apache.hadoop.mapred.TextInputFormat",
"parameters":{
},
"outputFormat":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
"table":{
"guid":"-53640556650492",
"typeName":"hive_table",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes_create3@primary"
}
},
"serdeInfo":{
"typeName":"hive_serde",
"attributes":{
"serializationLib":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"name":null,
"parameters":{
"serialization.format":"1"
}
}
},
"numBuckets":-1
},
"guid":"-53640556650493",
"provenanceType":0,
"version":0,
"proxy":false
},
"-53640556650494":{
"typeName":"hive_column",
"attributes":{
"owner":"root",
"qualifiedName":"atlas1.pokes_create3.foo@primary",
"name":"foo",
"comment":null,
"position":0,
"type":"int",
"table":{
"guid":"-53640556650492",
"typeName":"hive_table",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes_create3@primary"
}
}
},
"guid":"-53640556650494",
"provenanceType":0,
"version":0,
"proxy":false
},
"-53640556650488":{
"typeName":"hive_table",
"attributes":{
"owner":"root",
"temporary":false,
"lastAccessTime":1572663197000,
"qualifiedName":"atlas1.pokes@primary",
"columns":[
{
"guid":"-53640556650490",
"typeName":"hive_column",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes.foo@primary"
}
},
{
"guid":"-53640556650491",
"typeName":"hive_column",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes.bar@primary"
}
}
],
"tableType":"MANAGED_TABLE",
"sd":{
"guid":"-53640556650489",
"typeName":"hive_storagedesc",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes@primary_storage"
}
},
"createTime":1572663197000,
"name":"pokes",
"comment":null,
"partitionKeys":[
],
"parameters":{
"transient_lastDdlTime":"1572663225",
"totalSize":"5812",
"numRows":"0",
"rawDataSize":"0",
"numFiles":"1"
},
"db":{
"guid":"-53640556650487",
"typeName":"hive_db",
"uniqueAttributes":{
"qualifiedName":"atlas1@primary"
}
},
"retention":0
},
"guid":"-53640556650488",
"provenanceType":0,
"version":0,
"proxy":false
},
"-53640556650489":{
"typeName":"hive_storagedesc",
"attributes":{
"qualifiedName":"atlas1.pokes@primary_storage",
"storedAsSubDirectories":false,
"location":"hdfs://cent1:9000/user/hive/warehouse/atlas1.db/pokes",
"compressed":false,
"inputFormat":"org.apache.hadoop.mapred.TextInputFormat",
"parameters":{
},
"outputFormat":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
"table":{
"guid":"-53640556650488",
"typeName":"hive_table",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes@primary"
}
},
"serdeInfo":{
"typeName":"hive_serde",
"attributes":{
"serializationLib":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"name":null,
"parameters":{
"serialization.format":"1"
}
}
},
"numBuckets":-1
},
"guid":"-53640556650489",
"provenanceType":0,
"version":0,
"proxy":false
},
"-53640556650487":{
"typeName":"hive_db",
"attributes":{
"owner":"root",
"ownerType":"USER",
"qualifiedName":"atlas1@primary",
"clusterName":"primary",
"name":"atlas1",
"description":null,
"location":"hdfs://cent1:9000/user/hive/warehouse/atlas1.db",
"parameters":{
}
},
"guid":"-53640556650487",
"provenanceType":0,
"version":0,
"proxy":false
}
},
"entities":[
{
"typeName":"hive_process",
"attributes":{
"outputs":[
{
"guid":"-53640556650492",
"typeName":"hive_table",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes_create3@primary"
}
}
],
"recentQueries":[
"create table pokes_create3 as select foo,bar from pokes"
],
"qualifiedName":"atlas1.pokes_create3@primary:1572687176000",
"inputs":[
{
"guid":"-53640556650488",
"typeName":"hive_table",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes@primary"
}
}
],
"name":"create table pokes_create3 as select foo,bar from pokes",
"queryText":"create table pokes_create3 as select foo,bar from pokes",
"operationType":"CREATETABLE_AS_SELECT",
"startTime":1572686979104,
"queryPlan":"Not Supported",
"endTime":1572687183754,
"userName":"root",
"queryId":"root_20191102172939_5de2901c-49e1-473c-ad2f-7e5f5b3251ee"
},
"guid":"-53640556650496",
"provenanceType":0,
"version":0,
"proxy":false
}
]
}
}
}
列沿袭在哪里?我该如何进一步调试呢?
答案 0 :(得分:0)
似乎您需要同时拥有这两个Hive补丁。
Hive 2.1.0中发布的https://issues.apache.org/jira/browse/HIVE-13112
在Hive 2.2.0中发布的https://issues.apache.org/jira/browse/HIVE-14706
据我了解,第一个补丁在CTAS情况下公开了沿袭信息,第二个补丁则允许Atlas插件从配置单元挂钩中获取信息。
根据Atlas文档:https://atlas.apache.org/#/HookHive