像这样的东西,在文档中有一个字段并对其进行散列(例如md5)以生成_id
:
PUT index/doc/1?pretty
{
"name": "foo",
"_id": "hash(doc['name'])"
}
答案 0 :(得分:4)
是的,您可以使用ingest pipeline。
来实现首先,让我们定义一个script
processor的管道,它将计算您的_id
字段。由于Painless不提供任何散列方法,下面的方法是SHA1的无痛实现,但您可以用您选择的任何其他散列算法替换它
PUT _ingest/pipeline/id-generator
{
"description" : "This pipeline generates an ID based on the SHA1 hash of the name field",
"processors" : [
{
"script": {
"lang": "painless",
"source": """
def hex(int num) {
def hex_chr = "0123456789abcdef".toCharArray();
String str = "";
for(int j = 7; j >= 0; j--)
str += hex_chr[((num >> (j * 4)) & 15)];
return str;
}
def str2blks_SHA1(String str){
int nblk = ((str.length() + 8) >> 6) + 1;
int[] blks = new int[nblk * 16];
for(int a = 0; a < nblk * 16; a++)
blks[a] = 0;int i = 0;
for(; i < str.length(); i++)
blks[i >> 2] |= str.codePointAt(i) << (24 - (i % 4) * 8);
blks[i >> 2] |= 128 << (24 - (i % 4) * 8);
blks[nblk * 16 - 1] = str.length() * 8;
return blks;
}
def add(def x, def y){
def lsw = (x & 65535) + (y & 65535);
def msw = (x >> 16) + (y >> 16) + (lsw >> 16);
return (msw << 16) | (lsw & 65535);
}
def rol(def num, def cnt){
return (num << cnt) | (num >>> (32 - cnt));
}
def ft(def t, def b, def c, def d){
if(t < 20) return (b & c) | ((~b) & d);
if(t < 40) return b ^ c ^ d;
if(t < 60) return (b & c) | (b & d) | (c & d);
return b ^ c ^ d;
}
def kt(def t){
return (t < 20) ? 1518500249 : (t < 40) ? 1859775393 : (t < 60) ? -1894007588 : -899497514;
}
def calcSHA1(def str){
def x = str2blks_SHA1(str);
def w = new def[80];
def a = 1732584193;
def b = -271733879;
def c = -1732584194;
def d = 271733878;
def e = -1009589776;
for(def i = 0; i < x.length; i = i + 16){
def olda = a;
def oldb = b;
def oldc = c;
def oldd = d;
def olde = e;
for(def j = 0; j < 80; j++){
if(j < 16) {
w[j] = x[i + j];
} else {
w[j] = rol(w[j-3] ^ w[j-8] ^ w[j-14] ^ w[j-16], 1);
}
def t = add(add(rol(a, 5), ft(j, b, c, d)), add(add(e, w[j]), kt(j)));
e = d;
d = c;
c = rol(b, 30);
b = a;
a = t;
}
a = add(a, olda);
b = add(b, oldb);
c = add(c, oldc);
d = add(d, oldd);
e = add(e, olde);
}
return hex(a) + hex(b) + hex(c) + hex(d) + hex(e);
}
ctx._id = calcSHA1(ctx.name);
"""
}
}
]
}
然后,您可以通过引用管道来简单地索引您的文档:
POST myindex/_doc?pipeline=id-generator
{
"name": "John Doe"
}
结果:
{
"_index": "myindex",
"_type": "_doc",
"_id": "ae6e4d1209f17b460503904fad297b31e9cf6362",
"_score": 1,
"_source": {
"name": "John Doe"
}
}