弹性搜索数据导入计划

时间:2016-02-17 12:10:10

标签: php elasticsearch

我为我的数据库创建了弹性搜索,其中包含大约150k的大量记录。我是弹性搜索的新手。我想在没有关闭服务器的情况下每5小时从数据库导入一次数据。我该如何实现呢?

使用以下代码

创建我的索引
curl -XPUT 'localhost:9200/_river/jdbc/_meta' -d '{
    "type" : "jdbc",
    "jdbc": {
   "driver" : "com.mysql.jdbc.Driver",
        "url" : "jdbc:mysql://localhost:3306/dbname",
        "user" : "root",
        "password" : "pwd",
      "sql": "select PD.pd_profileID as _id,PD.pd_profileID,PD.pd_firstName,PD.pd_lawFirmName,PD.pd_secondName,PD.pd_fullName,PD.pd_fullNameTaxonomy, PD.pd_profileLink,PD.pd_description,PD.pd_emailAddress,PD.pd_mobileno,PD.pd_qualification,PD.pd_awards,PD.pd_experience,PD.pd_lawfirmID, lf.LF_searchURL,(SELECT DATE_FORMAT(pla.pla_createTime, \"%M - %Y\")from profile_location_audit as pla where pla.pla_profileID = PD.pd_profileID AND( CASE  PD.pd_manualflag WHEN 0 THEN(date_format(pla.pla_createTime, \"%Y-%m\") = \"2016-01\") ELSE 1=1 END )group by pla.pla_profileID) as location_ch_time, (SELECT DATE_FORMAT(pja.pja_createTime, \"%M - %Y\")from profile_jobtitle_audit as pja where pja.pja_profileID = PD.pd_profileID AND (CASE  PD.pd_manualflag WHEN 0 THEN (date_format(pja.pja_createTime, \"%Y-%m\") = \"2016-01\") ELSE 1=1 END )group by pja.pja_profileID) as jobtitle_ch_time, (SELECT DATE_FORMAT(jo.pd_movedTime, \"%M - %Y\")from movers as jo where jo.pd_profileID = PD.pd_profileID AND jo.pd_profilestatus = 2) as joined_on, (SELECT DATE_FORMAT(lv.pd_movedTime, \"%M - %Y\") from movers as lv where lv.pd_profileID = PD.pd_profileID AND lv.pd_profilestatus = 0) as left_on, (SELECT GROUP_CONCAT(concat(jtm_Taxonomy)separator \",\") FROM jobtitle_taxonomymaster WHERE jtm_Rank = (SELECT MAX(jtm_Rank) FROM jobtitle_taxonomymaster WHERE (SELECT GROUP_CONCAT(\",\", concat(ijt.pj_taxonomy1, \",\"), \",\") FROM profile_jobtitle AS ijt WHERE ijt.pj_profileID = PD.pd_profileID AND (CASE  PD.pd_manualflag WHEN 0 THEN (date_format(ijt.pj_createtime, \"%Y-%m\") = \"2016-01\" or date_format(ijt.pj_updatetime, \"%Y-%m\") = \"2016-01\") ELSE 1=1 END )) LIKE CONCAT(\"%,\", jtm_ID, \",%\"))) as jt_tax, (select GROUP_CONCAT(concat(jtm_Taxonomy) separator \",\") from jobtitle_taxonomymaster where jtm_ID IN (SELECT ijt.pj_taxonomy2 FROM profile_jobtitle as ijt where ijt.pj_profileID = PD.pd_profileID AND (CASE  PD.pd_manualflag WHEN 0 THEN (date_format(ijt.pj_createtime, \"%Y-%m\") = \"2016-01\" or date_format(ijt.pj_updatetime, \"%Y-%m\") = \"2016-01\") ELSE 1=1 END))) as jt_tax2, (SELECT GROUP_CONCAT(concat(jt.jt_jobtitle) separator \", \") from jobtitle as jt inner join profile_jobtitle as ijt ON jt.jt_ID = ijt.pj_jobtitleID where ijt.pj_profileID = PD.pd_profileID AND ( CASE  PD.pd_manualflag WHEN 0 THEN (date_format(ijt.pj_createtime, \"%Y-%m\") = \"2016-01\" or date_format(ijt.pj_updatetime, \"%Y-%m\") = \"2016-01\")ELSE 1=1 END)) as jt_webpage, (SELECT GROUP_CONCAT(concat(etm.etm_Taxonomy) separator \",\") from expertise_taxonomymaster as etm where (SELECT GROUP_CONCAT(\",\", concat(pe.pp_taxonomy1,\",\",ifnull(pe.pp_taxonomy2, 0),\",\",ifnull(pe.pp_taxonomy3, 0)),\",\") FROM profile_expertise as pe where pe.pp_profileID = PD.pd_profileID AND ( CASE  PD.pd_manualflag WHEN 0 THEN (date_format(pe.pp_createtime, \"%Y-%m\") = \"2016-01\" or date_format(pe.pp_updatetime, \"%Y-%m\") = \"2016-01\") ELSE 1=1 END)) LIKE CONCAT(\"%,\", etm_ID, \",%\")) as et_tax, (SELECT GROUP_CONCAT(concat(et.et_expertise) separator \", \") from expertise as et inner join profile_expertise as iet ON et.et_ID = iet.pp_expertiseID where iet.pp_profileID = PD.pd_profileID AND (CASE  PD.pd_manualflag WHEN 0 THEN (date_format(iet.pp_createtime, \"%Y-%m\") = \"2016-01\" or date_format(iet.pp_updatetime, \"%Y-%m\") = \"2016-01\") ELSE 1=1 END)) as et_webpage, (SELECT GROUP_CONCAT(distinct concat(lc.city_name)separator \",\") from location_city as lc inner join location_taxonomy as ltx ON lc.city_ID = ltx.ltx_cityID inner join profile_location as pl ON pl.pl_locationID = ltx.ltx_location_ID where pl.pl_profileID = PD.pd_profileID and (CASE  PD.pd_manualflag WHEN 0 THEN (date_format(pl.pl_createtime, \"%Y-%m\") = \"2016-01\" or date_format(pl.pl_updatetime, \"%Y-%m\") = \"2016-01\") ELSE 1=1 END )) as loc_cityname, (SELECT GROUP_CONCAT(distinct concat(ls.ls_name)separator \",\") from location_secondary as ls inner join location_city as lc ON ls.ls_ID = lc.city_secondaryID inner join location_taxonomy as ltx ON lc.city_ID = ltx.ltx_cityID inner join profile_location as pl ON pl.pl_locationID = ltx.ltx_location_ID where pl.pl_profileID = PD.pd_profileID and (CASE PD.pd_manualflag WHEN 0 THEN (date_format(pl.pl_createtime, \"%Y-%m\") = \"2016-01\" or date_format(pl.pl_updatetime, \"%Y-%m\") = \"2016-01\") ELSE 1=1 END )) as location_countryname, (SELECT GROUP_CONCAT(distinct concat(lp.lp_name)separator \",\") from location_primary as lp inner join location_city as lc ON lp.lp_ID = lc.city_primaryID inner join location_taxonomy as ltx ON lc.city_ID = ltx.ltx_cityID inner join profile_location as pl ON pl.pl_locationID = ltx.ltx_location_ID where pl.pl_profileID = PD.pd_profileID and(CASE PD.pd_manualflag WHEN 0 THEN (date_format(pl.pl_createtime, \"%Y-%m\") = \"2016-01\" or date_format(pl.pl_updatetime, \"%Y-%m\") = \"2016-01\") ELSE 1=1 END )) as location_primary, (SELECT admitted from solicitors_data as sd inner join geo_solicitor_profiles as gsd ON sd.id = gsd.solicitor_id where sd.admitted is not null and gsd.profile_id = PD.pd_profileID and match_count in (select max(match_count) from geo_solicitor_profiles where profile_id = PD.pd_profileID) GROUP BY gsd.profile_id) as admitted_date,IFNULL((SELECT YEAR(admitted) from solicitors_data as sd inner join geo_solicitor_profiles as gsd ON sd.id = gsd.solicitor_id where sd.admitted is not null and gsd.profile_id = PD.pd_profileID and match_count in (select max(match_count) from geo_solicitor_profiles where profile_id = PD.pd_profileID) GROUP BY gsd.profile_id),0) as admitted_year from profiledetails as PD join lawfirms as lf ON lf.LF_ID = PD.pd_lawfirmID where CASE PD.pd_manualflag WHEN 0 THEN (date_format(pd_createTime, \"%Y-%m\") = \"2016-01\" or date_format(pd_updateTime, \"%Y-%m\") = \"2016-01\") ELSE 1=1 END ",                  
      "poll": "24h",
      "strategy": "simple", 
      "scale": 0, 
      "autocommit": true,
      "bulk_size": 7000,
      "max_bulk_requests": 30,
      "bulk_flush_interval": "5s",
      "fetchsize": 100,
      "max_rows": 200000,
      "max_retries": 3,
      "max_retries_wait": "10s",
      "locale": "in",
      "digesting": true,
      "index": "jdbc",               
      "type": "jdbc",                
      "type_mapping": {                    
          "jdbc": { "dynamic_templates": [{   
          "strings": {
            "match": "pd_fullNameTaxonomy",
            "match_mapping_type": "string",
            "mapping": {
              "type": "string",
              "fields": {
                "raw": {
                  "type": "string",
                  "index": "not_analyzed"
                }
              }
            }
          }
        },
       {   
          "strings": {
            "match": "pd_lawFirmName",
            "match_mapping_type": "string",
            "mapping": {
              "type": "string",
              "fields": {
                "raw": {
                  "type": "string",
                  "index": "not_analyzed"
                }
              }
            }
          }
        },
         {   
          "strings": {
            "match": "jt_tax",
            "match_mapping_type": "string",
            "mapping": {
              "type": "string",
              "fields": {
                "raw": {
                  "type": "string",
                  "index": "not_analyzed"
                }
              }
            }
          }
        },
         {   
          "strings": {
            "match": "et_tax",
            "match_mapping_type": "string",
            "mapping": {
              "type": "string",
              "fields": {
                "raw": {
                  "type": "string",
                  "index": "not_analyzed"
                }
              }
            }
          }
        },
         {   
          "strings": {
            "match": "loc_cityname",
            "match_mapping_type": "string",
            "mapping": {
              "type": "string",
              "fields": {
                "raw": {
                  "type": "string",
                  "index": "not_analyzed"
                }
              }
            }
          }
        },
         {   
          "strings": {
            "match": "location_countryname",
            "match_mapping_type": "string",
            "mapping": {
              "type": "string",
              "fields": {
                "raw": {
                  "type": "string",
                  "index": "not_analyzed"
                }
              }
            }
          }
        },
        {   
          "strings": {
            "match": "location_primary",
            "match_mapping_type": "string",
            "mapping": {
              "type": "string",
              "fields": {
                "raw": {
                  "type": "string",
                  "index": "not_analyzed"
                }
              }
            }
          }
        },
        {   
          "strings": {
            "match": "admitted_date",
            "match_mapping_type": "date",
            "mapping": {
              "type": "date",
              "fields": {
                "raw": {
                  "type": "date",
                  "index": "not_analyzed"
                }
              }
            }
          }
        }
           ]}
      }
    }
}'

我想自动每5小时执行一次此代码,网站无法启动

1 个答案:

答案 0 :(得分:0)

面对这种情况时,我们应该在弹性搜索中使用重建索引

参考此链接

https://www.elastic.co/blog/changing-mapping-with-zero-downtime