无法从jdbc聚合数据以输出一个单个文档

时间:2019-10-15 23:16:16

标签: elasticsearch logstash logstash-file logstash-jdbc

我正在尝试通过logstash将jdbc中的数据提取到弹性搜索中,但是我认为聚合功能无法正常工作 我的管道代码是

-e:1:in `<main>': undefined local variable or method `“' for main:Object (NameError)

我没有得到合计的结果,我仅得到公司的结果。不包括地理位置和行业,我认为聚合不起作用, 关于如何进行这项工作的任何建议? 收到输出:

input {
      jdbc {
        jdbc_driver_library => "/home/simrat/Downloads/mysql-connector-java-5.1.6.jar"
        jdbc_driver_class => "com.mysql.jdbc.Driver"
        jdbc_connection_string => "xxxxx"
        jdbc_user => "xxxxx"
        jdbc_password => "xxxxx"
        tracking_column => "CompanyId"
        use_column_value=>true
        lowercase_column_names => false
        statement => "select companies.Name, companies.UrlName,
                                companies.Rating, YEAR(companies.StartDate) as Age,
                                companies.CompanyId,companies.TotalEmployees,
                                companies.Logo,
                                companies.ShortName,
                                seo_stats.CompanyReviewsLive, seo_stats.InterviewExperiencesLive,  seo_stats.CompanySalaries, companies.CompanyType
                                from companies, seo_stats 
                                WHERE companies.CompanyId = seo_stats.CompanyId"
       type => "companies"
       tags => ["companies"]
      }


    jdbc {
        jdbc_driver_library => "/home/simrat/Downloads/mysql-connector-java-5.1.6.jar"
        jdbc_driver_class => "com.mysql.jdbc.Driver"
        jdbc_connection_string =>"xxxxx"
        jdbc_user => "xxxxx"
        jdbc_password => "xxxxx"
        tracking_column => "CompanyId"
        use_column_value=>true
        lowercase_column_names => false
        statement => "select CompanyId, group_concat(city_list.Name) as locations from seo_stats_job_location inner join city_list on city_list.Id = seo_stats_job_location.JobLocationId where seo_stats_job_location.CompanyReviewsLive>0 group by CompanyId"
        type => "locations"
        tags => ["locations"]
      }

    jdbc {
        jdbc_driver_library => "/home/simrat/Downloads/mysql-connector-java-5.1.6.jar"
        jdbc_driver_class => "com.mysql.jdbc.Driver"
        jdbc_connection_string => "xxxxxx"
        jdbc_user => "xxxxxx"
        jdbc_password => "xxxxx"
        tracking_column => "CompanyId"
        use_column_value=>true
            lowercase_column_names => false
        statement => "select CompanyId, group_concat(company_catalogue.Name) as industries from company_catalogue_mapping inner join company_catalogue on company_catalogue.Id = company_catalogue_mapping.CatalogueId where company_catalogue.`GroupType`='Industry' group by CompanyId"
        type => "industries" 
        tags => ["industries"]
    }

}
filter {

 mutate {
        split => { "industries"=>","}
        split => { "locations" => "," }
    }

 aggregate {
       task_id => "%{CompanyId}"
        code => "
    if (event.get('type') == 'industries')
         map['industries'] ||= event.get('industries').split(',')
    end
        if (event.get('type') == 'locations')
     map['locations'] ||= event.get('locations').split(',')
    end 
    if(event.get('type') == 'companies')
        map['Name'] = event.get('Name')
        map['UrlName'] = event.get('UrlName')
        map['Rating'] = event.get('Rating')
        map['CompanyId'] = event.get('CompanyId')
        map['Logo'] = event.get('Logo')
        map['ShortName'] = event.get('ShortName')
        map['CompanyReviewsLive'] = event.get('CompanyReviewsLive')
        map['InterviewExperiencesLive'] = event.get('InterviewExperiencesLive')
        map['CompanySalaries'] = event.get('CompanySalaries')
        map['CompanyType'] = event.get('CompanyType')
        if(event.get('Age') > 0)
         map['Age'] = Time.now.year - event.get('Age')
        end
        totalEmployees = event.get('TotalEmployees')
        if(totalEmployees.include? '-')
         map['TotalEmployees'] = ((totalEmployees.split('-')[1].split(',')).join('')).to_i 
        elsif(totalEmployees.include? '+')
         map['TotalEmployees'] = (((totalEmployees.split('+')[0].split(',')).join('')).to_i)+1
        elsif(totalEmployees.include? ',')
         map['TotalEmployees'] = ((totalEmployees.split(',')).join('')).to_i
        else
         map['TotalEmployees'] = totalEmployees.to_i 
        end
    end
    event.cancel()
    "
       push_previous_map_as_event => true
       timeout => 300
     }
}
output {
  elasticsearch {
    document_id=> "%{CompanyId}"
    document_type => "companies"
    index => "listing"
    hosts => ["http://localhost:9201"]
  }
  stdout{
  codec => rubydebug
  }
}

预期输出应为

 {
        "_index": "listing",
        "_type": "companies",
        "_id": "1234498",
        "_score": 17.196167,
        "_source": {
          "CompanyId": 1234498,
          "TotalEmployees": 0,
          "Rating": 3.5,
          "UrlName": "aaa-group",
          "ShortName": "aaa Group",
          "CompanyType": "",
          "Name": "aaa Group",
          "Logo": "",
          "@timestamp": "2019-10-15T18:58:38.965Z",
          "CompanyReviewsLive": 4,
          "InterviewExperiencesLive": 0,
          "CompanySalaries": 5,
          "@version": "1"
        }
      }

0 个答案:

没有答案