我有一张地图,并且已经在这张地图上绘制了一些点。我希望这些点根据我为每个点计算的值显示颜色的渐变。下图有一个图例,这就是我要遵循的要点。这是我的代码
"""
Author :
Vidya
Modification History :
17-Dec-2019 Vidya Initial Draft
"""
from __future__ import absolute_import
# Import Libraries
import argparse
import logging
import warnings
import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions
from typing import List, Any
warnings.filterwarnings('ignore')
# Define custom class DataIngestion
class DataIngestion():
"""A helper class the load the file to the big query table."""
def __init__(self):
pass
def parse_method(self, input_string):
# Strip out carriage return, newline and quote characters.
values = re.split(",",
re.sub('\r\n', '', re.sub(u'"', '', input_string)))
row = dict(
zip(('id', 'name', 'salary'), values)
)
return row
class DataLakeComparison:
"""helper class """
def __init__(self):
pass
def base_query():
base_query = """
SELECT
id,
name,
salary
FROM CDC.base
"""
return base_query
def delta_query():
delta_query = """
SELECT
id,
name,
salary
FROM CDC.delta
"""
return delta_query
def process_id(self, id, data):
"""This function performs the join of the two datasets."""
result = list(data['delta']) # type: List[Any]
if not data['base']:
logging.info('id is missing in base')
return
if not data['delta']:
logging.info(' id is missing in delta')
return
base = {}
try:
base = data['base'][0]
except KeyError as err:
traceback.print_exc()
logging.error("id Not Found error: %s", err)
for delta in result:
delta.update(base)
return result
def run(argv=None):
"""The main function which creates the pipeline and runs it."""
parser = argparse.ArgumentParser()
parser.add_argument(
'--input',
dest='input',
required=False,
help='Input file to read. This can be a local file or '
'a file in a Google Storage Bucket.',
default='gs://input-cobalt/delta1.csv'
)
parser.add_argument(
'--output',
dest='output',
required=False,
help='Output BQ table to load the delta file ',
default='CDC.delta'
)
parser.add_argument(
'--output2',
dest='output',
required=False,
help='Output BQ table to load the base table',
default='CDC.base'
)
# Parse arguments from command line.
known_args, pipeline_args = parser.parse_known_args(argv)
data_ingestion = DataIngestion()
# Instantiate pipeline
options = PipelineOptions(pipeline_args)
p = beam.Pipeline(options=options)
(p
| 'Read from a File' >> beam.io.ReadFromText(known_args.input, skip_header_lines=1)
| 'String To BigQuery Row' >>
beam.Map(lambda s: data_ingestion.parse_method(s))
| 'Write to BigQuery' >> beam.io.Write(
beam.io.BigQuerySink(
known_args.output,
schema='id:INTEGER,name:STRING,salary:INTEGER',
create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
write_disposition=beam.io.BigQueryDisposition.WRITE_TRUNCATE))
)
datalakecomparison = DataLakeComparison()
base_data = datalakecomparison.base_query()
delta_data = datalakecomparison.delta_query()
base_data = (
p
| 'Read Delta from BigQuery ' >> beam.io.Read(
beam.io.BigQuerySource(query=base_data, use_standard_sql=True))
|
'Map id in base' >> beam.Map(
lambda row: (
row['id'], row
)))
delta_data = (
p
| 'Read Delta from BigQuery ' >> beam.io.Read(
beam.io.BigQuerySource(query=delta_data, use_standard_sql=True))
|
'Map id in base' >> beam.Map(
lambda row: (
row['id'], row
)))
result = {'base': base_data, 'delta': delta_data} | beam.CoGroupByKey()
joined = result | beam.FlatMap(datalakecomparison.process_id(result))
joined | 'Write Data to BigQuery' >> beam.io.Write(
beam.io.BigQuerySink(
known_args.output2,
schema='id:INTEGER,name:STRING,salary:INTEGER',
create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
write_disposition=beam.io.BigQueryDisposition.WRITE_TRUNCATE))
p.run().wait_until_finish()
# main function
if __name__ == '__main__':
logging.getLogger().setLevel(logging.INFO)
run()
这是我的照片