Question

我有一张地图，并且已经在这张地图上绘制了一些点。我希望这些点根据我为每个点计算的值显示颜色的渐变。下图有一个图例，这就是我要遵循的要点。这是我的代码

"""
Author :
Vidya 


Modification History :
17-Dec-2019     Vidya       Initial Draft

"""

from __future__ import absolute_import

# Import Libraries
import argparse
import logging
import warnings
import apache_beam as beam
from apache_beam.options.pipeline_options import PipelineOptions
from typing import List, Any

warnings.filterwarnings('ignore')


# Define custom class DataIngestion

class DataIngestion():
    """A helper class the load the file to the big query table."""

    def __init__(self):
        pass

    def parse_method(self, input_string):
        # Strip out carriage return, newline and quote characters.
        values = re.split(",",
                          re.sub('\r\n', '', re.sub(u'"', '', input_string)))
        row = dict(
            zip(('id', 'name', 'salary'), values)
        )
        return row


class DataLakeComparison:
    """helper class """

    def __init__(self):
        pass

    def base_query():
        base_query = """
        SELECT 
        id, 
        name,
        salary
        FROM CDC.base
        """
        return base_query

    def delta_query():
        delta_query = """
        SELECT 
        id, 
        name,
        salary
        FROM CDC.delta 
        """
        return delta_query

    def process_id(self, id, data):
        """This function performs the join of the two datasets."""
        result = list(data['delta'])  # type: List[Any]
        if not data['base']:
            logging.info('id is missing in base')
            return
        if not data['delta']:
            logging.info(' id is missing in delta')
            return

        base = {}
        try:
            base = data['base'][0]
        except KeyError as err:
            traceback.print_exc()
            logging.error("id Not Found error: %s", err)

        for delta in result:
            delta.update(base)

        return result


def run(argv=None):
    """The main function which creates the pipeline and runs it."""
    parser = argparse.ArgumentParser()

    parser.add_argument(
        '--input',
        dest='input',
        required=False,
        help='Input file to read. This can be a local file or '
             'a file in a Google Storage Bucket.',
        default='gs://input-cobalt/delta1.csv'
    )
    parser.add_argument(
        '--output',
        dest='output',
        required=False,
        help='Output BQ table to load the delta file ',
        default='CDC.delta'
    )

    parser.add_argument(
        '--output2',
        dest='output',
        required=False,
        help='Output BQ table to load the base table',
        default='CDC.base'
    )
    # Parse arguments from command line.
    known_args, pipeline_args = parser.parse_known_args(argv)

    data_ingestion = DataIngestion()

    # Instantiate pipeline
    options = PipelineOptions(pipeline_args)

    p = beam.Pipeline(options=options)

    (p
     | 'Read from a File' >> beam.io.ReadFromText(known_args.input, skip_header_lines=1)
     | 'String To BigQuery Row' >>
     beam.Map(lambda s: data_ingestion.parse_method(s))
     | 'Write to BigQuery' >> beam.io.Write(
                beam.io.BigQuerySink(
                    known_args.output,
                    schema='id:INTEGER,name:STRING,salary:INTEGER',
                    create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
                    write_disposition=beam.io.BigQueryDisposition.WRITE_TRUNCATE))
     )
    datalakecomparison = DataLakeComparison()
    base_data = datalakecomparison.base_query()
    delta_data = datalakecomparison.delta_query()
    base_data = (
            p
            | 'Read Delta from BigQuery ' >> beam.io.Read(
        beam.io.BigQuerySource(query=base_data, use_standard_sql=True))
            |
            'Map id in base' >> beam.Map(
        lambda row: (
            row['id'], row
        )))
    delta_data = (
            p
            | 'Read Delta from BigQuery ' >> beam.io.Read(
        beam.io.BigQuerySource(query=delta_data, use_standard_sql=True))
            |
            'Map id in base' >> beam.Map(
        lambda row: (
            row['id'], row
        )))

    result = {'base': base_data, 'delta': delta_data} | beam.CoGroupByKey()
    joined = result | beam.FlatMap(datalakecomparison.process_id(result))
    joined | 'Write Data to BigQuery' >> beam.io.Write(
        beam.io.BigQuerySink(
            known_args.output2,
            schema='id:INTEGER,name:STRING,salary:INTEGER',
            create_disposition=beam.io.BigQueryDisposition.CREATE_IF_NEEDED,
            write_disposition=beam.io.BigQueryDisposition.WRITE_TRUNCATE))

    p.run().wait_until_finish()


# main function

if __name__ == '__main__':
    logging.getLogger().setLevel(logging.INFO)
    run()

这是我的照片

这就是我想要的点在渐变中的样子...不需要是确切的颜色。

使用ggplot2与值相关的颜色渐变点

0 个答案: