从熊猫数据框创建绘图散点图框

时间:2019-06-19 14:17:24

标签: python pandas plotly mapbox

我想为印度尼西亚创建一个散点图框,以在区域基础上进行各种统计(人口,GDP等)。 我正在使用github中的geopandas文件。

plotly网站上的example为每个图层创建多个文件,然后使用github链接作为源。

#republican counties
source = 'https://raw.githubusercontent.com/plotly/datasets/master/florida-red-data.json'  
#democrat counties
source = 'https://raw.githubusercontent.com/plotly/datasets/master/florida-blue-data.json'

因此,我的问题是,如何使用熊猫数据框为每个区域创建图层字典并将其用作源(还可以通过其他数据框中的特定值对每个区域进行着色)。 根本不可能吗?有必要为每个区域创建一个单独的文件吗?我的尝试(第16-20行)似乎无效

import pandas as pd
import json
import string
import plotly
from plotly.graph_objs import Scattermapbox, Layout
ID_regions = pd.read_json('https://raw.githubusercontent.com/N1x0/indonesia-geojson/master/indonesia-edit.geojson')

region_names = []

for region in ID_regions['features']:
    region_names.append(state['properties']['name'])

print(region_names)

#This shit creates json and doesn't work
def create_region_files():
    for i in range(len(ID_regions)):
        region_data = ID_regions.iloc[i,:]
        region_data.to_json(f'C:\\Users\\nicho\\Desktop\\Waste Management\\Map_Maker\\ID_regions\\{region_names[i]}.json')
        i += 1


def create_Chloropleth():
    mapbox_access_token = 'My Access Key'

    data = [
        Scattermapbox(
            lat=['45.5017'],
            lon=['-73.5673'],
            mode='markers',
        )
    ]
    layout = Layout(
        height=900,
        autosize=True,
        showlegend=False,
        hovermode='closest',
        mapbox=dict(
            layers=[
                dict(
                    sourcetype = 'geojson',
                    source = 'https://raw.githubusercontent.com/N1x0/indonesia-geojson/master/indonesia-edit.geojson',
                    type = 'fill',
                    color = 'green'
                ),
                dict(
                    sourcetype = 'geojson',
                    source = 'https://raw.githubusercontent.com/N1x0/indonesia-geojson/master/west-sulawesi.json',
                    type = ' fill',
                    color = 'red',
                )
            ],
            accesstoken=mapbox_access_token,
            bearing=0,
            center=dict(
                lat=0.7893,
                lon=113.9213
            ),
            pitch=0,
            zoom=4.5,
            style='light'
        ),
    )

    fig = dict(data=data, layout=layout)
    plotly.offline.plot(fig, filename='Chloropleth_Province_Population.html')

create_Chloropleth()

谢谢您的帮助!

1 个答案:

答案 0 :(得分:0)

好的,我花了一段时间,但我想通了。非常感谢Emma Grimaldi在Medium和Vince Pota的帮助。他们的帖子帮助了我大部分时间。 因此,这里按顺序回答了我自己的问题:

  1. 不必为每个区域创建一个单独的文件。即您可以使用pandas数据框来匹配json中区域的名称,这样就可以正常工作。

with open('indonesia-en.geojson') as f: geojson = json.load(f)

def make_sources(downsample = 0):
    sources = []
    geojson_copy = copy.deepcopy(geojson['features']) # do not overwrite the original file
    for feature in geojson_copy:

        if downsample > 0:
            coords = np.array(feature['geometry']['coordinates'][0][0])
            coords = coords[::downsample]
            feature['geometry']['coordinates'] = [[coords]]

        sources.append(dict(type = 'FeatureCollection',
                            features = [feature])
                      )
    return sources

因此,您只需从geojson中提取坐标并将其附加到字典列表[{}]中即可。

  1. 如何使用此列表动态创建图层:
    MAPBOX_APIKEY = "Your API Key"
    data = dict(type='scattermapbox',
                lat=lats,
                lon=lons,
                mode='markers',
                text=hover_text,
                marker=dict(size=1,
                            color=scatter_colors,
                            showscale = True,
                            cmin = minpop/1000000,
                            cmax = maxpop/1000000,
                            colorscale = colorscale,
                            colorbar = dict(
                            title='Population in Millions'
                            )
                           ),
                showlegend=False,
                hoverinfo='text'
                 )

    layers=([dict(sourcetype = 'geojson',
                  source =sources[k],
                  below="water",
                  type = 'line',    # the borders
                  line = dict(width = 1),
                  color = 'black',
                  ) for k in range(n_provinces) # where n_provinces = len(geojson['features'])
              ] +

            [dict(sourcetype = 'geojson',
                  source =sources[k],
                  type = 'fill',   # the area inside the borders
                  color = scatter_colors[k],
                  opacity=0.8
                 ) for k in range(n_provinces)  # where n_provinces = len(geojson['features'])
             ]
            )

因此,这里的解决方案也设置为sources = sources[k],即在make_sources()

中创建的包含经度/纬度值的字典的列表
  1. 如何相应地为图层着色color=scatter_colors[k]

使用链接的示例,我使用了3个函数

3.1 scalarmappable

#sets colors based on min and max values
def scalarmappable(cmap, cmin, cmax):
        colormap = cm.get_cmap(cmap)
        norm = Normalize(vmin=cmin, vmax=cmax+(cmax*0.10)) #vmax get's increased 10 percent because otherwise the most populous region doesnt get colored
        return cm.ScalarMappable(norm=norm, cmap=colormap)

3.2 scatter_colors

#uses matplotlib to create colors based on values and sets grey for isnan value
def get_scatter_colors(sm, df):
grey = 'rgba(128,128,128,1)'
return ['rgba' + str(sm.to_rgba(m, bytes = True, alpha = 1)) if not np.isnan(m) else grey for m in df]  

3.3 colorscale

#defines horizontal range and corresponding values for colorscale
def get_colorscale(sm, df, cmin, cmax):
    xrange = np.linspace(0, 1, len(df))
    values = np.linspace(cmin, cmax, len(df))

    return [[i, 'rgba' + str(sm.to_rgba(v, bytes = True))] for i,v in zip(xrange, values) ]

然后使用函数设置变量

#assigning values
colormap = 'nipy_spectral'
minpop = stats['population'].min()
maxpop = stats['population'].max()
sources = make_sources(downsample=0)
lons, lats = get_centers()

sm = scalarmappable(colormap, minpop, maxpop)
scatter_colors = get_scatter_colors(sm, stats['population'])
colorscale = get_colorscale(sm, stats, minpop, maxpop)
hover_text = get_hover_text(stats['population'])

因此,如果有人对此答案有疑问,可以帮助您取得进步:)