所以问题是:
我可以在Plotly中绘制直方图,将所有大于某个阈值的值分组到一个bin中吗?
所需的输出:
但是使用标准的Histogram
类,我只能得到以下输出:
import pandas as pd
from plotly import graph_objs as go
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode()
test_df = pd.DataFrame({'values': [1]*10 + [2]*9 +
[3.1]*4 + [3.6]*4 +
[4]*7 + [5]*6 + [6]*5 + [7]*4 + [8]*3 +
[9]*2 + [10]*1 +
[111.2]*2 + [222.3]*2 + [333.4]*1}) # <- I want to group them into one bin "> 10"
data = [go.Histogram(x=test_df['values'],
xbins=dict(
start=0,
end=11,
size=1
),
autobinx = False)]
layout = go.Layout(
title='values'
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='basic histogram')
答案 0 :(得分:1)
因此,花了一些时间后,我自己使用numpy.Histogram
和密谋Bar
图表找到了解决方案。
如果有人遇到同样的问题,就留在这里。
def plot_bar_with_outliers(series, name, end):
start = int(series.min())
size = 1
# Making a histogram
largest_value = series.max()
if largest_value > end:
hist = np.histogram(series, bins=list(range(start, end+size, size)) + [largest_value])
else:
hist = np.histogram(series, bins=list(range(start, end+size, size)) + [end+size])
# Adding labels to the chart
labels = []
for i, j in zip(hist[1][0::1], hist[1][1::1]):
if j <= end:
labels.append('{} - {}'.format(i, j))
else:
labels.append('> {}'.format(i))
# Plotting the graph
data = [go.Bar(x=labels,
y=hist[0])]
layout = go.Layout(
title=name
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='basic histogram')
plot_bar_with_outliers(test_df['values'], 'values', end=11)