在使用titanic dataset绘制数字时,我会遇到"不满意的约束"错误。这是我使用名称和性别变量的代码:
import pandas as pd
from bokeh.io import show, output_file
from bokeh.models import (HoverTool, ColumnDataSource,)
from bokeh.palettes import Spectral6
from bokeh.plotting import figure
def main():
output_file("frequency_figure.html")
df = pd.read_csv("dataset/titanic/titanic.csv")
print(df.head())
#column_name = "Sex"
column_name = "Name"
get_frequency_figure(column_name, df)
def value_counts_of_series(mySeries):
return mySeries.value_counts().sort_index()
def get_frequency_figure(col_name,df):
try:
counts = value_counts_of_series(df[col_name])
counts = counts.to_dict()
counts = {str(k): v for k, v in counts.items()} # converting the key to string
x_values = []
y_values = []
for key in counts:
x_values.append(key)
y_values.append(counts[key])
source = ColumnDataSource(data=dict(
x=x_values,
y=y_values,
color=Spectral6
))
hover = HoverTool(tooltips=[
(col_name, "@x"),
("frequency value", "@y"),
])
p = figure(x_range=x_values, plot_height=350, title="Frequency",
tools=[hover])
# Draw the vertical bar
p.vbar(x='x', top='y', width=.5, source=source,
line_color='black', color='color')
p.y_range.start = 0
# p.xgrid.grid_line_color = None
p.xaxis.axis_label = col_name
p.xaxis.major_label_orientation = 1.2
p.outline_line_color = None
show(p)
except Exception as err:
error = str(err)
raise Exception(error)
main()
我注意到由于长类别标签(Name属性的每个数据点的值)导致错误。它与Sex变量名称一起工作正常,因为它非常短("男性"或"女性")。
因此,为了处理像" Name"这样的列名,我添加了get_category_label_for_figure函数。请参阅以下完整代码:
import pandas as pd
from bokeh.io import show, output_file
from bokeh.models import (HoverTool, ColumnDataSource,)
from bokeh.palettes import Spectral6
from bokeh.plotting import figure
def main():
output_file("frequency_figure.html")
df = pd.read_csv("dataset/titanic/titanic.csv")
print(df.head())
#column_name = "Sex"
column_name = "Name"
get_frequency_figure(column_name, df)
def value_counts_of_series(mySeries):
return mySeries.value_counts().sort_index()
#Without this, it gives "unsatisfiable constraints" bokeh error for large labels. So, we need to shorthen them.
#In 0.12.10, it shows the error and doesn't render the figure. After upgrading from 0.12.10 to 0.12.16, it no longer shows the error but still has the same issue with an empty page (do not render the figure plot at all).
def get_category_label_for_figure(str_label):
if len(str_label) > 10:
return str_label[0:10] + "..."
else:
return str_label
def get_frequency_figure(col_name,df):
try:
counts = value_counts_of_series(df[col_name])
counts = counts.to_dict()
counts = {get_category_label_for_figure(str(k)): v for k, v in counts.items()} # converting the key to string
x_values = []
y_values = []
for key in counts:
x_values.append(key)
y_values.append(counts[key])
source = ColumnDataSource(data=dict(
x=x_values,
y=y_values,
color=Spectral6
))
hover = HoverTool(tooltips=[
(col_name, "@x"),
("frequency value", "@y"),
])
p = figure(x_range=x_values, plot_height=350, title="Frequency",
tools=[hover])
# Draw the vertical bar
p.vbar(x='x', top='y', width=.5, source=source,
line_color='black', color='color')
p.y_range.start = 0
# p.xgrid.grid_line_color = None
p.xaxis.axis_label = col_name
p.xaxis.major_label_orientation = 1.2
p.outline_line_color = None
show(p)
except Exception as err:
error = str(err)
raise Exception(error)
main()
添加get_category_label_for_figure函数后,它工作正常。但是,现在我遇到了悬停问题,即它也缩短了悬停文本。看到这个截图(只是忽略丑陋的视图):
我想要的是能够在缩短图形轴的同时悬停完整的标签名称'标签名称(这里已经缩短了,我们因为丑陋的观点而无法看到)。有没有办法做到这一点?