我对这一切都是陌生的,无法弄清楚为什么会有这个KeyError。任何指针表示赞赏! 试图将CMS的一些示例医疗索赔数据与我查看的YouTube教程中收集的示例程序一起使用...想知道错误是否是因为“ HCPCS_CD1”的某些值可能为空?
# packages for data and visual analysis
import numpy as np
import pandas as pd
from sklearn import svm
import matplotlib.pyplot as plt
import seaborn as sns; sns.set(font_scale=1.2)
%matplotlib inline
# begin
claims = pd.read_csv('DE1_0_2008_to_2010_Outpatient_Claims_Sample_1_CCONLY.csv')
print(claims.head())
# plot data
sns.lmplot('HCPCS_CD1','HCPCS_CD2',data=claims,hue='ICD9_DGNS_CD_1',palette='Set1',fit_reg=False,scatter_kws={"s":70});
# format and preprocess training data - either it's a ECC or not
type_label = np.where(claims['ICD9_DGNS_CD_1']=='1561',0,1)
claim_features = claims.columns.values[1:].tolist()
#claim_features - limit which columns to consider
domain = claims[['ICD9_DGNS_CD_1','HCPCS_CD1','HCPCS_CD2']].values
print(domain)
# fit model
model = svm.SVC(kernel='linear')
model.fit(domain,type_label)
# get separating hyperplane
w = model.coef_[0]
a = -w[0] / w[1]
xx = np.linspace(30,60)
yy = a * xx - (model.intercept_[0]) / w[1]
# plot the parallels to the separating hyperplane that pass through the support vectors
b = model.support_vectors_[0]
yy_down = a * xx + (b[1] - a * b[0])
b = model.support_vectors_[-1]
yy_up = a * xx + (b[1] - a * b[0])
# plot data
sns.lmplot('HCPCS_CD1','HCPCS_CD2',data=claims,hue='ICD9_DGNS_CD_1',palette='Set1',fit_reg=False,scatter_kws={"s":70});
plt.plot(xx,yy,linewidth=2,color='black')
plt.plot(xx,yy_down,'k--')
plt.plot(xx,yy_up,'k--')
KeyError Traceback (most recent call last)
<ipython-input-7-ab7422e52d5c> in <module>
12
13 # plot data
---> 14 sns.lmplot('HCPCS_CD1','HCPCS_CD2',data=claims,hue='ICD9_DGNS_CD_1',palette='Set1',fit_reg=False,scatter_kws={"s":70});
15
16 # format and preprocess training data - either it's a ECC or not
F:\Users\matt\Anaconda3\lib\site-packages\seaborn\regression.py in lmplot(x, y, data, hue, col, row, palette, col_wrap, height, aspect, markers, sharex, sharey, hue_order, col_order, row_order, legend, legend_out, x_estimator, x_bins, x_ci, scatter, fit_reg, ci, n_boot, units, order, logistic, lowess, robust, logx, x_partial, y_partial, truncate, x_jitter, y_jitter, scatter_kws, line_kws, size)
549 need_cols = [x, y, hue, col, row, units, x_partial, y_partial]
550 cols = np.unique([a for a in need_cols if a is not None]).tolist()
--> 551 data = data[cols]
552
553 # Initialize the grid
F:\Users\matt\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2680 if isinstance(key, (Series, np.ndarray, Index, list)):
2681 # either boolean or fancy integer index
-> 2682 return self._getitem_array(key)
2683 elif isinstance(key, DataFrame):
2684 return self._getitem_frame(key)
F:\Users\matt\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_array(self, key)
2724 return self._take(indexer, axis=0)
2725 else:
-> 2726 indexer = self.loc._convert_to_indexer(key, axis=1)
2727 return self._take(indexer, axis=1)
2728
F:\Users\matt\Anaconda3\lib\site-packages\pandas\core\indexing.py in _convert_to_indexer(self, obj, axis, is_setter)
1325 if mask.any():
1326 raise KeyError('{mask} not in index'
-> 1327 .format(mask=objarr[mask]))
1328
1329 return com._values_from_object(indexer)
KeyError: "['HCPCS_CD1' 'HCPCS_CD2'] not in index"