带有CSV文件导入功能的sns.lmplot KeyError

时间:2019-02-04 22:31:59

标签: python pandas matplotlib machine-learning seaborn

我对这一切都是陌生的,无法弄清楚为什么会有这个KeyError。任何指针表示赞赏! 试图将CMS的一些示例医疗索赔数据与我查看的YouTube教程中收集的示例程序一起使用...想知道错误是否是因为“ HCPCS_CD1”的某些值可能为空?

# packages for data and visual analysis
import numpy as np
import pandas as pd
from sklearn import svm
import matplotlib.pyplot as plt
import seaborn as sns; sns.set(font_scale=1.2)
%matplotlib inline

# begin
claims = pd.read_csv('DE1_0_2008_to_2010_Outpatient_Claims_Sample_1_CCONLY.csv')
print(claims.head())

# plot data
sns.lmplot('HCPCS_CD1','HCPCS_CD2',data=claims,hue='ICD9_DGNS_CD_1',palette='Set1',fit_reg=False,scatter_kws={"s":70});

# format and preprocess training data - either it's a ECC or not
type_label = np.where(claims['ICD9_DGNS_CD_1']=='1561',0,1)
claim_features = claims.columns.values[1:].tolist()

#claim_features - limit which columns to consider
domain = claims[['ICD9_DGNS_CD_1','HCPCS_CD1','HCPCS_CD2']].values
print(domain)

# fit model
model = svm.SVC(kernel='linear')
model.fit(domain,type_label)

# get separating hyperplane
w = model.coef_[0]
a = -w[0] / w[1]
xx = np.linspace(30,60)
yy = a * xx - (model.intercept_[0]) / w[1]

# plot the parallels to the separating hyperplane that pass through the support vectors
b = model.support_vectors_[0]
yy_down = a * xx + (b[1] - a * b[0])
b = model.support_vectors_[-1]
yy_up = a * xx + (b[1] - a * b[0])

# plot data
sns.lmplot('HCPCS_CD1','HCPCS_CD2',data=claims,hue='ICD9_DGNS_CD_1',palette='Set1',fit_reg=False,scatter_kws={"s":70});
plt.plot(xx,yy,linewidth=2,color='black')
plt.plot(xx,yy_down,'k--')
plt.plot(xx,yy_up,'k--')

   KeyError                                  Traceback (most recent call last)
<ipython-input-7-ab7422e52d5c> in <module>
     12 
     13 # plot data
---> 14 sns.lmplot('HCPCS_CD1','HCPCS_CD2',data=claims,hue='ICD9_DGNS_CD_1',palette='Set1',fit_reg=False,scatter_kws={"s":70});
     15 
     16 # format and preprocess training data - either it's a ECC or not

F:\Users\matt\Anaconda3\lib\site-packages\seaborn\regression.py in lmplot(x, y, data, hue, col, row, palette, col_wrap, height, aspect, markers, sharex, sharey, hue_order, col_order, row_order, legend, legend_out, x_estimator, x_bins, x_ci, scatter, fit_reg, ci, n_boot, units, order, logistic, lowess, robust, logx, x_partial, y_partial, truncate, x_jitter, y_jitter, scatter_kws, line_kws, size)
    549     need_cols = [x, y, hue, col, row, units, x_partial, y_partial]
    550     cols = np.unique([a for a in need_cols if a is not None]).tolist()
--> 551     data = data[cols]
    552 
    553     # Initialize the grid

F:\Users\matt\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   2680         if isinstance(key, (Series, np.ndarray, Index, list)):
   2681             # either boolean or fancy integer index
-> 2682             return self._getitem_array(key)
   2683         elif isinstance(key, DataFrame):
   2684             return self._getitem_frame(key)

F:\Users\matt\Anaconda3\lib\site-packages\pandas\core\frame.py in _getitem_array(self, key)
   2724             return self._take(indexer, axis=0)
   2725         else:
-> 2726             indexer = self.loc._convert_to_indexer(key, axis=1)
   2727             return self._take(indexer, axis=1)
   2728 

F:\Users\matt\Anaconda3\lib\site-packages\pandas\core\indexing.py in _convert_to_indexer(self, obj, axis, is_setter)
   1325                 if mask.any():
   1326                     raise KeyError('{mask} not in index'
-> 1327                                    .format(mask=objarr[mask]))
   1328 
   1329                 return com._values_from_object(indexer)

KeyError: "['HCPCS_CD1' 'HCPCS_CD2'] not in index"

0 个答案:

没有答案