Python 3 OS Windows 10错误处理列表

时间:2018-03-01 11:59:11

标签: python list integer python-2.x kaggle

我尝试在免费访问here中测试用python 2编写的代码。当我运行代码时遇到了与list

相关的以下错误

非常感谢帮助 简 (我是python中的新手)

Traceback       
    `(C:\Users\myusername\Anaconda3) C:\Users\myusername\Documents\DSSP7\Projet 
        Insurance\kaggle-prudential-master\code>python.exe logRegression.py
        Traceback (most recent call last):
        File "logRegression.py", line 98, in <module>
        y_1, y_2 = train_ohd.iloc[X_1]['Response'], train_ohd.iloc[X_2]['Response']
        File "C:\Users\myusername\Anaconda3\lib\site-
        packages\pandas\core\indexing.py", line 1328, in __getitem__
        return self._getitem_axis(key, axis=0)
        File "C:\Users\myusername\Anaconda3\lib\site-
        packages\pandas\core\indexing.py", line 1738, in _getitem_axis
        return self._get_list_axis(key, axis=axis)
        File "C:\Users\myusername\Anaconda3\lib\site-
        packages\pandas\core\indexing.py", line 1715, in _get_list_axis
        return self.obj.take(key, axis=axis, convert=False)
        File "C:\Users\myusername\Anaconda3\lib\site-
        packages\pandas\core\generic.py", line 1928, in take
        convert=True, verify=True)
        File "C:\Users\myusername
        \Anaconda3\lib\site-packages\pandas\core\internals.py", line 3998, in take
        else np.asanyarray(indexer, dtype='int64'))
        File "C:\Users\myusername
        Anaconda3\lib\site-packages\numpy\core\numeric.py", line 583, in asanyarray
        return array(a, dtype, copy=False, order=order, subok=True)
        TypeError: int() argument must be a string, a bytes-like object or a number, 
        not 'filter' 

logRegression.py

'import pandas as pd 
from sklearn.linear_model import LogisticRegression
import json
from sklearn import metrics
from label_decoders import *

config = json.load(open('settings.json'))
train = pd.read_csv(config['train'])
test = pd.read_csv(config['test'])

# combine train and test
all_data = train.append(test)

# Preprocess data

# create any new variables    
all_data['Product_Info_2_char'] = all_data.Product_Info_2.str[0]
all_data['Product_Info_2_num'] = all_data.Product_Info_2.str[1]

# factorize categorial variables
all_data['Product_Info_2'] = pd.factorize(all_data['Product_Info_2'])[0]
all_data['Product_Info_2_char'] = pd.factorize(all_data['Product_Info_2_char'])[0]
all_data['Product_Info_2_num'] = pd.factorize(all_data['Product_Info_2_num'])[0]

## combine features
# BMI by age
all_data['BMI_Age'] = all_data['BMI'] * all_data['Ins_Age']

## sum features
#  Med keyword sum
med_keyword_columns = all_data.columns[all_data.columns.str.startswith('Medical_Keyword_')]
all_data['Med_Keywords_Count'] = all_data[med_keyword_columns].sum(axis=1)

# handle missing values : eliminate missing values
## Use -1 for NA
all_data.apply(lambda x: sum(x.isnull()),1)
all_data['countna'] = all_data.apply(lambda x: sum(x.isnull()),1)
all_data.fillna(-1, inplace=True)

#fix the dtype of the label column(convert it to integer)
all_data['Response'] = all_data['Response'].astype(int)

# split train and test
train_ohd = all_data[all_data['Response']>0].copy()
test_ohd = all_data[all_data['Response']<1].copy()

# convert data 
features=train_ohd.columns.tolist()
features = [x.replace('=','_') for x in features]
features = [x.replace('_','i') for x in features]
train_ohd.columns = features
features_t=test_ohd.columns.tolist()
features_t = [x.replace('=','i') for x in features_t]
features_t = [x.replace('_','i') for x in features_t]
test_ohd.columns = features_t

features.remove("Id")
features.remove("Response")

train_ohd['lr1'] = [0]*train_ohd.shape[0]
train_ohd['lr2'] = [0]*train_ohd.shape[0]
train_ohd['lr3'] = [0]*train_ohd.shape[0]
train_ohd['lr4'] = [0]*train_ohd.shape[0]
train_ohd['lr5'] = [0]*train_ohd.shape[0]
train_ohd['lr6'] = [0]*train_ohd.shape[0]
train_ohd['lr7'] = [0]*train_ohd.shape[0]
train_ohd['lr8'] = [0]*train_ohd.shape[0]
train_ohd['lr9'] = [0]*train_ohd.shape[0]
train_ohd['lr10'] = [0]*train_ohd.shape[0]
train_ohd['lr11'] = [0]*train_ohd.shape[0]
train_ohd['lr12'] = [0]*train_ohd.shape[0]
train_ohd['lr13'] = [0]*train_ohd.shape[0]


l = train_ohd.shape[0]
ind_list = [(range(0,l//10), filter(lambda x: x not in range(0,l//10), range(0,l))), 
            (range(l//10,l//10*2), filter(lambda x: x not in range(l//10,l//10*2), range(0,l))),
            (range(l//10*2,l//10*3), filter(lambda x: x not in range(l//10*2,l//10*3), range(0,l))),
            (range(l//10*3,l//10*4), filter(lambda x: x not in range(l//10*3,l//10*4), range(0,l))),
            (range(l//10*4,l//10*5), filter(lambda x: x not in range(l//10*4,l//10*5), range(0,l))),
            (range(l//10*5,l//10*6), filter(lambda x: x not in range(l//10*5,l//10*6), range(0,l))),
            (range(l//10*6,l//10*7), filter(lambda x: x not in range(l//10*6,l//10*7), range(0,l))),
            (range(l//10*7,l//10*8), filter(lambda x: x not in range(l//10*7,l//10*8), range(0,l))),
            (range(l//10*8,l//10*9), filter(lambda x: x not in range(l//10*8,l//10*9), range(0,l))),
            (range(l//10*9,l), filter(lambda x: x not in range(l//10*9,l), range(0,l)))]



ld = [labels_decoder1,labels_decoder2,labels_decoder3,labels_decoder4,labels_decoder5,labels_decoder6,labels_decoder7,
      labels_decoder8,labels_decoder9,labels_decoder10,labels_decoder11,labels_decoder12,labels_decoder13]

# train the model
i = 0
for l in ld:
    i = i + 1    

    for j in range(10):

        X_1, X_2 = ind_list[j][1], ind_list[j][0]
        y_1, y_2 = train_ohd.iloc[X_1]['Response'], train_ohd.iloc[X_2]['Response']

# get preds based on train data      
        lr = LogisticRegression(random_state=1)
        lr.fit(train_ohd[features].iloc[X_1],l(y_1))
        train_ohd['lr%s' % (i)].iloc[X_2] = lr.predict_proba(train_ohd[features].iloc[X_2]).T[1]


train_ohd.to_csv(config['train_lr'],index=0)

y = train_ohd['Response']
#print(y)  mon rajout pour afficher en local

# test the model
i = 0
for l in ld:
    i = i + 1    

# Pas de y dans le test data

# get preds based on test data
###1
    lr = LogisticRegression(random_state=1)
    lr.fit(train_ohd[features],l(y)), i
    test_ohd['lr%s' % (i)] = lr.predict_proba(test_ohd[features]).T[1]

test_ohd.to_csv(config['test_lr'],index=0)
#y_pred = test_ohd   A SPEFICIER ce que l'on doit afficher cf kaggle ET A PRINT
#y_pred = test_ohd.to_csv(config['test_lr'],index=0)
`    

1 个答案:

答案 0 :(得分:0)

这里的问题源于generator方法的不同行为。您可以在this question中查看,或在python3python2文档中阅读。

简而言之:在python 2中它会生成列表,因此TypeError: int() argument must be a string, a bytes-like object or a number, not 'filter'都是整数列表。

但是,在python 3中它会生成ind_list[:][1],这就是为什么你得到输出ind_list,因为list(filter(lambda x: x not in range(0,l//10)) 都包含过滤器对象。

您可以将创建conda create -n py27 python=2.7 sklearn pandas numpy 中的过滤器命令的所有输出转换为列表:

activate py27

或使用python2。但我猜你有一个特殊的原因要使用python3

<强>提示:

由于您使用的是anaconda,您只需执行

即可
{{1}}

接着是

{{1}}

在anaconda提示符下,它将为您提供一个使用python2的虚拟环境