Python ValueError中的隔离树算法:尝试获取空序列的argmax

时间:2020-06-17 08:55:14

标签: python python-3.x

我正在尝试构建隔离树,这是下面的DataFrame;

Here is the link to the dataset

这是遵循iTree算法的一些代码行。 DataFrame中的属性'label'具有4类normal。,smirf等。但是,除normal外还有其他类。被视为异常。

kdd = pd.read_csv("kddcup.data.corrected", sep=",", names=cols + ["label"], index_col=None)
df = kdd.drop(['protocol_type', 'service', 'flag'], axis=1)
def select_point(data):
    n_samples,n_columns = data.shape
    return data.iloc[random.choice(list(range(n_samples)))]
select_point(df)
def select_feature(data): 
    return random.choice(data.columns)
def select_value(data,feat):
    mini = data[feat].min()
    maxi = data[feat].max()
    return (maxi-mini)*np.random.random()+mini

select_value(df,select_feature(df))

一些数据拆分

def split_data(data, split_column, split_value):

    data_below = data[data[split_column] <= split_value]
    data_above = data[data[split_column] >  split_value]
    
    return data_below, data_above
a,b =split_data(df,
           select_feature(df),
          select_value(df,select_feature(df)))
def classify_data(data):
    
    label_column = data.values[:, -1]
    unique_classes, counts_unique_classes = np.unique(label_column, return_counts=True)

    index = counts_unique_classes.argmax()
    classification = unique_classes[index]
    
    return classification
classify_data(df)
def isolation_tree(data,counter=0, max_depth=50,random_subspace=False):
    
    # End Loop
    if (counter == max_depth) or data.shape[0]<=1:
        classification = classify_data(data)
        return classification
    
    else:
        # Counter
        counter +=1
        
        # Select feature
        split_column = select_feature(data)
        
        # Select value
        split_value = select_value(data,split_column)

        # Split data
        data_below, data_above = split_data(data,split_column,split_value)
        
        # instantiate sub-tree      
        question = "{} <= {}".format(split_column, split_value)
        sub_tree = {question: []}
        
        # Recursive part
        below_answer = isolation_tree(data_below, counter,max_depth=max_depth)
        above_answer = isolation_tree(data_above, counter,max_depth=max_depth)
        
        if below_answer == above_answer:
            sub_tree = below_answer
        else:
            sub_tree[question].append(below_answer)
            sub_tree[question].append(above_answer)
        
        return sub_tree

返回了以下错误

tree = isolation_tree(df.head(6), max_depth=1)
pprint(tree)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-37-b40475ae634e> in <module>
----> 1 tree = isolation_tree(df.head(6), max_depth=1)
      2 pprint(tree)

<ipython-input-36-d71e3aed4b2a> in isolation_tree(data, counter, max_depth, random_subspace)
     25         # Recursive part
     26         below_answer = isolation_tree(data_below, counter,max_depth=max_depth)
---> 27         above_answer = isolation_tree(data_above, counter,max_depth=max_depth)
     28 
     29         if below_answer == above_answer:

<ipython-input-36-d71e3aed4b2a> in isolation_tree(data, counter, max_depth, random_subspace)
      3     # End Loop
      4     if (counter == max_depth) or data.shape[0]<=1:
----> 5         classification = classify_data(data)
      6         return classification
      7 

<ipython-input-14-26b1d48eb27a> in classify_data(data)
      4     unique_classes, counts_unique_classes = np.unique(label_column, return_counts=True)
      5 
----> 6     index = counts_unique_classes.argmax()
      7     classification = unique_classes[index]
      8 

ValueError: attempt to get argmax of an empty sequence

0 个答案:

没有答案