我正在尝试构建隔离树,这是下面的DataFrame;
Here is the link to the dataset
这是遵循iTree算法的一些代码行。 DataFrame中的属性'label'具有4类normal。,smirf等。但是,除normal外还有其他类。被视为异常。
kdd = pd.read_csv("kddcup.data.corrected", sep=",", names=cols + ["label"], index_col=None)
df = kdd.drop(['protocol_type', 'service', 'flag'], axis=1)
def select_point(data):
n_samples,n_columns = data.shape
return data.iloc[random.choice(list(range(n_samples)))]
select_point(df)
def select_feature(data):
return random.choice(data.columns)
def select_value(data,feat):
mini = data[feat].min()
maxi = data[feat].max()
return (maxi-mini)*np.random.random()+mini
select_value(df,select_feature(df))
一些数据拆分
def split_data(data, split_column, split_value):
data_below = data[data[split_column] <= split_value]
data_above = data[data[split_column] > split_value]
return data_below, data_above
a,b =split_data(df,
select_feature(df),
select_value(df,select_feature(df)))
def classify_data(data):
label_column = data.values[:, -1]
unique_classes, counts_unique_classes = np.unique(label_column, return_counts=True)
index = counts_unique_classes.argmax()
classification = unique_classes[index]
return classification
classify_data(df)
def isolation_tree(data,counter=0, max_depth=50,random_subspace=False):
# End Loop
if (counter == max_depth) or data.shape[0]<=1:
classification = classify_data(data)
return classification
else:
# Counter
counter +=1
# Select feature
split_column = select_feature(data)
# Select value
split_value = select_value(data,split_column)
# Split data
data_below, data_above = split_data(data,split_column,split_value)
# instantiate sub-tree
question = "{} <= {}".format(split_column, split_value)
sub_tree = {question: []}
# Recursive part
below_answer = isolation_tree(data_below, counter,max_depth=max_depth)
above_answer = isolation_tree(data_above, counter,max_depth=max_depth)
if below_answer == above_answer:
sub_tree = below_answer
else:
sub_tree[question].append(below_answer)
sub_tree[question].append(above_answer)
return sub_tree
返回了以下错误
tree = isolation_tree(df.head(6), max_depth=1)
pprint(tree)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-37-b40475ae634e> in <module>
----> 1 tree = isolation_tree(df.head(6), max_depth=1)
2 pprint(tree)
<ipython-input-36-d71e3aed4b2a> in isolation_tree(data, counter, max_depth, random_subspace)
25 # Recursive part
26 below_answer = isolation_tree(data_below, counter,max_depth=max_depth)
---> 27 above_answer = isolation_tree(data_above, counter,max_depth=max_depth)
28
29 if below_answer == above_answer:
<ipython-input-36-d71e3aed4b2a> in isolation_tree(data, counter, max_depth, random_subspace)
3 # End Loop
4 if (counter == max_depth) or data.shape[0]<=1:
----> 5 classification = classify_data(data)
6 return classification
7
<ipython-input-14-26b1d48eb27a> in classify_data(data)
4 unique_classes, counts_unique_classes = np.unique(label_column, return_counts=True)
5
----> 6 index = counts_unique_classes.argmax()
7 classification = unique_classes[index]
8
ValueError: attempt to get argmax of an empty sequence