这是我的示例代码
dataset_current=dataset_seq['Motor_Current_Average']
dataset_consistency=dataset_seq['Consistency_Average']
#technique with non-overlapping the values(for current)
dataset_slide=dataset_current.tolist()
from window_slider import Slider
import numpy
list = numpy.array(dataset_slide)
bucket_size = 336
overlap_count = 0
slider = Slider(bucket_size,overlap_count)
slider.fit(list)
empty_dictionary = {}
count = 0
while True:
count += 1
window_data = slider.slide()
empty_dictionary['df_current%s'%count] = window_data
empty_dictionary['df_current%s'%count] =pd.DataFrame(empty_dictionary['df_current%s'%count])
empty_dictionary['df_current%s'%count]= empty_dictionary['df_current%s'%count].rename(columns={0: 'Motor_Current_Average'})
if slider.reached_end_of_list(): break
locals().update(empty_dictionary)
#technique with non-overlapping the values(for consistency)
dataset_slide_consistency=dataset_consistency.tolist()
list = numpy.array(dataset_slide_consistency)
slider_consistency = Slider(bucket_size,overlap_count)
slider_consistency.fit(list)
empty_dictionary_consistency = {}
count_consistency = 0
while True:
count_consistency += 1
window_data_consistency = slider_consistency.slide()
empty_dictionary_consistency['df_consistency%s'%count_consistency] = window_data_consistency
empty_dictionary_consistency['df_consistency%s'%count_consistency] =pd.DataFrame(empty_dictionary_consistency['df_consistency%s'%count_consistency])
empty_dictionary_consistency['df_consistency%s'%count_consistency]= empty_dictionary_consistency['df_consistency%s'%count_consistency].rename(columns={0: 'Consistency_Average'})
if slider_consistency.reached_end_of_list(): break
locals().update(empty_dictionary_consistency)
import pandas as pd
output_current ={}
increment = 0
while True:
increment +=1
output_current['dataframe%s'%increment] = pd.concat([empty_dictionary_consistency['df_consistency%s'%count_consistency],empty_dictionary['df_current%s'%count]],axis=1)
output_current ={}
length = len(empty_dictionary_consistency)
increment = 0
i = 0
while i < length:
increment +=1
output_current['windows%s'%increment] = pd.concat([empty_dictionary_Timestamp['df_Timestamp%s'%increment],empty_dictionary_current['df_current%s'%increment],empty_dictionary_flowrate['df_flowrate%s'%increment],empty_dictionary_level['df_level%s'%increment],empty_dictionary_consistency['df_consistency%s'%increment],empty_dictionary_label['df_label%s'%increment]],axis=1)
locals().update(output_current)
我的问题是我有26296行的数据集我们拥有的标签列的数量为 24662个案例,显示为0,表示机器正在运行 1633例,表示1表示(机械停止) 注意:标签是根据机器停止运行或运行的日期创建的,我们已经为数据集添加了标签。
如何使用上述不平衡数据集获得auc_roc得分。有24662个案例显示0,只有1633个案例显示1达到auc_roc评分至少为85%? 另外,我们如何使用这种滑动窗口技术训练任何分类器? 通过此操作,我们能够从26296行的给定数据集中创建79个窗口