我正在尝试使用Python中的火炬制作我自己的数据集。但是,每个文件打开时内存使用都会泄漏。
在以下for循环期间发生内存泄漏
for subj, _file in enumerate(filelist):
我在代码中删除了不必要的内容,例如import
parser.add_argument('--stddev', dest='stddev', default='subject', type=str, help="How to calculate ")
args = parser.parse_args()
# Options
if args.sess_num == 0:
label_set = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
base_dir = './kriss_proto'
ori_base_dir = '{}/original'.format(base_dir)
filelist = os.listdir(ori_base_dir) # ['BD001.mat', ...]
filenum = len(filelist)
# Saving data
eeg_data = {}
eeg_data['dataset'] = {}
split_data = {}
split_data['splits'] = []
split_data['splits'].append({})
_meanstd = []
for i in range(32):
_meanstd.append([])
if args.stddev == 'subject':
for j in range(filenum):
_meanstd[i].append([])
idx = 0
for subj, _file in enumerate(filelist):
filepath = os.path.join(ori_base_dir, _file)
with h5py.File(filepath, 'r') as mat:
## Step by step (with object searching)
bd_eeg = mat.get('BD_EEG')
vst = bd_eeg.get('VST')
for run in vst.keys():
print("Processing {} - {}".format(_file, run))
run_data = vst.get(run)
eeg = run_data['data3D']
eeg = th.Tensor(eeg)
eeg = eeg.permute(2, 1, 0)
label = run_data['class_labels']
label = th.Tensor(label)
label = th.squeeze(label)
# DATA COPY
for i in range(int(label.shape[0])):
if (label[i] in label_set) and (int(eeg[i].shape[0]) > 400):
eeg_data['dataset'][idx] = {}
eeg_data['dataset'][idx]['eeg'] = eeg[i, :, :32]
eeg_data['dataset'][idx]['label'] = label[i] - label_set[0] # label should start from 0
if args.stddev == 'whole':
for j in range(32):
for k in range(int(eeg[i].shape[0])):
_meanstd[j].append(eeg[i][k, j])
elif args.stddev == 'subject':
for j in range(32):
for k in range(int(eeg[i].shape[0])):
_meanstd[j][subj].append(eeg[i][k, j])
idx += 1