我想知道如何将不同的数据存储到numpy数组中,以便将其提供给机器学习SVC算法。 我的目标是获得大小的数据框(示例*功能),如下所示:
使用:
我怎样才能在Python中做到这一点?这对sklearn来说是否合适?
以下代码中的当前错误:
ValueError: setting an array element with a sequence.
代码:
# -*- coding: utf-8 -*-
"""----------------------------------------------------------------------------
-------------------------------- Imports --------------------------------------
----------------------------------------------------------------------------"""
import os
import pandas as pd
import numpy as np
from scipy import io as sio
from scipy import signal
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
"""----------------------------------------------------------------------------
------------------------------ Parameters -------------------------------------
----------------------------------------------------------------------------"""
# Path to the clean EEG .mat files
EEG_path = "data"
# Listing of the .mat files
EEG = list()
for elt in os.listdir(EEG_path):
if os.path.isfile(os.path.join(EEG_path, elt)):
if '.mat' in elt[len(elt)-4:]:
EEG.append(elt)
# Spectrum used
spectrum = ['all', (1,45), (8,12)]
nb_features = 3
"""----------------------------------------------------------------------------
------------------------------ Functions --------------------------------------
----------------------------------------------------------------------------"""
# Function on 1 channel
# Input: All points from one channel, for one epoch
def filter(x, n, fs, fc1, fc2):
b, a = signal.butter(n, [fc1/(fs/2), fc2/(fs/2)], 'bandpass')
y = signal.filtfilt(b, a, x)
return y
def haming(x, L):
# Symetric L-points hamming window
window = signal.hamming(L)
y = x * window.T # Element wise multiplication
return y
# Function on one epoch
# Input is a matrix of size (channel * length)
def amp_mean(x):
size = x.shape
y = list()
for i in range(size[0]):
y.append(np.mean(x[i,:]))
return y
def amp_max(x):
size = x.shape
y = list()
for i in range(size[0]):
y.append(np.max(abs(x[i,:])))
return y
"""----------------------------------------------------------------------------
-------------------------------- Script ---------------------------------------
----------------------------------------------------------------------------"""
# Load data
s_EEG = "{}/{}".format(EEG_path, EEG[4])
data = sio.loadmat(s_EEG)['s_EEG']['data'][0][0].astype(float) # data[i, j ,k]
labels = sio.loadmat(s_EEG)['s_EEG']['labels'][0][0][0] # labels[k]
fs = sio.loadmat(s_EEG)['s_EEG']['sampling_rate'][0][0][0][0] # 500 Hz
size = data.shape
# Creates an empty data frame of size (epoch * features)
df = np.empty(shape = (size[2], nb_features * len(spectrum)))
# Filling the dataframe with features
# for every epoch
for k in range(size[2]):
for freq in spectrum:
data_to_compute = np.empty(shape = size, dtype = float)
# Apply hamming
if freq == 'all':
for i in range(size[0]):
data_to_compute[i,:,k] = haming(data[i,:,k], size[1])
# Apply hamming after filtering
else:
for i in range(size[0]):
data_to_compute[i,:,k] = haming(filter(data[i,:,k],
15, fs, freq[0], freq[1]), size[1])
# data_to_compute is ready to have feature extracted
for n in range(0, df.shape[1], nb_features):
df[k, n] = data_to_compute[:,:,k]
df[k, n+1] = amp_mean(data_to_compute[:,:,k])
df[k, n+2] = amp_max(data_to_compute[:,:,k])
# X signal / Y label
X_train, X_test, Y_train, Y_test = train_test_split(data,
list(labels),
test_size=0.15,
random_state=42)
clf = SVC()
clf.fit(X_train, Y_train)
变量类型:
谢谢!