在我的项目中,我必须在交叉验证中使用SVM classifer和Pipeline进行特征选择。特征选择方法是基于相关的特征选择(我通过python-weka-wrapper使用Weka库)。所以,我用 fit()和 Transform()方法编写了这个类:
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from convertArff import arffOutput
import weka.core.jvm as jvm
from weka.attribute_selection import ASSearch, ASEvaluation, AttributeSelection
from weka.core.converters import Loader
class CorrelationFeatureSelection(BaseEstimator, TransformerMixin):
def __init__(self, names, array):
self.names = names
self.array = array
def _reset(self):
"""Reset internal data-dependent state of the scaler, if necessary.
__init__ parameters are not touched.
"""
# Checking one attribute is enough, becase they are all set together
# in partial_fit
if hasattr(self, 'attibutes_selected_'):
del self.attributes_selected_
def fit(self, X, y=None):
self._reset()
print type(self.array)
arffOutput("result", self.array, self.names)
jvm.start()
print "sono dentro"
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file("result.arff")
data.class_is_last()
print(data)
search = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "1", "-N", "5"])
evaluator = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "-E", "1"])
attsel = AttributeSelection()
attsel.search(search)
attsel.evaluator(evaluator)
attsel.select_attributes(data)
print("# attributes: " + str(attsel.number_attributes_selected))
print("attributes: " + str(attsel.selected_attributes))
print("result string:\n" + attsel.results_string)
attributes = attsel.selected_attributes
jvm.stop()
print attributes
print type(attributes)
self.attributes_selected_ = attributes[0:len(attributes) - 1]
print self.attributes_selected_
return self
def transform(self, X):
col_list = []
for c in self.attributes_selected_:
col_list.append(X[:, c:c + 1])
return np.concatenate(col_list, axis=1)
当我运行我的代码时,我收到以下错误:
RuntimeError: Failed to start Java VM
这个问题有解决办法吗?
答案 0 :(得分:0)
启动和停止JVM应该在启动应用程序的main
方法中发生(即在if __name__ == "__main__":
块中),因为不幸的是,你不能多次启动/停止JVM。
以下代码将在第二个jvm.start()
失败:
import weka.core.jvm as jvm
print("Starting 1")
jvm.start()
print("Stopping 1")
jvm.stop()
print("Starting 2")
jvm.start()
print("Stopping 2")
jvm.stop()