sklearn二元分类器的标签值

时间:2017-02-10 21:31:14

标签: python machine-learning scikit-learn classification

我的数据集的样本标记为" 0"或" 1"。

标签应该是" -1"和" 1"正确分类?

我不确定sklearn分类器最小化的丢失函数。也许它依赖的价值是" 1"或" -1"?

1 个答案:

答案 0 :(得分:1)

angular.module('myApp', ['ngAnimate', 'ui.bootstrap','ngTouch', 'ui.grid', 'ui.grid.edit','ui.grid.selection', 'ui.grid.edit','ui.grid.cellNav']); angular.module('myApp').controller('citiesCtrl',function($scope){ // $scope. places = undefined; $scope.items = ["Atlanta", "Chicago", "NewYork"]; $scope.selectAction = function() { console.log($scope.places1); }; }); /*Controller for searchLocations button*/ angular.module('myApp').controller('searchController', ['$scope', function($scope) { $scope.places = ['', '']; $scope.searchValue = ''; $scope.searched = false; $scope.submit = function() { $scope.searched = true; if ($scope.places[0].length > 0 && $scope.places[1].length > 0) { $scope.searchValue = $scope.places[0] + $scope.places[1]; } }; $scope.users = [ {'name' : 'AtlantaChicago', 'show' : true, 'details' : [ {"Travel Date": "10/10/2014", commute:"Bus", "zip":"1222","isActive" : true}, {"Travel Date": "10/11/2014", commute:"flight","zip":"11562","isActive" : false}] }, {'name' : 'NewYorkChicago', 'show' : true, 'details': [ {"Travel Date": "3/15/2016", commute:"flight","zip":"666","isActive" : true}, {"Travel Date": "10/12/2016", commute:"flight","zip":"4532","isActive" : false}, ] } ]; $scope.gridOptions = { enableFiltering: true, columnDefs: [ { name: 'Travel Date', width: '5%'}, { name: 'Departurecommute', enableFiltering: false, width: '12%' }, { name:'zip', field: 'getZip()', enableCellEdit:false}, { name:'isActive', width:300, field: 'radio', cellTemplate: '<div ng-init="releaseAction=0"><input name="Release{{grid.renderContainers.body.visibleRowCache.indexOf(row)}}" type="radio" ng-model="releaseAction" ng-value="1" style="width:20px"></div>'} ], rowHeight: 20, enableHorizontalScrollbar:2 }; }]); 分类器通常可以使用不同的损失函数或惩罚。虽然我无法在任何地方找到这些文档,但根据我的经验,通常对您传入的类很聪明。实际的解算器使用外部库,因此可能会发生一些消毒。但一般来说,我发现这些工作开箱即用:

sklearn

甚至:

>>> from sklearn.linear_model import LogisticRegression
>>> import numpy as np
>>> X = np.random.randint(0,10,(20,5))
>>> y1 = np.random.choice([-1,1], 20)
>>> y2 = np.random.choice([0,1], 20)
>>> y1
array([-1, -1,  1, -1, -1,  1, -1,  1, -1,  1,  1, -1,  1, -1, -1, -1,  1,
        1, -1,  1])
>>> y2
array([0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0])
>>> model1, model2 = LogisticRegression(), LogisticRegression()
>>> model1.fit(X,y1)
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)
>>> model2.fit(X, y2)
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)
>>> model1.predict(X)
array([-1,  1,  1, -1,  1, -1, -1,  1, -1, -1,  1,  1,  1, -1, -1, -1, -1,
        1, -1,  1])
>>> model2.predict(X)
array([1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0])
>>> y1
array([-1, -1,  1, -1, -1,  1, -1,  1, -1,  1,  1, -1,  1, -1, -1, -1,  1,
        1, -1,  1])
>>> y2
array([0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0])

或者,使用支持向量机:

>>> y3 = np.random.choice(['a','b'], 20)
>>> model3 = LogisticRegression()
>>> model3.fit(X,y3)
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)
>>> model3.classes_
array(['a', 'b'],
      dtype='<U1')
>>> model3.predict(X)
array(['b', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'b', 'b', 'a', 'a', 'a',
       'a', 'b', 'b', 'a', 'a', 'a', 'a'],
      dtype='<U1')

只需阅读分类器的文档,我认为如果它只接受特定类型的标签方案,则会记录在案。