From 2b771ad84298bf9f9ad8e73dbbcb2ab0660f3322 Mon Sep 17 00:00:00 2001 From: Dries VR Date: Wed, 30 Jan 2019 22:52:04 +0100 Subject: [PATCH 01/10] Add classweights to hp space --- hpsklearn/components.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hpsklearn/components.py b/hpsklearn/components.py index 9a9b2af7..e5b95289 100644 --- a/hpsklearn/components.py +++ b/hpsklearn/components.py @@ -224,6 +224,13 @@ def inv_patience_param(x): def hp_bool(name): return hp.choice(name, [False, True]) +def _trees_classweight(name): + return hp.choice(name, ['balanced', 'balanced_subsample','None']) + +def _classweight(name): + return hp.choice(name, ['balanced','None']) + + def _svm_gamma(name, n_features=1): '''Generator of default gamma values for SVMs. This setting is based on the following rationales: From 4d17559248b288c8d2d91daa42b1936e17336fbc Mon Sep 17 00:00:00 2001 From: Dries VR Date: Thu, 31 Jan 2019 07:06:43 +0100 Subject: [PATCH 02/10] Add classweight to tree models and SVC --- hpsklearn/components.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/hpsklearn/components.py b/hpsklearn/components.py index e5b95289..721acb25 100644 --- a/hpsklearn/components.py +++ b/hpsklearn/components.py @@ -224,10 +224,10 @@ def inv_patience_param(x): def hp_bool(name): return hp.choice(name, [False, True]) -def _trees_classweight(name): +def _trees_class_weight(name): return hp.choice(name, ['balanced', 'balanced_subsample','None']) -def _classweight(name): +def _class_weight(name): return hp.choice(name, ['balanced','None']) @@ -430,6 +430,7 @@ def _svm_hp_space( n_features=1, C=None, gamma=None, + class_weight=None, coef0=None, degree=None, shrinking=None, @@ -479,6 +480,8 @@ def _svm_hp_space( tol=_svm_tol(name_func('tol')) if tol is None else tol, max_iter=(_svm_max_iter(name_func('maxiter')) if max_iter is None else max_iter), + class_weight=(_class_weight(name_func('class_weight')) + if class_weight is None else class_weight), verbose=verbose, cache_size=cache_size) return hp_space @@ -730,6 +733,7 @@ def _trees_hp_space( n_estimators=None, max_features=None, max_depth=None, + class_weight=None, min_samples_split=None, min_samples_leaf=None, bootstrap=None, @@ -746,6 +750,8 @@ def _trees_hp_space( if max_features is None else max_features), max_depth=(_trees_max_depth(name_func('max_depth')) if max_depth is None else max_depth), + class_weight=(_trees_class_weight(name_func('class_weight')) + if class_weight is None else class_weight), min_samples_split=(_trees_min_samples_split(name_func('min_samples_split')) if min_samples_split is None else min_samples_split), min_samples_leaf=(_trees_min_samples_leaf(name_func('min_samples_leaf')) From 0634197114e28ce9ff8ca6a1b84dac5f096296a0 Mon Sep 17 00:00:00 2001 From: Dries VR Date: Thu, 31 Jan 2019 07:27:42 +0100 Subject: [PATCH 03/10] Move class_weight to classifier hp space --- hpsklearn/components.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/hpsklearn/components.py b/hpsklearn/components.py index 721acb25..0f8305f6 100644 --- a/hpsklearn/components.py +++ b/hpsklearn/components.py @@ -225,11 +225,10 @@ def hp_bool(name): return hp.choice(name, [False, True]) def _trees_class_weight(name): - return hp.choice(name, ['balanced', 'balanced_subsample','None']) + return hp.choice(name, ['balanced', 'balanced_subsample', None]) def _class_weight(name): - return hp.choice(name, ['balanced','None']) - + return hp.choice(name, ['balanced', None]) def _svm_gamma(name, n_features=1): '''Generator of default gamma values for SVMs. @@ -417,10 +416,6 @@ def _random_state(name, random_state): else: return random_state -def _class_weight(name): - return hp.choice(name, [None, 'balanced']) - - ############################################## ##==== SVM hyperparameters search space ====## ############################################## @@ -480,8 +475,6 @@ def _svm_hp_space( tol=_svm_tol(name_func('tol')) if tol is None else tol, max_iter=(_svm_max_iter(name_func('maxiter')) if max_iter is None else max_iter), - class_weight=(_class_weight(name_func('class_weight')) - if class_weight is None else class_weight), verbose=verbose, cache_size=cache_size) return hp_space @@ -492,7 +485,9 @@ def _svc_hp_space(name_func, random_state=None, probability=False): ''' hp_space = dict( random_state = _random_state(name_func('rstate'),random_state), - probability=probability + probability=probability, + class_weight=(_class_weight(name_func('class_weight')) + if class_weight is None else class_weight) ) return hp_space @@ -750,8 +745,6 @@ def _trees_hp_space( if max_features is None else max_features), max_depth=(_trees_max_depth(name_func('max_depth')) if max_depth is None else max_depth), - class_weight=(_trees_class_weight(name_func('class_weight')) - if class_weight is None else class_weight), min_samples_split=(_trees_min_samples_split(name_func('min_samples_split')) if min_samples_split is None else min_samples_split), min_samples_leaf=(_trees_min_samples_leaf(name_func('min_samples_leaf')) @@ -785,6 +778,8 @@ def _name(msg): hp_space = _trees_hp_space(_name, **kwargs) hp_space['criterion'] = (_trees_criterion(_name('criterion')) if criterion is None else criterion) + hp_space['class_weight']=(_trees_class_weight(name_func('class_weight')) + if class_weight is None else class_weight) return scope.sklearn_RandomForestClassifier(**hp_space) From 9bf0940bcd41c2e169308fcda9e863eaf3ed468e Mon Sep 17 00:00:00 2001 From: Dries VR Date: Thu, 31 Jan 2019 07:32:58 +0100 Subject: [PATCH 04/10] Define default classweight --- hpsklearn/components.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hpsklearn/components.py b/hpsklearn/components.py index 0f8305f6..d44ef9ab 100644 --- a/hpsklearn/components.py +++ b/hpsklearn/components.py @@ -480,7 +480,7 @@ def _svm_hp_space( return hp_space -def _svc_hp_space(name_func, random_state=None, probability=False): +def _svc_hp_space(name_func, random_state=None, class_weight=None, probability=False): '''Generate SVC specific hyperparamters ''' hp_space = dict( @@ -761,7 +761,7 @@ def _trees_hp_space( ############################################################# ##==== Random forest classifier/regressor constructors ====## ############################################################# -def random_forest(name, criterion=None, **kwargs): +def random_forest(name, criterion=None, class_weight=None, **kwargs): ''' Return a pyll graph with hyperparamters that will construct a sklearn.ensemble.RandomForestClassifier model. From 8bb73dde0ab8b13579f55f5f76413f797538bbf1 Mon Sep 17 00:00:00 2001 From: Dries VR Date: Thu, 31 Jan 2019 07:47:50 +0100 Subject: [PATCH 05/10] Fix? --- hpsklearn/components.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/hpsklearn/components.py b/hpsklearn/components.py index d44ef9ab..7d3ce3ed 100644 --- a/hpsklearn/components.py +++ b/hpsklearn/components.py @@ -425,7 +425,6 @@ def _svm_hp_space( n_features=1, C=None, gamma=None, - class_weight=None, coef0=None, degree=None, shrinking=None, @@ -480,15 +479,13 @@ def _svm_hp_space( return hp_space -def _svc_hp_space(name_func, random_state=None, class_weight=None, probability=False): +def _svc_hp_space(name_func, random_state=None, probability=False): '''Generate SVC specific hyperparamters ''' hp_space = dict( random_state = _random_state(name_func('rstate'),random_state), - probability=probability, - class_weight=(_class_weight(name_func('class_weight')) - if class_weight is None else class_weight) - ) + probability=probability ) + return hp_space def _svr_hp_space(name_func, epsilon=None): @@ -778,7 +775,7 @@ def _name(msg): hp_space = _trees_hp_space(_name, **kwargs) hp_space['criterion'] = (_trees_criterion(_name('criterion')) if criterion is None else criterion) - hp_space['class_weight']=(_trees_class_weight(name_func('class_weight')) + hp_space['class_weight']=(_trees_class_weight(_name('class_weight')) if class_weight is None else class_weight) return scope.sklearn_RandomForestClassifier(**hp_space) From 63aa055c6e00a739471c12a3de8c8d28e5389b1e Mon Sep 17 00:00:00 2001 From: Dries VR Date: Thu, 31 Jan 2019 08:45:30 +0100 Subject: [PATCH 06/10] Add scale_pos_weight to hpspace --- hpsklearn/components.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/hpsklearn/components.py b/hpsklearn/components.py index 7d3ce3ed..579b3f85 100644 --- a/hpsklearn/components.py +++ b/hpsklearn/components.py @@ -956,7 +956,7 @@ def _name(msg): ########################################################### ##==== Extra trees classifier/regressor constructors ====## ########################################################### -def extra_trees(name, criterion=None, **kwargs): +def extra_trees(name, criterion=None, class_weight=None, **kwargs): ''' Return a pyll graph with hyperparamters that will construct a sklearn.ensemble.ExtraTreesClassifier model. @@ -974,6 +974,8 @@ def _name(msg): hp_space = _trees_hp_space(_name, **kwargs) hp_space['criterion'] = (_trees_criterion(_name('criterion')) if criterion is None else criterion) + hp_space['class_weight']=(_trees_class_weight(_name('class_weight')) + if class_weight is None else class_weight) return scope.sklearn_ExtraTreesClassifier(**hp_space) @@ -1250,6 +1252,9 @@ def _xgboost_learning_rate(name): def _xgboost_n_estimators(name): return scope.int(hp.quniform(name, 100, 6000, 200)) +def _xgboost_scale_pos_weight(name): + return hp.loguniform(name, np.log10(0.001), np.log10(1000)) + def _xgboost_gamma(name): return hp.loguniform(name, np.log(0.0001), np.log(5)) - 0.0001 @@ -1284,7 +1289,7 @@ def _xgboost_hp_space( colsample_bylevel=None, reg_alpha=None, reg_lambda=None, - scale_pos_weight=1, + scale_pos_weight=None, base_score=0.5, random_state=None): '''Generate XGBoost hyperparameters search space @@ -1311,7 +1316,8 @@ def _xgboost_hp_space( if reg_alpha is None else reg_alpha), reg_lambda=(_xgboost_reg_lambda(name_func('reg_lambda')) if reg_lambda is None else reg_lambda), - scale_pos_weight=scale_pos_weight, + scale_pos_weight=(_xgboost_scale_pos_weight(name_func('scale_pos_weight')) + if scale_pos_weight is None else scale_pos_weight), base_score=base_score, seed=_random_state(name_func('rstate'), random_state) ) From a2675ad94530f8aa0b87febdf7074554df9a2d78 Mon Sep 17 00:00:00 2001 From: Dries VR Date: Thu, 31 Jan 2019 12:23:42 +0100 Subject: [PATCH 07/10] Update components.py --- hpsklearn/components.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hpsklearn/components.py b/hpsklearn/components.py index 579b3f85..94f829a8 100644 --- a/hpsklearn/components.py +++ b/hpsklearn/components.py @@ -500,7 +500,7 @@ def _svr_hp_space(name_func, epsilon=None): ######################################### ##==== SVM classifier constructors ====## ######################################### -def svc_kernel(name, kernel, random_state=None, probability=False, **kwargs): +def svc_kernel(name, kernel, class_weight = None,random_state=None, probability=False, **kwargs): """ Return a pyll graph with hyperparamters that will construct a sklearn.svm.SVC model with a user specified kernel. @@ -513,6 +513,9 @@ def _name(msg): hp_space = _svm_hp_space(_name, kernel=kernel, **kwargs) hp_space.update(_svc_hp_space(_name, random_state, probability)) + hp_space['class_weight']=(_class_weight(_name('class_weight')) + if class_weight is None else class_weight) + return scope.sklearn_SVC(**hp_space) def svc_linear(name, **kwargs): From 5a5a07452aaefb6df89b5d0d793ba64d6a27a69c Mon Sep 17 00:00:00 2001 From: Dries VR Date: Thu, 31 Jan 2019 12:43:57 +0100 Subject: [PATCH 08/10] Update components.py --- hpsklearn/components.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/hpsklearn/components.py b/hpsklearn/components.py index 94f829a8..fb35db4e 100644 --- a/hpsklearn/components.py +++ b/hpsklearn/components.py @@ -1556,6 +1556,24 @@ def any_classifier(name): return hp.choice('%s' % name, classifiers) +def simple_classifier(name): + classifiers = [ + svc(name + '.svc'), + random_forest(name + '.random_forest'), + extra_trees(name + '.extra_trees') + ] + + return hp.choice('%s' % name, classifiers) + +def only_svc(name): + classifiers = [ + svc(name + '.svc'), + ] + + return hp.choice('%s' % name, classifiers) + + + def any_sparse_classifier(name): return hp.choice('%s' % name, [ From 118722c7050c62b157aaf37272b9c6f7666d5e0b Mon Sep 17 00:00:00 2001 From: Dries VR Date: Thu, 31 Jan 2019 13:38:06 +0100 Subject: [PATCH 09/10] Update components.py --- hpsklearn/components.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/hpsklearn/components.py b/hpsklearn/components.py index fb35db4e..d69a9b19 100644 --- a/hpsklearn/components.py +++ b/hpsklearn/components.py @@ -1565,16 +1565,6 @@ def simple_classifier(name): return hp.choice('%s' % name, classifiers) -def only_svc(name): - classifiers = [ - svc(name + '.svc'), - ] - - return hp.choice('%s' % name, classifiers) - - - - def any_sparse_classifier(name): return hp.choice('%s' % name, [ liblinear_svc(name + '.linear_svc'), From 2c50042d36d4fb67144b336ad7d1501d81589982 Mon Sep 17 00:00:00 2001 From: Dries VR Date: Thu, 31 Jan 2019 13:39:36 +0100 Subject: [PATCH 10/10] Update components.py --- hpsklearn/components.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/hpsklearn/components.py b/hpsklearn/components.py index d69a9b19..a96f5df1 100644 --- a/hpsklearn/components.py +++ b/hpsklearn/components.py @@ -1556,15 +1556,6 @@ def any_classifier(name): return hp.choice('%s' % name, classifiers) -def simple_classifier(name): - classifiers = [ - svc(name + '.svc'), - random_forest(name + '.random_forest'), - extra_trees(name + '.extra_trees') - ] - - return hp.choice('%s' % name, classifiers) - def any_sparse_classifier(name): return hp.choice('%s' % name, [ liblinear_svc(name + '.linear_svc'),