From e233598da0fec7f93cdaf9e5d7530a9c502f2e73 Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Sun, 2 Mar 2025 09:31:46 +0100 Subject: [PATCH 01/28] ignore /venv --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index ea604a61..55ebb440 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,4 @@ nosetests.xml .idea notebooks/.ipynb_checkpoints +/venv \ No newline at end of file From ab09f7b5c631fd0dd31b5cf08c061244c2dd2098 Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Sun, 2 Mar 2025 11:09:59 +0100 Subject: [PATCH 02/28] Remove deprecated algorithm parameter from ada boost --- .../components/ensemble/_weight_boosting.py | 48 ++++++++++--------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/hpsklearn/components/ensemble/_weight_boosting.py b/hpsklearn/components/ensemble/_weight_boosting.py index 95ae6e84..665de07a 100644 --- a/hpsklearn/components/ensemble/_weight_boosting.py +++ b/hpsklearn/components/ensemble/_weight_boosting.py @@ -32,13 +32,6 @@ def _weight_boosting_learning_rate(name: str): return hp.lognormal(name, np.log(0.01), np.log(10.0)) -def _weight_boosting_algorithm(name: str): - """ - Declaration search space 'algorithm' parameter - """ - return hp.choice(name, ["SAMME", "SAMME.R"]) - - def _weight_boosting_loss(name: str): """ Declaration search space 'loss' parameter @@ -53,15 +46,17 @@ def _weight_boosting_random_state(name: str): return hp.randint(name, 5) -@validate(params=["n_estimators", "learning_rate"], - validation_test=lambda param: not isinstance(param, float) or param > 0, - msg="Invalid parameter '%s' with value '%s'. Parameter value must be non-negative and greater than 0.") +@validate( + params=["n_estimators", "learning_rate"], + validation_test=lambda param: not isinstance(param, float) or param > 0, + msg="Invalid parameter '%s' with value '%s'. Parameter value must be non-negative and greater than 0.", +) def _weight_boosting_hp_space( - name_func, - estimator=None, - n_estimators: typing.Union[int, Apply] = None, - learning_rate: typing.Union[float, Apply] = None, - random_state=None + name_func, + estimator=None, + n_estimators: typing.Union[int, Apply] = None, + learning_rate: typing.Union[float, Apply] = None, + random_state=None, ): """ Hyper parameter search space for @@ -70,22 +65,32 @@ def _weight_boosting_hp_space( """ hp_space = dict( estimator=estimator, - n_estimators=_weight_boosting_n_estimators(name_func("n_estimators")) if n_estimators is None else n_estimators, - learning_rate=_weight_boosting_learning_rate(name_func("learning_rate")) - if learning_rate is None else learning_rate, - random_state=_weight_boosting_random_state(name_func("random_state")) if random_state is None else random_state, + n_estimators=( + _weight_boosting_n_estimators(name_func("n_estimators")) + if n_estimators is None + else n_estimators + ), + learning_rate=( + _weight_boosting_learning_rate(name_func("learning_rate")) + if learning_rate is None + else learning_rate + ), + random_state=( + _weight_boosting_random_state(name_func("random_state")) + if random_state is None + else random_state + ), ) return hp_space -def ada_boost_classifier(name: str, algorithm: typing.Union[str, Apply] = None, **kwargs): +def ada_boost_classifier(name: str, **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.ensemble.AdaBoostClassifier model. Args: name: name | str - algorithm: choose 'SAMME' or 'SAMME.R' | str See help(hpsklearn.components.ensemble._weight_boosting._weight_boosting_hp_space) for info on additional available AdaBoost arguments. @@ -95,7 +100,6 @@ def _name(msg): return f"{name}.ada_boost_{msg}" hp_space = _weight_boosting_hp_space(_name, **kwargs) - hp_space["algorithm"] = _weight_boosting_algorithm(_name("algorithm")) if algorithm is None else algorithm return scope.sklearn_AdaBoostClassifier(**hp_space) From d6c7c6fb42d3622eda5d950c4113abd381f3aa55 Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Sun, 2 Mar 2025 11:16:41 +0100 Subject: [PATCH 03/28] Revert formatting --- .../components/ensemble/_weight_boosting.py | 27 +++++-------------- 1 file changed, 7 insertions(+), 20 deletions(-) diff --git a/hpsklearn/components/ensemble/_weight_boosting.py b/hpsklearn/components/ensemble/_weight_boosting.py index 665de07a..08fbeb03 100644 --- a/hpsklearn/components/ensemble/_weight_boosting.py +++ b/hpsklearn/components/ensemble/_weight_boosting.py @@ -46,11 +46,9 @@ def _weight_boosting_random_state(name: str): return hp.randint(name, 5) -@validate( - params=["n_estimators", "learning_rate"], - validation_test=lambda param: not isinstance(param, float) or param > 0, - msg="Invalid parameter '%s' with value '%s'. Parameter value must be non-negative and greater than 0.", -) +@validate(params=["n_estimators", "learning_rate"], + validation_test=lambda param: not isinstance(param, float) or param > 0, + msg="Invalid parameter '%s' with value '%s'. Parameter value must be non-negative and greater than 0.") def _weight_boosting_hp_space( name_func, estimator=None, @@ -65,21 +63,10 @@ def _weight_boosting_hp_space( """ hp_space = dict( estimator=estimator, - n_estimators=( - _weight_boosting_n_estimators(name_func("n_estimators")) - if n_estimators is None - else n_estimators - ), - learning_rate=( - _weight_boosting_learning_rate(name_func("learning_rate")) - if learning_rate is None - else learning_rate - ), - random_state=( - _weight_boosting_random_state(name_func("random_state")) - if random_state is None - else random_state - ), + n_estimators=_weight_boosting_n_estimators(name_func("n_estimators")) if n_estimators is None else n_estimators, + learning_rate=_weight_boosting_learning_rate(name_func("learning_rate")) + if learning_rate is None else learning_rate, + random_state=_weight_boosting_random_state(name_func("random_state")) if random_state is None else random_state, ) return hp_space From 1d17c3808f3505bb5948012cdf0df45fdf1d639e Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Sun, 2 Mar 2025 11:24:38 +0100 Subject: [PATCH 04/28] Add max_features parameter to hist_gradient_boosting clf and reg --- .../components/ensemble/_hist_gradient_boosting.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/hpsklearn/components/ensemble/_hist_gradient_boosting.py b/hpsklearn/components/ensemble/_hist_gradient_boosting.py index 4e4c761f..2ad8d95d 100644 --- a/hpsklearn/components/ensemble/_hist_gradient_boosting.py +++ b/hpsklearn/components/ensemble/_hist_gradient_boosting.py @@ -73,6 +73,13 @@ def _hist_gradient_boosting_random_state(name: str): return hp.randint(name, 5) +def _hist_gradient_boosting_max_features(name: str): + """ + Declaration search space 'max_features' parameter + """ + return hp.uniform(name + ".frac", 0.5, 1.) + + @validate(params=["max_bins"], validation_test=lambda param: not isinstance(param, int) or 0 < param <= 255, msg="Invalid parameter '%s' with value '%s'. " @@ -89,6 +96,7 @@ def _hist_gradient_boosting_hp_space( max_depth: typing.Union[int, Apply] = "Undefined", min_samples_leaf: typing.Union[int, Apply] = None, l2_regularization: float = 0, + max_features: typing.Union[float, Apply] = None, max_bins: int = 255, categorical_features: npt.ArrayLike = None, monotonic_cst: npt.ArrayLike = None, @@ -123,6 +131,8 @@ def _hist_gradient_boosting_hp_space( min_samples_leaf=_hist_gradient_boosting_min_samples_leaf(name_func("min_samples_leaf")) if min_samples_leaf is None else min_samples_leaf, l2_regularization=l2_regularization, + max_features=_hist_gradient_boosting_max_features(name_func("max_features")) + if max_features is None else max_features, max_bins=max_bins, categorical_features=categorical_features, monotonic_cst=monotonic_cst, From 6635575ef2a267d0f23c9a92d6475db2a3dd53f9 Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Sun, 2 Mar 2025 19:17:46 +0100 Subject: [PATCH 05/28] rename tfidf to tfidf vectorizer --- hpsklearn/components/__init__.py | 4 +++- hpsklearn/components/feature_extraction/__init__.py | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/hpsklearn/components/__init__.py b/hpsklearn/components/__init__.py index 9a529195..95c85c17 100644 --- a/hpsklearn/components/__init__.py +++ b/hpsklearn/components/__init__.py @@ -147,7 +147,9 @@ lightgbm_regression from .feature_extraction import \ - tfidf + tfidf_vectorizer, \ + hashing_vectorizer, \ + count_vectorizer from .decomposition import pca diff --git a/hpsklearn/components/feature_extraction/__init__.py b/hpsklearn/components/feature_extraction/__init__.py index 37e57a41..a51c8eba 100644 --- a/hpsklearn/components/feature_extraction/__init__.py +++ b/hpsklearn/components/feature_extraction/__init__.py @@ -1 +1,4 @@ -from .text import tfidf +from .text import \ + tfidf_vectorizer, \ + hashing_vectorizer, \ + count_vectorizer From bac374a84482c28915a3b93cd8f2c57481c86b46 Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Sun, 2 Mar 2025 19:18:12 +0100 Subject: [PATCH 06/28] Improve tfidf and add hashing vectorizer and count vectorizer --- .../components/feature_extraction/text.py | 207 ++++++++++++++---- 1 file changed, 168 insertions(+), 39 deletions(-) diff --git a/hpsklearn/components/feature_extraction/text.py b/hpsklearn/components/feature_extraction/text.py index c09bb26c..58950473 100644 --- a/hpsklearn/components/feature_extraction/text.py +++ b/hpsklearn/components/feature_extraction/text.py @@ -12,61 +12,190 @@ def sklearn_TfidfVectorizer(*args, **kwargs): return feature_extraction.text.TfidfVectorizer(*args, **kwargs) +@scope.define +def sklearn_HashingVectorizer(*args, **kwargs): + return feature_extraction.text.HashingVectorizer(*args, **kwargs) + + +@scope.define +def sklearn_CountVectorizer(*args, **kwargs): + return feature_extraction.text.CountVectorizer(*args, **kwargs) + + +def _text_analyzer(name: str): + """ + Declaration search space 'analyzer' parameter + """ + return hp.choice(name, ["word", "char", "char_wb"]) + + +def _text_stop_words(name: str): + """ + Declaration search space 'stop_words' parameter + """ + return hp.choice(name, ["english", None]) + + +def _text_lowercase(name: str): + """ + Declaration search space 'lowercase' parameter + """ + return hp.choice(name, [True, False]) + + +def _text_max_df(name: str): + """ + Declaration search space 'max_df' parameter + """ + return hp.uniform(name, 0.7, 1.0) + + +def _text_min_df(name: str): + """ + Declaration search space 'min_df' parameter + """ + return hp.uniform(name, 0.0, 0.2) + + +def _text_max_features(name: str): + """ + Declaration search space 'max_features' parameter + """ + return hp.pchoice(name, [ + (0.4, scope.int(hp.uniform(name + ".int", 1, 5))), + (0.6, None) + ]) + + +def _text_binary(name: str): + """ + Declaration search space 'binary' parameter + """ + return hp.choice(name, [True, False]) + + +def _text_max_ngram(name: str): + """ + Declaration maximum range for 'ngram_range' parameter + """ + return scope.int(hp.quniform(name, 1, 4, 1)) + + +def _text_norm(name: str): + """ + Declaration search space 'norm' parameter + """ + return hp.choice(name, ["l1", "l2"]) + + @validate(params=["analyzer"], validation_test=lambda param: not isinstance(param, str) or param in ["word", "char", "char_wb"], msg="Invalid parameter '%s' with value '%s'. Value must be one of 'word', 'char', 'char_wb'.") +def _text_hp_space( + name_func, + analyzer: typing.Union[str, callable, Apply] = None, + stop_words: typing.Union[str, list, Apply] = None, + lowercase: typing.Union[bool, Apply] = None, + binary: typing.Union[bool, Apply] = None, + ngram_range: tuple = None, + **kwargs, +): + """ + Hyper parameter search space for + tfidf vectorizer + hashing vectorizer + count vectorizer + """ + hp_space = dict( + analyzer=_text_analyzer(name_func("analyzer")) if analyzer is None else analyzer, + stop_words=_text_stop_words(name_func("stop_words")) if stop_words is None else stop_words, + lowercase=_text_lowercase(name_func("lowercase")) if lowercase is None else lowercase, + binary=_text_binary(name_func("binary")) if binary is None else binary, + ngram_range=(1, _text_max_ngram(name_func("ngram_range"))) if ngram_range is None else ngram_range, + **kwargs, + ) + return hp_space + + @validate(params=["norm"], validation_test=lambda param: not isinstance(param, str) or param in ["l1", "l2"], msg="Invalid parameter '%s' with value '%s'. Value must be one of 'l1', 'l2'.") -def tfidf(name: str, - analyzer: typing.Union[str, callable, Apply] = None, - ngram_range: tuple = None, - stop_words: typing.Union[str, list, Apply] = None, - lowercase: typing.Union[bool, Apply] = None, - max_df: typing.Union[float, int, Apply] = 1.0, - min_df: typing.Union[float, int, Apply] = 1, - max_features: int = None, - binary: typing.Union[bool, Apply] = None, - norm: typing.Union[str, Apply] = None, - use_idf: bool = False, - smooth_idf: bool = False, - sublinear_tf: bool = False): +def tfidf_vectorizer( + name: str, + max_df: typing.Union[float, int, Apply] = 1.0, + min_df: typing.Union[float, int, Apply] = 1, + max_features: typing.Union[int, Apply] = None, + norm: typing.Union[str, Apply] = None, + **kwargs, +): """ Return a pyll graph with hyperparameters that will construct a sklearn.feature_extraction.text.TfidfVectorizer transformer. Args: name: name | str - analyzer: features made of word or char n-grams | str, callable - ngram_range: lower and upper boundary of n values | tuple - stop_words: stop words | str, list - lowercase: convert all characters to lowercase | bool max_df: upper bound document frequency | float min_df: lower bound document frequency | float - max_features: max number of features | int - binary: set non-zero term counts to 1 | bool + max_features: maximum features to consider | int norm: 'l1', 'l2' or None | str - use_idf: enable inverse-document-frequency reweighting | bool - smooth_idf: smooth idf weights by adding one to document frequencies | bool - sublinear_tf: apply sublinear tf scaling | bool """ def _name(msg): return f"{name}.tfidf_vectorizer_{msg}" - max_ngram = scope.int(hp.quniform(_name("max_ngram"), 1, 4, 1)) - - rval = scope.sklearn_TfidfVectorizer( - analyzer=hp.choice(_name("analyzer"), ["word", "char", "char_wb"]) if analyzer is None else analyzer, - stop_words=hp.choice(_name("stop_words"), ["english", None]) if stop_words is None else stop_words, - lowercase=hp.choice(_name("lowercase"), [True, False]) if lowercase is None else lowercase, - max_df=max_df, - min_df=min_df, - max_features=max_features, - binary=hp.choice(_name("binary"), [True, False]) if binary is None else binary, - ngram_range=(1, max_ngram) if ngram_range is None else ngram_range, - norm=norm, - use_idf=use_idf, - smooth_idf=smooth_idf, - sublinear_tf=sublinear_tf, - ) - return rval + hp_space = _text_hp_space(_name, **kwargs) + hp_space["max_df"] = _text_max_df(_name("max_df")) if max_df is None else max_df + hp_space["min_df"] = _text_min_df(_name("min_df")) if min_df is None else min_df + hp_space["norm"] = _text_norm(_name("norm")) if norm is None else norm + hp_space["max_features"] = _text_max_features(_name("max_features")) \ + if max_features is not None else max_features + + return scope.sklearn_TfidfVectorizer(**hp_space) + + +@validate(params=["norm"], + validation_test=lambda param: not isinstance(param, str) or param in ["l1", "l2"], + msg="Invalid parameter '%s' with value '%s'. Value must be one of 'l1', 'l2'.") +def hashing_vectorizer(name: str, norm: typing.Union[str, Apply] = None, **kwargs): + """ + Return a pyll graph with hyperparameters that will construct + a sklearn.feature_extraction.text.HashingVectorizer transformer. + + Args: + name: name | str + norm: 'l1', 'l2' or None | str + """ + def _name(msg): + return f"{name}.hashing_vectorizer_{msg}" + + hp_space = _text_hp_space(_name, **kwargs) + hp_space["norm"] = _text_norm(_name("norm")) if norm is None else norm + + return scope.sklearn_HashingVectorizer(**hp_space) + + +def count_vectorizer( + name: str, + max_df: typing.Union[float, int, Apply] = 1.0, + min_df: typing.Union[float, int, Apply] = 1, + max_features: typing.Union[int, Apply] = None, + **kwargs, +): + """ + Return a pyll graph with hyperparameters that will construct + a sklearn.feature_extraction.text.CountVectorizer transformer. + + Args: + max_df: upper bound document frequency | float + min_df: lower bound document frequency | float + max_features: maximum features to consider | int + """ + def _name(msg): + return f"{name}.count_vectorizer_{msg}" + + hp_space = _text_hp_space(_name, **kwargs) + hp_space["max_df"] = _text_max_df(_name("max_df")) if max_df is None else max_df + hp_space["min_df"] = _text_min_df(_name("min_df")) if min_df is None else min_df + hp_space["max_features"] = _text_max_features(_name("max_features")) \ + if max_features is not None else max_features + + return scope.sklearn_CountVectorizer(**hp_space) From 934341b9a9be7bbde8bdc6d15c1cc0ad34750e72 Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Sun, 2 Mar 2025 19:18:50 +0100 Subject: [PATCH 07/28] Add unittests for hashing vectorizer and count vectorizer. Use SGD classifier since multinomial_nb does not accept negative values in hashing vectorizer --- .../test_feature_extraction/test_text.py | 45 +++++++++++++++++-- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/tests/test_components/test_feature_extraction/test_text.py b/tests/test_components/test_feature_extraction/test_text.py index c3cdacd1..8646c074 100644 --- a/tests/test_components/test_feature_extraction/test_text.py +++ b/tests/test_components/test_feature_extraction/test_text.py @@ -2,8 +2,11 @@ from hpsklearn import \ HyperoptEstimator, \ - tfidf, \ - multinomial_nb + multinomial_nb, \ + sgd_classifier, \ + tfidf_vectorizer, \ + hashing_vectorizer, \ + count_vectorizer from tests.utils import TrialsExceptionHandler from hyperopt import rand @@ -36,7 +39,7 @@ def test_tfidf_vectorizer(self): """ model = HyperoptEstimator( classifier=multinomial_nb("classifier"), - preprocessing=[tfidf("preprocessing")], + preprocessing=[tfidf_vectorizer("preprocessing")], algo=rand.suggest, trial_timeout=10.0, max_evals=5, @@ -44,7 +47,41 @@ def test_tfidf_vectorizer(self): model.fit(self.X_test, self.Y_test) model.score(self.X_test, self.Y_test) - test_tfidf_vectorizer.__name__ = f"test_{tfidf.__name__}" + @TrialsExceptionHandler + def test_hashing_vectorizer(self): + """ + Instantiate sgd_classifier hyperopt_estimator model + add HashingVectorizer preprocessor + fit and score model + """ + model = HyperoptEstimator( + classifier=sgd_classifier("classifier"), + preprocessing=[hashing_vectorizer("preprocessing")], + algo=rand.suggest, + trial_timeout=10.0, + max_evals=5, + ) + model.fit(self.X_test, self.Y_test) + + @TrialsExceptionHandler + def test_count_vectorizer(self): + """ + Instantiate multinomial_nb hyperopt_estimator model + add CountVectorizer preprocessor + fit and score model + """ + model = HyperoptEstimator( + classifier=multinomial_nb("classifier"), + preprocessing=[count_vectorizer("preprocessing")], + algo=rand.suggest, + trial_timeout=10.0, + max_evals=5, + ) + model.fit(self.X_test, self.Y_test) + + test_tfidf_vectorizer.__name__ = f"test_{tfidf_vectorizer.__name__}" + test_hashing_vectorizer.__name__ = f"test_{hashing_vectorizer.__name__}" + test_count_vectorizer.__name__ = f"test_{count_vectorizer.__name__}" if __name__ == "__main__": From c0c572b602fdb3ea55532593bb6cd0f2d9f9b0c3 Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Sun, 2 Mar 2025 20:09:08 +0100 Subject: [PATCH 08/28] Remove multi_class parameter --- hpsklearn/components/linear_model/_logistic.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/hpsklearn/components/linear_model/_logistic.py b/hpsklearn/components/linear_model/_logistic.py index f3481b20..7e3c5f24 100644 --- a/hpsklearn/components/linear_model/_logistic.py +++ b/hpsklearn/components/linear_model/_logistic.py @@ -89,9 +89,6 @@ def _logistic_random_state(name: str): "sag", "saga"], msg="Invalid parameter '%s' with value '%s'. " "Value must be in ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'].") -@validate(params=["multi_class"], - validation_test=lambda param: not isinstance(param, str) or param in ["auto", "ovr", "multinomial"], - msg="Invalid parameter '%s' with value '%s'. Value must be in ['auto', 'ovr', 'multinomial'].") def _logistic_hp_space( name_func, fit_intercept: bool = True, @@ -104,7 +101,6 @@ def _logistic_hp_space( n_jobs: int = 1, verbose: int = 0, intercept_scaling: float = 1, - multi_class: str = "auto", random_state=None ): if dual is True and not (penalty == "l2" and solver == "liblinear"): @@ -125,7 +121,6 @@ def _logistic_hp_space( n_jobs=n_jobs, verbose=verbose, intercept_scaling=intercept_scaling, - multi_class=multi_class, random_state=_logistic_random_state(name_func("random_state")) if random_state is None else random_state, ) From 24608e878c427bc15a78c2bf0ca9c2e626e66827 Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Sun, 2 Mar 2025 20:16:31 +0100 Subject: [PATCH 09/28] Remove copy x param from theil_sen. Copy is always made --- hpsklearn/components/linear_model/_theil_sen.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/hpsklearn/components/linear_model/_theil_sen.py b/hpsklearn/components/linear_model/_theil_sen.py index 2a0f0f19..9b13bf57 100644 --- a/hpsklearn/components/linear_model/_theil_sen.py +++ b/hpsklearn/components/linear_model/_theil_sen.py @@ -19,7 +19,6 @@ def sklearn_TheilSenRegressor(*args, **kwargs): def theil_sen_regressor( name: str, fit_intercept: bool = True, - copy_X: bool = True, max_subpopulation: typing.Union[int, Apply] = None, n_subsamples: int = None, max_iter: typing.Union[int, Apply] = None, @@ -34,7 +33,6 @@ def theil_sen_regressor( Args: name: name | str fit_intercept: whether to calculate the intercept | bool - copy_X: whether to copy X | bool max_subpopulation: consider stochastic subpopulation | int n_subsamples: number of samples to calculate parameters | int max_iter: maximum number of iterations | int @@ -49,7 +47,6 @@ def _name(msg): hp_space = dict( fit_intercept=fit_intercept, - copy_X=copy_X, max_subpopulation=scope.int(hp.uniform(_name("max_subpopulation"), 7500, 12500)) if max_subpopulation is None else max_subpopulation, n_subsamples=n_subsamples, From 24a92a23bd689f29801d75296676664e3556da1b Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Sun, 2 Mar 2025 20:53:02 +0100 Subject: [PATCH 10/28] Change default on dual linear hp space --- hpsklearn/components/svm/_classes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hpsklearn/components/svm/_classes.py b/hpsklearn/components/svm/_classes.py index 06c2d411..e05ff41f 100644 --- a/hpsklearn/components/svm/_classes.py +++ b/hpsklearn/components/svm/_classes.py @@ -133,7 +133,7 @@ def _linear_hp_space( C: typing.Union[float, Apply] = None, fit_intercept: bool = True, intercept_scaling: typing.Union[float, Apply] = None, - dual: bool = True, + dual: bool = "auto", verbose: int = 0, random_state=None, max_iter: typing.Union[int, Apply] = None From b80cea8a1f279adf7409d6d16903f8e446093fd7 Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Mon, 3 Mar 2025 19:31:26 +0100 Subject: [PATCH 11/28] Remove unused hp space params --- hpsklearn/components/lightgbm.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/hpsklearn/components/lightgbm.py b/hpsklearn/components/lightgbm.py index 26f91d3f..d3967bb5 100644 --- a/hpsklearn/components/lightgbm.py +++ b/hpsklearn/components/lightgbm.py @@ -54,13 +54,6 @@ def _lightgbm_n_estimators(name): return scope.int(hp.quniform(name, 100, 6000, 200)) -def _lightgbm_gamma(name): - """ - Declaration search space 'gamma' parameter - """ - return hp.loguniform(name, np.log(0.0001), np.log(5)) - 0.0001 - - def _lightgbm_min_child_weight(name): """ Declaration search space 'min_child_weight' parameter @@ -82,13 +75,6 @@ def _lightgbm_colsample_bytree(name): return hp.uniform(name, 0.5, 1) -def _lightgbm_colsample_bylevel(name): - """ - Declaration search space 'colsample_bylevel' parameter - """ - return hp.uniform(name, 0.5, 1) - - def _lightgbm_reg_alpha(name): """ Declaration search space 'reg_alpha' parameter From e87ea5d791dbb2a88ad2cc8aa6eb8a50cdb88a6d Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Mon, 3 Mar 2025 19:58:59 +0100 Subject: [PATCH 12/28] rem unused use_label_encoder --- hpsklearn/components/xgboost.py | 1 - 1 file changed, 1 deletion(-) diff --git a/hpsklearn/components/xgboost.py b/hpsklearn/components/xgboost.py index f4120ad0..49a2baea 100644 --- a/hpsklearn/components/xgboost.py +++ b/hpsklearn/components/xgboost.py @@ -169,7 +169,6 @@ def _name(msg): hp_space = _xgboost_hp_space(_name, **kwargs) hp_space["objective"] = "binary:logistic" if objective is None else objective - hp_space["use_label_encoder"] = False return scope.sklearn_XGBClassifier(**hp_space) From b30fd710daaf9967611ae6ae4f13507afe3ca5cb Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Mon, 3 Mar 2025 19:59:49 +0100 Subject: [PATCH 13/28] Allow more kwargs when using hp space dict --- hpsklearn/components/lightgbm.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hpsklearn/components/lightgbm.py b/hpsklearn/components/lightgbm.py index d3967bb5..1d1009d6 100644 --- a/hpsklearn/components/lightgbm.py +++ b/hpsklearn/components/lightgbm.py @@ -120,7 +120,8 @@ def _lightgbm_hp_space( reg_lambda: typing.Union[float, Apply] = None, boosting_type: typing.Union[str, Apply] = None, scale_pos_weight: float = 1, - random_state=None): + random_state=None, + **kwargs): """ Hyper parameter search space for lightgbm classifier @@ -143,7 +144,8 @@ def _lightgbm_hp_space( reg_lambda=_lightgbm_reg_lambda(name_func("reg_lambda")) if reg_lambda is None else reg_lambda, boosting_type=_lightgbm_boosting_type(name_func("boosting_type")) if boosting_type is None else boosting_type, scale_pos_weight=scale_pos_weight, - seed=_lightgbm_random_state(name_func("random_state")) if random_state is None else random_state + seed=_lightgbm_random_state(name_func("random_state")) if random_state is None else random_state, + **kwargs ) return hp_space From 837239bf27ae1f1e74338be793793d2e7a44c564 Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Mon, 3 Mar 2025 20:27:52 +0100 Subject: [PATCH 14/28] Improve imports --- tests/test_estimator/test_estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_estimator/test_estimator.py b/tests/test_estimator/test_estimator.py index 95864647..d0587fd0 100644 --- a/tests/test_estimator/test_estimator.py +++ b/tests/test_estimator/test_estimator.py @@ -5,7 +5,7 @@ from hyperopt.pyll import as_apply from hpsklearn import HyperoptEstimator from hpsklearn import sgd_classifier, any_classifier -from ..utils import RetryOnTrialsException +from tests.utils import RetryOnTrialsException class TestIter(unittest.TestCase): From 0db154e22ed5d6fe3d28604d31fc1aa67f7e7ff0 Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Mon, 3 Mar 2025 20:28:17 +0100 Subject: [PATCH 15/28] Add new vectorizers to preprocessors and text preprocessors --- hpsklearn/components/__init__.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/hpsklearn/components/__init__.py b/hpsklearn/components/__init__.py index 95c85c17..b9c18469 100644 --- a/hpsklearn/components/__init__.py +++ b/hpsklearn/components/__init__.py @@ -255,7 +255,11 @@ def any_text_preprocessing(name): """ Generic pre-processing appropriate for text data """ - return hp.choice(name, [[tfidf(name + '.tfidf')]]) + return hp.choice(name, [ + [tfidf_vectorizer(name + ".tfidf")], + [hashing_vectorizer(name + ".hashing")], + [count_vectorizer(name + ".count")], + ]) # Legacy any pre-processing as proposed in #137 @@ -425,7 +429,9 @@ def all_preprocessing(name): [polynomial_features(name + ".polynomial_features")], [spline_transformer(name + ".spline_transformer")], [k_bins_discretizer(name + ".k_bins_discretizer")], - [tfidf(name + ".tfidf")], + [tfidf_vectorizer(name + ".tfidf")], + [hashing_vectorizer(name + ".hashing")], + [count_vectorizer(name + ".count")], [pca(name + ".pca")], [ts_lagselector(name + ".ts_lagselector")], [colkmeans(name + ".colkmeans")], From 9b0a823556551194b99f03365a708d8837a1fe72 Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Mon, 3 Mar 2025 20:52:36 +0100 Subject: [PATCH 16/28] Add **kwargs to all hp spaces --- hpsklearn/components/cluster/_kmeans.py | 6 ++++-- hpsklearn/components/compose/_target.py | 6 ++++-- .../covariance/_elliptic_envelope.py | 6 ++++-- .../components/cross_decomposition/_pls.py | 6 ++++-- hpsklearn/components/discriminant_analysis.py | 6 ++++-- hpsklearn/components/dummy.py | 12 +++++++---- hpsklearn/components/ensemble/_bagging.py | 2 ++ hpsklearn/components/ensemble/_forest.py | 6 ++++-- hpsklearn/components/ensemble/_gb.py | 4 +++- .../ensemble/_hist_gradient_boosting.py | 6 ++++-- hpsklearn/components/ensemble/_iforest.py | 6 ++++-- .../components/ensemble/_weight_boosting.py | 2 ++ hpsklearn/components/gaussian_process/_gpc.py | 6 ++++-- hpsklearn/components/gaussian_process/_gpr.py | 4 +++- hpsklearn/components/kernel_ridge.py | 4 +++- hpsklearn/components/linear_model/_base.py | 6 ++++-- hpsklearn/components/linear_model/_bayes.py | 2 ++ .../linear_model/_coordinate_descent.py | 12 +++++++---- hpsklearn/components/linear_model/_glm.py | 4 +++- hpsklearn/components/linear_model/_huber.py | 6 ++++-- .../components/linear_model/_least_angle.py | 21 ++++++++----------- .../components/linear_model/_logistic.py | 4 +++- hpsklearn/components/linear_model/_omp.py | 14 +++++++------ .../linear_model/_passive_aggressive.py | 6 ++++-- .../components/linear_model/_perceptron.py | 7 ++++--- .../components/linear_model/_quantile.py | 6 ++++-- hpsklearn/components/linear_model/_ransac.py | 6 ++++-- hpsklearn/components/linear_model/_ridge.py | 10 ++++++--- .../linear_model/_stochastic_gradient.py | 9 +++++--- .../components/linear_model/_theil_sen.py | 6 ++++-- .../components/mixture/_bayesian_mixture.py | 4 +++- .../components/mixture/_gaussian_mixture.py | 4 +++- hpsklearn/components/multiclass.py | 18 ++++++++++------ hpsklearn/components/naive_bayes.py | 9 +++++--- .../components/neighbors/_nearest_centroid.py | 4 +++- hpsklearn/components/neighbors/_regression.py | 6 ++++-- .../neural_network/_multilayer_perceptron.py | 2 ++ .../semi_supervised/_label_propagation.py | 6 ++++-- hpsklearn/components/svm/_classes.py | 16 +++++++++----- hpsklearn/components/tree/_classes.py | 6 ++++-- hpsklearn/components/xgboost.py | 6 ++++-- 41 files changed, 187 insertions(+), 95 deletions(-) diff --git a/hpsklearn/components/cluster/_kmeans.py b/hpsklearn/components/cluster/_kmeans.py index 1365fdc4..3780c258 100644 --- a/hpsklearn/components/cluster/_kmeans.py +++ b/hpsklearn/components/cluster/_kmeans.py @@ -44,7 +44,8 @@ def _kmeans_hp_space( n_clusters: typing.Union[int, Apply] = None, init: typing.Union[str, callable, npt.ArrayLike, Apply] = None, verbose: int = 0, - random_state=None + random_state=None, + **kwargs ): """ Hyper parameter search space for @@ -55,7 +56,8 @@ def _kmeans_hp_space( n_clusters=_kmeans_n_clusters(name_func("n_clusters")) if n_clusters is None else n_clusters, init=_kmeans_init(name_func("init")) if init is None else init, verbose=verbose, - random_state=_kmeans_random_state(name_func("random_state")) if random_state is None else random_state + random_state=_kmeans_random_state(name_func("random_state")) if random_state is None else random_state, + **kwargs ) return hp_space diff --git a/hpsklearn/components/compose/_target.py b/hpsklearn/components/compose/_target.py index 47122eee..1b7ff15f 100644 --- a/hpsklearn/components/compose/_target.py +++ b/hpsklearn/components/compose/_target.py @@ -13,7 +13,8 @@ def transformed_target_regressor(name: str, transformer: object = None, func: callable = None, inverse_func: callable = None, - check_inverse: bool = True): + check_inverse: bool = True, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.compose.TransformedTargetRegressor model. @@ -36,6 +37,7 @@ def _name(msg): transformer=transformer, func=func, inverse_func=inverse_func, - check_inverse=check_inverse + check_inverse=check_inverse, + **kwargs ) return scope.sklearn_TransformedTargetRegressor(**hp_space) diff --git a/hpsklearn/components/covariance/_elliptic_envelope.py b/hpsklearn/components/covariance/_elliptic_envelope.py index 99d736b8..c1033283 100644 --- a/hpsklearn/components/covariance/_elliptic_envelope.py +++ b/hpsklearn/components/covariance/_elliptic_envelope.py @@ -16,7 +16,8 @@ def elliptic_envelope(name: str, assume_centered: bool = False, support_fraction: typing.Union[float, Apply] = None, contamination: typing.Union[float, Apply] = 0.1, - random_state=None): + random_state=None, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.covariance.EllipticEnvelope model. @@ -39,6 +40,7 @@ def _name(msg): support_fraction=hp.uniform(_name("support_fraction"), 0.05, 0.95) if support_fraction is None else support_fraction, contamination=hp.uniform(_name("contamination"), 0.0, 0.3) if contamination is None else contamination, - random_state=hp.randint(_name("random_state"), 5) if random_state is None else random_state + random_state=hp.randint(_name("random_state"), 5) if random_state is None else random_state, + **kwargs ) return scope.sklearn_EllipticEnvelope(**hp_space) diff --git a/hpsklearn/components/cross_decomposition/_pls.py b/hpsklearn/components/cross_decomposition/_pls.py index bdad8618..e4649957 100644 --- a/hpsklearn/components/cross_decomposition/_pls.py +++ b/hpsklearn/components/cross_decomposition/_pls.py @@ -51,7 +51,8 @@ def _pls_hp_space( scale: bool = True, max_iter: typing.Union[int, Apply] = None, tol: typing.Union[float, Apply] = None, - copy: bool = True + copy: bool = True, + **kwargs ): """ Hyper parameter search space for @@ -64,7 +65,8 @@ def _pls_hp_space( scale=scale, max_iter=_pls_max_iter(name_func("max_iter")) if max_iter is None else max_iter, tol=_pls_tol(name_func("tol")) if tol is None else tol, - copy=copy + copy=copy, + **kwargs ) return hp_space diff --git a/hpsklearn/components/discriminant_analysis.py b/hpsklearn/components/discriminant_analysis.py index 0755e630..216f7ca4 100644 --- a/hpsklearn/components/discriminant_analysis.py +++ b/hpsklearn/components/discriminant_analysis.py @@ -30,7 +30,8 @@ def _discriminant_analysis_hp_space( name_func, priors: npt.ArrayLike = None, store_covariance: bool = False, - tol: float = None + tol: float = None, + **kwargs ): """ Common hyper parameter search space @@ -40,7 +41,8 @@ def _discriminant_analysis_hp_space( hp_space = dict( priors=priors, store_covariance=store_covariance, - tol=_discriminant_analysis_tol(name_func("tol")) if tol is None else tol + tol=_discriminant_analysis_tol(name_func("tol")) if tol is None else tol, + **kwargs ) return hp_space diff --git a/hpsklearn/components/dummy.py b/hpsklearn/components/dummy.py index 15b7717b..d2a8ab03 100644 --- a/hpsklearn/components/dummy.py +++ b/hpsklearn/components/dummy.py @@ -26,7 +26,8 @@ def sklearn_DummyRegressor(*args, **kwargs): def dummy_classifier(name: str, strategy: typing.Union[str, Apply] = None, random_state=None, - constant: typing.Union[int, str, npt.ArrayLike] = None): + constant: typing.Union[int, str, npt.ArrayLike] = None, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.dummy.DummyClassifier model. @@ -45,7 +46,8 @@ def _name(msg): strategy=hp.choice(_name("strategy"), ["stratified", "most_frequent", "prior", "uniform"]) if strategy is None else strategy, random_state=hp.randint(_name("random_state"), 5) if random_state is None else random_state, - constant=constant + constant=constant, + **kwargs ) return scope.sklearn_DummyClassifier(**hp_space) @@ -60,7 +62,8 @@ def _name(msg): def dummy_regressor(name: str, strategy: typing.Union[str, Apply] = None, constant: typing.Union[int, str, npt.ArrayLike] = None, - quantile: float = None): + quantile: float = None, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.dummy.DummyRegressor model. @@ -78,6 +81,7 @@ def _name(msg): hp_space = dict( strategy=hp.choice(_name("strategy"), ["mean", "median"]) if strategy is None else strategy, constant=constant, - quantile=quantile + quantile=quantile, + **kwargs ) return scope.sklearn_DummyRegressor(**hp_space) diff --git a/hpsklearn/components/ensemble/_bagging.py b/hpsklearn/components/ensemble/_bagging.py index 0fc8ff82..319f2b76 100644 --- a/hpsklearn/components/ensemble/_bagging.py +++ b/hpsklearn/components/ensemble/_bagging.py @@ -93,6 +93,7 @@ def _bagging_hp_space( n_jobs: int = 1, random_state=None, verbose: int = False, + **kwargs, ): """ Hyper parameter search space for @@ -112,6 +113,7 @@ def _bagging_hp_space( n_jobs=n_jobs, random_state=_bagging_random_state(name_func("random_state")) if random_state is None else random_state, verbose=verbose, + **kwargs ) return hp_space diff --git a/hpsklearn/components/ensemble/_forest.py b/hpsklearn/components/ensemble/_forest.py index 5349b80b..bb14c74d 100644 --- a/hpsklearn/components/ensemble/_forest.py +++ b/hpsklearn/components/ensemble/_forest.py @@ -190,7 +190,8 @@ def _forest_hp_space( verbose: int = False, warm_start: bool = False, ccp_alpha: float = 0.0, - max_samples: float = None + max_samples: float = None, + **kwargs ): """ Hyper parameter search space for @@ -224,7 +225,8 @@ def _forest_hp_space( verbose=verbose, warm_start=warm_start, ccp_alpha=ccp_alpha, - max_samples=max_samples + max_samples=max_samples, + **kwargs ) return hp_space diff --git a/hpsklearn/components/ensemble/_gb.py b/hpsklearn/components/ensemble/_gb.py index 92697ffc..28069de3 100644 --- a/hpsklearn/components/ensemble/_gb.py +++ b/hpsklearn/components/ensemble/_gb.py @@ -174,7 +174,8 @@ def _gb_hp_space( validation_fraction: float = 0.1, n_iter_no_change: int = None, tol: float = 1e-4, - ccp_alpha: float = 0.0 + ccp_alpha: float = 0.0, + **kwargs ): """ Hyper parameter search space for @@ -206,6 +207,7 @@ def _gb_hp_space( n_iter_no_change=n_iter_no_change, tol=tol, ccp_alpha=ccp_alpha, + **kwargs ) return hp_space diff --git a/hpsklearn/components/ensemble/_hist_gradient_boosting.py b/hpsklearn/components/ensemble/_hist_gradient_boosting.py index 2ad8d95d..ba2aeaa0 100644 --- a/hpsklearn/components/ensemble/_hist_gradient_boosting.py +++ b/hpsklearn/components/ensemble/_hist_gradient_boosting.py @@ -107,7 +107,8 @@ def _hist_gradient_boosting_hp_space( n_iter_no_change: int = 10, tol: float = 1e-7, verbose: int = False, - random_state=None + random_state=None, + **kwargs ): """ Hyper parameter search space for @@ -144,7 +145,8 @@ def _hist_gradient_boosting_hp_space( tol=tol, verbose=verbose, random_state=_hist_gradient_boosting_random_state(name_func("random_state")) - if random_state is None else random_state + if random_state is None else random_state, + **kwargs ) return hp_space diff --git a/hpsklearn/components/ensemble/_iforest.py b/hpsklearn/components/ensemble/_iforest.py index afb60863..f0ea84ef 100644 --- a/hpsklearn/components/ensemble/_iforest.py +++ b/hpsklearn/components/ensemble/_iforest.py @@ -59,7 +59,8 @@ def _iforest_hp_space( n_jobs: int = 1, random_state=None, verbose: int = False, - warm_start: bool = False + warm_start: bool = False, + **kwargs ): """ Hyper parameter search space for @@ -74,7 +75,8 @@ def _iforest_hp_space( n_jobs=n_jobs, random_state=_iforest_random_state(name_func("random_state")) if random_state is None else random_state, verbose=verbose, - warm_start=warm_start + warm_start=warm_start, + **kwargs ) return hp_space diff --git a/hpsklearn/components/ensemble/_weight_boosting.py b/hpsklearn/components/ensemble/_weight_boosting.py index 08fbeb03..f411b1d0 100644 --- a/hpsklearn/components/ensemble/_weight_boosting.py +++ b/hpsklearn/components/ensemble/_weight_boosting.py @@ -55,6 +55,7 @@ def _weight_boosting_hp_space( n_estimators: typing.Union[int, Apply] = None, learning_rate: typing.Union[float, Apply] = None, random_state=None, + **kwargs ): """ Hyper parameter search space for @@ -67,6 +68,7 @@ def _weight_boosting_hp_space( learning_rate=_weight_boosting_learning_rate(name_func("learning_rate")) if learning_rate is None else learning_rate, random_state=_weight_boosting_random_state(name_func("random_state")) if random_state is None else random_state, + **kwargs ) return hp_space diff --git a/hpsklearn/components/gaussian_process/_gpc.py b/hpsklearn/components/gaussian_process/_gpc.py index a80cbb59..53d52b2d 100644 --- a/hpsklearn/components/gaussian_process/_gpc.py +++ b/hpsklearn/components/gaussian_process/_gpc.py @@ -27,7 +27,8 @@ def gaussian_process_classifier(name: str, copy_X_train: bool = True, random_state=None, multi_class: typing.Union[str, Apply] = None, - n_jobs: int = 1): + n_jobs: int = 1, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.gaussian_process.GaussianProcessClassifier model. @@ -61,6 +62,7 @@ def _name(msg): random_state=hp.randint(_name("random_state"), 5) if random_state is None else random_state, multi_class=hp.choice(_name("multi_class"), ["one_vs_rest", "one_vs_one"]) if multi_class is None else multi_class, - n_jobs=n_jobs + n_jobs=n_jobs, + **kwargs ) return scope.sklearn_GaussianProcessClassifier(**hp_space) diff --git a/hpsklearn/components/gaussian_process/_gpr.py b/hpsklearn/components/gaussian_process/_gpr.py index 880c8d87..8319ed4f 100644 --- a/hpsklearn/components/gaussian_process/_gpr.py +++ b/hpsklearn/components/gaussian_process/_gpr.py @@ -23,7 +23,8 @@ def gaussian_process_regressor(name: str, n_restarts_optimizer: typing.Union[int, Apply] = None, normalize_y: bool = False, copy_X_train: bool = True, - random_state=None): + random_state=None, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.gaussian_process.GaussianProcessRegressor model. @@ -52,5 +53,6 @@ def _name(msg): normalize_y=normalize_y, copy_X_train=copy_X_train, random_state=hp.randint(_name("random_state"), 5) if random_state is None else random_state, + **kwargs ) return scope.sklearn_GaussianProcessRegressor(**hp_space) diff --git a/hpsklearn/components/kernel_ridge.py b/hpsklearn/components/kernel_ridge.py index 55102e13..a1d44ddd 100644 --- a/hpsklearn/components/kernel_ridge.py +++ b/hpsklearn/components/kernel_ridge.py @@ -25,7 +25,8 @@ def hp_sklearn_kernel_ridge(name: str, gamma: typing.Union[float, Apply] = None, degree: typing.Union[float, Apply] = None, coef0: typing.Union[float, Apply] = None, - kernel_params: map = None): + kernel_params: map = None, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.kernel_ridge.KernelRidge model. @@ -51,5 +52,6 @@ def _name(msg): degree=scope.int(hp.uniform(_name("degree"), 1, 7)) if degree is None else degree, coef0=hp.uniform(_name("coef0"), 0.0, 1.0) if coef0 is None else coef0, kernel_params=kernel_params, + **kwargs ) return scope.sklearn_KernelRidge(**hp_space) diff --git a/hpsklearn/components/linear_model/_base.py b/hpsklearn/components/linear_model/_base.py index a94c0a42..17634095 100644 --- a/hpsklearn/components/linear_model/_base.py +++ b/hpsklearn/components/linear_model/_base.py @@ -12,7 +12,8 @@ def linear_regression(name: str, fit_intercept: bool = True, copy_X: bool = True, n_jobs: int = 1, - positive: bool = False): + positive: bool = False, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.linear_model.LinearRegression model. @@ -32,7 +33,8 @@ def _name(msg): fit_intercept=fit_intercept, copy_X=copy_X, n_jobs=n_jobs, - positive=positive + positive=positive, + **kwargs ) return scope.sklearn_LinearRegression(**hp_space) diff --git a/hpsklearn/components/linear_model/_bayes.py b/hpsklearn/components/linear_model/_bayes.py index 536daeb3..bdbfaabd 100644 --- a/hpsklearn/components/linear_model/_bayes.py +++ b/hpsklearn/components/linear_model/_bayes.py @@ -58,6 +58,7 @@ def _bayes_hp_space( fit_intercept: bool = True, copy_X: bool = True, verbose: bool = False, + **kwargs ): """ Hyper parameter search space for @@ -75,6 +76,7 @@ def _bayes_hp_space( fit_intercept=fit_intercept, copy_X=copy_X, verbose=verbose, + **kwargs ) return hp_space diff --git a/hpsklearn/components/linear_model/_coordinate_descent.py b/hpsklearn/components/linear_model/_coordinate_descent.py index 319d11af..150fbbce 100644 --- a/hpsklearn/components/linear_model/_coordinate_descent.py +++ b/hpsklearn/components/linear_model/_coordinate_descent.py @@ -138,7 +138,8 @@ def _coordinate_descent_hp_space( tol: typing.Union[float, Apply] = None, warm_start: bool = False, random_state=None, - selection: typing.Union[str, Apply] = None + selection: typing.Union[str, Apply] = None, + **kwargs ): """ Hyper parameter search space for @@ -156,7 +157,8 @@ def _coordinate_descent_hp_space( warm_start=warm_start, random_state=_coordinate_descent_random_state(name_func("random_state")) if random_state is None else random_state, - selection=_coordinate_descent_selection(name_func("selection")) if selection is None else selection + selection=_coordinate_descent_selection(name_func("selection")) if selection is None else selection, + **kwargs ) return hp_space @@ -180,7 +182,8 @@ def _coordinate_descent_cv_hp_space( verbose: int = False, n_jobs: int = 1, random_state=None, - selection: typing.Union[str, Apply] = None + selection: typing.Union[str, Apply] = None, + **kwargs ): """ Hyper parameter search space for @@ -202,7 +205,8 @@ def _coordinate_descent_cv_hp_space( n_jobs=n_jobs, random_state=_coordinate_descent_random_state(name_func("random_state")) if random_state is None else random_state, - selection=_coordinate_descent_selection(name_func("selection")) if selection is None else selection + selection=_coordinate_descent_selection(name_func("selection")) if selection is None else selection, + **kwargs ) return hp_space diff --git a/hpsklearn/components/linear_model/_glm.py b/hpsklearn/components/linear_model/_glm.py index 7accf1da..29ce6ed7 100644 --- a/hpsklearn/components/linear_model/_glm.py +++ b/hpsklearn/components/linear_model/_glm.py @@ -55,6 +55,7 @@ def _glm_hp_space( tol: typing.Union[float, Apply] = 1e-4, warm_start: bool = False, verbose: int = 0, + **kwargs ): """ Hyper parameter search space for @@ -68,7 +69,8 @@ def _glm_hp_space( max_iter=_glm_max_iter(name_func("max_iter")) if max_iter is None else max_iter, tol=_glm_tol(name_func("tol")) if tol is None else tol, warm_start=warm_start, - verbose=verbose + verbose=verbose, + **kwargs ) return hp_space diff --git a/hpsklearn/components/linear_model/_huber.py b/hpsklearn/components/linear_model/_huber.py index aff16060..cc8fed75 100644 --- a/hpsklearn/components/linear_model/_huber.py +++ b/hpsklearn/components/linear_model/_huber.py @@ -50,7 +50,8 @@ def huber_regressor(name: str, alpha: typing.Union[float, Apply] = None, warm_start: bool = False, fit_intercept: bool = True, - tol: typing.Union[float, Apply] = None): + tol: typing.Union[float, Apply] = None, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.linear_model.HuberRegressor model. @@ -73,7 +74,8 @@ def _name(msg): alpha=_glm_alpha(_name("alpha")) if tol is None else alpha, warm_start=warm_start, fit_intercept=fit_intercept, - tol=_glm_tol(_name("tol")) if tol is None else tol + tol=_glm_tol(_name("tol")) if tol is None else tol, + **kwargs ) return scope.sklearn_HuberRegressor(**hp_space) diff --git a/hpsklearn/components/linear_model/_least_angle.py b/hpsklearn/components/linear_model/_least_angle.py index 5fa14954..527aaf70 100644 --- a/hpsklearn/components/linear_model/_least_angle.py +++ b/hpsklearn/components/linear_model/_least_angle.py @@ -97,6 +97,7 @@ def _least_angle_hp_space( precompute: typing.Union[bool, str, npt.ArrayLike] = "auto", eps: float = np.finfo(float).eps, copy_X: bool = True, + **kwargs, ): """ Hyper parameter of search space of common parameters for @@ -111,7 +112,8 @@ def _least_angle_hp_space( verbose=verbose, precompute=precompute, eps=eps, - copy_X=copy_X + copy_X=copy_X, + **kwargs ) return hp_space @@ -126,7 +128,7 @@ def _least_angle_cv_shared_space( cv: typing.Union[int, callable, typing.Generator, Apply] = None, max_n_alphas: typing.Union[int, Apply] = None, n_jobs: int = None, -): + **kwargs): """ Declaration shared search space parameters for lars cv @@ -148,8 +150,7 @@ def lars(name: str, fit_path: bool = True, jitter: float = None, random_state=None, - **kwargs - ): + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.linear_model.Lars model. @@ -193,8 +194,7 @@ def lasso_lars(name: str, positive: bool = False, jitter: float = None, random_state=None, - **kwargs - ): + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.linear_model.LassoLars model. @@ -235,8 +235,7 @@ def lars_cv(name: str, cv: typing.Union[int, callable, typing.Generator, Apply] = None, max_n_alphas: typing.Union[int, Apply] = None, n_jobs: int = None, - **kwargs - ): + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.linear_model.LarsCV model. @@ -274,8 +273,7 @@ def lasso_lars_cv(name: str, max_n_alphas: typing.Union[int, Apply] = None, n_jobs: int = None, positive: bool = False, - **kwargs - ): + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.linear_model.LassoLarsCV model. @@ -315,8 +313,7 @@ def lasso_lars_ic(name: str, criterion: typing.Union[str, Apply] = None, max_iter: typing.Union[int, Apply] = None, positive: bool = False, - **kwargs - ): + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.linear_model.LassoLarsIC model. diff --git a/hpsklearn/components/linear_model/_logistic.py b/hpsklearn/components/linear_model/_logistic.py index 7e3c5f24..a351a919 100644 --- a/hpsklearn/components/linear_model/_logistic.py +++ b/hpsklearn/components/linear_model/_logistic.py @@ -101,7 +101,8 @@ def _logistic_hp_space( n_jobs: int = 1, verbose: int = 0, intercept_scaling: float = 1, - random_state=None + random_state=None, + **kwargs ): if dual is True and not (penalty == "l2" and solver == "liblinear"): raise ValueError("Dual formulation (implied by parameter 'dual' = 'True') is only implemented for " @@ -123,6 +124,7 @@ def _logistic_hp_space( intercept_scaling=intercept_scaling, random_state=_logistic_random_state(name_func("random_state")) if random_state is None else random_state, + **kwargs, ) return hp_space diff --git a/hpsklearn/components/linear_model/_omp.py b/hpsklearn/components/linear_model/_omp.py index 5ea2e28c..bccfceae 100644 --- a/hpsklearn/components/linear_model/_omp.py +++ b/hpsklearn/components/linear_model/_omp.py @@ -20,8 +20,8 @@ def orthogonal_matching_pursuit(name: str, n_nonzero_coefs: int = None, tol: typing.Union[float, Apply] = None, fit_intercept: bool = True, - precompute: typing.Union[str, bool] = "auto" - ): + precompute: typing.Union[str, bool] = "auto", + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.linear_model.OrthogonalMatchingPursuit model. @@ -41,7 +41,8 @@ def _name(msg): n_nonzero_coefs=n_nonzero_coefs, tol=hp.loguniform(_name("tol"), np.log(1e-5), np.log(1e-2)) if tol is None else tol, fit_intercept=fit_intercept, - precompute=precompute + precompute=precompute, + **kwargs ) return scope.sklearn_OrthogonalMatchingPursuit(**hp_space) @@ -53,8 +54,8 @@ def orthogonal_matching_pursuit_cv(name: str, max_iter: typing.Union[int, Apply] = None, cv: typing.Union[int, callable, typing.Generator, Apply] = None, n_jobs: int = 1, - verbose: typing.Union[bool, int] = False - ): + verbose: typing.Union[bool, int] = False, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.linear_model.OrthogonalMatchingPursuitCV model. @@ -79,7 +80,8 @@ def _name(msg): cv=hp.pchoice(_name("cv"), [(0.0625, 3), (0.175, 4), (0.525, 5), (0.175, 6), (0.0625, 7)]) if cv is None else cv, n_jobs=n_jobs, - verbose=verbose + verbose=verbose, + **kwargs ) return scope.sklearn_OrthogonalMatchingPursuitCV(**hp_space) diff --git a/hpsklearn/components/linear_model/_passive_aggressive.py b/hpsklearn/components/linear_model/_passive_aggressive.py index f8f14a85..38b0fe33 100644 --- a/hpsklearn/components/linear_model/_passive_aggressive.py +++ b/hpsklearn/components/linear_model/_passive_aggressive.py @@ -68,7 +68,8 @@ def _passive_aggressive_hp_space( verbose: int = 0, random_state=None, warm_start: bool = False, - average: typing.Union[bool, int] = False + average: typing.Union[bool, int] = False, + **kwargs ): """ Hyper parameter search space for @@ -89,7 +90,8 @@ def _passive_aggressive_hp_space( random_state=_passive_aggressive_random_state(name_func("random_state")) if random_state is None else random_state, warm_start=warm_start, - average=average + average=average, + **kwargs ) return hp_space diff --git a/hpsklearn/components/linear_model/_perceptron.py b/hpsklearn/components/linear_model/_perceptron.py index fe94ed6e..b311efcf 100644 --- a/hpsklearn/components/linear_model/_perceptron.py +++ b/hpsklearn/components/linear_model/_perceptron.py @@ -32,8 +32,8 @@ def perceptron(name: str, validation_fraction: typing.Union[float, Apply] = None, n_iter_no_change: typing.Union[int, Apply] = 5, class_weight: typing.Union[dict, str] = None, - warm_start: bool = False - ): + warm_start: bool = False, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.linear_model.Perceptron model. @@ -78,7 +78,8 @@ def _name(msg): n_iter_no_change=hp.pchoice(_name("n_iter_no_change"), [(0.25, 4), (0.50, 5), (0.25, 6)]) if n_iter_no_change is None else n_iter_no_change, class_weight=class_weight, - warm_start=warm_start + warm_start=warm_start, + **kwargs ) return scope.sklearn_Perceptron(**hp_space) diff --git a/hpsklearn/components/linear_model/_quantile.py b/hpsklearn/components/linear_model/_quantile.py index 00326ed5..7bd26809 100644 --- a/hpsklearn/components/linear_model/_quantile.py +++ b/hpsklearn/components/linear_model/_quantile.py @@ -22,7 +22,8 @@ def quantile_regression(name: str, alpha: typing.Union[float, Apply] = None, fit_intercept: typing.Union[bool, Apply] = None, solver: typing.Union[str, Apply] = None, - solver_options: dict = None): + solver_options: dict = None, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.linear_model.QuantileRegression model. @@ -45,7 +46,8 @@ def _name(msg): fit_intercept=hp.choice(_name("fit_intercept"), [True, False]) if fit_intercept is None else fit_intercept, solver=hp.choice(_name("solver"), ["highs-ds", "highs-ipm", "highs", "revised simplex"]) if solver is None else solver, - solver_options=solver_options + solver_options=solver_options, + **kwargs ) return scope.sklearn_QuantileRegressor(**hp_space) diff --git a/hpsklearn/components/linear_model/_ransac.py b/hpsklearn/components/linear_model/_ransac.py index 399726c8..53a63c72 100644 --- a/hpsklearn/components/linear_model/_ransac.py +++ b/hpsklearn/components/linear_model/_ransac.py @@ -28,7 +28,8 @@ def ransac_regression(name: str, stop_score: typing.Union[float, Apply] = None, stop_probability: typing.Union[float, Apply] = None, loss: typing.Union[callable, str, Apply] = None, - random_state=None): + random_state=None, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.linear_model.RANSACRegressor model. @@ -65,7 +66,8 @@ def _name(msg): stop_probability=hp.uniform(_name("stop_probability"), 0.90, 0.99) if stop_probability is None else stop_probability, loss=hp.choice(_name("loss"), ["absolute_error", "squared_error"]) if loss is None else loss, - random_state=hp.randint(_name("random_state"), 5) if random_state is None else random_state + random_state=hp.randint(_name("random_state"), 5) if random_state is None else random_state, + **kwargs ) return scope.sklearn_RANSACRegressor(**hp_space) diff --git a/hpsklearn/components/linear_model/_ridge.py b/hpsklearn/components/linear_model/_ridge.py index 4493f5e9..4e0114b6 100644 --- a/hpsklearn/components/linear_model/_ridge.py +++ b/hpsklearn/components/linear_model/_ridge.py @@ -94,6 +94,7 @@ def _ridge_hp_space( solver: typing.Union[str, Apply] = "auto", positive: bool = False, random_state=None, + **kwargs, ): """ Hyper parameter search space for @@ -108,7 +109,8 @@ def _ridge_hp_space( tol=_ridge_tol(name_func("tol")) if tol is None else tol, solver=solver, positive=positive, - random_state=random_state + random_state=random_state, + **kwargs ) return hp_space @@ -119,7 +121,8 @@ def _ridge_cv_hp_space( fit_intercept: bool = True, scoring: typing.Union[str, callable] = None, cv: typing.Union[int, Iterable, typing.Generator, Apply] = None, - store_cv_values: bool = False + store_cv_values: bool = False, + **kwargs ): """ Hyper parameter search space for @@ -131,7 +134,8 @@ def _ridge_cv_hp_space( fit_intercept=fit_intercept, scoring=scoring, cv=_ridge_cv(name_func("cv")) if cv is None else cv, - store_cv_values=store_cv_values + store_cv_values=store_cv_values, + **kwargs ) return hp_space diff --git a/hpsklearn/components/linear_model/_stochastic_gradient.py b/hpsklearn/components/linear_model/_stochastic_gradient.py index 7f73141d..5c3f8fbb 100644 --- a/hpsklearn/components/linear_model/_stochastic_gradient.py +++ b/hpsklearn/components/linear_model/_stochastic_gradient.py @@ -176,6 +176,7 @@ def _stochastic_gradient_hp_space( n_iter_no_change: typing.Union[int, Apply] = None, warm_start: bool = False, average: typing.Union[bool, int] = False, + **kwargs, ): """ Hyper parameter search space for @@ -212,6 +213,7 @@ def _stochastic_gradient_hp_space( if n_iter_no_change is None else n_iter_no_change, warm_start=warm_start, average=average, + **kwargs, ) return hp_space @@ -297,8 +299,8 @@ def sgd_one_class_svm(name: str, eta0: typing.Union[float, Apply] = None, power_t: typing.Union[float, Apply] = None, warm_start: bool = False, - average: typing.Union[bool, int] = False - ): + average: typing.Union[bool, int] = False, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.linear_model.SGDOneClassSVM model. @@ -339,6 +341,7 @@ def _name(msg): eta0=_stochastic_gradient_eta0(_name("eta0")) if eta0 is None else eta0, power_t=_stochastic_gradient_power_t(_name("power_t")) if power_t is None else power_t, warm_start=warm_start, - average=average + average=average, + **kwargs ) return scope.sklearn_SGDOneClassSVM(**hp_space) diff --git a/hpsklearn/components/linear_model/_theil_sen.py b/hpsklearn/components/linear_model/_theil_sen.py index 9b13bf57..bd8b8f1a 100644 --- a/hpsklearn/components/linear_model/_theil_sen.py +++ b/hpsklearn/components/linear_model/_theil_sen.py @@ -25,7 +25,8 @@ def theil_sen_regressor( tol: typing.Union[float, Apply] = None, random_state=None, n_jobs: int = 1, - verbose: bool = False): + verbose: bool = False, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.linear_model.TheilSenRegressor model. @@ -54,6 +55,7 @@ def _name(msg): tol=hp.loguniform(_name("tol"), np.log(1e-5), np.log(1e-2)) if tol is None else tol, random_state=hp.randint(_name("random_state"), 5) if random_state is None else random_state, n_jobs=n_jobs, - verbose=verbose + verbose=verbose, + **kwargs ) return scope.sklearn_TheilSenRegressor(**hp_space) diff --git a/hpsklearn/components/mixture/_bayesian_mixture.py b/hpsklearn/components/mixture/_bayesian_mixture.py index d8a123b4..d0b0d0aa 100644 --- a/hpsklearn/components/mixture/_bayesian_mixture.py +++ b/hpsklearn/components/mixture/_bayesian_mixture.py @@ -41,7 +41,8 @@ def bayesian_gaussian_mixture(name: str, random_state=None, warm_start: bool = False, verbose: int = 0, - verbose_interval: int = 10): + verbose_interval: int = 10, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.mixture.BayesianGaussianMixture model. @@ -91,5 +92,6 @@ def _name(msg): warm_start=warm_start, verbose=verbose, verbose_interval=verbose_interval, + **kwargs, ) return scope.sklearn_BayesianGaussianMixture(**hp_space) diff --git a/hpsklearn/components/mixture/_gaussian_mixture.py b/hpsklearn/components/mixture/_gaussian_mixture.py index e5a30d8a..e83d7648 100644 --- a/hpsklearn/components/mixture/_gaussian_mixture.py +++ b/hpsklearn/components/mixture/_gaussian_mixture.py @@ -34,7 +34,8 @@ def gaussian_mixture(name: str, random_state=None, warm_start: bool = False, verbose: int = 0, - verbose_interval: int = 10): + verbose_interval: int = 10, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.mixture.GaussianMixture model. @@ -76,5 +77,6 @@ def _name(msg): warm_start=warm_start, verbose=verbose, verbose_interval=verbose_interval, + **kwargs, ) return scope.sklearn_GaussianMixture(**hp_space) diff --git a/hpsklearn/components/multiclass.py b/hpsklearn/components/multiclass.py index 01dc821c..10d3e8a8 100644 --- a/hpsklearn/components/multiclass.py +++ b/hpsklearn/components/multiclass.py @@ -23,7 +23,8 @@ def sklearn_OutputCodeClassifier(*args, **kwargs): def one_vs_rest_classifier(name: str, estimator: typing.Union[object, Apply] = None, - n_jobs: int = 1): + n_jobs: int = 1, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.multiclass.OneVsRestClassifier model. @@ -39,14 +40,16 @@ def _name(msg): hp_space = dict( estimator=any_classifier(_name("estimator")) if estimator is None else estimator, - n_jobs=n_jobs + n_jobs=n_jobs, + **kwargs ) return scope.sklearn_OneVsRestClassifier(**hp_space) def one_vs_one_classifier(name: str, estimator: typing.Union[object, Apply] = None, - n_jobs: int = 1): + n_jobs: int = 1, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.multiclass.OneVsOneClassifier model. @@ -62,7 +65,8 @@ def _name(msg): hp_space = dict( estimator=any_classifier(_name("estimator")) if estimator is None else estimator, - n_jobs=n_jobs + n_jobs=n_jobs, + **kwargs ) return scope.sklearn_OneVsOneClassifier(**hp_space) @@ -71,7 +75,8 @@ def output_code_classifier(name: str, estimator: typing.Union[object, Apply] = None, code_size: typing.Union[float, Apply] = None, random_state=None, - n_jobs: int = 1): + n_jobs: int = 1, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.multiclass.OutputCodeClassifier model. @@ -91,6 +96,7 @@ def _name(msg): estimator=any_classifier(_name("estimator")) if estimator is None else estimator, code_size=hp.uniform(_name("code_size"), 1, 2) if code_size is None else code_size, n_jobs=n_jobs, - random_state=hp.randint(_name("random_state"), 5) if random_state is None else random_state + random_state=hp.randint(_name("random_state"), 5) if random_state is None else random_state, + **kwargs ) return scope.sklearn_OutputCodeClassifier(**hp_space) diff --git a/hpsklearn/components/naive_bayes.py b/hpsklearn/components/naive_bayes.py index 180cd2f1..b8a6380c 100644 --- a/hpsklearn/components/naive_bayes.py +++ b/hpsklearn/components/naive_bayes.py @@ -37,6 +37,7 @@ def _nb_hp_space( alpha: typing.Union[float, Apply] = None, fit_prior: typing.Union[bool, Apply] = None, class_prior: npt.ArrayLike = None, + **kwargs ): """ Hyper parameter search space for @@ -48,7 +49,8 @@ def _nb_hp_space( hp_space = dict( alpha=hp.quniform(name_func("alpha"), 0, 1, 0.001) if alpha is None else alpha, fit_prior=hp.choice(name_func("fit_prior"), [True, False]) if fit_prior is None else fit_prior, - class_prior=class_prior + class_prior=class_prior, + **kwargs ) return hp_space @@ -119,7 +121,7 @@ def _name(msg): return scope.sklearn_ComplementNB(**hp_space) -def gaussian_nb(name: str, var_smoothing: float = 1e-9): +def gaussian_nb(name: str, var_smoothing: float = 1e-9, **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.naive_bayes.GaussianNB model. @@ -133,7 +135,8 @@ def _name(msg): return f"{name}.gaussian_nb_{msg}" hp_space = dict( - var_smoothing=var_smoothing + var_smoothing=var_smoothing, + **kwargs ) return scope.sklearn_GaussianNB(**hp_space) diff --git a/hpsklearn/components/neighbors/_nearest_centroid.py b/hpsklearn/components/neighbors/_nearest_centroid.py index 7e84f8e5..e83e49a6 100644 --- a/hpsklearn/components/neighbors/_nearest_centroid.py +++ b/hpsklearn/components/neighbors/_nearest_centroid.py @@ -12,7 +12,8 @@ def sklearn_NearestCentroid(*args, **kwargs): def nearest_centroid(name: str, metric: typing.Union[str, Apply] = None, - shrink_threshold: float = None): + shrink_threshold: float = None, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.neighbors.NearestCentroid model. @@ -30,5 +31,6 @@ def _name(msg): metric=hp.choice(_name("metric"), ["euclidean", "manhattan"]) if metric is None else metric, shrink_threshold=shrink_threshold, + **kwargs, ) return scope.sklearn_NearestCentroid(**hp_space) diff --git a/hpsklearn/components/neighbors/_regression.py b/hpsklearn/components/neighbors/_regression.py index 5cee1239..303dfae9 100644 --- a/hpsklearn/components/neighbors/_regression.py +++ b/hpsklearn/components/neighbors/_regression.py @@ -67,7 +67,8 @@ def neighbors_hp_space( p: typing.Union[int, Apply] = None, metric: typing.Union[str, callable, Apply] = None, metric_params: dict = None, - n_jobs: int = 1): + n_jobs: int = 1, + **kwargs): """ Hyper parameter search space for k neighbors regressor @@ -80,7 +81,8 @@ def neighbors_hp_space( p=_neighbors_p(name_func("p")) if p is None else p, metric=_neighbors_metric(name_func("metric")) if metric is None else metric, metric_params=metric_params, - n_jobs=n_jobs + n_jobs=n_jobs, + **kwargs ) return hp_space diff --git a/hpsklearn/components/neural_network/_multilayer_perceptron.py b/hpsklearn/components/neural_network/_multilayer_perceptron.py index 41523505..d9e38bcf 100644 --- a/hpsklearn/components/neural_network/_multilayer_perceptron.py +++ b/hpsklearn/components/neural_network/_multilayer_perceptron.py @@ -177,6 +177,7 @@ def _multilayer_perceptron_hp_space( epsilon: typing.Union[float, Apply] = None, n_iter_no_change: typing.Union[int, Apply] = None, max_fun: typing.Union[int, Apply] = None, + **kwargs, ): """ Hyper parameter search space for @@ -214,6 +215,7 @@ def _multilayer_perceptron_hp_space( n_iter_no_change=_multilayer_perceptron_n_iter_no_change(name_func("n_iter_no_change")) if n_iter_no_change is None else n_iter_no_change, max_fun=_multilayer_perceptron_max_fun(name_func("max_fun")) if max_fun is None else max_fun, + **kwargs, ) return hp_space diff --git a/hpsklearn/components/semi_supervised/_label_propagation.py b/hpsklearn/components/semi_supervised/_label_propagation.py index 83b75e23..17f6c298 100644 --- a/hpsklearn/components/semi_supervised/_label_propagation.py +++ b/hpsklearn/components/semi_supervised/_label_propagation.py @@ -55,7 +55,8 @@ def _label_propagation_hp_space( gamma: typing.Union[float, Apply] = None, n_neighbors: typing.Union[int, Apply] = None, tol: typing.Union[float, Apply] = None, - n_jobs: int = 1 + n_jobs: int = 1, + **kwargs ): """ Hyper parameter search space for @@ -67,7 +68,8 @@ def _label_propagation_hp_space( gamma=_label_propagation_gamma(name_func("gamma")) if gamma is None else gamma, n_neighbors=_label_propagation_n_neighbors(name_func("n_neighbors")) if n_neighbors is None else n_neighbors, tol=_label_propagation_n_tol(name_func("tol")) if tol is None else tol, - n_jobs=n_jobs + n_jobs=n_jobs, + **kwargs ) return hp_space diff --git a/hpsklearn/components/svm/_classes.py b/hpsklearn/components/svm/_classes.py index e05ff41f..8557c63c 100644 --- a/hpsklearn/components/svm/_classes.py +++ b/hpsklearn/components/svm/_classes.py @@ -136,7 +136,8 @@ def _linear_hp_space( dual: bool = "auto", verbose: int = 0, random_state=None, - max_iter: typing.Union[int, Apply] = None + max_iter: typing.Union[int, Apply] = None, + **kwargs ): """ Hyper parameter search space for @@ -152,7 +153,8 @@ def _linear_hp_space( dual=dual, verbose=verbose, random_state=_svm_random_state(name_func("random_state")) if random_state is None else random_state, - max_iter=_linear_max_iter(name_func("max_iter")) if max_iter is None else max_iter + max_iter=_linear_max_iter(name_func("max_iter")) if max_iter is None else max_iter, + **kwargs ) return hp_space @@ -185,7 +187,8 @@ def _svc_hp_space(name_func, max_iter: typing.Union[int, Apply] = None, decision_function_shape: typing.Union[str, Apply] = None, break_ties: bool = False, - random_state=None): + random_state=None, + **kwargs): """ Hyper parameter search space for nu svc @@ -206,7 +209,8 @@ def _svc_hp_space(name_func, decision_function_shape=_svc_decision_function_shape(name_func("decision_function_shape")) if decision_function_shape is None else decision_function_shape, break_ties=break_ties, - random_state=_svm_random_state(name_func("random_state")) if random_state is None else random_state + random_state=_svm_random_state(name_func("random_state")) if random_state is None else random_state, + **kwargs ) return hp_space @@ -220,7 +224,8 @@ def _svr_one_class_hp_space(name_func, shrinking: typing.Union[bool, Apply] = None, cache_size: int = 200, verbose: bool = False, - max_iter: int = None): + max_iter: int = None, + **kwargs): """ Hyper parameter search space for nu svr @@ -237,6 +242,7 @@ def _svr_one_class_hp_space(name_func, cache_size=cache_size, verbose=verbose, max_iter=-1 if max_iter is None else max_iter, + **kwargs, ) return hp_space diff --git a/hpsklearn/components/tree/_classes.py b/hpsklearn/components/tree/_classes.py index d6788e37..d70c7b09 100644 --- a/hpsklearn/components/tree/_classes.py +++ b/hpsklearn/components/tree/_classes.py @@ -144,7 +144,8 @@ def _tree_hp_space( random_state=None, min_impurity_decrease: typing.Union[float, Apply] = 0.0, max_leaf_nodes: typing.Union[int, Apply] = "Undefined", - ccp_alpha: float = 0.0 + ccp_alpha: float = 0.0, + **kwargs ): """ Hyper parameter search space for @@ -168,7 +169,8 @@ def _tree_hp_space( if min_impurity_decrease is None else min_impurity_decrease, max_leaf_nodes=_tree_max_leaf_nodes(name_func("max_leaf_nodes")) if max_leaf_nodes == "Undefined" else max_leaf_nodes, - ccp_alpha=ccp_alpha + ccp_alpha=ccp_alpha, + **kwargs ) return hp_space diff --git a/hpsklearn/components/xgboost.py b/hpsklearn/components/xgboost.py index 49a2baea..6c6b8522 100644 --- a/hpsklearn/components/xgboost.py +++ b/hpsklearn/components/xgboost.py @@ -119,7 +119,8 @@ def _xgboost_hp_space( scale_pos_weight: float = 1, base_score: float = 0.5, random_state=None, - n_jobs: int = -1): + n_jobs: int = -1, + **kwargs): """ Hyper parameter search space for xgboost classifier @@ -143,7 +144,8 @@ def _xgboost_hp_space( scale_pos_weight=scale_pos_weight, base_score=base_score, seed=_xgboost_random_state(name_func("random_state")) if random_state is None else random_state, - n_jobs=n_jobs + n_jobs=n_jobs, + **kwargs ) return hp_space From 4973b96bf449c84cc6c3444770ca2a414e1e4f7c Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Mon, 3 Mar 2025 20:58:05 +0100 Subject: [PATCH 17/28] deprecated store_cv_values --- hpsklearn/components/linear_model/_ridge.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hpsklearn/components/linear_model/_ridge.py b/hpsklearn/components/linear_model/_ridge.py index 4e0114b6..e5fd01f6 100644 --- a/hpsklearn/components/linear_model/_ridge.py +++ b/hpsklearn/components/linear_model/_ridge.py @@ -121,7 +121,7 @@ def _ridge_cv_hp_space( fit_intercept: bool = True, scoring: typing.Union[str, callable] = None, cv: typing.Union[int, Iterable, typing.Generator, Apply] = None, - store_cv_values: bool = False, + store_cv_results: bool = False, **kwargs ): """ @@ -134,7 +134,7 @@ def _ridge_cv_hp_space( fit_intercept=fit_intercept, scoring=scoring, cv=_ridge_cv(name_func("cv")) if cv is None else cv, - store_cv_values=store_cv_values, + store_cv_restults=store_cv_results, **kwargs ) return hp_space From 00bf9c8fda93af23d4feabcb0b9a5e300bab606d Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Mon, 3 Mar 2025 21:02:15 +0100 Subject: [PATCH 18/28] Requirements versioning update, will be tested with tox on pl --- requirements.txt | 10 +++++----- requirements_dev.txt | 10 ++++++---- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index bafd0a5e..bc03ea2e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -hyperopt>=0.2.6,<=0.2.7 -numpy>=1.26.0,<1.26.1 -scikit-learn>=1.3.0,<=1.3.2 -scipy>=1.11.2,<=1.11.3 -pandas>=2.1.0,<=2.1.1 +hyperopt==0.2.7 +numpy>=2.0.0 +scikit-learn>=1.5 +scipy>=1.15.0 +pandas>=2.1.0 diff --git a/requirements_dev.txt b/requirements_dev.txt index b21c8dbf..5d199b6e 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,5 +1,7 @@ -tox==4.8.0,<=4.11.3 -xgboost==1.6.2 -lightgbm>=4.0.0,<4.1.0 -coverage==7.3.1 +tox>=4.20.0 +xgboost>=2.0.0 +lightgbm==4.6.0 +coverage==7.6.12 +pytest==8.3.5 +pytest-xdist==3.6.1 -r requirements.txt From ebbd0c0656db79f1b046ab67adcd02eaafeb8e35 Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Mon, 3 Mar 2025 21:04:28 +0100 Subject: [PATCH 19/28] max version support on sklearn --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index bc03ea2e..0fe47484 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ hyperopt==0.2.7 numpy>=2.0.0 -scikit-learn>=1.5 +scikit-learn>=1.5,<=1.7 scipy>=1.15.0 pandas>=2.1.0 From bd615461615f8b8c1c98a5f8650a7d59941f87b7 Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Mon, 3 Mar 2025 21:20:33 +0100 Subject: [PATCH 20/28] Improve pyproject, setup and tox using pytest --- pyproject.toml | 2 +- setup.cfg | 26 +++++++++++++++----------- tox.ini | 12 ++++++------ 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 64ef545a..9d2d345f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,3 @@ [build-system] -requires = ["setuptools>=68.2.2", "wheel"] +requires = ["setuptools>=75.8.2", "wheel"] build-backend = "setuptools.build_meta" diff --git a/setup.cfg b/setup.cfg index 588952dc..1d5896d8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = hpsklearn -version = 1.0.3 +version = 1.1.0 description = Hyperparameter Optimization for sklearn long_description = file: README.md url = http://hyperopt.github.com/hyperopt-sklearn/ @@ -11,7 +11,7 @@ license_file = LICENSE.txt platforms = Linux, OS-X, Windows keywords = hyperopt, hyperparameter, sklearn classifiers = - Development Status :: 1.0.3 - Alpha + Development Status :: 1.1.0 - Alpha Intended Audience :: Education Intended Audience :: Science/Research Intended Audience :: Developers @@ -23,9 +23,9 @@ classifiers = Operating System :: Unix Programming Language :: Python :: 3 Programming Language :: Python :: 3 :: Only - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 + Programming Language :: Python :: 3.12 + Programming Language :: Python :: 3.13 Topic :: Scientific/Engineering Topic :: Software Development @@ -33,15 +33,19 @@ classifiers = packages = find: install_requires = - hyperopt>=0.2.6 - numpy>=1.26.0 - scikit-learn>=1.3.0 - scipy>=1.11.2 -python_requires = >=3.9 + hyperopt==0.2.7 + numpy>=2.0.0 + scikit-learn>=1.5,<=1.7 + scipy>=1.15.0 + pandas>=2.1.0 +python_requires = >=3.11 zip_safe = False [options.extras_require] xgboost = xgboost>=2.0.0 -lightgbm = lightgbm>=4.0.0 +lightgbm = lightgbm>=4.6.0 testing = - tox>=4.11.3 + tox>=4.20.0 + coverage==7.6.12 + pytest==8.3.5 + pytest-xdist==3.6.1 diff --git a/tox.ini b/tox.ini index 1ba1ea4f..cd9ef32f 100644 --- a/tox.ini +++ b/tox.ini @@ -1,13 +1,13 @@ [tox] minversion = 3.7.0 -envlist = py39, py310, py311, flake8 +envlist = py311, py312, py313, flake8 isolated_build = true [gh-actions] python = - 3.9: py39 - 3.10: py310 - 3.11: py311, flake8 + 3.11: py311 + 3.12: py312 + 3.13: py313, flake8 [testenv] setenv = @@ -15,10 +15,10 @@ setenv = deps = -r{toxinidir}/requirements_dev.txt commands = - coverage run -m unittest discover + coverage run -m pytest -n auto coverage report -m [testenv:flake8] -basepython = python3.11 +basepython = python3.13 deps = flake8 commands = flake8 --max-line-length=120 --ignore=F401,W504 hpsklearn tests From db5456d819019c3d8d05d5e59e311d4b85c95cbc Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Mon, 3 Mar 2025 21:25:45 +0100 Subject: [PATCH 21/28] Update python versions in workflow --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 5727a426..4faf84a6 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -10,7 +10,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, windows-latest] - python-version: ['3.9', '3.10', '3.11'] + python-version: ['3.11', '3.12', '3.13'] steps: - uses: actions/checkout@v2 From d40d4c7d33c8dc7f8eb221c0ed10f126a2f7739e Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Tue, 4 Mar 2025 18:49:55 +0100 Subject: [PATCH 22/28] Add missing setuptools --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 0fe47484..dbb1f434 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ numpy>=2.0.0 scikit-learn>=1.5,<=1.7 scipy>=1.15.0 pandas>=2.1.0 +setuptools>=71.0.0 \ No newline at end of file From 040718e7559c32f3dad722aad8e312e8b08bb833 Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Tue, 4 Mar 2025 19:54:31 +0100 Subject: [PATCH 23/28] improve store_cv_results param --- hpsklearn/components/linear_model/_ridge.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/hpsklearn/components/linear_model/_ridge.py b/hpsklearn/components/linear_model/_ridge.py index e5fd01f6..db2d3f52 100644 --- a/hpsklearn/components/linear_model/_ridge.py +++ b/hpsklearn/components/linear_model/_ridge.py @@ -121,7 +121,6 @@ def _ridge_cv_hp_space( fit_intercept: bool = True, scoring: typing.Union[str, callable] = None, cv: typing.Union[int, Iterable, typing.Generator, Apply] = None, - store_cv_results: bool = False, **kwargs ): """ @@ -134,7 +133,6 @@ def _ridge_cv_hp_space( fit_intercept=fit_intercept, scoring=scoring, cv=_ridge_cv(name_func("cv")) if cv is None else cv, - store_cv_restults=store_cv_results, **kwargs ) return hp_space @@ -166,6 +164,7 @@ def _name(msg): def ridge_cv(name: str, gcv_mode: typing.Union[str, Apply] = "auto", alpha_per_target: bool = False, + store_cv_results: bool = False, **kwargs): """ Return a pyll graph with hyperparameters that will construct @@ -186,6 +185,7 @@ def _name(msg): hp_space = _ridge_cv_hp_space(_name, **kwargs) hp_space["gcv_mode"] = gcv_mode hp_space["alpha_per_target"] = alpha_per_target + hp_space["store_cv_results"] = store_cv_results return scope.sklearn_RidgeCV(**hp_space) @@ -218,7 +218,10 @@ def _name(msg): @validate(params=["class_weight"], validation_test=lambda param: not isinstance(param, str) or param == "balanced", msg="Invalid parameter '%s' with value '%s'. Value must be 'balanced'") -def ridge_classifier_cv(name: str, class_weight: typing.Union[dict, str] = None, **kwargs): +def ridge_classifier_cv(name: str, + class_weight: typing.Union[dict, str] = None, + store_cv_results: bool = False, + **kwargs): """ Return a pyll graph with hyperparameters that will construct a sklearn.linear_model.RidgeClassifierCV model. @@ -236,5 +239,6 @@ def _name(msg): hp_space = _ridge_cv_hp_space(_name, **kwargs) hp_space["class_weight"] = class_weight + hp_space["store_cv_results"] = store_cv_results return scope.sklearn_RidgeClassifierCV(**hp_space) From 80b5e69cea9bed7b62e0d2d5122b0ae549638c1d Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Tue, 4 Mar 2025 20:04:45 +0100 Subject: [PATCH 24/28] pytest makes clusters run out of memory. No parallelization of tests, rip --- requirements_dev.txt | 2 -- tox.ini | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/requirements_dev.txt b/requirements_dev.txt index 5d199b6e..5cf52abb 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -2,6 +2,4 @@ tox>=4.20.0 xgboost>=2.0.0 lightgbm==4.6.0 coverage==7.6.12 -pytest==8.3.5 -pytest-xdist==3.6.1 -r requirements.txt diff --git a/tox.ini b/tox.ini index cd9ef32f..7127bb32 100644 --- a/tox.ini +++ b/tox.ini @@ -15,7 +15,7 @@ setenv = deps = -r{toxinidir}/requirements_dev.txt commands = - coverage run -m pytest -n auto + coverage run -m unittest discover coverage report -m [testenv:flake8] From d7b673585b60a8277167dcabbbc2f2dc87ee027c Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Tue, 4 Mar 2025 20:28:44 +0100 Subject: [PATCH 25/28] Improve flake8 issues --- hpsklearn/components/feature_extraction/text.py | 12 ++++++------ hpsklearn/components/linear_model/_ridge.py | 2 +- .../test_feature_extraction/test_text.py | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/hpsklearn/components/feature_extraction/text.py b/hpsklearn/components/feature_extraction/text.py index 58950473..eeeca39e 100644 --- a/hpsklearn/components/feature_extraction/text.py +++ b/hpsklearn/components/feature_extraction/text.py @@ -86,7 +86,7 @@ def _text_norm(name: str): Declaration search space 'norm' parameter """ return hp.choice(name, ["l1", "l2"]) - + @validate(params=["analyzer"], validation_test=lambda param: not isinstance(param, str) or param in ["word", "char", "char_wb"], @@ -147,7 +147,7 @@ def _name(msg): hp_space["min_df"] = _text_min_df(_name("min_df")) if min_df is None else min_df hp_space["norm"] = _text_norm(_name("norm")) if norm is None else norm hp_space["max_features"] = _text_max_features(_name("max_features")) \ - if max_features is not None else max_features + if max_features is not None else max_features return scope.sklearn_TfidfVectorizer(**hp_space) @@ -166,7 +166,7 @@ def hashing_vectorizer(name: str, norm: typing.Union[str, Apply] = None, **kwarg """ def _name(msg): return f"{name}.hashing_vectorizer_{msg}" - + hp_space = _text_hp_space(_name, **kwargs) hp_space["norm"] = _text_norm(_name("norm")) if norm is None else norm @@ -191,11 +191,11 @@ def count_vectorizer( """ def _name(msg): return f"{name}.count_vectorizer_{msg}" - + hp_space = _text_hp_space(_name, **kwargs) hp_space["max_df"] = _text_max_df(_name("max_df")) if max_df is None else max_df hp_space["min_df"] = _text_min_df(_name("min_df")) if min_df is None else min_df hp_space["max_features"] = _text_max_features(_name("max_features")) \ - if max_features is not None else max_features - + if max_features is not None else max_features + return scope.sklearn_CountVectorizer(**hp_space) diff --git a/hpsklearn/components/linear_model/_ridge.py b/hpsklearn/components/linear_model/_ridge.py index db2d3f52..570700cf 100644 --- a/hpsklearn/components/linear_model/_ridge.py +++ b/hpsklearn/components/linear_model/_ridge.py @@ -219,7 +219,7 @@ def _name(msg): validation_test=lambda param: not isinstance(param, str) or param == "balanced", msg="Invalid parameter '%s' with value '%s'. Value must be 'balanced'") def ridge_classifier_cv(name: str, - class_weight: typing.Union[dict, str] = None, + class_weight: typing.Union[dict, str] = None, store_cv_results: bool = False, **kwargs): """ diff --git a/tests/test_components/test_feature_extraction/test_text.py b/tests/test_components/test_feature_extraction/test_text.py index 8646c074..6b974b90 100644 --- a/tests/test_components/test_feature_extraction/test_text.py +++ b/tests/test_components/test_feature_extraction/test_text.py @@ -62,7 +62,7 @@ def test_hashing_vectorizer(self): max_evals=5, ) model.fit(self.X_test, self.Y_test) - + @TrialsExceptionHandler def test_count_vectorizer(self): """ From 39b62ab4930ff00f45c923dbb9c88a7fe7d7e982 Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Sun, 9 Mar 2025 08:47:18 +0100 Subject: [PATCH 26/28] Update setup cfg --- setup.cfg | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/setup.cfg b/setup.cfg index 1d5896d8..de3f0658 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,17 +1,19 @@ [metadata] -name = hpsklearn +name = hyperopt-sklearn version = 1.1.0 description = Hyperparameter Optimization for sklearn long_description = file: README.md -url = http://hyperopt.github.com/hyperopt-sklearn/ +url = https://github.com/hyperopt/hyperopt-sklearn/ author = James Bergstra author_email = anon@anon.com +maintainer = Pim Tholhuijsen +maintainer_email = anon@anon.com license = BSD -license_file = LICENSE.txt +license_files = LICENSE.txt platforms = Linux, OS-X, Windows keywords = hyperopt, hyperparameter, sklearn classifiers = - Development Status :: 1.1.0 - Alpha + Development Status :: 3 - Alpha Intended Audience :: Education Intended Audience :: Science/Research Intended Audience :: Developers @@ -30,22 +32,20 @@ classifiers = Topic :: Software Development [options] -packages = - find: +packages = find: install_requires = hyperopt==0.2.7 numpy>=2.0.0 scikit-learn>=1.5,<=1.7 scipy>=1.15.0 pandas>=2.1.0 + setuptools>=71.0.0 python_requires = >=3.11 zip_safe = False [options.extras_require] xgboost = xgboost>=2.0.0 -lightgbm = lightgbm>=4.6.0 +lightgbm = lightgbm==4.6.0 testing = tox>=4.20.0 - coverage==7.6.12 - pytest==8.3.5 - pytest-xdist==3.6.1 + coverage==7.6.12 \ No newline at end of file From 6872f16ea3f81c433930dbbfaa4c5ba8860010e9 Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Sun, 9 Mar 2025 08:48:24 +0100 Subject: [PATCH 27/28] Reduce require setuptools --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9d2d345f..86c57f55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,3 @@ [build-system] -requires = ["setuptools>=75.8.2", "wheel"] +requires = ["setuptools>=71.0.0", "wheel"] build-backend = "setuptools.build_meta" From 6797eb9daa14aed3f9ebfa704b2f7e38d63a4b78 Mon Sep 17 00:00:00 2001 From: mandjevant <38689620+mandjevant@users.noreply.github.com> Date: Sun, 9 Mar 2025 09:04:58 +0100 Subject: [PATCH 28/28] Fixate max requirements --- requirements.txt | 6 +++--- requirements_dev.txt | 2 +- setup.cfg | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/requirements.txt b/requirements.txt index dbb1f434..39ebfbb0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ hyperopt==0.2.7 -numpy>=2.0.0 +numpy>=2.0.0,<=2.3.0 scikit-learn>=1.5,<=1.7 -scipy>=1.15.0 -pandas>=2.1.0 +scipy>=1.15.0,<=1.15.3 +pandas>=2.1.0,<=2.3.0 setuptools>=71.0.0 \ No newline at end of file diff --git a/requirements_dev.txt b/requirements_dev.txt index 5cf52abb..2b72fa20 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,5 +1,5 @@ tox>=4.20.0 -xgboost>=2.0.0 +xgboost>=2.0.0,<=2.2.0 lightgbm==4.6.0 coverage==7.6.12 -r requirements.txt diff --git a/setup.cfg b/setup.cfg index de3f0658..be0e8c13 100644 --- a/setup.cfg +++ b/setup.cfg @@ -35,16 +35,16 @@ classifiers = packages = find: install_requires = hyperopt==0.2.7 - numpy>=2.0.0 + numpy>=2.0.0,<=2.3.0 scikit-learn>=1.5,<=1.7 - scipy>=1.15.0 - pandas>=2.1.0 + scipy>=1.15.0,<=1.15.3 + pandas>=2.1.0,<=2.3.0 setuptools>=71.0.0 python_requires = >=3.11 zip_safe = False [options.extras_require] -xgboost = xgboost>=2.0.0 +xgboost = xgboost>=2.0.0,<=2.2.0 lightgbm = lightgbm==4.6.0 testing = tox>=4.20.0