hyperopt · mandjevant · Mar 22, 2025 · Mar 2, 2025 · Mar 2, 2025 · Mar 2, 2025
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -10,7 +10,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest, windows-latest]
-        python-version: ['3.9', '3.10', '3.11']
+        python-version: ['3.11', '3.12', '3.13']
 
     steps:
     - uses: actions/checkout@v2

diff --git a/.gitignore b/.gitignore
@@ -36,3 +36,4 @@ nosetests.xml
 
 .idea
 notebooks/.ipynb_checkpoints
+/venv
diff --git a/hpsklearn/components/__init__.py b/hpsklearn/components/__init__.py
@@ -147,7 +147,9 @@
     lightgbm_regression
 
 from .feature_extraction import \
-    tfidf
+    tfidf_vectorizer, \
+    hashing_vectorizer, \
+    count_vectorizer
 
 from .decomposition import pca
 
@@ -253,7 +255,11 @@ def any_text_preprocessing(name):
     """
     Generic pre-processing appropriate for text data
     """
-    return hp.choice(name, [[tfidf(name + '.tfidf')]])
+    return hp.choice(name, [
+        [tfidf_vectorizer(name + ".tfidf")],
+        [hashing_vectorizer(name + ".hashing")],
+        [count_vectorizer(name + ".count")],
+    ])
 
 
 # Legacy any pre-processing as proposed in #137
@@ -423,7 +429,9 @@ def all_preprocessing(name):
         [polynomial_features(name + ".polynomial_features")],
         [spline_transformer(name + ".spline_transformer")],
         [k_bins_discretizer(name + ".k_bins_discretizer")],
-        [tfidf(name + ".tfidf")],
+        [tfidf_vectorizer(name + ".tfidf")],
+        [hashing_vectorizer(name + ".hashing")],
+        [count_vectorizer(name + ".count")],
         [pca(name + ".pca")],
         [ts_lagselector(name + ".ts_lagselector")],
         [colkmeans(name + ".colkmeans")],

diff --git a/hpsklearn/components/cluster/_kmeans.py b/hpsklearn/components/cluster/_kmeans.py
@@ -44,7 +44,8 @@ def _kmeans_hp_space(
         n_clusters: typing.Union[int, Apply] = None,
         init: typing.Union[str, callable, npt.ArrayLike, Apply] = None,
         verbose: int = 0,
-        random_state=None
+        random_state=None,
+        **kwargs
 ):
     """
     Hyper parameter search space for
@@ -55,7 +56,8 @@ def _kmeans_hp_space(
         n_clusters=_kmeans_n_clusters(name_func("n_clusters")) if n_clusters is None else n_clusters,
         init=_kmeans_init(name_func("init")) if init is None else init,
         verbose=verbose,
-        random_state=_kmeans_random_state(name_func("random_state")) if random_state is None else random_state
+        random_state=_kmeans_random_state(name_func("random_state")) if random_state is None else random_state,
+        **kwargs
     )
     return hp_space
 

diff --git a/hpsklearn/components/compose/_target.py b/hpsklearn/components/compose/_target.py
@@ -13,7 +13,8 @@ def transformed_target_regressor(name: str,
                                  transformer: object = None,
                                  func: callable = None,
                                  inverse_func: callable = None,
-                                 check_inverse: bool = True):
+                                 check_inverse: bool = True,
+                                 **kwargs):
     """
     Return a pyll graph with hyperparameters that will construct
     a sklearn.compose.TransformedTargetRegressor model.
@@ -36,6 +37,7 @@ def _name(msg):
         transformer=transformer,
         func=func,
         inverse_func=inverse_func,
-        check_inverse=check_inverse
+        check_inverse=check_inverse,
+        **kwargs
     )
     return scope.sklearn_TransformedTargetRegressor(**hp_space)
diff --git a/hpsklearn/components/covariance/_elliptic_envelope.py b/hpsklearn/components/covariance/_elliptic_envelope.py
@@ -16,7 +16,8 @@ def elliptic_envelope(name: str,
                       assume_centered: bool = False,
                       support_fraction: typing.Union[float, Apply] = None,
                       contamination: typing.Union[float, Apply] = 0.1,
-                      random_state=None):
+                      random_state=None,
+                      **kwargs):
     """
     Return a pyll graph with hyperparameters that will construct
     a sklearn.covariance.EllipticEnvelope model.
@@ -39,6 +40,7 @@ def _name(msg):
         support_fraction=hp.uniform(_name("support_fraction"), 0.05, 0.95)
         if support_fraction is None else support_fraction,
         contamination=hp.uniform(_name("contamination"), 0.0, 0.3) if contamination is None else contamination,
-        random_state=hp.randint(_name("random_state"), 5) if random_state is None else random_state
+        random_state=hp.randint(_name("random_state"), 5) if random_state is None else random_state,
+        **kwargs
     )
     return scope.sklearn_EllipticEnvelope(**hp_space)
diff --git a/hpsklearn/components/cross_decomposition/_pls.py b/hpsklearn/components/cross_decomposition/_pls.py
@@ -51,7 +51,8 @@ def _pls_hp_space(
         scale: bool = True,
         max_iter: typing.Union[int, Apply] = None,
         tol: typing.Union[float, Apply] = None,
-        copy: bool = True
+        copy: bool = True,
+        **kwargs
 ):
     """
     Hyper parameter search space for
@@ -64,7 +65,8 @@ def _pls_hp_space(
         scale=scale,
         max_iter=_pls_max_iter(name_func("max_iter")) if max_iter is None else max_iter,
         tol=_pls_tol(name_func("tol")) if tol is None else tol,
-        copy=copy
+        copy=copy,
+        **kwargs
     )
     return hp_space
 

diff --git a/hpsklearn/components/discriminant_analysis.py b/hpsklearn/components/discriminant_analysis.py
@@ -30,7 +30,8 @@ def _discriminant_analysis_hp_space(
         name_func,
         priors: npt.ArrayLike = None,
         store_covariance: bool = False,
-        tol: float = None
+        tol: float = None,
+        **kwargs
 ):
     """
     Common hyper parameter search space
@@ -40,7 +41,8 @@ def _discriminant_analysis_hp_space(
     hp_space = dict(
         priors=priors,
         store_covariance=store_covariance,
-        tol=_discriminant_analysis_tol(name_func("tol")) if tol is None else tol
+        tol=_discriminant_analysis_tol(name_func("tol")) if tol is None else tol,
+        **kwargs
     )
     return hp_space
 

diff --git a/hpsklearn/components/dummy.py b/hpsklearn/components/dummy.py
@@ -26,7 +26,8 @@ def sklearn_DummyRegressor(*args, **kwargs):
 def dummy_classifier(name: str,
                      strategy: typing.Union[str, Apply] = None,
                      random_state=None,
-                     constant: typing.Union[int, str, npt.ArrayLike] = None):
+                     constant: typing.Union[int, str, npt.ArrayLike] = None,
+                     **kwargs):
     """
     Return a pyll graph with hyperparameters that will construct
     a sklearn.dummy.DummyClassifier model.
@@ -45,7 +46,8 @@ def _name(msg):
         strategy=hp.choice(_name("strategy"), ["stratified", "most_frequent", "prior", "uniform"])
         if strategy is None else strategy,
         random_state=hp.randint(_name("random_state"), 5) if random_state is None else random_state,
-        constant=constant
+        constant=constant,
+        **kwargs
     )
     return scope.sklearn_DummyClassifier(**hp_space)
 
@@ -60,7 +62,8 @@ def _name(msg):
 def dummy_regressor(name: str,
                     strategy: typing.Union[str, Apply] = None,
                     constant: typing.Union[int, str, npt.ArrayLike] = None,
-                    quantile: float = None):
+                    quantile: float = None,
+                    **kwargs):
     """
     Return a pyll graph with hyperparameters that will construct
     a sklearn.dummy.DummyRegressor model.
@@ -78,6 +81,7 @@ def _name(msg):
     hp_space = dict(
         strategy=hp.choice(_name("strategy"), ["mean", "median"]) if strategy is None else strategy,
         constant=constant,
-        quantile=quantile
+        quantile=quantile,
+        **kwargs
     )
     return scope.sklearn_DummyRegressor(**hp_space)
diff --git a/hpsklearn/components/ensemble/_bagging.py b/hpsklearn/components/ensemble/_bagging.py
@@ -93,6 +93,7 @@ def _bagging_hp_space(
         n_jobs: int = 1,
         random_state=None,
         verbose: int = False,
+        **kwargs,
 ):
     """
     Hyper parameter search space for
@@ -112,6 +113,7 @@ def _bagging_hp_space(
         n_jobs=n_jobs,
         random_state=_bagging_random_state(name_func("random_state")) if random_state is None else random_state,
         verbose=verbose,
+        **kwargs
     )
     return hp_space
 

diff --git a/hpsklearn/components/ensemble/_forest.py b/hpsklearn/components/ensemble/_forest.py
@@ -190,7 +190,8 @@ def _forest_hp_space(
         verbose: int = False,
         warm_start: bool = False,
         ccp_alpha: float = 0.0,
-        max_samples: float = None
+        max_samples: float = None,
+        **kwargs
 ):
     """
     Hyper parameter search space for
@@ -224,7 +225,8 @@ def _forest_hp_space(
         verbose=verbose,
         warm_start=warm_start,
         ccp_alpha=ccp_alpha,
-        max_samples=max_samples
+        max_samples=max_samples,
+        **kwargs
     )
     return hp_space
 

diff --git a/hpsklearn/components/ensemble/_gb.py b/hpsklearn/components/ensemble/_gb.py
@@ -174,7 +174,8 @@ def _gb_hp_space(
         validation_fraction: float = 0.1,
         n_iter_no_change: int = None,
         tol: float = 1e-4,
-        ccp_alpha: float = 0.0
+        ccp_alpha: float = 0.0,
+        **kwargs
 ):
     """
     Hyper parameter search space for
@@ -206,6 +207,7 @@ def _gb_hp_space(
         n_iter_no_change=n_iter_no_change,
         tol=tol,
         ccp_alpha=ccp_alpha,
+        **kwargs
     )
     return hp_space
 

diff --git a/hpsklearn/components/ensemble/_hist_gradient_boosting.py b/hpsklearn/components/ensemble/_hist_gradient_boosting.py
@@ -73,6 +73,13 @@ def _hist_gradient_boosting_random_state(name: str):
     return hp.randint(name, 5)
 
 
+def _hist_gradient_boosting_max_features(name: str):
+    """
+    Declaration search space 'max_features' parameter
+    """
+    return hp.uniform(name + ".frac", 0.5, 1.)
+
+
 @validate(params=["max_bins"],
           validation_test=lambda param: not isinstance(param, int) or 0 < param <= 255,
           msg="Invalid parameter '%s' with value '%s'. "
@@ -89,6 +96,7 @@ def _hist_gradient_boosting_hp_space(
         max_depth: typing.Union[int, Apply] = "Undefined",
         min_samples_leaf: typing.Union[int, Apply] = None,
         l2_regularization: float = 0,
+        max_features: typing.Union[float, Apply] = None,
         max_bins: int = 255,
         categorical_features: npt.ArrayLike = None,
         monotonic_cst: npt.ArrayLike = None,
@@ -99,7 +107,8 @@ def _hist_gradient_boosting_hp_space(
         n_iter_no_change: int = 10,
         tol: float = 1e-7,
         verbose: int = False,
-        random_state=None
+        random_state=None,
+        **kwargs
 ):
     """
     Hyper parameter search space for
@@ -123,6 +132,8 @@ def _hist_gradient_boosting_hp_space(
         min_samples_leaf=_hist_gradient_boosting_min_samples_leaf(name_func("min_samples_leaf"))
         if min_samples_leaf is None else min_samples_leaf,
         l2_regularization=l2_regularization,
+        max_features=_hist_gradient_boosting_max_features(name_func("max_features"))
+        if max_features is None else max_features,
         max_bins=max_bins,
         categorical_features=categorical_features,
         monotonic_cst=monotonic_cst,
@@ -134,7 +145,8 @@ def _hist_gradient_boosting_hp_space(
         tol=tol,
         verbose=verbose,
         random_state=_hist_gradient_boosting_random_state(name_func("random_state"))
-        if random_state is None else random_state
+        if random_state is None else random_state,
+        **kwargs
     )
     return hp_space
 

diff --git a/hpsklearn/components/ensemble/_iforest.py b/hpsklearn/components/ensemble/_iforest.py
@@ -59,7 +59,8 @@ def _iforest_hp_space(
         n_jobs: int = 1,
         random_state=None,
         verbose: int = False,
-        warm_start: bool = False
+        warm_start: bool = False,
+        **kwargs
 ):
     """
     Hyper parameter search space for
@@ -74,7 +75,8 @@ def _iforest_hp_space(
         n_jobs=n_jobs,
         random_state=_iforest_random_state(name_func("random_state")) if random_state is None else random_state,
         verbose=verbose,
-        warm_start=warm_start
+        warm_start=warm_start,
+        **kwargs
     )
     return hp_space
 

diff --git a/hpsklearn/components/ensemble/_weight_boosting.py b/hpsklearn/components/ensemble/_weight_boosting.py
@@ -32,13 +32,6 @@ def _weight_boosting_learning_rate(name: str):
     return hp.lognormal(name, np.log(0.01), np.log(10.0))
 
 
-def _weight_boosting_algorithm(name: str):
-    """
-    Declaration search space 'algorithm' parameter
-    """
-    return hp.choice(name, ["SAMME", "SAMME.R"])
-
-
 def _weight_boosting_loss(name: str):
     """
     Declaration search space 'loss' parameter
@@ -57,11 +50,12 @@ def _weight_boosting_random_state(name: str):
           validation_test=lambda param: not isinstance(param, float) or param > 0,
           msg="Invalid parameter '%s' with value '%s'. Parameter value must be non-negative and greater than 0.")
 def _weight_boosting_hp_space(
-        name_func,
-        estimator=None,
-        n_estimators: typing.Union[int, Apply] = None,
-        learning_rate: typing.Union[float, Apply] = None,
-        random_state=None
+    name_func,
+    estimator=None,
+    n_estimators: typing.Union[int, Apply] = None,
+    learning_rate: typing.Union[float, Apply] = None,
+    random_state=None,
+    **kwargs
 ):
     """
     Hyper parameter search space for
@@ -74,18 +68,18 @@ def _weight_boosting_hp_space(
         learning_rate=_weight_boosting_learning_rate(name_func("learning_rate"))
         if learning_rate is None else learning_rate,
         random_state=_weight_boosting_random_state(name_func("random_state")) if random_state is None else random_state,
+        **kwargs
     )
     return hp_space
 
 
-def ada_boost_classifier(name: str, algorithm: typing.Union[str, Apply] = None, **kwargs):
+def ada_boost_classifier(name: str, **kwargs):
     """
     Return a pyll graph with hyperparameters that will construct
     a sklearn.ensemble.AdaBoostClassifier model.
 
     Args:
         name: name | str
-        algorithm: choose 'SAMME' or 'SAMME.R' | str
 
     See help(hpsklearn.components.ensemble._weight_boosting._weight_boosting_hp_space)
     for info on additional available AdaBoost arguments.
@@ -95,7 +89,6 @@ def _name(msg):
         return f"{name}.ada_boost_{msg}"
 
     hp_space = _weight_boosting_hp_space(_name, **kwargs)
-    hp_space["algorithm"] = _weight_boosting_algorithm(_name("algorithm")) if algorithm is None else algorithm
 
     return scope.sklearn_AdaBoostClassifier(**hp_space)
 

diff --git a/hpsklearn/components/feature_extraction/__init__.py b/hpsklearn/components/feature_extraction/__init__.py
@@ -1 +1,4 @@
-from .text import tfidf
+from .text import \
+    tfidf_vectorizer, \
+    hashing_vectorizer, \
+    count_vectorizer
Original file line number	Diff line number	Diff line change
Expand Up		@@ -36,3 +36,4 @@ nosetests.xml

		.idea
		notebooks/.ipynb_checkpoints
		/venv