Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
e233598
ignore /venv
mandjevant Mar 2, 2025
ab09f7b
Remove deprecated algorithm parameter from ada boost
mandjevant Mar 2, 2025
d6c7c6f
Revert formatting
mandjevant Mar 2, 2025
1d17c38
Add max_features parameter to hist_gradient_boosting clf and reg
mandjevant Mar 2, 2025
6635575
rename tfidf to tfidf vectorizer
mandjevant Mar 2, 2025
bac374a
Improve tfidf and add hashing vectorizer and count vectorizer
mandjevant Mar 2, 2025
934341b
Add unittests for hashing vectorizer and count vectorizer. Use SGD cl…
mandjevant Mar 2, 2025
c0c572b
Remove multi_class parameter
mandjevant Mar 2, 2025
24608e8
Remove copy x param from theil_sen. Copy is always made
mandjevant Mar 2, 2025
24a92a2
Change default on dual linear hp space
mandjevant Mar 2, 2025
b80cea8
Remove unused hp space params
mandjevant Mar 3, 2025
e87ea5d
rem unused use_label_encoder
mandjevant Mar 3, 2025
b30fd71
Allow more kwargs when using hp space dict
mandjevant Mar 3, 2025
837239b
Improve imports
mandjevant Mar 3, 2025
0db154e
Add new vectorizers to preprocessors and text preprocessors
mandjevant Mar 3, 2025
9b0a823
Add **kwargs to all hp spaces
mandjevant Mar 3, 2025
4973b96
deprecated store_cv_values
mandjevant Mar 3, 2025
00bf9c8
Requirements versioning update, will be tested with tox on pl
mandjevant Mar 3, 2025
ebbd0c0
max version support on sklearn
mandjevant Mar 3, 2025
bd61546
Improve pyproject, setup and tox using pytest
mandjevant Mar 3, 2025
db5456d
Update python versions in workflow
mandjevant Mar 3, 2025
d40d4c7
Add missing setuptools
mandjevant Mar 4, 2025
040718e
improve store_cv_results param
mandjevant Mar 4, 2025
80b5e69
pytest makes clusters run out of memory. No parallelization of tests,…
mandjevant Mar 4, 2025
d7b6735
Improve flake8 issues
mandjevant Mar 4, 2025
39b62ab
Update setup cfg
mandjevant Mar 9, 2025
6872f16
Reduce require setuptools
mandjevant Mar 9, 2025
6797eb9
Fixate max requirements
mandjevant Mar 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest, windows-latest]
python-version: ['3.9', '3.10', '3.11']
python-version: ['3.11', '3.12', '3.13']

steps:
- uses: actions/checkout@v2
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,4 @@ nosetests.xml

.idea
notebooks/.ipynb_checkpoints
/venv
14 changes: 11 additions & 3 deletions hpsklearn/components/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,9 @@
lightgbm_regression

from .feature_extraction import \
tfidf
tfidf_vectorizer, \
hashing_vectorizer, \
count_vectorizer

from .decomposition import pca

Expand Down Expand Up @@ -253,7 +255,11 @@ def any_text_preprocessing(name):
"""
Generic pre-processing appropriate for text data
"""
return hp.choice(name, [[tfidf(name + '.tfidf')]])
return hp.choice(name, [
[tfidf_vectorizer(name + ".tfidf")],
[hashing_vectorizer(name + ".hashing")],
[count_vectorizer(name + ".count")],
])


# Legacy any pre-processing as proposed in #137
Expand Down Expand Up @@ -423,7 +429,9 @@ def all_preprocessing(name):
[polynomial_features(name + ".polynomial_features")],
[spline_transformer(name + ".spline_transformer")],
[k_bins_discretizer(name + ".k_bins_discretizer")],
[tfidf(name + ".tfidf")],
[tfidf_vectorizer(name + ".tfidf")],
[hashing_vectorizer(name + ".hashing")],
[count_vectorizer(name + ".count")],
[pca(name + ".pca")],
[ts_lagselector(name + ".ts_lagselector")],
[colkmeans(name + ".colkmeans")],
Expand Down
6 changes: 4 additions & 2 deletions hpsklearn/components/cluster/_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ def _kmeans_hp_space(
n_clusters: typing.Union[int, Apply] = None,
init: typing.Union[str, callable, npt.ArrayLike, Apply] = None,
verbose: int = 0,
random_state=None
random_state=None,
**kwargs
):
"""
Hyper parameter search space for
Expand All @@ -55,7 +56,8 @@ def _kmeans_hp_space(
n_clusters=_kmeans_n_clusters(name_func("n_clusters")) if n_clusters is None else n_clusters,
init=_kmeans_init(name_func("init")) if init is None else init,
verbose=verbose,
random_state=_kmeans_random_state(name_func("random_state")) if random_state is None else random_state
random_state=_kmeans_random_state(name_func("random_state")) if random_state is None else random_state,
**kwargs
)
return hp_space

Expand Down
6 changes: 4 additions & 2 deletions hpsklearn/components/compose/_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ def transformed_target_regressor(name: str,
transformer: object = None,
func: callable = None,
inverse_func: callable = None,
check_inverse: bool = True):
check_inverse: bool = True,
**kwargs):
"""
Return a pyll graph with hyperparameters that will construct
a sklearn.compose.TransformedTargetRegressor model.
Expand All @@ -36,6 +37,7 @@ def _name(msg):
transformer=transformer,
func=func,
inverse_func=inverse_func,
check_inverse=check_inverse
check_inverse=check_inverse,
**kwargs
)
return scope.sklearn_TransformedTargetRegressor(**hp_space)
6 changes: 4 additions & 2 deletions hpsklearn/components/covariance/_elliptic_envelope.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ def elliptic_envelope(name: str,
assume_centered: bool = False,
support_fraction: typing.Union[float, Apply] = None,
contamination: typing.Union[float, Apply] = 0.1,
random_state=None):
random_state=None,
**kwargs):
"""
Return a pyll graph with hyperparameters that will construct
a sklearn.covariance.EllipticEnvelope model.
Expand All @@ -39,6 +40,7 @@ def _name(msg):
support_fraction=hp.uniform(_name("support_fraction"), 0.05, 0.95)
if support_fraction is None else support_fraction,
contamination=hp.uniform(_name("contamination"), 0.0, 0.3) if contamination is None else contamination,
random_state=hp.randint(_name("random_state"), 5) if random_state is None else random_state
random_state=hp.randint(_name("random_state"), 5) if random_state is None else random_state,
**kwargs
)
return scope.sklearn_EllipticEnvelope(**hp_space)
6 changes: 4 additions & 2 deletions hpsklearn/components/cross_decomposition/_pls.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ def _pls_hp_space(
scale: bool = True,
max_iter: typing.Union[int, Apply] = None,
tol: typing.Union[float, Apply] = None,
copy: bool = True
copy: bool = True,
**kwargs
):
"""
Hyper parameter search space for
Expand All @@ -64,7 +65,8 @@ def _pls_hp_space(
scale=scale,
max_iter=_pls_max_iter(name_func("max_iter")) if max_iter is None else max_iter,
tol=_pls_tol(name_func("tol")) if tol is None else tol,
copy=copy
copy=copy,
**kwargs
)
return hp_space

Expand Down
6 changes: 4 additions & 2 deletions hpsklearn/components/discriminant_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def _discriminant_analysis_hp_space(
name_func,
priors: npt.ArrayLike = None,
store_covariance: bool = False,
tol: float = None
tol: float = None,
**kwargs
):
"""
Common hyper parameter search space
Expand All @@ -40,7 +41,8 @@ def _discriminant_analysis_hp_space(
hp_space = dict(
priors=priors,
store_covariance=store_covariance,
tol=_discriminant_analysis_tol(name_func("tol")) if tol is None else tol
tol=_discriminant_analysis_tol(name_func("tol")) if tol is None else tol,
**kwargs
)
return hp_space

Expand Down
12 changes: 8 additions & 4 deletions hpsklearn/components/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ def sklearn_DummyRegressor(*args, **kwargs):
def dummy_classifier(name: str,
strategy: typing.Union[str, Apply] = None,
random_state=None,
constant: typing.Union[int, str, npt.ArrayLike] = None):
constant: typing.Union[int, str, npt.ArrayLike] = None,
**kwargs):
"""
Return a pyll graph with hyperparameters that will construct
a sklearn.dummy.DummyClassifier model.
Expand All @@ -45,7 +46,8 @@ def _name(msg):
strategy=hp.choice(_name("strategy"), ["stratified", "most_frequent", "prior", "uniform"])
if strategy is None else strategy,
random_state=hp.randint(_name("random_state"), 5) if random_state is None else random_state,
constant=constant
constant=constant,
**kwargs
)
return scope.sklearn_DummyClassifier(**hp_space)

Expand All @@ -60,7 +62,8 @@ def _name(msg):
def dummy_regressor(name: str,
strategy: typing.Union[str, Apply] = None,
constant: typing.Union[int, str, npt.ArrayLike] = None,
quantile: float = None):
quantile: float = None,
**kwargs):
"""
Return a pyll graph with hyperparameters that will construct
a sklearn.dummy.DummyRegressor model.
Expand All @@ -78,6 +81,7 @@ def _name(msg):
hp_space = dict(
strategy=hp.choice(_name("strategy"), ["mean", "median"]) if strategy is None else strategy,
constant=constant,
quantile=quantile
quantile=quantile,
**kwargs
)
return scope.sklearn_DummyRegressor(**hp_space)
2 changes: 2 additions & 0 deletions hpsklearn/components/ensemble/_bagging.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def _bagging_hp_space(
n_jobs: int = 1,
random_state=None,
verbose: int = False,
**kwargs,
):
"""
Hyper parameter search space for
Expand All @@ -112,6 +113,7 @@ def _bagging_hp_space(
n_jobs=n_jobs,
random_state=_bagging_random_state(name_func("random_state")) if random_state is None else random_state,
verbose=verbose,
**kwargs
)
return hp_space

Expand Down
6 changes: 4 additions & 2 deletions hpsklearn/components/ensemble/_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,8 @@ def _forest_hp_space(
verbose: int = False,
warm_start: bool = False,
ccp_alpha: float = 0.0,
max_samples: float = None
max_samples: float = None,
**kwargs
):
"""
Hyper parameter search space for
Expand Down Expand Up @@ -224,7 +225,8 @@ def _forest_hp_space(
verbose=verbose,
warm_start=warm_start,
ccp_alpha=ccp_alpha,
max_samples=max_samples
max_samples=max_samples,
**kwargs
)
return hp_space

Expand Down
4 changes: 3 additions & 1 deletion hpsklearn/components/ensemble/_gb.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,8 @@ def _gb_hp_space(
validation_fraction: float = 0.1,
n_iter_no_change: int = None,
tol: float = 1e-4,
ccp_alpha: float = 0.0
ccp_alpha: float = 0.0,
**kwargs
):
"""
Hyper parameter search space for
Expand Down Expand Up @@ -206,6 +207,7 @@ def _gb_hp_space(
n_iter_no_change=n_iter_no_change,
tol=tol,
ccp_alpha=ccp_alpha,
**kwargs
)
return hp_space

Expand Down
16 changes: 14 additions & 2 deletions hpsklearn/components/ensemble/_hist_gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,13 @@ def _hist_gradient_boosting_random_state(name: str):
return hp.randint(name, 5)


def _hist_gradient_boosting_max_features(name: str):
"""
Declaration search space 'max_features' parameter
"""
return hp.uniform(name + ".frac", 0.5, 1.)


@validate(params=["max_bins"],
validation_test=lambda param: not isinstance(param, int) or 0 < param <= 255,
msg="Invalid parameter '%s' with value '%s'. "
Expand All @@ -89,6 +96,7 @@ def _hist_gradient_boosting_hp_space(
max_depth: typing.Union[int, Apply] = "Undefined",
min_samples_leaf: typing.Union[int, Apply] = None,
l2_regularization: float = 0,
max_features: typing.Union[float, Apply] = None,
max_bins: int = 255,
categorical_features: npt.ArrayLike = None,
monotonic_cst: npt.ArrayLike = None,
Expand All @@ -99,7 +107,8 @@ def _hist_gradient_boosting_hp_space(
n_iter_no_change: int = 10,
tol: float = 1e-7,
verbose: int = False,
random_state=None
random_state=None,
**kwargs
):
"""
Hyper parameter search space for
Expand All @@ -123,6 +132,8 @@ def _hist_gradient_boosting_hp_space(
min_samples_leaf=_hist_gradient_boosting_min_samples_leaf(name_func("min_samples_leaf"))
if min_samples_leaf is None else min_samples_leaf,
l2_regularization=l2_regularization,
max_features=_hist_gradient_boosting_max_features(name_func("max_features"))
if max_features is None else max_features,
max_bins=max_bins,
categorical_features=categorical_features,
monotonic_cst=monotonic_cst,
Expand All @@ -134,7 +145,8 @@ def _hist_gradient_boosting_hp_space(
tol=tol,
verbose=verbose,
random_state=_hist_gradient_boosting_random_state(name_func("random_state"))
if random_state is None else random_state
if random_state is None else random_state,
**kwargs
)
return hp_space

Expand Down
6 changes: 4 additions & 2 deletions hpsklearn/components/ensemble/_iforest.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ def _iforest_hp_space(
n_jobs: int = 1,
random_state=None,
verbose: int = False,
warm_start: bool = False
warm_start: bool = False,
**kwargs
):
"""
Hyper parameter search space for
Expand All @@ -74,7 +75,8 @@ def _iforest_hp_space(
n_jobs=n_jobs,
random_state=_iforest_random_state(name_func("random_state")) if random_state is None else random_state,
verbose=verbose,
warm_start=warm_start
warm_start=warm_start,
**kwargs
)
return hp_space

Expand Down
23 changes: 8 additions & 15 deletions hpsklearn/components/ensemble/_weight_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,6 @@ def _weight_boosting_learning_rate(name: str):
return hp.lognormal(name, np.log(0.01), np.log(10.0))


def _weight_boosting_algorithm(name: str):
"""
Declaration search space 'algorithm' parameter
"""
return hp.choice(name, ["SAMME", "SAMME.R"])


def _weight_boosting_loss(name: str):
"""
Declaration search space 'loss' parameter
Expand All @@ -57,11 +50,12 @@ def _weight_boosting_random_state(name: str):
validation_test=lambda param: not isinstance(param, float) or param > 0,
msg="Invalid parameter '%s' with value '%s'. Parameter value must be non-negative and greater than 0.")
def _weight_boosting_hp_space(
name_func,
estimator=None,
n_estimators: typing.Union[int, Apply] = None,
learning_rate: typing.Union[float, Apply] = None,
random_state=None
name_func,
estimator=None,
n_estimators: typing.Union[int, Apply] = None,
learning_rate: typing.Union[float, Apply] = None,
random_state=None,
**kwargs
):
"""
Hyper parameter search space for
Expand All @@ -74,18 +68,18 @@ def _weight_boosting_hp_space(
learning_rate=_weight_boosting_learning_rate(name_func("learning_rate"))
if learning_rate is None else learning_rate,
random_state=_weight_boosting_random_state(name_func("random_state")) if random_state is None else random_state,
**kwargs
)
return hp_space


def ada_boost_classifier(name: str, algorithm: typing.Union[str, Apply] = None, **kwargs):
def ada_boost_classifier(name: str, **kwargs):
"""
Return a pyll graph with hyperparameters that will construct
a sklearn.ensemble.AdaBoostClassifier model.

Args:
name: name | str
algorithm: choose 'SAMME' or 'SAMME.R' | str

See help(hpsklearn.components.ensemble._weight_boosting._weight_boosting_hp_space)
for info on additional available AdaBoost arguments.
Expand All @@ -95,7 +89,6 @@ def _name(msg):
return f"{name}.ada_boost_{msg}"

hp_space = _weight_boosting_hp_space(_name, **kwargs)
hp_space["algorithm"] = _weight_boosting_algorithm(_name("algorithm")) if algorithm is None else algorithm

return scope.sklearn_AdaBoostClassifier(**hp_space)

Expand Down
5 changes: 4 additions & 1 deletion hpsklearn/components/feature_extraction/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
from .text import tfidf
from .text import \
tfidf_vectorizer, \
hashing_vectorizer, \
count_vectorizer
Loading