diff --git a/CHANGELOG.md b/CHANGELOG.md index a21b5e7172..ab4709c7d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `candidates_exp` argument removed from `SubspaceDiscrete.subset_masks`, `SubspaceDiscrete.sample_subset_masks`, `SearchSpace.subsets`, and `SearchSpace.sample_subsets` +- `SubspaceDiscrete.get_candidates` now returns only the experimental representation + instead of a tuple of experimental and computational representations ### Added - `narwhals` as a hard dependency @@ -50,6 +52,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 access batch-level constraints; filtering constraints are only needed during subspace construction and are thus no longer stored). - `SubspaceDiscrete.constraints_batch` property (use `batch_constraints` instead) +- `SubspaceDiscrete.exp_rep` attribute (use `get_candidates()` instead) +- `SubspaceDiscrete.comp_rep` attribute (use `transform(get_candidates())` instead) ## [0.15.0] - 2026-06-11 ### Breaking Changes diff --git a/baybe/acquisition/acqfs.py b/baybe/acquisition/acqfs.py index 5e3b08b1cf..96110fa66b 100644 --- a/baybe/acquisition/acqfs.py +++ b/baybe/acquisition/acqfs.py @@ -104,7 +104,9 @@ def get_integration_points(self, searchspace: SearchSpace) -> pd.DataFrame: # Discrete part if not searchspace.discrete.is_empty: - candidates_discrete = searchspace.discrete.comp_rep + candidates_discrete = searchspace.discrete.transform( + searchspace.discrete.get_candidates() + ) n_candidates = self.sampling_n_points or math.ceil( self.sampling_fraction * len(candidates_discrete) # type: ignore[operator] ) diff --git a/baybe/campaign.py b/baybe/campaign.py index 5ccf45356c..57fc3594dc 100644 --- a/baybe/campaign.py +++ b/baybe/campaign.py @@ -206,39 +206,7 @@ def _default_recommended_experiments(self) -> pd.DataFrame: @override def __str__(self) -> str: - recommended_count = len(self._recommended_experiments) - exp_rep = self.searchspace.discrete.exp_rep - if self._measurements.empty or exp_rep.empty: - measured_count = 0 - else: - measured_count = len( - fuzzy_row_match(exp_rep, self._measurements, self.parameters) - ) - excluded_count = len(self._excluded_experiments) - n_elements = len(exp_rep) - searchspace_fields = [ - to_string( - "Recommended:", - f"{recommended_count}/{n_elements}", - single_line=True, - ), - to_string( - "Measured:", - f"{measured_count}/{n_elements}", - single_line=True, - ), - to_string( - "Excluded:", - f"{excluded_count}/{n_elements}", - single_line=True, - ), - ] - metadata_fields = [ - to_string("Discrete Subspace Meta Data", *searchspace_fields), - ] - metadata = to_string("Meta Data", *metadata_fields) - fields = [metadata, self.searchspace, self.objective, self.recommender] - + fields = [self.searchspace, self.objective, self.recommender] return to_string(self.__class__.__name__, *fields) @property @@ -460,7 +428,7 @@ def toggle_discrete_candidates( # noqa: DOC501 # * Additional shortcuts might be possible. self.clear_cache() - df = self.searchspace.discrete.exp_rep + df = self.searchspace.discrete.get_candidates() if isinstance(constraints, pd.DataFrame): # Determine the candidate subset to be toggled @@ -561,12 +529,12 @@ def recommend( if self.searchspace.type is SearchSpaceType.DISCRETE: # TODO: This implementation should at some point be hidden behind an # appropriate public interface, like `SubspaceDiscrete.filter()` - exp_rep = self.searchspace.discrete.exp_rep - mask_todrop = pd.Series(False, index=exp_rep.index) + candidates = self.searchspace.discrete.get_candidates() + mask_todrop = pd.Series(False, index=candidates.index) if not self._excluded_experiments.empty: mask_todrop |= ( pd.merge( - exp_rep, + candidates, self._excluded_experiments, indicator=True, how="left", @@ -580,7 +548,7 @@ def recommend( ): mask_todrop |= ( pd.merge( - exp_rep, + candidates, self._recommended_experiments, indicator=True, how="left", @@ -593,7 +561,7 @@ def recommend( and not self._measurements.empty ): measured_idxs = fuzzy_row_match( - exp_rep, self._measurements, self.parameters + candidates, self._measurements, self.parameters ) mask_todrop.loc[measured_idxs] = True if ( @@ -602,7 +570,7 @@ def recommend( ): mask_todrop |= ( pd.merge( - exp_rep, + candidates, pending_experiments, indicator=True, how="left", @@ -613,7 +581,7 @@ def recommend( searchspace = evolve( self.searchspace, discrete=evolve( - self.searchspace.discrete, exp_rep=exp_rep.loc[~mask_todrop] + self.searchspace.discrete, exp_rep=candidates.loc[~mask_todrop] ), ) else: @@ -1100,13 +1068,16 @@ def _structure_campaign(d: dict, cl: type) -> Campaign: # >>>>>>>>>> Deprecation # Post-structure reconstruction from legacy metadata indices - if legacy_recommended_idxs is not None: - rec_df = campaign.searchspace.discrete.exp_rep.loc[legacy_recommended_idxs] - campaign._recommended_experiments = rec_df.reset_index(drop=True) - - if legacy_excluded_idxs is not None: - excl_df = campaign.searchspace.discrete.exp_rep.loc[legacy_excluded_idxs] - campaign._excluded_experiments = excl_df.reset_index(drop=True) + if legacy_recommended_idxs is not None or legacy_excluded_idxs is not None: + candidates = campaign.searchspace.discrete.get_candidates() + if legacy_recommended_idxs is not None: + campaign._recommended_experiments = candidates.loc[ + legacy_recommended_idxs + ].reset_index(drop=True) + if legacy_excluded_idxs is not None: + campaign._excluded_experiments = candidates.loc[ + legacy_excluded_idxs + ].reset_index(drop=True) # Fix schema of empty DataFrames from legacy serialization if campaign._measurements.columns.empty: diff --git a/baybe/recommenders/naive.py b/baybe/recommenders/naive.py index 04800c7db2..4b89ad13da 100644 --- a/baybe/recommenders/naive.py +++ b/baybe/recommenders/naive.py @@ -117,7 +117,7 @@ def recommend( # Get one random discrete point that will be attached when evaluating the # acquisition function in the discrete space. - disc_part = searchspace.discrete.comp_rep.loc[disc_rec.index].sample(1) + disc_part = searchspace.discrete.transform(disc_rec).sample(1) disc_part_tensor = to_tensor(disc_part).unsqueeze(-2) # Setup a fresh acquisition function for the continuous recommender diff --git a/baybe/recommenders/pure/base.py b/baybe/recommenders/pure/base.py index 493a086710..58d62c1868 100644 --- a/baybe/recommenders/pure/base.py +++ b/baybe/recommenders/pure/base.py @@ -271,7 +271,9 @@ def _recommend_with_discrete_parts( # Check if enough candidates are left # TODO [15917]: This check is not perfectly correct. - if (not is_hybrid_space) and (len(searchspace.discrete.exp_rep) < batch_size): + if (not is_hybrid_space) and ( + len(searchspace.discrete.get_candidates()) < batch_size + ): raise NotEnoughPointsLeftError( f"Using the current settings, there are fewer than {batch_size} " f"possible data points left to recommend." diff --git a/baybe/recommenders/pure/bayesian/botorch/discrete.py b/baybe/recommenders/pure/bayesian/botorch/discrete.py index 24551a3fee..eabbc2ad6f 100644 --- a/baybe/recommenders/pure/bayesian/botorch/discrete.py +++ b/baybe/recommenders/pure/bayesian/botorch/discrete.py @@ -43,6 +43,7 @@ def recommend_discrete_with_subsets( """ import torch + candidates = subspace_discrete.get_candidates() masks: Iterable[npt.NDArray[np.bool_]] if subspace_discrete.n_subsets <= recommender.max_n_subsets: masks = subspace_discrete.subset_masks(min_candidates=batch_size) @@ -56,9 +57,7 @@ def make_callable( mask: np.ndarray, ) -> Callable[[], tuple[pd.DataFrame, Tensor]]: def optimize() -> tuple[pd.DataFrame, Tensor]: - subset_subspace = evolve( - subspace_discrete, exp_rep=subspace_discrete.exp_rep.loc[mask] - ) + subset_subspace = evolve(subspace_discrete, exp_rep=candidates.loc[mask]) rec = recommend_discrete_without_subsets( recommender, subset_subspace, batch_size @@ -118,7 +117,8 @@ def recommend_discrete_without_subsets( from botorch.optim import optimize_acqf_discrete # Determine the next set of points to be tested - candidates_comp = subspace_discrete.comp_rep + candidates = subspace_discrete.get_candidates() + candidates_comp = subspace_discrete.transform(candidates) points, _ = optimize_acqf_discrete( recommender._botorch_acqf, batch_size, to_tensor(candidates_comp) ) @@ -137,4 +137,4 @@ def recommend_discrete_without_subsets( )["index"] ) - return subspace_discrete.exp_rep.loc[idxs] + return candidates.loc[idxs] diff --git a/baybe/recommenders/pure/bayesian/botorch/hybrid.py b/baybe/recommenders/pure/bayesian/botorch/hybrid.py index 0c81339b25..1424db9c18 100644 --- a/baybe/recommenders/pure/bayesian/botorch/hybrid.py +++ b/baybe/recommenders/pure/bayesian/botorch/hybrid.py @@ -81,8 +81,8 @@ def recommend_hybrid_without_subsets( from botorch.optim import optimize_acqf_mixed # Transform discrete candidates - # (Create a shallow copy to avoid in-place modifications of the original dataframe) - candidates_comp = searchspace.discrete.comp_rep.copy(deep=False) + candidates = searchspace.discrete.get_candidates() + candidates_comp = searchspace.discrete.transform(candidates) # Calculate the number of samples from the given percentage n_candidates = math.ceil( @@ -144,7 +144,7 @@ def recommend_hybrid_without_subsets( ).set_index("index") # Get experimental representation of discrete part - rec_disc_exp = searchspace.discrete.exp_rep.loc[merged.index] + rec_disc_exp = candidates.loc[merged.index] # Combine discrete and continuous parts rec_exp = pd.concat( @@ -186,6 +186,7 @@ def recommend_hybrid_with_subsets( # NOTE: No min_discrete_candidates filtering in hybrid spaces because # optimize_acqf_mixed can produce multiple recommendations from a single # discrete candidate by varying continuous parameters. + candidates = searchspace.discrete.get_candidates() combined_masks: Iterable[tuple[np.ndarray, frozenset[str]]] if searchspace.n_subsets <= recommender.max_n_subsets: combined_masks = searchspace.subsets() @@ -201,7 +202,7 @@ def optimize() -> tuple[pd.DataFrame, Tensor]: mod_disc = evolve( searchspace.discrete, - exp_rep=searchspace.discrete.exp_rep.loc[d_mask], + exp_rep=candidates.loc[d_mask], ) mod_cont = ( subspace_c._enforce_cardinality_constraints(c_inactive_params) diff --git a/baybe/recommenders/pure/nonpredictive/clustering.py b/baybe/recommenders/pure/nonpredictive/clustering.py index 5a23c44afd..7e5db5c16d 100644 --- a/baybe/recommenders/pure/nonpredictive/clustering.py +++ b/baybe/recommenders/pure/nonpredictive/clustering.py @@ -107,13 +107,13 @@ def _recommend_discrete( from sklearn.preprocessing import StandardScaler # TODO [Scaling]: scaling should be handled by search space object + candidates = subspace_discrete.get_candidates() + candidates_comp = subspace_discrete.transform(candidates) scaler = StandardScaler() - scaler.fit(subspace_discrete.comp_rep) + scaler.fit(candidates_comp) # Scale candidates - candidates_scaled = np.ascontiguousarray( - scaler.transform(subspace_discrete.comp_rep) - ) + candidates_scaled = np.ascontiguousarray(scaler.transform(candidates_comp)) # Set model parameters and perform fit model = self._get_model_cls()( @@ -129,7 +129,7 @@ def _recommend_discrete( selection = self._make_selection_default(model, candidates_scaled) # Select rows by positional indices and return the corresponding subset - return subspace_discrete.exp_rep.iloc[selection] + return candidates.iloc[selection] @override def __str__(self) -> str: diff --git a/baybe/recommenders/pure/nonpredictive/sampling.py b/baybe/recommenders/pure/nonpredictive/sampling.py index 534fa1922c..bc17a56a00 100644 --- a/baybe/recommenders/pure/nonpredictive/sampling.py +++ b/baybe/recommenders/pure/nonpredictive/sampling.py @@ -41,7 +41,7 @@ def _recommend_hybrid( if searchspace.type is SearchSpaceType.CONTINUOUS: return cont_random - candidates_exp = searchspace.discrete.exp_rep + candidates_exp = searchspace.discrete.get_candidates() # Restrict to a random subset if subset-generating constraints are present if searchspace.discrete.n_subsets > 0: @@ -152,11 +152,12 @@ def _recommend_discrete( from sklearn.preprocessing import StandardScaler # TODO [Scaling]: scaling should be handled by search space object + candidates = subspace_discrete.get_candidates() + candidates_comp = subspace_discrete.transform(candidates) scaler = StandardScaler() - scaler.fit(subspace_discrete.comp_rep) + scaler.fit(candidates_comp) # Scale and sample - candidates_comp = subspace_discrete.comp_rep candidates_scaled = np.ascontiguousarray(scaler.transform(candidates_comp)) if active_settings.use_fpsample: @@ -173,7 +174,7 @@ def _recommend_discrete( initialization=self.initialization.value, random_tie_break=self.random_tie_break, ) - return subspace_discrete.exp_rep.iloc[ilocs] + return candidates.iloc[ilocs] @override def __str__(self) -> str: diff --git a/baybe/searchspace/core.py b/baybe/searchspace/core.py index 2ec217da23..e36959eeea 100644 --- a/baybe/searchspace/core.py +++ b/baybe/searchspace/core.py @@ -6,7 +6,7 @@ from collections.abc import Collection, Iterable, Iterator, Sequence from enum import Enum from itertools import product -from typing import TYPE_CHECKING, ClassVar, cast +from typing import TYPE_CHECKING, ClassVar import numpy as np import numpy.typing as npt @@ -263,7 +263,7 @@ def task_idx(self) -> int | None: # appear first in the computational dataframe. # 3. It assumes there exists exactly one task parameter # --> Fix this when refactoring the data - return cast(int, self.discrete.comp_rep.columns.get_loc(task_param.name)) + return self.discrete.comp_rep_columns.index(task_param.name) @property def n_tasks(self) -> int: diff --git a/baybe/searchspace/discrete.py b/baybe/searchspace/discrete.py index c0a21df74a..5521e6caf6 100644 --- a/baybe/searchspace/discrete.py +++ b/baybe/searchspace/discrete.py @@ -6,7 +6,6 @@ import random import warnings from collections.abc import Callable, Collection, Iterator, Sequence -from functools import cached_property from itertools import islice from math import prod from typing import TYPE_CHECKING, Annotated, Any, Literal @@ -117,7 +116,9 @@ class SubspaceDiscrete(SerialMixin): ) """The parameters spanning the subspace.""" - exp_rep: pd.DataFrame = field(validator=instance_of(pd.DataFrame), eq=eq_dataframe) + _exp_rep: pd.DataFrame = field( + alias="exp_rep", validator=instance_of(pd.DataFrame), eq=eq_dataframe + ) """The experimental representation of the subspace.""" _empty_encoding: Annotated[bool, cattrs.override(omit=True)] = field( @@ -193,13 +194,11 @@ def __str__(self) -> str: "Discrete Parameters", pretty_print_df(param_df, max_colwidth=None), ), - to_string("Experimental Representation", pretty_print_df(self.exp_rep)), to_string("Batch Constraints", pretty_print_df(batch_constraints_df)), - to_string("Computational Representation", pretty_print_df(self.comp_rep)), ] return to_string(self.__class__.__name__, *fields) - @exp_rep.validator + @_exp_rep.validator def _validate_exp_rep( # noqa: DOC101, DOC103 self, _: Any, exp_rep: pd.DataFrame ) -> None: @@ -612,20 +611,47 @@ def parameter_names(self) -> tuple[str, ...]: """Return tuple of parameter names.""" return tuple(p.name for p in self.parameters) - @cached_property + # >>>>>>>>>> Deprecation + @property + def exp_rep(self) -> pd.DataFrame: + """Deprecated! Use :meth:`get_candidates` instead.""" + get_candidates = type(self).get_candidates.__name__ + warnings.warn( + f"Accessing 'exp_rep' is deprecated and will be removed in a future " + f"version. Use '{get_candidates}()' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self._exp_rep + + @property def comp_rep(self) -> pd.DataFrame: - """The computational representation of the subspace.""" - return self.transform(self.exp_rep) + """Deprecated! Use :meth:`transform` with :meth:`get_candidates` instead.""" + cls = type(self) + transform = cls.transform.__name__ + get_candidates = cls.get_candidates.__name__ + warnings.warn( + f"Accessing 'comp_rep' is deprecated and will be removed in a future " + f"version. Use '{transform}({get_candidates}())' instead.", + DeprecationWarning, + stacklevel=2, + ) + return self.transform(self._exp_rep) + + # <<<<<<<<<< Deprecation @property def comp_rep_columns(self) -> tuple[str, ...]: """The columns spanning the computational representation.""" - return tuple(self.comp_rep.columns) + return tuple(col for p in self.parameters for col in p.comp_rep_columns) @property def comp_rep_bounds(self) -> pd.DataFrame: """The minimum and maximum values of the computational representation.""" - return pd.DataFrame({"min": self.comp_rep.min(), "max": self.comp_rep.max()}).T + if not self.parameters: + return pd.DataFrame(index=["min", "max"]) + df = pd.concat([p.comp_df for p in self.parameters], axis=1) + return pd.DataFrame({"min": df.min(), "max": df.max()}).T @property def scaling_bounds(self) -> pd.DataFrame: @@ -742,10 +768,10 @@ def subset_masks( per_constraint: list[list[npt.NDArray[np.bool_]]] if not self.batch_constraints: - per_constraint = [[np.ones(len(self.exp_rep), dtype=bool)]] + per_constraint = [[np.ones(len(self.get_candidates()), dtype=bool)]] else: per_constraint = [ - c.subset_masks(self.exp_rep) for c in self.batch_constraints + c.subset_masks(self.get_candidates()) for c in self.batch_constraints ] total = prod(len(masks) for masks in per_constraint) @@ -797,14 +823,9 @@ def sample_subset_masks( ) ) - def get_candidates(self) -> tuple[pd.DataFrame, pd.DataFrame]: - """Return the set of candidate parameter settings that can be tested. - - Returns: - The candidate parameter settings both in experimental and computational - representation. - """ - return self.exp_rep, self.comp_rep + def get_candidates(self) -> pd.DataFrame: + """Return all candidate parameter configurations.""" + return self._exp_rep def transform( self, diff --git a/baybe/simulation/scenarios.py b/baybe/simulation/scenarios.py index dc5763e2eb..ad34406641 100644 --- a/baybe/simulation/scenarios.py +++ b/baybe/simulation/scenarios.py @@ -283,9 +283,13 @@ def _simulate_groupby( # space constructor, the integer-based indexing provides a second safety net. # Hence, the "reset_index" call. if groupby is None: - groups = ((None, campaign.searchspace.discrete.exp_rep.reset_index()),) + groups = ((None, campaign.searchspace.discrete.get_candidates().reset_index()),) else: - groups = campaign.searchspace.discrete.exp_rep.reset_index().groupby(groupby) + groups = ( + campaign.searchspace.discrete.get_candidates() + .reset_index() + .groupby(groupby) + ) # Simulate all subgroups dfs = [] diff --git a/examples/Constraints_Discrete/custom_constraints.py b/examples/Constraints_Discrete/custom_constraints.py index bde644b596..0ede01683b 100644 --- a/examples/Constraints_Discrete/custom_constraints.py +++ b/examples/Constraints_Discrete/custom_constraints.py @@ -115,43 +115,33 @@ def custom_function(df: pd.DataFrame) -> pd.Series: N_ITERATIONS = 3 for kIter in range(N_ITERATIONS): + candidates = campaign.searchspace.discrete.get_candidates() + print(f"\n\n#### ITERATION {kIter + 1} ####") print("## ASSERTS ##") print( "Number of entries with water, temp > 120 and concentration > 5: ", ( - campaign.searchspace.discrete.exp_rep["Concentration"].apply( - lambda x: x > 5 - ) - & campaign.searchspace.discrete.exp_rep["Temperature"].apply( - lambda x: x > 120 - ) - & campaign.searchspace.discrete.exp_rep["Solvent"].eq("water") + candidates["Concentration"].apply(lambda x: x > 5) + & candidates["Temperature"].apply(lambda x: x > 120) + & candidates["Solvent"].eq("water") ).sum(), ) print( "Number of entries with C2, temp > 180 and concentration > 3: ", ( - campaign.searchspace.discrete.exp_rep["Concentration"].apply( - lambda x: x > 3 - ) - & campaign.searchspace.discrete.exp_rep["Temperature"].apply( - lambda x: x > 180 - ) - & campaign.searchspace.discrete.exp_rep["Solvent"].eq("C2") + candidates["Concentration"].apply(lambda x: x > 3) + & candidates["Temperature"].apply(lambda x: x > 180) + & candidates["Solvent"].eq("C2") ).sum(), ) print( "Number of entries with C3, temp > 150 and concentration > 3: ", ( - campaign.searchspace.discrete.exp_rep["Concentration"].apply( - lambda x: x > 3 - ) - & campaign.searchspace.discrete.exp_rep["Temperature"].apply( - lambda x: x > 150 - ) - & campaign.searchspace.discrete.exp_rep["Solvent"].eq("C3") + candidates["Concentration"].apply(lambda x: x > 3) + & candidates["Temperature"].apply(lambda x: x > 150) + & candidates["Solvent"].eq("C3") ).sum(), ) diff --git a/examples/Constraints_Discrete/dependency_constraints.py b/examples/Constraints_Discrete/dependency_constraints.py index 011224e68f..ead9421194 100644 --- a/examples/Constraints_Discrete/dependency_constraints.py +++ b/examples/Constraints_Discrete/dependency_constraints.py @@ -77,39 +77,29 @@ N_ITERATIONS = 2 if SMOKE_TEST else 5 for kIter in range(N_ITERATIONS): + candidates = campaign.searchspace.discrete.get_candidates() + print(f"\n#### ITERATION {kIter + 1} ####") print("## ASSERTS ##") print( f"Number entries with both switches on " f"(expected {RESOLUTION * len(dict_solvent) * 2 * 2}): ", - ( - (campaign.searchspace.discrete.exp_rep["Switch1"] == "on") - & (campaign.searchspace.discrete.exp_rep["Switch2"] == "right") - ).sum(), + ((candidates["Switch1"] == "on") & (candidates["Switch2"] == "right")).sum(), ) print( f"Number entries with Switch1 off (expected {2 * 2}): ", - ( - (campaign.searchspace.discrete.exp_rep["Switch1"] == "off") - & (campaign.searchspace.discrete.exp_rep["Switch2"] == "right") - ).sum(), + ((candidates["Switch1"] == "off") & (candidates["Switch2"] == "right")).sum(), ) print( f"Number entries with Switch2 off " f"(expected {RESOLUTION * len(dict_solvent)}):" f" ", - ( - (campaign.searchspace.discrete.exp_rep["Switch1"] == "on") - & (campaign.searchspace.discrete.exp_rep["Switch2"] == "left") - ).sum(), + ((candidates["Switch1"] == "on") & (candidates["Switch2"] == "left")).sum(), ) print( "Number entries with both switches off (expected 1): ", - ( - (campaign.searchspace.discrete.exp_rep["Switch1"] == "off") - & (campaign.searchspace.discrete.exp_rep["Switch2"] == "left") - ).sum(), + ((candidates["Switch1"] == "off") & (candidates["Switch2"] == "left")).sum(), ) rec = campaign.recommend(batch_size=5) diff --git a/examples/Constraints_Discrete/exclusion_constraints.py b/examples/Constraints_Discrete/exclusion_constraints.py index feba15858d..c801b4ddfb 100644 --- a/examples/Constraints_Discrete/exclusion_constraints.py +++ b/examples/Constraints_Discrete/exclusion_constraints.py @@ -114,32 +114,30 @@ N_ITERATIONS = 3 for kIter in range(N_ITERATIONS): + candidates = campaign.searchspace.discrete.get_candidates() + print(f"\n\n#### ITERATION {kIter + 1} ####") print("## ASSERTS ##") print( "Number of entries with either Solvents C2 or C4 and a temperature above 151: ", ( - campaign.searchspace.discrete.exp_rep["Temp"].apply(lambda x: x > 151) - & campaign.searchspace.discrete.exp_rep["Solv"].apply( - lambda x: x in ["C2", "C4"] - ) + candidates["Temp"].apply(lambda x: x > 151) + & candidates["Solv"].apply(lambda x: x in ["C2", "C4"]) ).sum(), ) print( "Number of entries with either Solvents C5 or C6 and a pressure above 5: ", ( - campaign.searchspace.discrete.exp_rep["Pressure"].apply(lambda x: x > 5) - & campaign.searchspace.discrete.exp_rep["Solv"].apply( - lambda x: x in ["C5", "C6"] - ) + candidates["Pressure"].apply(lambda x: x > 5) + & candidates["Solv"].apply(lambda x: x in ["C5", "C6"]) ).sum(), ) print( "Number of entries with pressure below 3 and temperature above 120: ", ( - campaign.searchspace.discrete.exp_rep["Pressure"].apply(lambda x: x < 3) - & campaign.searchspace.discrete.exp_rep["Temp"].apply(lambda x: x > 120) + candidates["Pressure"].apply(lambda x: x < 3) + & candidates["Temp"].apply(lambda x: x > 120) ).sum(), ) diff --git a/examples/Constraints_Discrete/prodsum_constraints.py b/examples/Constraints_Discrete/prodsum_constraints.py index ff482cdc49..f62a70eba0 100644 --- a/examples/Constraints_Discrete/prodsum_constraints.py +++ b/examples/Constraints_Discrete/prodsum_constraints.py @@ -109,30 +109,22 @@ N_ITERATIONS = 2 if SMOKE_TEST else 5 for kIter in range(N_ITERATIONS): + candidates = campaign.searchspace.discrete.get_candidates() + print(f"\n\n#### ITERATION {kIter + 1} ####") print("## ASSERTS ##") print( "Number of entries with 1,2-sum above 150: ", - ( - campaign.searchspace.discrete.exp_rep[["NumParam1", "NumParam2"]].sum( - axis=1 - ) - > 150.0 - ).sum(), + (candidates[["NumParam1", "NumParam2"]].sum(axis=1) > 150.0).sum(), ) print( "Number of entries with 3,4-product under 30: ", - ( - campaign.searchspace.discrete.exp_rep[["NumParam3", "NumParam4"]].prod( - axis=1 - ) - < 30 - ).sum(), + (candidates[["NumParam3", "NumParam4"]].prod(axis=1) < 30).sum(), ) print( "Number of entries with 5,6-sum unequal to 100: ", - campaign.searchspace.discrete.exp_rep[["NumParam5", "NumParam6"]] + candidates[["NumParam5", "NumParam6"]] .sum(axis=1) .apply(lambda x: x - 100.0) .abs() diff --git a/examples/Custom_Hooks/campaign_stopping.py b/examples/Custom_Hooks/campaign_stopping.py index 208cbe60f9..1aa35fc046 100644 --- a/examples/Custom_Hooks/campaign_stopping.py +++ b/examples/Custom_Hooks/campaign_stopping.py @@ -138,7 +138,7 @@ def stop_on_PI( f"Currently, only search spaces of type '{SearchSpaceType.DISCRETE}' are " f"accepted." ) - candidates, _ = searchspace.discrete.get_candidates() + candidates = searchspace.discrete.get_candidates() acqf = ProbabilityOfImprovement() pi = self.acquisition_values( candidates, searchspace, objective, measurements, acquisition_function=acqf diff --git a/examples/Custom_Hooks/probability_of_improvement.py b/examples/Custom_Hooks/probability_of_improvement.py index 70511fffeb..4a87aba47f 100644 --- a/examples/Custom_Hooks/probability_of_improvement.py +++ b/examples/Custom_Hooks/probability_of_improvement.py @@ -79,7 +79,7 @@ def extract_pi( f"Currently, only search spaces of type '{SearchSpaceType.DISCRETE}' are " f"accepted." ) - candidates, _ = searchspace.discrete.get_candidates() + candidates = searchspace.discrete.get_candidates() acqf = ProbabilityOfImprovement() pi = self.acquisition_values( candidates, searchspace, objective, measurements, acquisition_function=acqf diff --git a/examples/Custom_Surrogates/custom_pretrained.py b/examples/Custom_Surrogates/custom_pretrained.py index 5e408fd4b4..d3cff830ed 100644 --- a/examples/Custom_Surrogates/custom_pretrained.py +++ b/examples/Custom_Surrogates/custom_pretrained.py @@ -55,7 +55,7 @@ # Its purpose is to show the workflow for using pre-trained surrogates in BayBE. searchspace = SearchSpace.from_product(parameters=parameters, constraints=None) -train_x = to_tensor(searchspace.discrete.comp_rep) +train_x = to_tensor(searchspace.transform(searchspace.discrete.get_candidates())) train_y = torch.rand(train_x.size(dim=0)) # train with a random y vector # Define model and fit diff --git a/examples/Mixtures/slot_based.py b/examples/Mixtures/slot_based.py index ee1cef9a7c..9b5d0290e9 100644 --- a/examples/Mixtures/slot_based.py +++ b/examples/Mixtures/slot_based.py @@ -185,11 +185,12 @@ space = SubspaceDiscrete.from_product(parameters=parameters, constraints=constraints) +candidates = space.get_candidates() print( pretty_print_df( - space.exp_rep, - max_rows=len(space.exp_rep), - max_columns=len(space.exp_rep.columns), + candidates, + max_rows=len(candidates), + max_columns=len(candidates.columns), ) ) @@ -222,9 +223,9 @@ # Let us programmatically assert that all constraints are satisfied: -amounts = space.exp_rep[["Slot1_Amount", "Slot2_Amount", "Slot3_Amount"]] -labels = space.exp_rep[["Slot1_Label", "Slot2_Label", "Slot3_Label"]] -slots = space.exp_rep.apply( +amounts = candidates[["Slot1_Amount", "Slot2_Amount", "Slot3_Amount"]] +labels = candidates[["Slot1_Label", "Slot2_Label", "Slot3_Label"]] +slots = candidates.apply( lambda row: pd.Series( [(row[f"Slot{k}_Label"], row[f"Slot{k}_Amount"]) for k in range(1, 4)] ), diff --git a/tests/constraints/test_cardinality_constraint_discrete.py b/tests/constraints/test_cardinality_constraint_discrete.py index 1da1a92ea8..bc5d56fdca 100644 --- a/tests/constraints/test_cardinality_constraint_discrete.py +++ b/tests/constraints/test_cardinality_constraint_discrete.py @@ -46,7 +46,7 @@ def test_cardinality_constraint_discrete( # Assert that cardinality constraint is fulfilled assert ( - (searchspace.discrete.exp_rep != 0.0) + (searchspace.discrete.get_candidates() != 0.0) .sum(axis=1) .between(min_cardinality, max_cardinality) .all() diff --git a/tests/constraints/test_constraints_discrete.py b/tests/constraints/test_constraints_discrete.py index 9273ae13bf..a0add6ed26 100644 --- a/tests/constraints/test_constraints_discrete.py +++ b/tests/constraints/test_constraints_discrete.py @@ -27,10 +27,11 @@ def fixture_n_grid_points(request): @pytest.mark.parametrize("constraint_names", [["Constraint_1"]]) def test_simple_dependency(campaign, n_grid_points, mock_substances, mock_categories): """Test declaring dependencies by declaring them in a single constraints entry.""" + candidates = campaign.searchspace.discrete.get_candidates() + # Number entries with both switches on num_entries = ( - (campaign.searchspace.discrete.exp_rep["Switch_1"] == "on") - & (campaign.searchspace.discrete.exp_rep["Switch_2"] == "right") + (candidates["Switch_1"] == "on") & (candidates["Switch_2"] == "right") ).sum() assert num_entries == n_grid_points * len(mock_substances) * len( mock_categories @@ -38,22 +39,19 @@ def test_simple_dependency(campaign, n_grid_points, mock_substances, mock_catego # Number entries with Switch_1 off num_entries = ( - (campaign.searchspace.discrete.exp_rep["Switch_1"] == "off") - & (campaign.searchspace.discrete.exp_rep["Switch_2"] == "right") + (candidates["Switch_1"] == "off") & (candidates["Switch_2"] == "right") ).sum() assert num_entries == len(mock_categories) * len(mock_categories) # Number entries with both switches on num_entries = ( - (campaign.searchspace.discrete.exp_rep["Switch_1"] == "on") - & (campaign.searchspace.discrete.exp_rep["Switch_2"] == "left") + (candidates["Switch_1"] == "on") & (candidates["Switch_2"] == "left") ).sum() assert num_entries == n_grid_points * len(mock_substances) # Number entries with both switches on num_entries = ( - (campaign.searchspace.discrete.exp_rep["Switch_1"] == "off") - & (campaign.searchspace.discrete.exp_rep["Switch_2"] == "left") + (candidates["Switch_1"] == "off") & (candidates["Switch_2"] == "left") ).sum() assert num_entries == 1 @@ -67,28 +65,26 @@ def test_simple_dependency(campaign, n_grid_points, mock_substances, mock_catego ) def test_exclusion(campaign, mock_substances): """Tests exclusion constraint.""" + candidates = campaign.searchspace.discrete.get_candidates() + # Number of entries with either first/second substance and a temperature above 151 num_entries = ( - campaign.searchspace.discrete.exp_rep["Temperature"].apply(lambda x: x > 151) - & campaign.searchspace.discrete.exp_rep["Solvent_1"].apply( - lambda x: x in list(mock_substances)[:2] - ) + candidates["Temperature"].apply(lambda x: x > 151) + & candidates["Solvent_1"].apply(lambda x: x in list(mock_substances)[:2]) ).sum() assert num_entries == 0 # Number of entries with either last / second last substance and a pressure above 5 num_entries = ( - campaign.searchspace.discrete.exp_rep["Pressure"].apply(lambda x: x > 5) - & campaign.searchspace.discrete.exp_rep["Solvent_1"].apply( - lambda x: x in list(mock_substances)[-2:] - ) + candidates["Pressure"].apply(lambda x: x > 5) + & candidates["Solvent_1"].apply(lambda x: x in list(mock_substances)[-2:]) ).sum() assert num_entries == 0 # Number of entries with pressure below 3 and temperature above 120 num_entries = ( - campaign.searchspace.discrete.exp_rep["Pressure"].apply(lambda x: x < 3) - & campaign.searchspace.discrete.exp_rep["Temperature"].apply(lambda x: x > 120) + candidates["Pressure"].apply(lambda x: x < 3) + & candidates["Temperature"].apply(lambda x: x > 120) ).sum() assert num_entries == 0 @@ -97,11 +93,10 @@ def test_exclusion(campaign, mock_substances): @pytest.mark.parametrize("constraint_names", [["Constraint_8"]]) def test_prodsum1(campaign): """Tests sum constraint.""" + candidates = campaign.searchspace.discrete.get_candidates() + # Number of entries with 1,2-sum above 150 - num_entries = ( - campaign.searchspace.discrete.exp_rep[["Fraction_1", "Fraction_2"]].sum(axis=1) - > 150.0 - ).sum() + num_entries = (candidates[["Fraction_1", "Fraction_2"]].sum(axis=1) > 150.0).sum() assert num_entries == 0 @@ -109,11 +104,10 @@ def test_prodsum1(campaign): @pytest.mark.parametrize("constraint_names", [["Constraint_9"]]) def test_prodsum2(campaign): """Tests product constrain.""" + candidates = campaign.searchspace.discrete.get_candidates() + # Number of entries with product under 30 - num_entries = ( - campaign.searchspace.discrete.exp_rep[["Fraction_1", "Fraction_2"]].prod(axis=1) - < 30 - ).sum() + num_entries = (candidates[["Fraction_1", "Fraction_2"]].prod(axis=1) < 30).sum() assert num_entries == 0 @@ -121,9 +115,10 @@ def test_prodsum2(campaign): @pytest.mark.parametrize("constraint_names", [["Constraint_10"]]) def test_prodsum3(campaign): """Tests exact sum constraint.""" + candidates = campaign.searchspace.discrete.get_candidates() # Number of entries with sum unequal to 100 num_entries = ( - campaign.searchspace.discrete.exp_rep[["Fraction_1", "Fraction_2"]] + candidates[["Fraction_1", "Fraction_2"]] .sum(axis=1) .apply(lambda x: x - 100.0) .abs() @@ -142,11 +137,11 @@ def test_prodsum3(campaign): ) def test_mixture(campaign, n_grid_points, mock_substances): """Tests various constraints in a mixture use case.""" + candidates = campaign.searchspace.discrete.get_candidates() + # Number of searchspace entries where fractions do not sum to 100.0 num_entries = ( - campaign.searchspace.discrete.exp_rep[ - ["Fraction_1", "Fraction_2", "Fraction_3"] - ] + candidates[["Fraction_1", "Fraction_2", "Fraction_3"]] .sum(axis=1) .apply(lambda x: x - 100.0) .abs() @@ -157,23 +152,16 @@ def test_mixture(campaign, n_grid_points, mock_substances): # Number of searchspace entries that have duplicate solvent labels num_entries = ( - campaign.searchspace.discrete.exp_rep[["Solvent_1", "Solvent_2", "Solvent_3"]] - .nunique(axis=1) - .ne(3) - .sum() + candidates[["Solvent_1", "Solvent_2", "Solvent_3"]].nunique(axis=1).ne(3).sum() ) assert num_entries == 0 # Number of searchspace entries with permutation-invariant combinations num_entries = ( - campaign.searchspace.discrete.exp_rep[["Solvent_1", "Solvent_2", "Solvent_3"]] + candidates[["Solvent_1", "Solvent_2", "Solvent_3"]] .apply(frozenset, axis=1) .to_frame() - .join( - campaign.searchspace.discrete.exp_rep[ - ["Fraction_1", "Fraction_2", "Fraction_3"] - ] - ) + .join(candidates[["Fraction_1", "Fraction_2", "Fraction_3"]]) .duplicated() .sum() ) @@ -181,12 +169,7 @@ def test_mixture(campaign, n_grid_points, mock_substances): # Number of unique 1-solvent entries num_entries = ( - ( - campaign.searchspace.discrete.exp_rep[ - ["Fraction_1", "Fraction_2", "Fraction_3"] - ] - == 0.0 - ) + (candidates[["Fraction_1", "Fraction_2", "Fraction_3"]] == 0.0) .sum(axis=1) .eq(2) .sum() @@ -195,12 +178,7 @@ def test_mixture(campaign, n_grid_points, mock_substances): # Number of unique 2-solvent entries num_entries = ( - ( - campaign.searchspace.discrete.exp_rep[ - ["Fraction_1", "Fraction_2", "Fraction_3"] - ] - == 0.0 - ) + (candidates[["Fraction_1", "Fraction_2", "Fraction_3"]] == 0.0) .sum(axis=1) .eq(1) .sum() @@ -209,12 +187,7 @@ def test_mixture(campaign, n_grid_points, mock_substances): # Number of unique 3-solvent entries num_entries = ( - ( - campaign.searchspace.discrete.exp_rep[ - ["Fraction_1", "Fraction_2", "Fraction_3"] - ] - == 0.0 - ) + (candidates[["Fraction_1", "Fraction_2", "Fraction_3"]] == 0.0) .sum(axis=1) .eq(0) .sum() @@ -234,24 +207,26 @@ def test_mixture(campaign, n_grid_points, mock_substances): @pytest.mark.parametrize("constraint_names", [["Constraint_13"]]) def test_custom(campaign): """Tests custom constraint (uses config from exclude test).""" + candidates = campaign.searchspace.discrete.get_candidates() + num_entries = ( - campaign.searchspace.discrete.exp_rep["Pressure"].apply(lambda x: x > 5) - & campaign.searchspace.discrete.exp_rep["Temperature"].apply(lambda x: x > 120) - & campaign.searchspace.discrete.exp_rep["Solvent_1"].eq("water") + candidates["Pressure"].apply(lambda x: x > 5) + & candidates["Temperature"].apply(lambda x: x > 120) + & candidates["Solvent_1"].eq("water") ).sum() assert num_entries == 0 ( - campaign.searchspace.discrete.exp_rep["Pressure"].apply(lambda x: x > 3) - & campaign.searchspace.discrete.exp_rep["Temperature"].apply(lambda x: x > 180) - & campaign.searchspace.discrete.exp_rep["Solvent_1"].eq("C2") + candidates["Pressure"].apply(lambda x: x > 3) + & candidates["Temperature"].apply(lambda x: x > 180) + & candidates["Solvent_1"].eq("C2") ).sum() assert num_entries == 0 ( - campaign.searchspace.discrete.exp_rep["Pressure"].apply(lambda x: x > 3) - & campaign.searchspace.discrete.exp_rep["Temperature"].apply(lambda x: x < 150) - & campaign.searchspace.discrete.exp_rep["Solvent_1"].eq("C3") + candidates["Pressure"].apply(lambda x: x > 3) + & candidates["Temperature"].apply(lambda x: x < 150) + & candidates["Solvent_1"].eq("C3") ).sum() assert num_entries == 0 @@ -263,13 +238,12 @@ def test_custom(campaign): @pytest.mark.parametrize("constraint_names", [["Constraint_14"]]) def test_cardinality(campaign): """Test discrete cardinality constraint.""" + candidates = campaign.searchspace.discrete.get_candidates() + # Number of non-zeros - non_zeros = ( - campaign.searchspace.discrete.exp_rep[ - ["Fraction_1", "Fraction_2", "Fraction_3"] - ] - != 0.0 - ).sum(axis=1) + non_zeros = (candidates[["Fraction_1", "Fraction_2", "Fraction_3"]] != 0.0).sum( + axis=1 + ) # number of non-zeros fulfills cardinality min_cardinality = 1 diff --git a/tests/hypothesis_strategies/alternative_creation/test_searchspace.py b/tests/hypothesis_strategies/alternative_creation/test_searchspace.py index a1de0b4a2e..506ef19c29 100644 --- a/tests/hypothesis_strategies/alternative_creation/test_searchspace.py +++ b/tests/hypothesis_strategies/alternative_creation/test_searchspace.py @@ -108,7 +108,7 @@ def test_discrete_searchspace_creation_from_degenerate_dataframe(): """A degenerate dataframe with index but no columns yields an empty space.""" df = pd.DataFrame(index=[0]) subspace = SubspaceDiscrete.from_dataframe(df) - assert_frame_equal(subspace.exp_rep, pd.DataFrame()) + assert_frame_equal(subspace.get_candidates(), pd.DataFrame()) @pytest.mark.parametrize("boundary_only", (False, True)) @@ -139,10 +139,11 @@ def test_discrete_space_creation_from_simplex_inner(parameters, boundary_only): max_sum, parameters, boundary_only=boundary_only, tolerance=tolerance ) + candidates = subspace.get_candidates() if boundary_only: - assert np.allclose(subspace.exp_rep.sum(axis=1), max_sum, atol=tolerance) + assert np.allclose(candidates.sum(axis=1), max_sum, atol=tolerance) else: - assert (subspace.exp_rep.sum(axis=1) <= max_sum + tolerance).all() + assert (candidates.sum(axis=1) <= max_sum + tolerance).all() p_d1 = NumericalDiscreteParameter(name="d1", values=[0.0, 0.5, 1.0]) @@ -169,10 +170,11 @@ def test_discrete_space_creation_from_simplex_mixed( product_parameters=product_parameters, boundary_only=False, ) - assert len(subspace.exp_rep) == n_elements # <-- (# simplex part) x (# task part) - assert not any(subspace.exp_rep.duplicated()) - assert len(subspace.parameters) == len(subspace.exp_rep.columns) - assert all(p.name in subspace.exp_rep.columns for p in subspace.parameters) + candidates = subspace.get_candidates() + assert len(candidates) == n_elements # <-- (# simplex part) x (# task part) + assert not any(candidates.duplicated()) + assert len(subspace.parameters) == len(candidates.columns) + assert all(p.name in candidates.columns for p in subspace.parameters) @pytest.mark.parametrize("boundary_only", (False, True)) @@ -188,10 +190,11 @@ def test_discrete_space_creation_from_simplex_restricted(boundary_only): max_nonzero=4, boundary_only=True, ) - n_nonzero = (subspace.exp_rep > 0.0).sum(axis=1) + candidates = subspace.get_candidates() + n_nonzero = (candidates > 0.0).sum(axis=1) if boundary_only: - assert np.allclose(subspace.exp_rep.sum(axis=1), 1.0) + assert np.allclose(candidates.sum(axis=1), 1.0) assert n_nonzero.min() == 2 assert n_nonzero.max() == 4 - assert len(subspace.parameters) == len(subspace.exp_rep.columns) - assert all(p.name in subspace.exp_rep.columns for p in subspace.parameters) + assert len(subspace.parameters) == len(candidates.columns) + assert all(p.name in candidates.columns for p in subspace.parameters) diff --git a/tests/test_campaign.py b/tests/test_campaign.py index ca53f0755c..1cb15fc460 100644 --- a/tests/test_campaign.py +++ b/tests/test_campaign.py @@ -99,7 +99,7 @@ def test_candidate_toggling(constraints, exclude, complement): ] ) campaign = Campaign(subspace) - all_candidates = campaign.searchspace.discrete.exp_rep + all_candidates = campaign.searchspace.discrete.get_candidates() # Set initial state to the opposite of the targeted value if not exclude: @@ -410,7 +410,7 @@ def test_posterior_stats_invalid_input(ongoing_campaign, stats, error, match): @pytest.mark.parametrize("batch_size", [3], ids=["b3"]) def test_acquisition_value_computation(ongoing_campaign: Campaign): """Acquisition values have the expected shape.""" - df = ongoing_campaign.searchspace.discrete.exp_rep + df = ongoing_campaign.searchspace.discrete.get_candidates() assert not df.empty # Using campaign acquisition function diff --git a/tests/test_deprecations.py b/tests/test_deprecations.py index 5112d73215..e478a25e4f 100644 --- a/tests/test_deprecations.py +++ b/tests/test_deprecations.py @@ -622,7 +622,7 @@ def test_legacy_recommended_metadata_deserialization(ongoing_campaign): del data["excluded_experiments"] # Construct legacy searchspace_metadata with a "recommended" column - exp_rep = ongoing_campaign.searchspace.discrete.exp_rep + exp_rep = ongoing_campaign.searchspace.discrete.get_candidates() metadata = pd.DataFrame(False, index=exp_rep.index, columns=[_RECOMMENDED]) idxs = rec.index[:n_recommended] metadata.loc[idxs, _RECOMMENDED] = True @@ -679,7 +679,7 @@ def test_legacy_measured_metadata_deserialization(): data = campaign.to_dict() metadata = pd.DataFrame( {_MEASURED: [True, False, False]}, - index=campaign.searchspace.discrete.exp_rep.index, + index=campaign.searchspace.discrete.get_candidates().index, ) data["searchspace_metadata"] = converter.unstructure(metadata) @@ -703,7 +703,7 @@ def test_legacy_excluded_metadata_deserialization(): # and no excluded_experiments field data = campaign.to_dict() del data["excluded_experiments"] - exp_rep = campaign.searchspace.discrete.exp_rep + exp_rep = campaign.searchspace.discrete.get_candidates() metadata = pd.DataFrame( {_EXCLUDED: [True, False, True]}, index=exp_rep.index, @@ -878,12 +878,16 @@ def test_deprecated_constraints_argument_from_product(): parameters=[p, q], constraints=[no_dup_c] ) + ss_both_candidates = ss_both.get_candidates() + ss_none_candidates = ss_none.get_candidates() + ss_with_batch_candidates = ss_with_batch.get_candidates() + ss_without_batch_candidates = ss_without_batch.get_candidates() assert ss_both.batch_constraints == ss_with_batch.batch_constraints == (batch_c,) assert ss_without_batch.batch_constraints == ss_none.batch_constraints == () - assert_frame_equal(ss_both.exp_rep, ss_without_batch.exp_rep) - assert_frame_equal(ss_with_batch.exp_rep, ss_none.exp_rep) - assert len(ss_both.exp_rep) == 2 - assert len(ss_none.exp_rep) == 4 + assert_frame_equal(ss_both_candidates, ss_without_batch_candidates) + assert_frame_equal(ss_with_batch_candidates, ss_none_candidates) + assert len(ss_both_candidates) == 2 + assert len(ss_none_candidates) == 4 def test_deprecated_constraints_batch_property(): @@ -900,3 +904,19 @@ def test_deprecated_constraints_batch_property(): result = subspace.constraints_batch assert result == subspace.batch_constraints == (batch_c,) + + +def test_deprecated_exp_rep_property(): + """Accessing ``exp_rep`` on ``SubspaceDiscrete`` emits a deprecation warning.""" + subspace = CategoricalParameter("p", ["a", "b"]).to_subspace() + with pytest.warns(DeprecationWarning, match="Accessing 'exp_rep'"): + result = subspace.exp_rep + assert_frame_equal(result, subspace.get_candidates()) + + +def test_deprecated_comp_rep_property(): + """Accessing ``comp_rep`` on ``SubspaceDiscrete`` emits a deprecation warning.""" + subspace = CategoricalParameter("p", ["a", "b"]).to_subspace() + with pytest.warns(DeprecationWarning, match="Accessing 'comp_rep'"): + result = subspace.comp_rep + assert_frame_equal(result, subspace.transform(subspace.get_candidates())) diff --git a/tests/test_searchspace.py b/tests/test_searchspace.py index d85ec9f96c..660a4a0a6f 100644 --- a/tests/test_searchspace.py +++ b/tests/test_searchspace.py @@ -95,7 +95,7 @@ def test_discrete_searchspace_creation_from_dataframe(): assert searchspace.type == SearchSpaceType.DISCRETE assert searchspace.parameters == all_params - assert df.equals(searchspace.discrete.exp_rep) + assert df.equals(searchspace.discrete.get_candidates()) def test_discrete_from_dataframe_dtype_consistency(): @@ -114,7 +114,7 @@ def test_discrete_from_dataframe_dtype_consistency(): next(p for p in subspace.parameters if p.name == "C"), NumericalDiscreteParameter, ) - assert pd.api.types.is_float_dtype(subspace.exp_rep["C"]) + assert pd.api.types.is_float_dtype(subspace.get_candidates()["C"]) def test_invalid_simplex_creating_with_overlapping_parameters(): @@ -157,11 +157,12 @@ def test_from_simplex_with_degenerate_parameter_count(simplex_parameters, expect product_parameters=product_parameters, ) - assert len(subspace.exp_rep) == expected_len + candidates = subspace.get_candidates() + assert len(candidates) == expected_len if simplex_parameters: simplex_cols = [p.name for p in simplex_parameters] - assert all(subspace.exp_rep[simplex_cols].sum(axis=1) <= 1.0) + assert all(candidates[simplex_cols].sum(axis=1) <= 1.0) def test_continuous_searchspace_creation_from_bounds(): @@ -232,10 +233,10 @@ def test_searchspace_memory_estimate(searchspace: SearchSpace): estimate_exp = estimate.exp_rep_bytes estimate_comp = estimate.comp_rep_bytes - actual_exp = searchspace.discrete.exp_rep.memory_usage(deep=True, index=False).sum() - actual_comp = searchspace.discrete.comp_rep.memory_usage( - deep=True, index=False - ).sum() + candidates = searchspace.discrete.get_candidates() + candidates_comp = searchspace.discrete.transform(candidates) + actual_exp = candidates.memory_usage(deep=True, index=False).sum() + actual_comp = candidates_comp.memory_usage(deep=True, index=False).sum() assert 0.95 <= estimate_exp / actual_exp <= 1.05, ( "Exp: ", @@ -332,8 +333,9 @@ def test_task_parameter_active_values_validation(): searchspace = SearchSpace.from_dataframe( target_df, parameters=[num_param, task_param, cat_param] ) - assert len(searchspace.discrete.exp_rep) == 1 - assert all(searchspace.discrete.exp_rep["task"] == "target") + candidates = searchspace.discrete.get_candidates() + assert len(candidates) == 1 + assert all(candidates["task"] == "target") @pytest.mark.parametrize("parameter_names", [["Conti_finite1", "Conti_finite2"]]) diff --git a/tests/utils/test_dataframe.py b/tests/utils/test_dataframe.py index 9da4a2c268..916c1895d7 100644 --- a/tests/utils/test_dataframe.py +++ b/tests/utils/test_dataframe.py @@ -110,7 +110,7 @@ def test_degenerate_rows_invalid_input(): ) def test_fuzzy_row_match(searchspace, noise, duplicated): """Fuzzy row matching returns expected indices.""" - left_df = searchspace.discrete.exp_rep.copy() + left_df = searchspace.discrete.get_candidates().copy() selected = np.random.choice(left_df.index, 4, replace=False) right_df = left_df.loc[selected].reset_index(drop=True) @@ -155,7 +155,7 @@ def test_fuzzy_row_match(searchspace, noise, duplicated): @pytest.mark.parametrize("invalid", ["left", "right"]) def test_invalid_fuzzy_row_match(searchspace, invalid): """Returns expected errors when dataframes don't contain all expected columns.""" - left_df = searchspace.discrete.exp_rep.copy() + left_df = searchspace.discrete.get_candidates().copy() selected = np.random.choice(left_df.index, 4, replace=False) right_df = left_df.loc[selected].copy() diff --git a/tests/utils/test_sampling_algorithms.py b/tests/utils/test_sampling_algorithms.py index ffe015721f..1a34a64257 100644 --- a/tests/utils/test_sampling_algorithms.py +++ b/tests/utils/test_sampling_algorithms.py @@ -222,7 +222,9 @@ def test_fps_utility_expected_errors(points, n_requested, initialization, match) def test_fps_recommender_utility_initialization_indices(searchspace): """FPS utilities return expected indices when initialization indices are used.""" - points = searchspace.discrete.comp_rep.values + candidates = searchspace.discrete.get_candidates() + candidates_comp = searchspace.discrete.transform(candidates) + points = candidates_comp.values inds1 = farthest_point_sampling(points, 3, initialization=[0]) inds2 = farthest_point_sampling(points, 3, initialization=[1, 2]) @@ -269,7 +271,9 @@ def test_fps_recommender_result_consistency(searchspace): """FPS utilities return consistent results.""" from baybe._optional.fpsample import fps_sampling - points = searchspace.discrete.comp_rep.values + candidates = searchspace.discrete.get_candidates() + candidates_comp = searchspace.discrete.transform(candidates) + points = candidates_comp.values inds1 = fps_sampling(points, 3, start_idx=0).tolist() inds2 = farthest_point_sampling( points, 3, initialization=[0], random_tie_break=False