Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `candidates_exp` argument removed from `SubspaceDiscrete.subset_masks`,
`SubspaceDiscrete.sample_subset_masks`, `SearchSpace.subsets`, and
`SearchSpace.sample_subsets`
- `SubspaceDiscrete.get_candidates` now returns only the experimental representation
instead of a tuple of experimental and computational representations

### Added
- `narwhals` as a hard dependency
Expand Down Expand Up @@ -50,6 +52,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
access batch-level constraints; filtering constraints are only needed during subspace
construction and are thus no longer stored).
- `SubspaceDiscrete.constraints_batch` property (use `batch_constraints` instead)
- `SubspaceDiscrete.exp_rep` attribute (use `get_candidates()` instead)
- `SubspaceDiscrete.comp_rep` attribute (use `transform(get_candidates())` instead)

## [0.15.0] - 2026-06-11
### Breaking Changes
Expand Down
4 changes: 3 additions & 1 deletion baybe/acquisition/acqfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,9 @@ def get_integration_points(self, searchspace: SearchSpace) -> pd.DataFrame:

# Discrete part
if not searchspace.discrete.is_empty:
candidates_discrete = searchspace.discrete.comp_rep
candidates_discrete = searchspace.discrete.transform(
searchspace.discrete.get_candidates()
)
Comment thread
AVHopp marked this conversation as resolved.
n_candidates = self.sampling_n_points or math.ceil(
self.sampling_fraction * len(candidates_discrete) # type: ignore[operator]
)
Expand Down
67 changes: 19 additions & 48 deletions baybe/campaign.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,39 +206,7 @@ def _default_recommended_experiments(self) -> pd.DataFrame:

@override
def __str__(self) -> str:
recommended_count = len(self._recommended_experiments)
exp_rep = self.searchspace.discrete.exp_rep
if self._measurements.empty or exp_rep.empty:
measured_count = 0
else:
measured_count = len(
fuzzy_row_match(exp_rep, self._measurements, self.parameters)
)
excluded_count = len(self._excluded_experiments)
n_elements = len(exp_rep)
searchspace_fields = [
to_string(
"Recommended:",
f"{recommended_count}/{n_elements}",
single_line=True,
),
to_string(
"Measured:",
f"{measured_count}/{n_elements}",
single_line=True,
),
to_string(
"Excluded:",
f"{excluded_count}/{n_elements}",
single_line=True,
),
]
metadata_fields = [
to_string("Discrete Subspace Meta Data", *searchspace_fields),
]
metadata = to_string("Meta Data", *metadata_fields)
fields = [metadata, self.searchspace, self.objective, self.recommender]

fields = [self.searchspace, self.objective, self.recommender]
return to_string(self.__class__.__name__, *fields)

@property
Expand Down Expand Up @@ -460,7 +428,7 @@ def toggle_discrete_candidates( # noqa: DOC501
# * Additional shortcuts might be possible.
self.clear_cache()

df = self.searchspace.discrete.exp_rep
df = self.searchspace.discrete.get_candidates()

if isinstance(constraints, pd.DataFrame):
# Determine the candidate subset to be toggled
Expand Down Expand Up @@ -561,12 +529,12 @@ def recommend(
if self.searchspace.type is SearchSpaceType.DISCRETE:
# TODO: This implementation should at some point be hidden behind an
# appropriate public interface, like `SubspaceDiscrete.filter()`
exp_rep = self.searchspace.discrete.exp_rep
mask_todrop = pd.Series(False, index=exp_rep.index)
candidates = self.searchspace.discrete.get_candidates()
mask_todrop = pd.Series(False, index=candidates.index)
if not self._excluded_experiments.empty:
mask_todrop |= (
pd.merge(
exp_rep,
candidates,
self._excluded_experiments,
indicator=True,
how="left",
Expand All @@ -580,7 +548,7 @@ def recommend(
):
mask_todrop |= (
pd.merge(
exp_rep,
candidates,
self._recommended_experiments,
indicator=True,
how="left",
Expand All @@ -593,7 +561,7 @@ def recommend(
and not self._measurements.empty
):
measured_idxs = fuzzy_row_match(
exp_rep, self._measurements, self.parameters
candidates, self._measurements, self.parameters
)
mask_todrop.loc[measured_idxs] = True
if (
Expand All @@ -602,7 +570,7 @@ def recommend(
):
mask_todrop |= (
pd.merge(
exp_rep,
candidates,
pending_experiments,
indicator=True,
how="left",
Expand All @@ -613,7 +581,7 @@ def recommend(
searchspace = evolve(
self.searchspace,
discrete=evolve(
self.searchspace.discrete, exp_rep=exp_rep.loc[~mask_todrop]
self.searchspace.discrete, exp_rep=candidates.loc[~mask_todrop]
),
)
else:
Expand Down Expand Up @@ -1100,13 +1068,16 @@ def _structure_campaign(d: dict, cl: type) -> Campaign:

# >>>>>>>>>> Deprecation
# Post-structure reconstruction from legacy metadata indices
if legacy_recommended_idxs is not None:
rec_df = campaign.searchspace.discrete.exp_rep.loc[legacy_recommended_idxs]
campaign._recommended_experiments = rec_df.reset_index(drop=True)

if legacy_excluded_idxs is not None:
excl_df = campaign.searchspace.discrete.exp_rep.loc[legacy_excluded_idxs]
campaign._excluded_experiments = excl_df.reset_index(drop=True)
if legacy_recommended_idxs is not None or legacy_excluded_idxs is not None:
candidates = campaign.searchspace.discrete.get_candidates()
if legacy_recommended_idxs is not None:
campaign._recommended_experiments = candidates.loc[
legacy_recommended_idxs
].reset_index(drop=True)
if legacy_excluded_idxs is not None:
campaign._excluded_experiments = candidates.loc[
legacy_excluded_idxs
].reset_index(drop=True)

# Fix schema of empty DataFrames from legacy serialization
if campaign._measurements.columns.empty:
Expand Down
2 changes: 1 addition & 1 deletion baybe/recommenders/naive.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def recommend(

# Get one random discrete point that will be attached when evaluating the
# acquisition function in the discrete space.
disc_part = searchspace.discrete.comp_rep.loc[disc_rec.index].sample(1)
disc_part = searchspace.discrete.transform(disc_rec).sample(1)
disc_part_tensor = to_tensor(disc_part).unsqueeze(-2)

# Setup a fresh acquisition function for the continuous recommender
Expand Down
4 changes: 3 additions & 1 deletion baybe/recommenders/pure/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,9 @@ def _recommend_with_discrete_parts(

# Check if enough candidates are left
# TODO [15917]: This check is not perfectly correct.
if (not is_hybrid_space) and (len(searchspace.discrete.exp_rep) < batch_size):
if (not is_hybrid_space) and (
len(searchspace.discrete.get_candidates()) < batch_size
):
raise NotEnoughPointsLeftError(
f"Using the current settings, there are fewer than {batch_size} "
f"possible data points left to recommend."
Expand Down
9 changes: 4 additions & 5 deletions baybe/recommenders/pure/bayesian/botorch/discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def recommend_discrete_with_subsets(
"""
import torch

candidates = subspace_discrete.get_candidates()
masks: Iterable[npt.NDArray[np.bool_]]
if subspace_discrete.n_subsets <= recommender.max_n_subsets:
masks = subspace_discrete.subset_masks(min_candidates=batch_size)
Expand All @@ -56,9 +57,7 @@ def make_callable(
mask: np.ndarray,
) -> Callable[[], tuple[pd.DataFrame, Tensor]]:
def optimize() -> tuple[pd.DataFrame, Tensor]:
subset_subspace = evolve(
subspace_discrete, exp_rep=subspace_discrete.exp_rep.loc[mask]
)
subset_subspace = evolve(subspace_discrete, exp_rep=candidates.loc[mask])

rec = recommend_discrete_without_subsets(
recommender, subset_subspace, batch_size
Expand Down Expand Up @@ -118,7 +117,7 @@ def recommend_discrete_without_subsets(
from botorch.optim import optimize_acqf_discrete

# Determine the next set of points to be tested
candidates_comp = subspace_discrete.comp_rep
candidates_comp = subspace_discrete.transform(subspace_discrete.get_candidates())
points, _ = optimize_acqf_discrete(
recommender._botorch_acqf, batch_size, to_tensor(candidates_comp)
)
Expand All @@ -137,4 +136,4 @@ def recommend_discrete_without_subsets(
)["index"]
)

return subspace_discrete.exp_rep.loc[idxs]
return subspace_discrete.get_candidates().loc[idxs]
10 changes: 6 additions & 4 deletions baybe/recommenders/pure/bayesian/botorch/hybrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,9 @@ def recommend_hybrid_without_subsets(
from botorch.optim import optimize_acqf_mixed

# Transform discrete candidates
# (Create a shallow copy to avoid in-place modifications of the original dataframe)
candidates_comp = searchspace.discrete.comp_rep.copy(deep=False)
candidates_comp = searchspace.discrete.transform(
searchspace.discrete.get_candidates()
)

# Calculate the number of samples from the given percentage
n_candidates = math.ceil(
Expand Down Expand Up @@ -144,7 +145,7 @@ def recommend_hybrid_without_subsets(
).set_index("index")

# Get experimental representation of discrete part
rec_disc_exp = searchspace.discrete.exp_rep.loc[merged.index]
rec_disc_exp = searchspace.discrete.get_candidates().loc[merged.index]
Comment thread
AdrianSosic marked this conversation as resolved.
Outdated

# Combine discrete and continuous parts
rec_exp = pd.concat(
Expand Down Expand Up @@ -186,6 +187,7 @@ def recommend_hybrid_with_subsets(
# NOTE: No min_discrete_candidates filtering in hybrid spaces because
# optimize_acqf_mixed can produce multiple recommendations from a single
# discrete candidate by varying continuous parameters.
candidates = searchspace.discrete.get_candidates()
combined_masks: Iterable[tuple[np.ndarray, frozenset[str]]]
if searchspace.n_subsets <= recommender.max_n_subsets:
combined_masks = searchspace.subsets()
Expand All @@ -201,7 +203,7 @@ def optimize() -> tuple[pd.DataFrame, Tensor]:

mod_disc = evolve(
searchspace.discrete,
exp_rep=searchspace.discrete.exp_rep.loc[d_mask],
exp_rep=candidates.loc[d_mask],
)
mod_cont = (
subspace_c._enforce_cardinality_constraints(c_inactive_params)
Expand Down
11 changes: 6 additions & 5 deletions baybe/recommenders/pure/nonpredictive/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,13 +107,14 @@ def _recommend_discrete(
from sklearn.preprocessing import StandardScaler

# TODO [Scaling]: scaling should be handled by search space object
candidates_comp = subspace_discrete.transform(
subspace_discrete.get_candidates()
Comment thread
fabianliebig marked this conversation as resolved.
Outdated
)
scaler = StandardScaler()
scaler.fit(subspace_discrete.comp_rep)
scaler.fit(candidates_comp)

# Scale candidates
candidates_scaled = np.ascontiguousarray(
scaler.transform(subspace_discrete.comp_rep)
)
candidates_scaled = np.ascontiguousarray(scaler.transform(candidates_comp))

# Set model parameters and perform fit
model = self._get_model_cls()(
Expand All @@ -129,7 +130,7 @@ def _recommend_discrete(
selection = self._make_selection_default(model, candidates_scaled)

# Select rows by positional indices and return the corresponding subset
return subspace_discrete.exp_rep.iloc[selection]
return subspace_discrete.get_candidates().iloc[selection]

@override
def __str__(self) -> str:
Expand Down
10 changes: 6 additions & 4 deletions baybe/recommenders/pure/nonpredictive/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def _recommend_hybrid(
if searchspace.type is SearchSpaceType.CONTINUOUS:
return cont_random

candidates_exp = searchspace.discrete.exp_rep
candidates_exp = searchspace.discrete.get_candidates()

# Restrict to a random subset if subset-generating constraints are present
if searchspace.discrete.n_subsets > 0:
Expand Down Expand Up @@ -152,11 +152,13 @@ def _recommend_discrete(
from sklearn.preprocessing import StandardScaler

# TODO [Scaling]: scaling should be handled by search space object
candidates_comp = subspace_discrete.transform(
subspace_discrete.get_candidates()
)
scaler = StandardScaler()
scaler.fit(subspace_discrete.comp_rep)
scaler.fit(candidates_comp)

# Scale and sample
candidates_comp = subspace_discrete.comp_rep
candidates_scaled = np.ascontiguousarray(scaler.transform(candidates_comp))

if active_settings.use_fpsample:
Expand All @@ -173,7 +175,7 @@ def _recommend_discrete(
initialization=self.initialization.value,
random_tie_break=self.random_tie_break,
)
return subspace_discrete.exp_rep.iloc[ilocs]
return subspace_discrete.get_candidates().iloc[ilocs]

@override
def __str__(self) -> str:
Expand Down
4 changes: 2 additions & 2 deletions baybe/searchspace/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from collections.abc import Collection, Iterable, Iterator, Sequence
from enum import Enum
from itertools import product
from typing import TYPE_CHECKING, ClassVar, cast
from typing import TYPE_CHECKING, ClassVar

import numpy as np
import numpy.typing as npt
Expand Down Expand Up @@ -263,7 +263,7 @@ def task_idx(self) -> int | None:
# appear first in the computational dataframe.
# 3. It assumes there exists exactly one task parameter
# --> Fix this when refactoring the data
return cast(int, self.discrete.comp_rep.columns.get_loc(task_param.name))
return self.discrete.comp_rep_columns.index(task_param.name)
Comment thread
AdrianSosic marked this conversation as resolved.

@property
def n_tasks(self) -> int:
Expand Down
Loading
Loading