diff --git a/pyomo/contrib/doe/doe.py b/pyomo/contrib/doe/doe.py index a13e6785bf4..36429ff7914 100644 --- a/pyomo/contrib/doe/doe.py +++ b/pyomo/contrib/doe/doe.py @@ -30,6 +30,7 @@ import json import logging import math +import sys from pyomo.common.dependencies import ( numpy as np, @@ -52,11 +53,15 @@ import pyomo.environ as pyo from pyomo.contrib.doe.utils import ( - check_FIM, + check_matrix, compute_FIM_metrics, _SMALL_TOLERANCE_DEFINITENESS, + snake_traversal_grid_sampling, ) +from pyomo.contrib.parmest.utils.model_utils import update_model_from_suffix + + from pyomo.opt import SolverStatus @@ -75,6 +80,11 @@ class FiniteDifferenceStep(Enum): backward = "backward" +class DesignSpaceTraversal(Enum): + snake_traversal = "snake_traversal" + nested_for_loop = "nested_for_loop" + + class DesignOfExperiments: def __init__( self, @@ -748,6 +758,9 @@ def _sequential_FIM(self, model=None): if self.scale_nominal_param_value: scale_factor *= v + # TODO: scale by response values (i.e., measurement values) + # if self.scale_response_values: + # scale_factor /= measurement_vals_np[:, col_1].mean() # Calculate the column of the sensitivity matrix self.seq_jac[:, i] = ( measurement_vals_np[:, col_1] - measurement_vals_np[:, col_2] @@ -1750,7 +1763,8 @@ def check_model_FIM(self, model=None, FIM=None): ) ) - check_FIM(FIM) + # Check FIM is positive definite and symmetric + check_matrix(FIM) self.logger.info( "FIM provided matches expected dimensions from model " @@ -1958,12 +1972,15 @@ def compute_FIM_full_factorial( 2, ), ) - except: + except Exception as err: self.logger.warning( - ":::::::::::Warning: Cannot converge this run.::::::::::::" + "Cannot converge this run during FIM computation: %s", err + ) + self.logger.debug( + "Traceback for failed FIM full-factorial run:", exc_info=True ) failures += 1 - self.logger.warning("failed count:", failures) + self.logger.warning("failed count: %s", failures) self._computed_FIM = np.zeros(self.prior_FIM.shape) @@ -1972,39 +1989,401 @@ def compute_FIM_full_factorial( FIM = self._computed_FIM - ( - det_FIM, - trace_cov, - trace_FIM, - E_vals, - E_vecs, - D_opt, - A_opt, - pseudo_A_opt, - E_opt, - ME_opt, - ) = compute_FIM_metrics(FIM) + fim_metrics = compute_FIM_metrics(FIM) # Append the values for each of the experiment inputs for k, v in model.experiment_inputs.items(): fim_factorial_results[k.name].append(pyo.value(k)) - fim_factorial_results["log10 D-opt"].append(D_opt) - fim_factorial_results["log10 A-opt"].append(A_opt) - fim_factorial_results["log10 pseudo A-opt"].append(pseudo_A_opt) - fim_factorial_results["log10 E-opt"].append(E_opt) - fim_factorial_results["log10 ME-opt"].append(ME_opt) - fim_factorial_results["eigval_min"].append(E_vals.min()) - fim_factorial_results["eigval_max"].append(E_vals.max()) - fim_factorial_results["det_FIM"].append(det_FIM) - fim_factorial_results["trace_cov"].append(trace_cov) - fim_factorial_results["trace_FIM"].append(trace_FIM) + fim_factorial_results["log10 D-opt"].append(fim_metrics.D_opt) + fim_factorial_results["log10 A-opt"].append(fim_metrics.A_opt) + fim_factorial_results["log10 pseudo A-opt"].append(fim_metrics.pseudo_A_opt) + fim_factorial_results["log10 E-opt"].append(fim_metrics.E_opt) + fim_factorial_results["log10 ME-opt"].append(fim_metrics.ME_opt) + fim_factorial_results["eigval_min"].append(fim_metrics.E_vals.min()) + fim_factorial_results["eigval_max"].append(fim_metrics.E_vals.max()) + fim_factorial_results["det_FIM"].append(fim_metrics.det_FIM) + fim_factorial_results["trace_cov"].append(fim_metrics.trace_cov) + fim_factorial_results["trace_FIM"].append(fim_metrics.trace_FIM) fim_factorial_results["solve_time"].append(time_set[-1]) self.fim_factorial_results = fim_factorial_results return self.fim_factorial_results + def compute_FIM_factorial( + self, + model=None, + design_vals: dict = None, + abs_step: list = None, + rel_step: list = None, + n_design_points: int = None, + method="sequential", + return_df=True, + traversal_scheme="snake_traversal", + file_name: str = None, + ): + """Perform a simulation-based factorial exploration of the experimental input + space (a.k.a. a ``grid search`` / ``parameter sweep``) to evaluate how FIM + metrics vary over the design space. + + Exactly one design-space specification mode must be provided: + + 1. ``design_vals``: + Explicit values for selected design variables (component keys). + 2. ``n_design_points``: + Uniform ``np.linspace(lb, ub, n_design_points)`` values for each design + variable using model bounds. + 3. ``abs_step`` and/or ``rel_step``: + Step-based values for each design variable using model bounds and: + ``delta_i = (ub_i - lb_i) * rel_step[i] + abs_step[i]``. + Values are generated from lower to upper bound in steps of ``delta_i``. + + Parameters + ---------- + model : DoE model, optional + Reserved for API compatibility. A fresh labeled model clone is built + internally for the factorial exploration. + design_vals : dict, optional + Mapping of design variable components to 1D array-like values: + ``{component: [values...]}``. Keys should be a subset of + ``model.experiment_inputs`` keys (or ``ComponentUID`` objects that + identify those components on a compatible model). Variables not + included are fixed at their model values. + abs_step : list, optional + Absolute step for each design variable. Used only when + ``design_vals`` and ``n_design_points`` are not provided. Length must + match the number of design variables in ``model.experiment_inputs``. + If provided without ``rel_step``, ``rel_step`` is treated as zeros. + rel_step : list, optional + Relative step for each design variable. Used only when + ``design_vals`` and ``n_design_points`` are not provided. Length must + match the number of design variables in ``model.experiment_inputs``. + If provided without ``abs_step``, ``abs_step`` is treated as zeros. + n_design_points : int, optional + Number of equally-spaced points per design variable generated from + bounds via :func:`np.linspace`. Requires finite lower and upper bounds. + method : str, optional + Method used for FIM computation. Options: ``kaug`` and + ``sequential``. Default: ``sequential``. + return_df : bool, optional + If ``True``, print results as a pandas DataFrame. + traversal_scheme : str, optional + Which scheme to use for initializing the design variables. + Options are ``snake_traversal`` and ``nested_for_loop``. + If ``snake_traversal`` is used, the design variables will be initialized + in a snake-like pattern where only one design variables change at a time. + If ``nested_for_loop`` is used, the design variables will be initialized + using a nested for loop. Default: "snake_traversal" + file_name : str, optional + If provided, save results to ``.json``. + + Returns + ------- + factorial_results: dict + A dictionary containing the results of the factorial design with the + following keys: + - keys of model's experiment_inputs + - "log10 D-opt": list of D-optimality values + - "log10 A-opt": list of A-optimality values + - "log10 pseudo A-opt": list of pseudo A-optimality values + - "log10 E-opt": list of E-optimality values + - "log10 ME-opt": list of ME-optimality values + - "eigval_min": list of minimum eigenvalues + - "eigval_max": list of maximum eigenvalues + - "det_FIM": list of determinants of the FIM + - "trace_cov": list of traces of covariance matrix + - "trace_FIM": list of traces of the FIM + - "solve_time": list of solve times + - "total_points": total number of points in the factorial design + - "success_count": number of successful runs + - "failure_count": number of failed runs + - "FIM_all": list of all FIMs computed for each point in the factorial + """ + + # Start timer + sp_timer = TicTocTimer() + sp_timer.tic(msg=None) + self.logger.info("Beginning Factorial Design.") + + # Make new model for factorial design + self.factorial_model = self.experiment.get_labeled_model( + **self.get_labeled_model_args + ).clone() + model = self.factorial_model + + # Group the mutually exclusive arguments for passing design values + # abs_step, and rel_step can be provided together or separately + num_des_args_provided = ( + (design_vals is not None) + + (n_design_points is not None) + + (abs_step is not None or rel_step is not None) + ) + + # Check the count and raise an error if more than one was provided + if num_des_args_provided > 1: + raise ValueError( + "design_vals, n_design_points, and abs_step/rel_step are " + "mutually exclusive." + ) + + # Check if at least one was provided + if num_des_args_provided == 0: + raise ValueError( + "Missing required argument: specify one of design_vals, " + "n_design_points, or abs_step/rel_step." + ) + + if design_vals is not None: + # Normalize design_vals keys to components on the cloned model. + # Preferred API uses component keys (like experiment_inputs). + design_name_map = {k.name: k for k in model.experiment_inputs.keys()} + normalized_design_vals = pyo.ComponentMap() + invalid_keys = [] + for key, val in design_vals.items(): + if isinstance(key, str): + mapped_key = None + elif isinstance(key, pyo.ComponentUID): + mapped_key = key.find_component_on(model) + else: + try: + mapped_key = pyo.ComponentUID(key).find_component_on(model) + except Exception: + mapped_key = None + + if mapped_key is None or mapped_key not in model.experiment_inputs: + invalid_keys.append(key) + continue + normalized_design_vals[mapped_key] = val + + if invalid_keys: + raise ValueError( + "design_vals keys must identify components in " + f"`model.experiment_inputs`. Invalid keys: {invalid_keys}. " + "Pass experiment input components (or ComponentUIDs), not names. " + "Valid experiment_inputs are: " + f"{list(design_name_map.keys())}." + ) + + # Get the design map keys that match the normalized design_values keys. + design_map_keys = [ + k for k in model.experiment_inputs.keys() if k in normalized_design_vals + ] + # This ensures that the order of the design_values keys matches the order + # of the design_map_keys so that design_point can be constructed correctly + # in the loop. + design_values = [normalized_design_vals[k] for k in design_map_keys] + + # Create a temporary suffix to pass in `update_model_from_suffix` + design_suff = pyo.Suffix(direction=pyo.Suffix.LOCAL) + design_suff.update((k, None) for k in design_map_keys) + + else: + design_keys = [k for k in model.experiment_inputs.keys()] + use_step_changes = (abs_step is not None) or (rel_step is not None) + if use_step_changes: + # Missing absolute/relative inputs default to zero steps. + if abs_step is None: + abs_step = [0.0] * len(design_keys) + if rel_step is None: + rel_step = [0.0] * len(design_keys) + + if len(abs_step) != len(design_keys): + raise ValueError( + "`abs_step` must have the same length of " + f"`{len(design_keys)}` as `design_keys`." + ) + if len(rel_step) != len(design_keys): + raise ValueError( + "`rel_step` must have the same length of " + f"`{len(design_keys)}` as `design_keys`." + ) + + design_values = [] + # loop over design keys and generate design values + for i, comp in enumerate(design_keys): + lb = comp.lb + ub = comp.ub + # Check if the component has finite lower and upper bounds + if lb is None or ub is None: + raise ValueError( + f"{comp.name} does not have a lower or upper bound." + ) + + if not use_step_changes: + des_val = np.linspace(lb, ub, n_design_points) + + # If abs_step and/or rel_step is provided, generate design values + # using the formula: + # step_change = (upper_bound - lower_bound) * rel_step + abs_step + else: + # Calculate the step change in value, delta value + span = ub - lb + del_val = span * rel_step[i] + abs_step[i] + if del_val <= 0: + raise ValueError( + f"Design variable {comp.name} has non-positive step " + "in value - check abs_step and rel_step values." + ) + span = ub - lb + # Build points by count to avoid float drift from iterative + # addition. Keep compatibility with lb > ub behavior. + n_steps = max(0, int(np.floor(span / del_val + 1e-12)) + 1) + des_val = lb + del_val * np.arange(n_steps) + + design_values.append(des_val) + + # generate the factorial points based on the initialization scheme + try: + scheme_enum = DesignSpaceTraversal(traversal_scheme) + except ValueError: + raise ValueError( + f"{traversal_scheme=} is not recognized. " + "Please use one of the following: " + f"{[e.value for e in DesignSpaceTraversal]}" + ) + + if scheme_enum == DesignSpaceTraversal.snake_traversal: + factorial_points = snake_traversal_grid_sampling(*design_values) + elif scheme_enum == DesignSpaceTraversal.nested_for_loop: + factorial_points = product(*design_values) + else: + raise ValueError( + f"{traversal_scheme=} is not recognized. " + "Please use one of the following: " + f"{[e.value for e in DesignSpaceTraversal]}" + ) + + factorial_points_list = list(factorial_points) + + factorial_results = {k.name: [] for k in model.experiment_inputs.keys()} + factorial_results.update( + { + "log10 D-opt": [], + "log10 A-opt": [], + "log10 pseudo A-opt": [], + "log10 E-opt": [], + "log10 ME-opt": [], + "eigval_min": [], + "eigval_max": [], + "det_FIM": [], + "trace_cov": [], + "trace_FIM": [], + "solve_time": [], + } + ) + + success_count = 0 + failure_count = 0 + total_points = len(factorial_points_list) + + # save the FIM for each point in the factorial design + self.n_parameters = len(model.unknown_parameters) + FIM_all = np.zeros((total_points, self.n_parameters, self.n_parameters)) + + time_set = [] + curr_point = 1 # Initial current point + for design_point in factorial_points_list: + if design_vals is not None: + update_model_from_suffix(design_suff, design_point) + else: + update_model_from_suffix(model.experiment_inputs, design_point) + + # Timing and logging objects + self.logger.info(f"=======Iteration Number: {curr_point} =======") + iter_timer = TicTocTimer() + iter_timer.tic(msg=None) + + try: + curr_point = success_count + failure_count + 1 + self.logger.info(f"This is run {curr_point} out of {total_points}.") + self.compute_FIM(model=model, method=method) + success_count += 1 + # iteration time + iter_t = iter_timer.toc(msg=None) + time_set.append(iter_t) + + # More logging + self.logger.info( + f"The code has run for {round(sum(time_set), 2)} seconds." + ) + self.logger.info( + "Estimated remaining time: %s seconds", + round( + sum(time_set) / (curr_point) * (total_points - curr_point + 1), + 2, + ), + ) + except Exception as err: + self.logger.warning( + "Cannot converge this run during FIM computation: %s", err + ) + self.logger.debug( + "Traceback for failed FIM factorial run:", exc_info=True + ) + failure_count += 1 + self.logger.warning("failed count: %s", failure_count) + + self._computed_FIM = np.zeros(self.prior_FIM.shape) + + iter_t = iter_timer.toc(msg=None) + time_set.append(iter_t) + + FIM = self._computed_FIM + + # Save FIM for the current design point + FIM_all[curr_point - 1, :, :] = FIM + + # Compute and record metrics on FIM + fim_metrics = compute_FIM_metrics(FIM) + + for k in model.experiment_inputs.keys(): + factorial_results[k.name].append(pyo.value(k)) + + factorial_results["log10 D-opt"].append(fim_metrics.D_opt) + factorial_results["log10 A-opt"].append(fim_metrics.A_opt) + factorial_results["log10 pseudo A-opt"].append(fim_metrics.pseudo_A_opt) + factorial_results["log10 E-opt"].append(fim_metrics.E_opt) + factorial_results["log10 ME-opt"].append(fim_metrics.ME_opt) + factorial_results["eigval_min"].append(np.min(fim_metrics.E_vals)) + factorial_results["eigval_max"].append(np.max(fim_metrics.E_vals)) + factorial_results["det_FIM"].append(fim_metrics.det_FIM) + factorial_results["trace_cov"].append(fim_metrics.trace_cov) + factorial_results["trace_FIM"].append(fim_metrics.trace_FIM) + factorial_results["solve_time"].append(time_set[-1]) + + factorial_results.update( + { + "total_points": total_points, + "success_count": success_count, + "failure_count": failure_count, + "FIM_all": FIM_all.tolist(), # Save all FIMs + } + ) + self.factorial_results = factorial_results + + # Set pandas DataFrame options + if return_df: + # exclude matrix-like entries, and unchanging results from the DataFrame + exclude_keys = {"total_points", "success_count", "failure_count", "FIM_all"} + dict_for_df = { + k: v for k, v in factorial_results.items() if k not in exclude_keys + } + res_df = pd.DataFrame(dict_for_df) + sys.stdout.write("\n\n=========Factorial results===========\n") + sys.stdout.write(f"Total points: {total_points}\n") + sys.stdout.write(f"Success count: {success_count}\n") + sys.stdout.write(f"Failure count: {failure_count}\n\n") + sys.stdout.write(f"{res_df}\n") + + # Save the results to a json file based on the file_name provided + if file_name is not None: + with open(file_name + ".json", "w") as f: + json.dump(self.factorial_results, f, indent=4) + self.logger.info(f"Results saved to {file_name}.json.") + + return self.factorial_results + # TODO: Overhaul plotting functions to not use strings # TODO: Make the plotting functionalities work for >2 design features def draw_factorial_figure( @@ -2022,20 +2401,26 @@ def draw_factorial_figure( log_scale=True, ): """ - Extract results needed for drawing figures from - the results dictionary provided by the - ``compute_FIM_full_factorial`` function. + Extract and filter factorial results for 1D/2D sensitivity plots. + + Results can come from either ``compute_FIM_full_factorial`` or + ``compute_FIM_factorial``. Design variable names are specified as strings + to align with the factorial APIs. Draw either the 1D sensitivity curve or 2D heatmap. Parameters ---------- - results: dictionary, results dictionary from ``compute_FIM_full_factorial`` - default: None (self.fim_factorial_results) - sensitivity_design_variables: a list, design variable names to draw sensitivity - fixed_design_variables: a dictionary, keys are the design variable names to be - fixed, values are the value of it to be fixed. - full_design_variable_names: a list, all the design variables in the problem. + results: dictionary/pandas.DataFrame + Results dictionary (or DataFrame) from factorial evaluation. + Default: ``self.fim_factorial_results``. + sensitivity_design_variables: iterable of strings + Design variable names used as sensitivity degrees of freedom. + At most two are currently supported. + fixed_design_variables: dict + Dictionary of fixed design variable names and values. + full_design_variable_names: list of strings + All design variable names in the problem. title_text: a string, name for the figure xlabel_text: a string, label for the x-axis of the figure default: last design variable name @@ -2071,7 +2456,9 @@ def draw_factorial_figure( "include all the design variable names." ) - des_names = full_design_variable_names + des_names = list(full_design_variable_names) + if any(not isinstance(name, str) for name in des_names): + raise ValueError("``full_design_variable_names`` entries must be strings.") # Inputs must exist for the function to do anything # TODO: Put in a default value function????? @@ -2081,23 +2468,63 @@ def draw_factorial_figure( if fixed_design_variables is None: raise ValueError("``fixed_design_variables`` must be included.") - # Check that the provided design variables are within the results object - check_des_vars = True - for k, v in fixed_design_variables.items(): - check_des_vars *= k in ([k2 for k2, v2 in results.items()]) - check_sens_vars = True - for k in sensitivity_design_variables: - check_sens_vars *= k in [k2 for k2, v2 in results.items()] + if isinstance(sensitivity_design_variables, str): + sensitivity_design_variables = [sensitivity_design_variables] + else: + try: + sensitivity_design_variables = list(sensitivity_design_variables) + except TypeError: + raise TypeError( + "``sensitivity_design_variables`` must be an iterable of strings." + ) + + if any(not isinstance(name, str) for name in sensitivity_design_variables): + raise ValueError("Sensitivity design variable names must be strings.") + + if len(set(sensitivity_design_variables)) != len(sensitivity_design_variables): + raise ValueError( + "Sensitivity design variables contain duplicates. Please provide " + "unique design variable names." + ) + + if not isinstance(fixed_design_variables, dict): + raise ValueError("``fixed_design_variables`` must be a dictionary.") + if any(not isinstance(name, str) for name in fixed_design_variables.keys()): + raise ValueError("Fixed design variable names must be strings.") - if not check_des_vars: + des_name_set = set(des_names) + missing_from_results = des_name_set - set(results.keys()) + if missing_from_results: + raise ValueError( + "The following design variable names were not found in the results " + f"object: {missing_from_results}." + ) + + invalid_fixed_names = set(fixed_design_variables.keys()) - des_name_set + if invalid_fixed_names: raise ValueError( "Fixed design variables do not all appear " - "in the results object keys." + "in the results object keys. " + f"Invalid entries: {invalid_fixed_names}. " + f"Valid design variable names are: {des_name_set}." ) - if not check_sens_vars: + + invalid_sensitivity_names = set(sensitivity_design_variables) - des_name_set + if invalid_sensitivity_names: raise ValueError( "Sensitivity design variables do not all appear " - "in the results object keys." + "in the results object keys. " + f"Invalid entries: {invalid_sensitivity_names}. " + f"Valid design variable names are: {des_name_set}." + ) + + overlap_names = set(sensitivity_design_variables).intersection( + fixed_design_variables.keys() + ) + if overlap_names: + raise ValueError( + "Design variables cannot be both sensitivity and fixed variables. " + f"Overlapping names: {overlap_names}." ) # TODO: Make it possible to plot pair-wise sensitivities for all variables diff --git a/pyomo/contrib/doe/tests/test_doe_errors.py b/pyomo/contrib/doe/tests/test_doe_errors.py index 10165aa6267..ebd65fce98d 100644 --- a/pyomo/contrib/doe/tests/test_doe_errors.py +++ b/pyomo/contrib/doe/tests/test_doe_errors.py @@ -33,6 +33,7 @@ from pyomo.contrib.doe.examples.rooney_biegler_doe_example import run_rooney_biegler_doe from pyomo.opt import SolverFactory +import pyomo.environ as pyo ipopt_available = SolverFactory("ipopt").available() @@ -219,8 +220,7 @@ def test_reactor_check_bad_prior_negative_eigenvalue(self): with self.assertRaisesRegex( ValueError, - r"FIM provided is not positive definite. It has one or more " - r"negative eigenvalue\(s\) less than -{:.1e}".format( + r"Matrix provided is not positive definite. It has one or more negative eigenvalue\(s\) less than -{:.1e}".format( _SMALL_TOLERANCE_DEFINITENESS ), ): @@ -244,7 +244,7 @@ def test_reactor_check_bad_prior_not_symmetric(self): with self.assertRaisesRegex( ValueError, - "FIM provided is not symmetric using absolute tolerance {}".format( + "Matrix provided is not symmetric using absolute tolerance {}".format( _SMALL_TOLERANCE_SYMMETRY ), ): @@ -781,6 +781,63 @@ def test_invalid_trace_without_cholesky(self): ): doe_obj.create_objective_function() + def test_compute_fim_factorial_component_keys_processed_before_traversal_validation( + self, + ): + fd_method = "central" + obj_used = "pseudo_trace" + + experiment = get_rooney_biegler_experiment_flag() + DoE_args = get_standard_args(experiment, fd_method, obj_used, flag=None) + doe_obj = DesignOfExperiments(**DoE_args) + + model = experiment.get_labeled_model() + design_var = next(iter(model.experiment_inputs.keys())) + # Use a valid component-keyed design map, then force a traversal error to + # confirm design_vals key normalization succeeds before traversal checks. + design_vals = pyo.ComponentMap(((design_var, [1.0, 2.0]),)) + + with self.assertRaisesRegex( + ValueError, "traversal_scheme='bad_traversal' is not recognized." + ): + doe_obj.compute_FIM_factorial( + design_vals=design_vals, traversal_scheme="bad_traversal" + ) + + def test_compute_fim_factorial_design_vals_invalid_component_key(self): + fd_method = "central" + obj_used = "pseudo_trace" + + experiment = get_rooney_biegler_experiment_flag() + DoE_args = get_standard_args(experiment, fd_method, obj_used, flag=None) + doe_obj = DesignOfExperiments(**DoE_args) + + model = experiment.get_labeled_model() + # unknown_parameters are valid model components, but are not experiment + # inputs and should be rejected for design_vals. + invalid_key = next(iter(model.unknown_parameters.keys())) + design_vals = pyo.ComponentMap(((invalid_key, [1.0, 2.0]),)) + + with self.assertRaisesRegex( + ValueError, + "design_vals keys must identify components in " + "`model.experiment_inputs`.", + ): + doe_obj.compute_FIM_factorial(design_vals=design_vals) + + def test_compute_fim_factorial_design_vals_rejects_string_keys(self): + fd_method = "central" + obj_used = "pseudo_trace" + + experiment = get_rooney_biegler_experiment_flag() + DoE_args = get_standard_args(experiment, fd_method, obj_used, flag=None) + doe_obj = DesignOfExperiments(**DoE_args) + + # String names are intentionally rejected; API expects component keys or + # ComponentUIDs that identify experiment_inputs. + with self.assertRaisesRegex(ValueError, "Pass experiment input components"): + doe_obj.compute_FIM_factorial(design_vals={"hour": [1.0, 2.0]}) + if __name__ == "__main__": unittest.main() diff --git a/pyomo/contrib/doe/tests/test_doe_solve.py b/pyomo/contrib/doe/tests/test_doe_solve.py index 9e31accf894..f47a8043f56 100644 --- a/pyomo/contrib/doe/tests/test_doe_solve.py +++ b/pyomo/contrib/doe/tests/test_doe_solve.py @@ -44,7 +44,7 @@ from pyomo.contrib.parmest.examples.rooney_biegler.rooney_biegler import ( RooneyBieglerExperiment, ) -from pyomo.contrib.doe.utils import rescale_FIM +from pyomo.contrib.doe.utils import rescale_FIM, compute_FIM_metrics from pyomo.contrib.doe.examples.rooney_biegler_doe_example import run_rooney_biegler_doe import pyomo.environ as pyo @@ -649,6 +649,170 @@ def test_doe_full_factorial(self): self.assertStructuredAlmostEqual(ff_results["det_FIM"], det_FIM_expected) self.assertStructuredAlmostEqual(ff_results["trace_FIM"], trace_FIM_expected) + def test_compute_fim_factorial_matches_pointwise_compute_fim(self): + fd_method = "central" + obj_used = "pseudo_trace" + + experiment = get_rooney_biegler_experiment() + DoE_args = get_standard_args(experiment, fd_method, obj_used) + DoE_args["logger_level"] = logging.ERROR + doe_obj = DesignOfExperiments(**DoE_args) + + base_model = experiment.get_labeled_model() + design_var = next(iter(base_model.experiment_inputs.keys())) + design_points = [1.0, 5.0, 9.0] + design_vals = pyo.ComponentMap(((design_var, design_points),)) + + factorial_results = doe_obj.compute_FIM_factorial( + design_vals=design_vals, + method="sequential", + return_df=False, + traversal_scheme="nested_for_loop", + ) + + self.assertEqual(factorial_results["total_points"], len(design_points)) + self.assertEqual(factorial_results["success_count"], len(design_points)) + self.assertEqual(factorial_results["failure_count"], 0) + + for idx, design_point in enumerate(design_points): + point_model = experiment.get_labeled_model().clone() + point_var = pyo.ComponentUID(design_var).find_component_on(point_model) + point_var.fix(design_point) + + point_fim = doe_obj.compute_FIM(model=point_model, method="sequential") + factorial_fim = np.array(factorial_results["FIM_all"][idx]) + + self.assertTrue(np.allclose(factorial_fim, point_fim, rtol=1e-7, atol=1e-7)) + + ( + det_FIM, + trace_cov, + trace_FIM, + E_vals, + _, + D_opt, + A_opt, + pseudo_A_opt, + E_opt, + ME_opt, + ) = compute_FIM_metrics(point_fim) + + self.assertTrue( + np.isclose( + factorial_results["log10 D-opt"][idx], + D_opt, + rtol=1e-7, + atol=1e-7, + equal_nan=True, + ) + ) + self.assertTrue( + np.isclose( + factorial_results["log10 A-opt"][idx], + A_opt, + rtol=1e-7, + atol=1e-7, + equal_nan=True, + ) + ) + self.assertTrue( + np.isclose( + factorial_results["log10 pseudo A-opt"][idx], + pseudo_A_opt, + rtol=1e-7, + atol=1e-7, + equal_nan=True, + ) + ) + self.assertTrue( + np.isclose( + factorial_results["log10 E-opt"][idx], + E_opt, + rtol=1e-7, + atol=1e-7, + equal_nan=True, + ) + ) + self.assertTrue( + np.isclose( + factorial_results["log10 ME-opt"][idx], + ME_opt, + rtol=1e-7, + atol=1e-7, + equal_nan=True, + ) + ) + self.assertTrue( + np.isclose( + factorial_results["eigval_min"][idx], + np.min(E_vals), + rtol=1e-7, + atol=1e-7, + equal_nan=True, + ) + ) + self.assertTrue( + np.isclose( + factorial_results["eigval_max"][idx], + np.max(E_vals), + rtol=1e-7, + atol=1e-7, + equal_nan=True, + ) + ) + self.assertTrue( + np.isclose( + factorial_results["det_FIM"][idx], + det_FIM, + rtol=1e-7, + atol=1e-7, + equal_nan=True, + ) + ) + self.assertTrue( + np.isclose( + factorial_results["trace_cov"][idx], + trace_cov, + rtol=1e-7, + atol=1e-7, + equal_nan=True, + ) + ) + self.assertTrue( + np.isclose( + factorial_results["trace_FIM"][idx], + trace_FIM, + rtol=1e-7, + atol=1e-7, + equal_nan=True, + ) + ) + + def test_compute_fim_factorial_step_formula_uses_design_span(self): + fd_method = "central" + obj_used = "pseudo_trace" + + experiment = get_rooney_biegler_experiment() + DoE_args = get_standard_args(experiment, fd_method, obj_used) + DoE_args["logger_level"] = logging.ERROR + doe_obj = DesignOfExperiments(**DoE_args) + + factorial_results = doe_obj.compute_FIM_factorial( + abs_step=[2.0], + rel_step=[0.1], + method="sequential", + return_df=False, + traversal_scheme="nested_for_loop", + ) + + # Rooney-Biegler hour bounds are [0, 10], so: + # delta = (ub - lb) * rel_change + abs_change = 10*0.1 + 2 = 3 + expected_hour_points = [0.0, 3.0, 6.0, 9.0] + self.assertStructuredAlmostEqual( + factorial_results["hour"], expected_hour_points, abstol=1e-8, reltol=1e-8 + ) + self.assertEqual(factorial_results["total_points"], len(expected_hour_points)) + @unittest.skipUnless(pandas_available, "test requires pandas") def test_doe_A_optimality(self): A_opt_value_expected = -2.2364242059539663 diff --git a/pyomo/contrib/doe/tests/test_utils.py b/pyomo/contrib/doe/tests/test_utils.py index a6f6cb38a23..76b721d77ec 100644 --- a/pyomo/contrib/doe/tests/test_utils.py +++ b/pyomo/contrib/doe/tests/test_utils.py @@ -6,13 +6,20 @@ # Solutions of Sandia, LLC, the U.S. Government retains certain rights in this # software. This software is distributed under the 3-clause BSD License. # ____________________________________________________________________________________ -from pyomo.common.dependencies import numpy as np, numpy_available +from pyomo.common.dependencies import ( + numpy as np, + numpy_available, + pandas as pd, + pandas_available, +) import pyomo.common.unittest as unittest from pyomo.contrib.doe.utils import ( - check_FIM, + check_matrix, compute_FIM_metrics, get_FIM_metrics, + snake_traversal_grid_sampling, + compute_correlation_matrix as compcorr, _SMALL_TOLERANCE_DEFINITENESS, _SMALL_TOLERANCE_SYMMETRY, _SMALL_TOLERANCE_IMG, @@ -20,45 +27,49 @@ @unittest.skipIf(not numpy_available, "Numpy is not available") +@unittest.skipIf(not pandas_available, "Pandas is not available") class TestUtilsFIM(unittest.TestCase): - """Test the check_FIM() from utils.py.""" + """Test the check_matrix() from utils.py.""" - def test_check_FIM_valid(self): + # TODO: add tests when `check_pos_def = False` is used in check_matrix() + def test_check_matrix_valid(self): """Test case where the FIM is valid (square, positive definite, symmetric).""" FIM = np.array([[4, 1], [1, 3]]) try: - check_FIM(FIM) + check_matrix(FIM) except ValueError as e: self.fail(f"Unexpected error: {e}") - def test_check_FIM_non_square(self): + def test_check_matrix_non_square(self): """Test case where the FIM is not square.""" FIM = np.array([[4, 1], [1, 3], [2, 1]]) - with self.assertRaisesRegex(ValueError, "FIM must be a square matrix"): - check_FIM(FIM) + with self.assertRaisesRegex( + ValueError, "argument mat must be a 2D square matrix" + ): + check_matrix(FIM) - def test_check_FIM_non_positive_definite(self): + def test_check_matrix_non_positive_definite(self): """Test case where the FIM is not positive definite.""" FIM = np.array([[1, 0], [0, -2]]) with self.assertRaisesRegex( ValueError, - "FIM provided is not positive definite. It has one or more negative " + "Matrix provided is not positive definite. It has one or more negative " + r"eigenvalue\(s\) less than -{:.1e}".format( _SMALL_TOLERANCE_DEFINITENESS ), ): - check_FIM(FIM) + check_matrix(FIM) - def test_check_FIM_non_symmetric(self): + def test_check_matrix_non_symmetric(self): """Test case where the FIM is not symmetric.""" FIM = np.array([[4, 1], [0, 3]]) with self.assertRaisesRegex( ValueError, - "FIM provided is not symmetric using absolute tolerance {}".format( + "Matrix provided is not symmetric using absolute tolerance {}".format( _SMALL_TOLERANCE_SYMMETRY ), ): - check_FIM(FIM) + check_matrix(FIM) """Test the compute_FIM_metrics() from utils.py.""" @@ -153,6 +164,142 @@ def test_get_FIM_metrics(self): fim_metrics["log10(Modified E-Optimality)"], expected['ME_opt'] ) + def test_snake_traversal_grid_sampling_errors(self): + # Test the error handling with lists + list_2d_bad = [[1, 2, 3], [4, 5, 6]] + with self.assertRaises(ValueError) as cm: + list(snake_traversal_grid_sampling(list_2d_bad)) + self.assertEqual( + str(cm.exception), "Argument at position 0 is not 1D. Got shape (2, 3)." + ) + + list_2d_wrong_shape_bad = [[1, 2, 3], [4, 5, 6, 7]] + with self.assertRaises(ValueError) as cm: + list(snake_traversal_grid_sampling(list_2d_wrong_shape_bad)) + self.assertEqual( + str(cm.exception), "Argument at position 0 is not 1D array-like." + ) + + # Test the error handling with tuples + tuple_2d_bad = ((1, 2, 3), (4, 5, 6)) + with self.assertRaises(ValueError) as cm: + list(snake_traversal_grid_sampling(tuple_2d_bad)) + self.assertEqual( + str(cm.exception), "Argument at position 0 is not 1D. Got shape (2, 3)." + ) + + tuple_2d_wrong_shape_bad = ((1, 2, 3), (4, 5, 6, 7)) + with self.assertRaises(ValueError) as cm: + list(snake_traversal_grid_sampling(tuple_2d_wrong_shape_bad)) + self.assertEqual( + str(cm.exception), "Argument at position 0 is not 1D array-like." + ) + + # Test the error handling with numpy arrays + array_2d_bad = np.array([[1, 2, 3], [4, 5, 6]]) + with self.assertRaises(ValueError) as cm: + list(snake_traversal_grid_sampling(array_2d_bad)) + self.assertEqual( + str(cm.exception), "Argument at position 0 is not 1D. Got shape (2, 3)." + ) + + def test_snake_traversal_grid_sampling_values(self): + # Test with lists + # Test with a single list + list1 = [1, 2, 3] + result_list1 = list(snake_traversal_grid_sampling(list1)) + expected_list1 = [(1,), (2,), (3,)] + self.assertEqual(result_list1, expected_list1) + + # Test with two lists + list2 = [4, 5, 6] + result_list2 = list(snake_traversal_grid_sampling(list1, list2)) + expected_list2 = [ + (1, 4), + (1, 5), + (1, 6), + (2, 6), + (2, 5), + (2, 4), + (3, 4), + (3, 5), + (3, 6), + ] + self.assertEqual(result_list2, expected_list2) + + # Test with three lists + list3 = [7, 8] + result_list3 = list(snake_traversal_grid_sampling(list1, list2, list3)) + expected_list3 = [ + (1, 4, 7), + (1, 4, 8), + (1, 5, 8), + (1, 5, 7), + (1, 6, 7), + (1, 6, 8), + (2, 6, 8), + (2, 6, 7), + (2, 5, 7), + (2, 5, 8), + (2, 4, 8), + (2, 4, 7), + (3, 4, 7), + (3, 4, 8), + (3, 5, 8), + (3, 5, 7), + (3, 6, 7), + (3, 6, 8), + ] + self.assertEqual(result_list3, expected_list3) + + # Test with tuples + tuple1 = (1, 2, 3) + result_tuple1 = list(snake_traversal_grid_sampling(tuple1)) + tuple2 = (4, 5, 6) + result_tuple2 = list(snake_traversal_grid_sampling(tuple1, tuple2)) + tuple3 = (7, 8) + result_tuple3 = list(snake_traversal_grid_sampling(tuple1, tuple2, tuple3)) + self.assertEqual(result_tuple1, expected_list1) + self.assertEqual(result_tuple2, expected_list2) + self.assertEqual(result_tuple3, expected_list3) + + # Test with numpy arrays + array1 = np.array([1, 2, 3]) + array2 = np.array([4, 5, 6]) + array3 = np.array([7, 8]) + result_array1 = list(snake_traversal_grid_sampling(array1)) + result_array2 = list(snake_traversal_grid_sampling(array1, array2)) + result_array3 = list(snake_traversal_grid_sampling(array1, array2, array3)) + self.assertEqual(result_array1, expected_list1) + self.assertEqual(result_array2, expected_list2) + self.assertEqual(result_array3, expected_list3) + + # Test with mixed types(List, Tuple, numpy array) + result_mixed = list(snake_traversal_grid_sampling(list1, tuple2, array3)) + self.assertEqual(result_mixed, expected_list3) + + # TODO: Add more tests as needed + def test_compute_correlation_matrix(self): + # Create a sample covariance matrix + covariance_matrix = np.array([[4, 2], [2, 3]]) + var_name = ["X1", "X2"] + + # Compute the correlation matrix + correlation_matrix = compcorr(covariance_matrix, var_name, significant_digits=3) + + # Expected correlation matrix + expected_correlation_matrix = pd.DataFrame( + [[1.0, 0.577], [0.577, 1.0]], index=var_name, columns=var_name + ) + + # Check if the computed correlation matrix matches the expected one + pd.testing.assert_frame_equal( + correlation_matrix, + expected_correlation_matrix, + check_exact=False, + atol=1e-6, + ) + if __name__ == "__main__": unittest.main() diff --git a/pyomo/contrib/doe/utils.py b/pyomo/contrib/doe/utils.py index c85fbbe4c0b..0c8b58ec1b1 100644 --- a/pyomo/contrib/doe/utils.py +++ b/pyomo/contrib/doe/utils.py @@ -26,15 +26,39 @@ import pyomo.environ as pyo -from pyomo.common.dependencies import numpy as np, numpy_available +from pyomo.common.dependencies import ( + numpy as np, + numpy_available, + pandas as pd, + pandas_available, +) from pyomo.core.base.param import ParamData from pyomo.core.base.var import VarData import logging +from collections import namedtuple logger = logging.getLogger(__name__) +# Named tuple to store FIM metrics in a structured way. +# This allows for easy access to individual metrics +FIMMetrics = namedtuple( + "FIMMetrics", + [ + "det_FIM", + "trace_cov", + "trace_FIM", + "E_vals", + "E_vecs", + "D_opt", + "A_opt", + "pseudo_A_opt", + "E_opt", + "ME_opt", + ], +) + # This small and positive tolerance is used when checking # if the prior is negative definite or approximately # indefinite. It is defined as a tolerance here to ensure @@ -91,41 +115,43 @@ def rescale_FIM(FIM, param_vals): return scaled_FIM -def check_FIM(FIM): +def check_matrix(mat): """ - Checks that the FIM is square, positive definite, and symmetric. + Checks that the matrix is square, positive definite, and symmetric. Parameters ---------- - FIM: 2D numpy array representing the FIM + mat: 2D numpy array representing the matrix Returns ------- None, but will raise error messages as needed """ - # Check that the FIM is a square matrix - if FIM.shape[0] != FIM.shape[1]: - raise ValueError("FIM must be a square matrix") + # Check that the matrix is a square matrix + if mat.ndim != 2 or mat.shape[0] != mat.shape[1]: + raise ValueError("argument mat must be a 2D square matrix") - # Compute the eigenvalues of the FIM - evals = np.linalg.eigvals(FIM) + # Compute the eigenvalues of the matrix + evals = np.linalg.eigvals(mat) - # Check if the FIM is positive definite + # Check if the matrix is positive definite if np.min(evals) < -_SMALL_TOLERANCE_DEFINITENESS: raise ValueError( - "FIM provided is not positive definite. It has one or more negative " + "Matrix provided is not positive definite. It has one or more negative " + "eigenvalue(s) less than -{:.1e}".format(_SMALL_TOLERANCE_DEFINITENESS) ) - # Check if the FIM is symmetric - if not np.allclose(FIM, FIM.T, atol=_SMALL_TOLERANCE_SYMMETRY): + # Check if the matrix is symmetric + if not np.allclose(mat, mat.T, atol=_SMALL_TOLERANCE_SYMMETRY): raise ValueError( - "FIM provided is not symmetric using absolute tolerance {}".format( + "Matrix provided is not symmetric using absolute tolerance {}".format( _SMALL_TOLERANCE_SYMMETRY ) ) +# TODO: probably can merge compute_FIM_metrics() and get_FIM_metrics() in a single + +# TODO: function with an argument (e.g., return_dict = True) to compute the metrics # Functions to compute FIM metrics def compute_FIM_metrics(FIM): """ @@ -136,7 +162,8 @@ def compute_FIM_metrics(FIM): Returns ------- - Returns the following metrics as a tuple in the order shown below: + FIMMetrics + Named tuple with the following fields: det_FIM : float Determinant of the FIM. @@ -161,7 +188,7 @@ def compute_FIM_metrics(FIM): """ # Check whether the FIM is square, positive definite, and symmetric - check_FIM(FIM) + check_matrix(FIM) # Compute FIM metrics det_FIM = np.linalg.det(FIM) @@ -192,17 +219,17 @@ def compute_FIM_metrics(FIM): ME_opt = np.log10(np.linalg.cond(FIM)) - return ( - det_FIM, - trace_cov, - trace_FIM, - E_vals, - E_vecs, - D_opt, - A_opt, - pseudo_A_opt, - E_opt, - ME_opt, + return FIMMetrics( + det_FIM=det_FIM, + trace_cov=trace_cov, + trace_FIM=trace_FIM, + E_vals=E_vals, + E_vecs=E_vecs, + D_opt=D_opt, + A_opt=A_opt, + pseudo_A_opt=pseudo_A_opt, + E_opt=E_opt, + ME_opt=ME_opt, ) @@ -266,3 +293,155 @@ def get_FIM_metrics(FIM): "log10(E-Optimality)": E_opt, "log10(Modified E-Optimality)": ME_opt, } + + +def snake_traversal_grid_sampling(*array_like_args): + """ + Generates a multi-dimensional pattern for an arbitrary number of 1D array-like arguments. + This pattern is useful for generating patterns for sensitivity analysis when we want + to change one variable at a time. This function uses recursion and acts as a generator. + + Parameters + ---------- + *array_like_args: A variable number of 1D array-like objects as arguments. + + Yields + ------ + A tuple representing points in the snake pattern. + """ + # throw an error if the array_like_args are not array-like or all 1D + for i, arg in enumerate(array_like_args): + try: + arr_np = np.asarray(arg) + except Exception: + raise ValueError(f"Argument at position {i} is not 1D array-like.") + + if arr_np.ndim != 1: + raise ValueError( + f"Argument at position {i} is not 1D. Got shape {arr_np.shape}." + ) + + # The main logic is in a nested recursive helper function. + def _generate_recursive(depth, index_sum): + # Base case: If we've processed all lists, we're at the end of a path. + if depth == len(array_like_args): + yield () + return + + current_list = array_like_args[depth] + + # Determine the iteration direction based on the sum of parent indices. + # This is the mathematical rule for the zigzag. + is_forward = index_sum % 2 == 0 + iterable = current_list if is_forward else reversed(current_list) + + # Enumerate to get the index `i` for the *next* recursive call's sum. + for i, value in enumerate(iterable): + # Recur for the next list, updating the index_sum. + for sub_pattern in _generate_recursive(depth + 1, index_sum + i): + # Prepend the current value to the results from deeper levels. + yield (value,) + sub_pattern + + # Start the recursion at the first list (depth 0) with an initial sum of 0. + yield from _generate_recursive(depth=0, index_sum=0) + + +def compute_correlation_matrix( + covariance_matrix, var_name: list = None, significant_digits=None +): + """ + Computes the correlation matrix from a covariance matrix. + + Parameters + ---------- + covariance_matrix : numpy.ndarray or pandas.DataFrame + 2D square covariance matrix. If a DataFrame is provided and ``var_name`` + is not given, DataFrame column names are used for both rows and columns + in the output. Column names must be unique. + var_name : list, optional + Variable names used for both rows and columns in the output correlation + matrix. Length must match matrix dimension and names must be unique. + If omitted and ``covariance_matrix`` is a DataFrame, DataFrame column names + are used. If omitted and ``covariance_matrix`` is a NumPy array, an unlabeled + NumPy array is returned. + significant_digits : int, optional + Number of significant digits for rounding entries of the correlation matrix. + Default: None (no rounding). + + Returns + ------- + pandas.DataFrame/numpy.ndarray + If `var_name` is provided, returns a pandas DataFrame with the correlation matrix + and the specified variable names as both index and columns. If `var_name` is not + provided, returns: + - a pandas DataFrame when the covariance input is a DataFrame, + - a numpy array when the covariance input is a NumPy array. + """ + # Support square pandas.DataFrame input while preserving label information. + # Use duck typing to avoid hard dependency on pandas type checks here. + is_dataframe_like = ( + hasattr(covariance_matrix, "to_numpy") + and hasattr(covariance_matrix, "index") + and hasattr(covariance_matrix, "columns") + ) + + if is_dataframe_like: + cov_columns = list(covariance_matrix.columns) + if len(set(cov_columns)) != len(cov_columns): + raise ValueError( + "For DataFrame covariance_matrix input, column labels must be unique." + ) + covariance_matrix_np = covariance_matrix.to_numpy() + else: + covariance_matrix_np = np.asarray(covariance_matrix) + cov_columns = None + + # Check if covariance matrix is symmetric and square + check_matrix(covariance_matrix_np) + + if var_name is not None: + if isinstance(var_name, str): + raise ValueError( + "`var_name` must be a list-like collection of names, not a string." + ) + var_name = list(var_name) + if len(var_name) != covariance_matrix_np.shape[0]: + raise ValueError( + "Length of var_name must match the number of rows/columns in the " + "covariance matrix." + ) + if len(set(var_name)) != len(var_name): + raise ValueError("Entries in `var_name` must be unique.") + elif is_dataframe_like: + var_name = cov_columns + + if not np.all(np.isfinite(covariance_matrix_np)): + raise ValueError("Covariance matrix contains non-finite values.") + + std_dev = np.sqrt(np.diag(covariance_matrix_np)) + if np.any(std_dev == 0): + raise ValueError( + "Covariance matrix contains one or more zero variances; correlation " + "entries are undefined." + ) + + std_dev_matrix = np.outer(std_dev, std_dev) + correlation_matrix = covariance_matrix_np / std_dev_matrix + + # Build labeled DataFrame output only when names are available. + if var_name is not None: + if not pandas_available: + raise RuntimeError( + "pandas is required to return labeled correlation matrices." + ) + corr_output = pd.DataFrame(correlation_matrix, index=var_name, columns=var_name) + else: + corr_output = correlation_matrix + + if significant_digits is None: + return corr_output + + if not isinstance(significant_digits, int) or significant_digits < 0: + raise ValueError("`significant_digits` must be a non-negative integer or None.") + + return corr_output.round(significant_digits)