Skip to content

Optimization API Reference

Algorithm

Genetic Algorithm

seapopym_optimization.algorithm.genetic_algorithm.genetic_algorithm

This module contains the main genetic algorithm functions that can be used to optimize the model.

GeneticAlgorithmParameters dataclass

The structure used to store the genetic algorithm parameters.

Can generate the toolbox with default parameters. parameters.

Parameters

MUTPB: float Represents the probability of mutating an individual. It is recommended to use a value between 0.001 and 0.1. ETA: float Crowding degree of the mutation. A high eta will produce a mutant resembling its parent, while a small eta will produce a solution much more different. It is recommended to use a value between 1 and 20. INDPB: float Represents the individual probability of mutation for each attribute of the individual. It is recommended to use a value between 0.0 and 0.1. If you have a lot of parameters, you can use a 1/len(parameters) value. CXPB: float Represents the probability of mating two individuals. It is recommended to use a value between 0.5 and 1.0. NGEN: int Represents the number of generations. POP_SIZE: int Represents the size of the population. cost_function_weight: tuple | float = (-1.0,) The weight of the cost function. The default value is (-1.0,) to minimize the cost function.

Source code in packages/seapopym-optimization/src/seapopym_optimization/algorithm/genetic_algorithm/genetic_algorithm.py
@dataclass
class GeneticAlgorithmParameters:
    """The structure used to store the genetic algorithm parameters.

    Can generate the toolbox with default parameters.
    parameters.

    Parameters
    ----------
    MUTPB: float
        Represents the probability of mutating an individual. It is recommended to use a value between 0.001 and 0.1.
    ETA: float
        Crowding degree of the mutation. A high eta will produce a mutant resembling its parent, while a small eta will
        produce a solution much more different. It is recommended to use a value between 1 and 20.
    INDPB: float
        Represents the individual probability of mutation for each attribute of the individual. It is recommended to use
        a value between 0.0 and 0.1. If you have a lot of parameters, you can use a 1/len(parameters) value.
    CXPB: float
        Represents the probability of mating two individuals. It is recommended to use a value between 0.5 and 1.0.
    NGEN: int
        Represents the number of generations.
    POP_SIZE: int
        Represents the size of the population.
    cost_function_weight: tuple | float = (-1.0,)
        The weight of the cost function. The default value is (-1.0,) to minimize the cost function.

    """

    ETA: float
    INDPB: float
    CXPB: float
    MUTPB: float
    NGEN: int
    POP_SIZE: int
    TOURNSIZE: int = field(default=3)
    cost_function_weight: tuple[Number] = (-1.0,)

    def __post_init__(self: GeneticAlgorithmParameters) -> None:
        """Check parameters and set default functions for selection, mating, mutation and variation."""
        self.select = tools.selTournament
        self.mate = tools.cxTwoPoint  # NOTE(Jules): We should test this method `tools.cxSimulatedBinary``
        self.mutate = tools.mutPolynomialBounded
        self.variation = algorithms.varAnd
        self.cost_function_weight = tuple(
            np.asarray(self.cost_function_weight) / np.sum(np.absolute(self.cost_function_weight))
        )

    def generate_toolbox(self: GeneticAlgorithmParameters, parameters: Sequence[Parameter]) -> base.Toolbox:
        """Generate a DEAP toolbox with the necessary functions for the genetic algorithm."""
        toolbox = base.Toolbox()
        Individual = individual_creator(self.cost_function_weight)  # noqa: N806
        toolbox.register("Individual", Individual)

        for param in parameters:
            toolbox.register(param.name, param.init_method, param.lower_bound, param.upper_bound)

        def individual() -> list:
            return Individual([param.init_method(param.lower_bound, param.upper_bound) for param in parameters])

        toolbox.register("population", tools.initRepeat, list, individual)
        # Note: Evaluation is now handled by evaluation strategies, not the toolbox
        toolbox.register("mate", self.mate)
        low_boundaries = [param.lower_bound for param in parameters]
        up_boundaries = [param.upper_bound for param in parameters]
        toolbox.register("mutate", self.mutate, eta=self.ETA, indpb=self.INDPB, low=low_boundaries, up=up_boundaries)
        toolbox.register("select", self.select, tournsize=self.TOURNSIZE)
        return toolbox

__post_init__()

Check parameters and set default functions for selection, mating, mutation and variation.

Source code in packages/seapopym-optimization/src/seapopym_optimization/algorithm/genetic_algorithm/genetic_algorithm.py
def __post_init__(self: GeneticAlgorithmParameters) -> None:
    """Check parameters and set default functions for selection, mating, mutation and variation."""
    self.select = tools.selTournament
    self.mate = tools.cxTwoPoint  # NOTE(Jules): We should test this method `tools.cxSimulatedBinary``
    self.mutate = tools.mutPolynomialBounded
    self.variation = algorithms.varAnd
    self.cost_function_weight = tuple(
        np.asarray(self.cost_function_weight) / np.sum(np.absolute(self.cost_function_weight))
    )

generate_toolbox(parameters)

Generate a DEAP toolbox with the necessary functions for the genetic algorithm.

Source code in packages/seapopym-optimization/src/seapopym_optimization/algorithm/genetic_algorithm/genetic_algorithm.py
def generate_toolbox(self: GeneticAlgorithmParameters, parameters: Sequence[Parameter]) -> base.Toolbox:
    """Generate a DEAP toolbox with the necessary functions for the genetic algorithm."""
    toolbox = base.Toolbox()
    Individual = individual_creator(self.cost_function_weight)  # noqa: N806
    toolbox.register("Individual", Individual)

    for param in parameters:
        toolbox.register(param.name, param.init_method, param.lower_bound, param.upper_bound)

    def individual() -> list:
        return Individual([param.init_method(param.lower_bound, param.upper_bound) for param in parameters])

    toolbox.register("population", tools.initRepeat, list, individual)
    # Note: Evaluation is now handled by evaluation strategies, not the toolbox
    toolbox.register("mate", self.mate)
    low_boundaries = [param.lower_bound for param in parameters]
    up_boundaries = [param.upper_bound for param in parameters]
    toolbox.register("mutate", self.mutate, eta=self.ETA, indpb=self.INDPB, low=low_boundaries, up=up_boundaries)
    toolbox.register("select", self.select, tournsize=self.TOURNSIZE)
    return toolbox

GeneticAlgorithm dataclass

Genetic algorithm for optimizing SeapoPym models.

By default, the process order is SCM: Select, Cross, Mutate.

Uses the Strategy pattern for individual evaluation, allowing easy switching between sequential and hybrid modes as needed.

Examples

from seapopym_optimization.algorithm.genetic_algorithm import GeneticAlgorithmFactory ga = GeneticAlgorithmFactory.create_sequential(meta_params, cost_function) results = ga.optimize()

Attributes

meta_parameter: GeneticAlgorithmParameters The parameters of the genetic algorithm. cost_function: CostFunctionProtocol The cost function to optimize. evaluation_strategy: AbstractEvaluationStrategy Strategy pattern for evaluating individuals. constraint: Sequence[ConstraintProtocol] | None The constraints to apply to the individuals. If None, no constraints are applied. save: PathLike | None The path to save the logbook (in Parquet format). If None, the logbook is not saved.

Source code in packages/seapopym-optimization/src/seapopym_optimization/algorithm/genetic_algorithm/genetic_algorithm.py
@dataclass
class GeneticAlgorithm:
    """Genetic algorithm for optimizing SeapoPym models.

    By default, the process order is SCM: Select, Cross, Mutate.

    Uses the Strategy pattern for individual evaluation, allowing
    easy switching between sequential and hybrid modes as needed.

    Examples
    --------
    >>> from seapopym_optimization.algorithm.genetic_algorithm import GeneticAlgorithmFactory
    >>> ga = GeneticAlgorithmFactory.create_sequential(meta_params, cost_function)
    >>> results = ga.optimize()

    Attributes
    ----------
    meta_parameter: GeneticAlgorithmParameters
        The parameters of the genetic algorithm.
    cost_function: CostFunctionProtocol
        The cost function to optimize.
    evaluation_strategy: AbstractEvaluationStrategy
        Strategy pattern for evaluating individuals.
    constraint: Sequence[ConstraintProtocol] | None
        The constraints to apply to the individuals. If None, no constraints are applied.
    save: PathLike | None
        The path to save the logbook (in Parquet format). If None, the logbook is not saved.

    """

    meta_parameter: GeneticAlgorithmParameters
    cost_function: CostFunctionProtocol
    evaluation_strategy: AbstractEvaluationStrategy
    constraint: Sequence[ConstraintProtocol] | None = None

    save: FilePath | WriteBuffer[bytes] | None = None
    logbook: Logbook | None = field(default=None, repr=False)
    toolbox: base.Toolbox | None = field(default=None, init=False, repr=False)

    def __post_init__(self: GeneticAlgorithm) -> None:
        """Check parameters and initialize the evaluation strategy."""
        # Logbook configuration
        if self.logbook is not None and not isinstance(self.logbook, Logbook):
            self.logbook = Logbook(self.logbook)
        if self.save is not None:
            self.save = Path(self.save)
            if self.save.exists():
                warning_msg = f"Logbook file {self.save} already exists. It will be overwritten."
                logger.warning(warning_msg)

        # Toolbox generation
        ordered_parameters = self.cost_function.functional_groups.unique_functional_groups_parameters_ordered()
        self.toolbox = self.meta_parameter.generate_toolbox(ordered_parameters.values())

        # Application des contraintes
        if self.constraint is not None:
            for constraint in self.constraint:
                self.toolbox.decorate("evaluate", constraint.generate(list(ordered_parameters.keys())))

        # Validation des poids
        if len(self.meta_parameter.cost_function_weight) != len(self.cost_function.observations):
            msg = (
                "The cost function weight must have the same length as the number of observations. "
                f"Got {len(self.meta_parameter.cost_function_weight)} and {len(self.cost_function.observations)}."
            )
            raise ValueError(msg)

    def update_logbook(self: GeneticAlgorithm, logbook: Logbook) -> None:
        """Update the logbook with the new data and save to disk if a path is provided."""
        if not isinstance(logbook, Logbook):
            logbook = Logbook(logbook)

        if self.logbook is None:
            self.logbook = logbook
        else:
            # Append new generation using Pandas concat
            self.logbook = self.logbook.append_new_generation(logbook)

        if self.save is not None:
            self.logbook.to_parquet(self.save)

    def _evaluate(self: GeneticAlgorithm, individuals: Sequence, generation: int) -> Logbook:
        """Evaluate individuals by delegating to the evaluation strategy.

        Creates and returns a Logbook for the current generation.
        """

        def update_fitness(individuals: list) -> list:
            known = [ind.fitness.valid for ind in individuals]
            invalid_ind = [ind for ind in individuals if not ind.fitness.valid]

            if invalid_ind:
                # Delegation to the evaluation strategy
                fitnesses = self.evaluation_strategy.evaluate(invalid_ind)

                for ind, fit in zip(invalid_ind, fitnesses, strict=True):
                    ind.fitness.values = fit

            return known

        known = update_fitness(individuals)

        # Extract parameter names and fitness names
        parameter_names = list(
            self.cost_function.functional_groups.unique_functional_groups_parameters_ordered().keys()
        )
        fitness_names = list(self.cost_function.observations.keys())

        # Create Logbook from individuals (fitness is automatically extracted)
        return Logbook.from_individual(
            generation=generation,
            is_from_previous_generation=known,
            individual=individuals,
            parameter_names=parameter_names,
            fitness_names=fitness_names,
        )

    def _initialization(self: GeneticAlgorithm) -> tuple[int, list[list]]:
        """Initialize the genetic algorithm. If a logbook is provided, it will load the last generation."""

        def create_first_generation() -> tuple[Literal[1], list[list]]:
            """Create the first generation (i.e. generation `0`) of individuals."""
            new_generation = 0
            population = self.toolbox.population(n=self.meta_parameter.POP_SIZE)
            logbook = self._evaluate(individuals=population, generation=new_generation)
            self.update_logbook(logbook)
            next_generation = new_generation + 1
            return next_generation, population

        def create_population_from_logbook(population_unprocessed: pd.DataFrame) -> list[list]:
            """Create a population from the logbook DataFrame."""
            individuals = population_unprocessed.loc[:, [LogbookCategory.PARAMETER]].to_numpy()
            fitness = list(population_unprocessed.loc[:, [LogbookCategory.FITNESS]].itertuples(index=False, name=None))
            fitness = [() if any(np.isnan(fit)) else fit for fit in fitness]
            return [
                self.toolbox.Individual(iterator=iterator, values=values)
                for iterator, values in zip(individuals, fitness, strict=True)
            ]

        if self.logbook is None:
            return create_first_generation()

        logger.info("Logbook found. Loading last generation.")

        last_computed_generation = self.logbook.index.get_level_values(LogbookIndex.GENERATION).max()
        population_unprocessed = self.logbook.loc[last_computed_generation]

        population = create_population_from_logbook(population_unprocessed)

        if population_unprocessed.loc[:, LogbookCategory.FITNESS].isna().any(axis=None):
            logger.warning("Some individuals in the logbook have no fitness values. Re-evaluating the population.")
            logbook = self._evaluate(population, last_computed_generation)
            self.logbook = None
            self.update_logbook(logbook)

        return last_computed_generation + 1, population

    def optimize(self: GeneticAlgorithm) -> Logbook:
        """This is the main function. Use it to optimize your model."""
        generation_start, population = self._initialization()

        for gen in range(generation_start, self.meta_parameter.NGEN):
            log_message = f"Generation {gen} / {self.meta_parameter.NGEN}."
            logger.info(log_message)
            offspring = self.toolbox.select(population, self.meta_parameter.POP_SIZE)
            offspring = self.meta_parameter.variation(
                offspring, self.toolbox, self.meta_parameter.CXPB, self.meta_parameter.MUTPB
            )
            logbook = self._evaluate(offspring, gen)

            self.update_logbook(logbook)
            population[:] = offspring

        return self.logbook.copy()

__post_init__()

Check parameters and initialize the evaluation strategy.

Source code in packages/seapopym-optimization/src/seapopym_optimization/algorithm/genetic_algorithm/genetic_algorithm.py
def __post_init__(self: GeneticAlgorithm) -> None:
    """Check parameters and initialize the evaluation strategy."""
    # Logbook configuration
    if self.logbook is not None and not isinstance(self.logbook, Logbook):
        self.logbook = Logbook(self.logbook)
    if self.save is not None:
        self.save = Path(self.save)
        if self.save.exists():
            warning_msg = f"Logbook file {self.save} already exists. It will be overwritten."
            logger.warning(warning_msg)

    # Toolbox generation
    ordered_parameters = self.cost_function.functional_groups.unique_functional_groups_parameters_ordered()
    self.toolbox = self.meta_parameter.generate_toolbox(ordered_parameters.values())

    # Application des contraintes
    if self.constraint is not None:
        for constraint in self.constraint:
            self.toolbox.decorate("evaluate", constraint.generate(list(ordered_parameters.keys())))

    # Validation des poids
    if len(self.meta_parameter.cost_function_weight) != len(self.cost_function.observations):
        msg = (
            "The cost function weight must have the same length as the number of observations. "
            f"Got {len(self.meta_parameter.cost_function_weight)} and {len(self.cost_function.observations)}."
        )
        raise ValueError(msg)

update_logbook(logbook)

Update the logbook with the new data and save to disk if a path is provided.

Source code in packages/seapopym-optimization/src/seapopym_optimization/algorithm/genetic_algorithm/genetic_algorithm.py
def update_logbook(self: GeneticAlgorithm, logbook: Logbook) -> None:
    """Update the logbook with the new data and save to disk if a path is provided."""
    if not isinstance(logbook, Logbook):
        logbook = Logbook(logbook)

    if self.logbook is None:
        self.logbook = logbook
    else:
        # Append new generation using Pandas concat
        self.logbook = self.logbook.append_new_generation(logbook)

    if self.save is not None:
        self.logbook.to_parquet(self.save)

optimize()

This is the main function. Use it to optimize your model.

Source code in packages/seapopym-optimization/src/seapopym_optimization/algorithm/genetic_algorithm/genetic_algorithm.py
def optimize(self: GeneticAlgorithm) -> Logbook:
    """This is the main function. Use it to optimize your model."""
    generation_start, population = self._initialization()

    for gen in range(generation_start, self.meta_parameter.NGEN):
        log_message = f"Generation {gen} / {self.meta_parameter.NGEN}."
        logger.info(log_message)
        offspring = self.toolbox.select(population, self.meta_parameter.POP_SIZE)
        offspring = self.meta_parameter.variation(
            offspring, self.toolbox, self.meta_parameter.CXPB, self.meta_parameter.MUTPB
        )
        logbook = self._evaluate(offspring, gen)

        self.update_logbook(logbook)
        population[:] = offspring

    return self.logbook.copy()

individual_creator(cost_function_weight)

Create a custom individual class for DEAP genetic algorithms.

This individual class inherits from list and includes a fitness attribute. It is redefined to work with the Dask framework, which does not support the default DEAP individual structure created with deap.creator.create.

Source code in packages/seapopym-optimization/src/seapopym_optimization/algorithm/genetic_algorithm/genetic_algorithm.py
def individual_creator(cost_function_weight: tuple[Number]) -> type:
    """Create a custom individual class for DEAP genetic algorithms.

    This individual class inherits from `list` and includes a fitness attribute. It is redefined to work with the
    Dask framework, which does not support the default DEAP individual structure created with `deap.creator.create`.
    """

    class Fitness(base.Fitness):
        """Fitness class to store the fitness of an individual."""

        weights = cost_function_weight

    class Individual(list):
        """Individual class to store the parameters of an individual."""

        def __init__(self: Individual, iterator: Sequence, values: Sequence[Number] = ()) -> None:
            super().__init__(iterator)
            self.fitness = Fitness(values=values)

    return Individual

Factory

seapopym_optimization.algorithm.genetic_algorithm.factory

Factory for creating configured GeneticAlgorithm instances.

This module provides factory methods to simplify the creation of GeneticAlgorithm instances with different evaluation strategies, hiding configuration complexity for business users.

GeneticAlgorithmFactory

Factory for creating GeneticAlgorithm instances with different configurations.

This factory simplifies genetic algorithm creation by encapsulating the configuration logic for evaluation strategies and distribution.

See Also

seapopym_optimization.algorithm.genetic_algorithm.genetic_algorithm.GeneticAlgorithm : Main GA class

Source code in packages/seapopym-optimization/src/seapopym_optimization/algorithm/genetic_algorithm/factory.py
class GeneticAlgorithmFactory:
    """Factory for creating GeneticAlgorithm instances with different configurations.

    This factory simplifies genetic algorithm creation by encapsulating
    the configuration logic for evaluation strategies and distribution.

    See Also
    --------
    seapopym_optimization.algorithm.genetic_algorithm.genetic_algorithm.GeneticAlgorithm : Main GA class

    """

    @staticmethod
    def create_sequential(
        meta_parameter: GeneticAlgorithmParameters, cost_function: CostFunctionProtocol, **kwargs: Any
    ) -> GeneticAlgorithm:
        """Create a GA in sequential mode.

        Simplest evaluation mode, suitable for small populations
        or situations where parallelization is not necessary.

        Parameters
        ----------
        meta_parameter : GeneticAlgorithmParameters
            Genetic algorithm parameters
        cost_function : CostFunctionProtocol
            Cost function to optimize
        **kwargs
            Additional arguments for GeneticAlgorithm

        Returns
        -------
        GeneticAlgorithm
            Instance configured in sequential mode

        Examples
        --------
        >>> ga = GeneticAlgorithmFactory.create_sequential(meta_params, cost_function)
        >>> results = ga.optimize()

        """
        logger.info("Creating genetic algorithm in sequential mode")

        return GeneticAlgorithm(
            meta_parameter=meta_parameter,
            cost_function=cost_function,
            evaluation_strategy=SequentialEvaluation(cost_function),
            **kwargs,
        )

    @staticmethod
    def create_parallel(
        meta_parameter: GeneticAlgorithmParameters, cost_function: CostFunctionProtocol, n_jobs: int = -1, **kwargs: Any
    ) -> GeneticAlgorithm:
        """Create a GA in parallel mode using multiprocessing.

        Uses ProcessPoolExecutor to evaluate individuals across
        multiple CPU cores for improved performance.

        Parameters
        ----------
        meta_parameter : GeneticAlgorithmParameters
            Genetic algorithm parameters
        cost_function : CostFunctionProtocol
            Cost function to optimize
        n_jobs : int, default=-1
            Number of parallel jobs. If -1, use all available CPUs
        **kwargs
            Additional arguments for GeneticAlgorithm

        Returns
        -------
        GeneticAlgorithm
            Instance configured in parallel mode

        Examples
        --------
        >>> ga = GeneticAlgorithmFactory.create_parallel(meta_params, cost_function, n_jobs=4)
        >>> results = ga.optimize()

        """
        logger.info("Creating genetic algorithm in parallel mode with %d jobs", n_jobs)

        return GeneticAlgorithm(
            meta_parameter=meta_parameter,
            cost_function=cost_function,
            evaluation_strategy=ParallelEvaluation(cost_function, n_jobs=n_jobs),
            **kwargs,
        )

    @staticmethod
    def create_distributed(
        meta_parameter: GeneticAlgorithmParameters,
        cost_function: CostFunctionProtocol,
        client: Client,
        **kwargs: Any,
    ) -> GeneticAlgorithm:
        """Create a GA in distributed mode with Dask.

        Automatically detects if data is already distributed (Futures) and distributes
        if necessary. Uses Dask client.map() with distributed data to evaluate
        individuals across multiple workers efficiently.

        WARNING: This method modifies the cost_function in-place by replacing
        forcing and observations data with Dask Futures.

        Parameters
        ----------
        meta_parameter : GeneticAlgorithmParameters
            Genetic algorithm parameters
        cost_function : CostFunctionProtocol
            Cost function to optimize (will be modified in-place)
        client : Client
            Dask client for distributed computing
        **kwargs
            Additional arguments for GeneticAlgorithm

        Returns
        -------
        GeneticAlgorithm
            GA instance configured for distributed execution

        Raises
        ------
        TypeError
            If client is not a Dask Client instance

        Examples
        --------
        >>> from dask.distributed import Client
        >>> client = Client()
        >>> ga = GeneticAlgorithmFactory.create_distributed(
        ...     meta_params, cost_function, client
        ... )
        >>> results = ga.optimize()
        >>> client.close()

        """
        if not isinstance(client, Client):
            msg = "client must be a dask.distributed.Client instance"
            raise TypeError(msg)

        logger.info("Creating genetic algorithm in distributed mode")

        # Check forcing and distribute if necessary (modify in-place)
        if isinstance(cost_function.forcing, Future):
            logger.info("Forcing already distributed (Future detected). Using existing Future.")
        else:
            logger.info("Distributing forcing to Dask workers with broadcast=True...")
            cost_function.forcing = client.scatter(cost_function.forcing, broadcast=True)

        # Check and distribute observations dict (modify in-place)
        for name, obs in cost_function.observations.items():
            if isinstance(obs, Future):
                logger.info("Observation '%s' already distributed (Future detected). Using existing Future.", name)
            else:
                logger.info("Distributing observation '%s' to Dask workers with broadcast=True...", name)
                # Distribute the entire observation object
                cost_function.observations[name] = client.scatter(obs, broadcast=True)

        # Create distributed evaluation strategy with explicit client
        evaluation_strategy = DistributedEvaluation(cost_function, client)

        # Create and return GA instance
        return GeneticAlgorithm(
            meta_parameter=meta_parameter,
            cost_function=cost_function,
            evaluation_strategy=evaluation_strategy,
            **kwargs,
        )

create_sequential(meta_parameter, cost_function, **kwargs) staticmethod

Create a GA in sequential mode.

Simplest evaluation mode, suitable for small populations or situations where parallelization is not necessary.

Parameters

meta_parameter : GeneticAlgorithmParameters Genetic algorithm parameters cost_function : CostFunctionProtocol Cost function to optimize **kwargs Additional arguments for GeneticAlgorithm

Returns

GeneticAlgorithm Instance configured in sequential mode

Examples

ga = GeneticAlgorithmFactory.create_sequential(meta_params, cost_function) results = ga.optimize()

Source code in packages/seapopym-optimization/src/seapopym_optimization/algorithm/genetic_algorithm/factory.py
@staticmethod
def create_sequential(
    meta_parameter: GeneticAlgorithmParameters, cost_function: CostFunctionProtocol, **kwargs: Any
) -> GeneticAlgorithm:
    """Create a GA in sequential mode.

    Simplest evaluation mode, suitable for small populations
    or situations where parallelization is not necessary.

    Parameters
    ----------
    meta_parameter : GeneticAlgorithmParameters
        Genetic algorithm parameters
    cost_function : CostFunctionProtocol
        Cost function to optimize
    **kwargs
        Additional arguments for GeneticAlgorithm

    Returns
    -------
    GeneticAlgorithm
        Instance configured in sequential mode

    Examples
    --------
    >>> ga = GeneticAlgorithmFactory.create_sequential(meta_params, cost_function)
    >>> results = ga.optimize()

    """
    logger.info("Creating genetic algorithm in sequential mode")

    return GeneticAlgorithm(
        meta_parameter=meta_parameter,
        cost_function=cost_function,
        evaluation_strategy=SequentialEvaluation(cost_function),
        **kwargs,
    )

create_parallel(meta_parameter, cost_function, n_jobs=-1, **kwargs) staticmethod

Create a GA in parallel mode using multiprocessing.

Uses ProcessPoolExecutor to evaluate individuals across multiple CPU cores for improved performance.

Parameters

meta_parameter : GeneticAlgorithmParameters Genetic algorithm parameters cost_function : CostFunctionProtocol Cost function to optimize n_jobs : int, default=-1 Number of parallel jobs. If -1, use all available CPUs **kwargs Additional arguments for GeneticAlgorithm

Returns

GeneticAlgorithm Instance configured in parallel mode

Examples

ga = GeneticAlgorithmFactory.create_parallel(meta_params, cost_function, n_jobs=4) results = ga.optimize()

Source code in packages/seapopym-optimization/src/seapopym_optimization/algorithm/genetic_algorithm/factory.py
@staticmethod
def create_parallel(
    meta_parameter: GeneticAlgorithmParameters, cost_function: CostFunctionProtocol, n_jobs: int = -1, **kwargs: Any
) -> GeneticAlgorithm:
    """Create a GA in parallel mode using multiprocessing.

    Uses ProcessPoolExecutor to evaluate individuals across
    multiple CPU cores for improved performance.

    Parameters
    ----------
    meta_parameter : GeneticAlgorithmParameters
        Genetic algorithm parameters
    cost_function : CostFunctionProtocol
        Cost function to optimize
    n_jobs : int, default=-1
        Number of parallel jobs. If -1, use all available CPUs
    **kwargs
        Additional arguments for GeneticAlgorithm

    Returns
    -------
    GeneticAlgorithm
        Instance configured in parallel mode

    Examples
    --------
    >>> ga = GeneticAlgorithmFactory.create_parallel(meta_params, cost_function, n_jobs=4)
    >>> results = ga.optimize()

    """
    logger.info("Creating genetic algorithm in parallel mode with %d jobs", n_jobs)

    return GeneticAlgorithm(
        meta_parameter=meta_parameter,
        cost_function=cost_function,
        evaluation_strategy=ParallelEvaluation(cost_function, n_jobs=n_jobs),
        **kwargs,
    )

create_distributed(meta_parameter, cost_function, client, **kwargs) staticmethod

Create a GA in distributed mode with Dask.

Automatically detects if data is already distributed (Futures) and distributes if necessary. Uses Dask client.map() with distributed data to evaluate individuals across multiple workers efficiently.

WARNING: This method modifies the cost_function in-place by replacing forcing and observations data with Dask Futures.

Parameters

meta_parameter : GeneticAlgorithmParameters Genetic algorithm parameters cost_function : CostFunctionProtocol Cost function to optimize (will be modified in-place) client : Client Dask client for distributed computing **kwargs Additional arguments for GeneticAlgorithm

Returns

GeneticAlgorithm GA instance configured for distributed execution

Raises

TypeError If client is not a Dask Client instance

Examples

from dask.distributed import Client client = Client() ga = GeneticAlgorithmFactory.create_distributed( ... meta_params, cost_function, client ... ) results = ga.optimize() client.close()

Source code in packages/seapopym-optimization/src/seapopym_optimization/algorithm/genetic_algorithm/factory.py
@staticmethod
def create_distributed(
    meta_parameter: GeneticAlgorithmParameters,
    cost_function: CostFunctionProtocol,
    client: Client,
    **kwargs: Any,
) -> GeneticAlgorithm:
    """Create a GA in distributed mode with Dask.

    Automatically detects if data is already distributed (Futures) and distributes
    if necessary. Uses Dask client.map() with distributed data to evaluate
    individuals across multiple workers efficiently.

    WARNING: This method modifies the cost_function in-place by replacing
    forcing and observations data with Dask Futures.

    Parameters
    ----------
    meta_parameter : GeneticAlgorithmParameters
        Genetic algorithm parameters
    cost_function : CostFunctionProtocol
        Cost function to optimize (will be modified in-place)
    client : Client
        Dask client for distributed computing
    **kwargs
        Additional arguments for GeneticAlgorithm

    Returns
    -------
    GeneticAlgorithm
        GA instance configured for distributed execution

    Raises
    ------
    TypeError
        If client is not a Dask Client instance

    Examples
    --------
    >>> from dask.distributed import Client
    >>> client = Client()
    >>> ga = GeneticAlgorithmFactory.create_distributed(
    ...     meta_params, cost_function, client
    ... )
    >>> results = ga.optimize()
    >>> client.close()

    """
    if not isinstance(client, Client):
        msg = "client must be a dask.distributed.Client instance"
        raise TypeError(msg)

    logger.info("Creating genetic algorithm in distributed mode")

    # Check forcing and distribute if necessary (modify in-place)
    if isinstance(cost_function.forcing, Future):
        logger.info("Forcing already distributed (Future detected). Using existing Future.")
    else:
        logger.info("Distributing forcing to Dask workers with broadcast=True...")
        cost_function.forcing = client.scatter(cost_function.forcing, broadcast=True)

    # Check and distribute observations dict (modify in-place)
    for name, obs in cost_function.observations.items():
        if isinstance(obs, Future):
            logger.info("Observation '%s' already distributed (Future detected). Using existing Future.", name)
        else:
            logger.info("Distributing observation '%s' to Dask workers with broadcast=True...", name)
            # Distribute the entire observation object
            cost_function.observations[name] = client.scatter(obs, broadcast=True)

    # Create distributed evaluation strategy with explicit client
    evaluation_strategy = DistributedEvaluation(cost_function, client)

    # Create and return GA instance
    return GeneticAlgorithm(
        meta_parameter=meta_parameter,
        cost_function=cost_function,
        evaluation_strategy=evaluation_strategy,
        **kwargs,
    )

Functional Groups

Parameters

seapopym_optimization.functional_group.base_functional_group

A module that contains the base class for functional groups declaration and parameter management in optimization process.

Parameter dataclass

The definition of a parameter to optimize.

Parameters

name: str The name of the parameter. lower_bound: float The lower bound of the parameter. upper_bound: float The upper bound of the parameter. init_method: Callable[[float, float], float], optional The method used to get the initial value of a parameter. Default is a random uniform distribution that exclude the bounds values.

Source code in packages/seapopym-optimization/src/seapopym_optimization/functional_group/base_functional_group.py
@dataclass
class Parameter:
    """The definition of a parameter to optimize.

    Parameters
    ----------
    name: str
        The name of the parameter.
    lower_bound: float
        The lower bound of the parameter.
    upper_bound: float
        The upper bound of the parameter.
    init_method: Callable[[float, float], float], optional
        The method used to get the initial value of a parameter. Default is a random uniform distribution that exclude
        the bounds values.

    """

    name: str
    lower_bound: float
    upper_bound: float
    init_method: Callable[[float, float], float]

    def __post_init__(self: Parameter) -> None:
        """Check that the parameter is correctly defined."""
        if self.lower_bound >= self.upper_bound:
            msg = f"Lower bounds ({self.lower_bound}) must be <= to upper bound ({self.upper_bound})."
            raise ValueError(msg)

__post_init__()

Check that the parameter is correctly defined.

Source code in packages/seapopym-optimization/src/seapopym_optimization/functional_group/base_functional_group.py
def __post_init__(self: Parameter) -> None:
    """Check that the parameter is correctly defined."""
    if self.lower_bound >= self.upper_bound:
        msg = f"Lower bounds ({self.lower_bound}) must be <= to upper bound ({self.upper_bound})."
        raise ValueError(msg)

AbstractFunctionalGroup dataclass

Bases: ABC

The Generic structure used to store the parameters of a functional group as used in SeapoPym.

Source code in packages/seapopym-optimization/src/seapopym_optimization/functional_group/base_functional_group.py
@dataclass
class AbstractFunctionalGroup(ABC):
    """The Generic structure used to store the parameters of a functional group as used in SeapoPym."""

    name: str

    @property
    def parameters(self: AbstractFunctionalGroup) -> tuple:
        """Return the parameters representing the functional group. Order of declaration is preserved."""
        excluded = ("name",)
        return tuple(getattr(self, field.name) for field in fields(self) if field.name not in excluded)

    def as_dict(self: AbstractFunctionalGroup) -> dict:
        """Return the functional group as a dictionary with parameter names as keys (without functional group name)."""
        return {field.name: getattr(self, field.name) for field in fields(self) if field.name != "name"}

    def get_parameters_to_optimize(self: AbstractFunctionalGroup) -> Sequence[Parameter]:
        """Return the parameters to optimize as a sequence of `Parameter`."""
        return tuple(param for param in self.parameters if isinstance(param, Parameter))

parameters property

Return the parameters representing the functional group. Order of declaration is preserved.

as_dict()

Return the functional group as a dictionary with parameter names as keys (without functional group name).

Source code in packages/seapopym-optimization/src/seapopym_optimization/functional_group/base_functional_group.py
def as_dict(self: AbstractFunctionalGroup) -> dict:
    """Return the functional group as a dictionary with parameter names as keys (without functional group name)."""
    return {field.name: getattr(self, field.name) for field in fields(self) if field.name != "name"}

get_parameters_to_optimize()

Return the parameters to optimize as a sequence of Parameter.

Source code in packages/seapopym-optimization/src/seapopym_optimization/functional_group/base_functional_group.py
def get_parameters_to_optimize(self: AbstractFunctionalGroup) -> Sequence[Parameter]:
    """Return the parameters to optimize as a sequence of `Parameter`."""
    return tuple(param for param in self.parameters if isinstance(param, Parameter))

FunctionalGroupSet dataclass

The structure used to generate the matrix of all parameters for all functional groups.

Source code in packages/seapopym-optimization/src/seapopym_optimization/functional_group/base_functional_group.py
@dataclass
class FunctionalGroupSet[T: AbstractFunctionalGroup]:
    """The structure used to generate the matrix of all parameters for all functional groups."""

    functional_groups: Sequence[T]

    def __post_init__(self: FunctionalGroupSet) -> None:
        """Check that the functional groups are correctly typed."""
        if not all(isinstance(group, AbstractFunctionalGroup) for group in self.functional_groups):
            msg = "All functional groups must be instances of AbstractFunctionalGroup."
            raise TypeError(msg)

    def functional_groups_name(self: FunctionalGroupSet) -> Sequence[str]:
        """Return the ordered list of the functional groups name."""
        return tuple(group.name for group in self.functional_groups)

    def unique_functional_groups_parameters_ordered(self: FunctionalGroupSet) -> dict[str, Parameter]:
        """Return the unique optimized parameters of all functional groups in the order of declaration.

        Used to setup toolbox for optimization algorithms.
        """
        all_param = tuple(chain.from_iterable(group.get_parameters_to_optimize() for group in self.functional_groups))
        unique_params = {}
        for param in all_param:
            if param.name not in unique_params:
                unique_params[param.name] = param
        return unique_params

    def generate(self: FunctionalGroupSet, x: Sequence[float]) -> list[T]:
        """Generate a list of dictionaries representing the functional groups with their parameters values.

        The order of the parameters is defined by the `unique_functional_groups_parameters_ordered` method.
        The input `x` should match the order of the parameters returned by that method.
        It is used by the `configuration_generator` to generate the model.

        Parameters
        ----------
        x: Sequence[float]
            A sequence of float values representing the parameters to set for each functional group.

        Returns
        -------
        list[AbstractFunctionalGroup]
            A list of functional groups with their parameters and their corresponding values.

        """
        keys = list(self.unique_functional_groups_parameters_ordered().keys())

        try:
            parameters_values = dict(zip(keys, x, strict=True))
        except ValueError as e:
            msg = (
                f"Cost function parameters {x} do not match the expected parameters {keys}. "
                "Please check your parameters definition."
            )
            raise ValueError(msg) from e

        result = []
        for group in self.functional_groups:
            param_names = list(group.as_dict().keys())
            param_values = [
                parameters_values.get(param.name, np.nan) if isinstance(param, Parameter) else param
                for param in group.parameters
            ]
            # Create dictionary with updated parameter values and preserve the name
            group_dict = dict(zip(param_names, param_values, strict=True))
            group_dict["name"] = group.name
            # Use type(group) instead of T to instantiate the concrete class
            result.append(type(group)(**group_dict))
        return result

__post_init__()

Check that the functional groups are correctly typed.

Source code in packages/seapopym-optimization/src/seapopym_optimization/functional_group/base_functional_group.py
def __post_init__(self: FunctionalGroupSet) -> None:
    """Check that the functional groups are correctly typed."""
    if not all(isinstance(group, AbstractFunctionalGroup) for group in self.functional_groups):
        msg = "All functional groups must be instances of AbstractFunctionalGroup."
        raise TypeError(msg)

functional_groups_name()

Return the ordered list of the functional groups name.

Source code in packages/seapopym-optimization/src/seapopym_optimization/functional_group/base_functional_group.py
def functional_groups_name(self: FunctionalGroupSet) -> Sequence[str]:
    """Return the ordered list of the functional groups name."""
    return tuple(group.name for group in self.functional_groups)

unique_functional_groups_parameters_ordered()

Return the unique optimized parameters of all functional groups in the order of declaration.

Used to setup toolbox for optimization algorithms.

Source code in packages/seapopym-optimization/src/seapopym_optimization/functional_group/base_functional_group.py
def unique_functional_groups_parameters_ordered(self: FunctionalGroupSet) -> dict[str, Parameter]:
    """Return the unique optimized parameters of all functional groups in the order of declaration.

    Used to setup toolbox for optimization algorithms.
    """
    all_param = tuple(chain.from_iterable(group.get_parameters_to_optimize() for group in self.functional_groups))
    unique_params = {}
    for param in all_param:
        if param.name not in unique_params:
            unique_params[param.name] = param
    return unique_params

generate(x)

Generate a list of dictionaries representing the functional groups with their parameters values.

The order of the parameters is defined by the unique_functional_groups_parameters_ordered method. The input x should match the order of the parameters returned by that method. It is used by the configuration_generator to generate the model.

Parameters

x: Sequence[float] A sequence of float values representing the parameters to set for each functional group.

Returns

list[AbstractFunctionalGroup] A list of functional groups with their parameters and their corresponding values.

Source code in packages/seapopym-optimization/src/seapopym_optimization/functional_group/base_functional_group.py
def generate(self: FunctionalGroupSet, x: Sequence[float]) -> list[T]:
    """Generate a list of dictionaries representing the functional groups with their parameters values.

    The order of the parameters is defined by the `unique_functional_groups_parameters_ordered` method.
    The input `x` should match the order of the parameters returned by that method.
    It is used by the `configuration_generator` to generate the model.

    Parameters
    ----------
    x: Sequence[float]
        A sequence of float values representing the parameters to set for each functional group.

    Returns
    -------
    list[AbstractFunctionalGroup]
        A list of functional groups with their parameters and their corresponding values.

    """
    keys = list(self.unique_functional_groups_parameters_ordered().keys())

    try:
        parameters_values = dict(zip(keys, x, strict=True))
    except ValueError as e:
        msg = (
            f"Cost function parameters {x} do not match the expected parameters {keys}. "
            "Please check your parameters definition."
        )
        raise ValueError(msg) from e

    result = []
    for group in self.functional_groups:
        param_names = list(group.as_dict().keys())
        param_values = [
            parameters_values.get(param.name, np.nan) if isinstance(param, Parameter) else param
            for param in group.parameters
        ]
        # Create dictionary with updated parameter values and preserve the name
        group_dict = dict(zip(param_names, param_values, strict=True))
        group_dict["name"] = group.name
        # Use type(group) instead of T to instantiate the concrete class
        result.append(type(group)(**group_dict))
    return result

NoTransport Implementation

seapopym_optimization.functional_group.no_transport_functional_groups

This module contains the cost function used to optimize the parameters of the SeapoPym model.

NoTransportFunctionalGroup dataclass

Bases: AbstractFunctionalGroup

The parameters of a functional group as they are defined in the SeapoPym NoTransport model.

Source code in packages/seapopym-optimization/src/seapopym_optimization/functional_group/no_transport_functional_groups.py
@dataclass
class NoTransportFunctionalGroup(AbstractFunctionalGroup):
    """The parameters of a functional group as they are defined in the SeapoPym NoTransport model."""

    day_layer: float | Parameter
    night_layer: float | Parameter
    energy_transfert: float | Parameter
    lambda_temperature_0: float | Parameter
    gamma_lambda_temperature: float | Parameter
    tr_0: float | Parameter
    gamma_tr: float | Parameter

Cost Function

Core

seapopym_optimization.cost_function.cost_function

This module contains the cost function used to optimize the parameters of the SeapoPym model.

CostFunction dataclass

The cost function generator for SeapoPym models.

Source code in packages/seapopym-optimization/src/seapopym_optimization/cost_function/cost_function.py
@dataclass(kw_only=True)
class CostFunction:
    """The cost function generator for SeapoPym models."""

    # TODO(Jules): We can gather configuration generators and functional groups in a single object later if needed.
    configuration_generator: ConfigurationGeneratorProtocol
    functional_groups: FunctionalGroupSet[AbstractFunctionalGroup]
    forcing: ForcingParameterProtocol
    kernel: KernelParameterProtocol
    observations: dict[str, ObservationProtocol] | Sequence[ObservationProtocol]  # Dict or Sequence of observations
    processor: AbstractScoreProcessor  # Processor for computing scores from state and observations

    def __post_init__(self: CostFunction) -> None:
        """Convert observations to dict if needed and validate observation names match dictionary keys."""
        # Convert Sequence to dict if needed
        if not isinstance(self.observations, dict):
            self.observations = {obs.name: obs for obs in self.observations}

        # Validate that observation names match dictionary keys
        for name, obs in self.observations.items():
            if obs.name != name:
                msg = f"Observation name mismatch: key='{name}', obs.name='{obs.name}'"
                raise ValueError(msg)

    # NOTE(Jules): Forcing and observations must be passed as parameter of the cost function to be used with Dask
    # and scattered to workers. They cannot be attributes of the class.
    def _cost_function(
        self: CostFunction,
        args: np.ndarray,
        forcing: ForcingParameterProtocol,
        observations: dict[str, ObservationProtocol],
    ) -> tuple:
        """Evaluate the cost function for given parameters.

        Parameters
        ----------
        args : np.ndarray
            Individual parameters to evaluate
        forcing : ForcingParameterProtocol
            Forcing parameters (resolved from Future if distributed)
        observations : dict[str, ObservationProtocol]
            Dictionary of observations (resolved from Futures if distributed)

        Returns
        -------
        tuple
            Fitness values for each observation (in dict order)

        """
        configuration = self.configuration_generator.generate(
            functional_group_parameters=self.functional_groups.generate(args),
            forcing_parameters=forcing,
            kernel=self.kernel,
        )

        # Create model from configuration and run it
        with self.configuration_generator.model_class.from_configuration(configuration) as model:
            model.run()
            state = model.state

            # Compute score for each observation (dict preserves insertion order in Python 3.7+)
            return tuple(self.processor.process(state, obs) for obs in observations.values())

    def get_evaluator(self: CostFunction) -> Callable[..., tuple[Number, ...]]:
        """Return the evaluation function to be called on workers.

        This method is used by distributed evaluation strategies to obtain
        the core evaluation function without captured parameters.

        Returns
        -------
        Callable[..., tuple[Number, ...]]
            Function that takes (args, forcing, observations) and returns a tuple of fitness values

        Examples
        --------
        >>> evaluator = cost_function.get_evaluator()
        >>> fitness = evaluator(args, forcing_data, observations_data)

        """
        return self._cost_function

    def get_distributed_parameters(self: CostFunction) -> dict[str, Any]:
        """Return parameters that should be distributed to workers as a dictionary.

        Dask will automatically resolve any Futures contained in this dictionary
        when it's passed as an argument to client.map().

        Returns
        -------
        dict[str, Any]
            Dictionary with keys:
            - 'forcing': ForcingParameter or Future
            - 'observations': Dict of observations {name: observation} or {name: Future}

        Notes
        -----
        If you subclass CostFunction and add new distributed parameters,
        override this method to include them in the returned dictionary.

        Examples
        --------
        >>> params = cost_function.get_distributed_parameters()
        >>> params['forcing']
        <ForcingParameter or Future>
        >>> params['observations']
        {'Biomass': <TimeSeriesObservation or Future>, ...}

        See Also
        --------
        get_evaluator : Get the evaluation function to use with these parameters

        """
        return {
            "forcing": self.forcing,
            "observations": self.observations,
        }

__post_init__()

Convert observations to dict if needed and validate observation names match dictionary keys.

Source code in packages/seapopym-optimization/src/seapopym_optimization/cost_function/cost_function.py
def __post_init__(self: CostFunction) -> None:
    """Convert observations to dict if needed and validate observation names match dictionary keys."""
    # Convert Sequence to dict if needed
    if not isinstance(self.observations, dict):
        self.observations = {obs.name: obs for obs in self.observations}

    # Validate that observation names match dictionary keys
    for name, obs in self.observations.items():
        if obs.name != name:
            msg = f"Observation name mismatch: key='{name}', obs.name='{obs.name}'"
            raise ValueError(msg)

get_evaluator()

Return the evaluation function to be called on workers.

This method is used by distributed evaluation strategies to obtain the core evaluation function without captured parameters.

Returns

Callable[..., tuple[Number, ...]] Function that takes (args, forcing, observations) and returns a tuple of fitness values

Examples

evaluator = cost_function.get_evaluator() fitness = evaluator(args, forcing_data, observations_data)

Source code in packages/seapopym-optimization/src/seapopym_optimization/cost_function/cost_function.py
def get_evaluator(self: CostFunction) -> Callable[..., tuple[Number, ...]]:
    """Return the evaluation function to be called on workers.

    This method is used by distributed evaluation strategies to obtain
    the core evaluation function without captured parameters.

    Returns
    -------
    Callable[..., tuple[Number, ...]]
        Function that takes (args, forcing, observations) and returns a tuple of fitness values

    Examples
    --------
    >>> evaluator = cost_function.get_evaluator()
    >>> fitness = evaluator(args, forcing_data, observations_data)

    """
    return self._cost_function

get_distributed_parameters()

Return parameters that should be distributed to workers as a dictionary.

Dask will automatically resolve any Futures contained in this dictionary when it's passed as an argument to client.map().

Returns

dict[str, Any] Dictionary with keys: - 'forcing': ForcingParameter or Future - 'observations': Dict of observations {name: observation} or {name: Future}

Notes

If you subclass CostFunction and add new distributed parameters, override this method to include them in the returned dictionary.

Examples

params = cost_function.get_distributed_parameters() params['forcing'] params['observations']

See Also

get_evaluator : Get the evaluation function to use with these parameters

Source code in packages/seapopym-optimization/src/seapopym_optimization/cost_function/cost_function.py
def get_distributed_parameters(self: CostFunction) -> dict[str, Any]:
    """Return parameters that should be distributed to workers as a dictionary.

    Dask will automatically resolve any Futures contained in this dictionary
    when it's passed as an argument to client.map().

    Returns
    -------
    dict[str, Any]
        Dictionary with keys:
        - 'forcing': ForcingParameter or Future
        - 'observations': Dict of observations {name: observation} or {name: Future}

    Notes
    -----
    If you subclass CostFunction and add new distributed parameters,
    override this method to include them in the returned dictionary.

    Examples
    --------
    >>> params = cost_function.get_distributed_parameters()
    >>> params['forcing']
    <ForcingParameter or Future>
    >>> params['observations']
    {'Biomass': <TimeSeriesObservation or Future>, ...}

    See Also
    --------
    get_evaluator : Get the evaluation function to use with these parameters

    """
    return {
        "forcing": self.forcing,
        "observations": self.observations,
    }

Processors

seapopym_optimization.cost_function.processor

Observation processing components for cost function evaluation.

AbstractScoreProcessor

Bases: ABC

Abstract class for processing model state and observations to return a score.

Source code in packages/seapopym-optimization/src/seapopym_optimization/cost_function/processor.py
class AbstractScoreProcessor(ABC):
    """Abstract class for processing model state and observations to return a score."""

    def __init__(self, comparator: MetricProtocol[xr.DataArray, ObservationProtocol]) -> None:
        """Initialize with a comparator metric."""
        self.comparator = comparator

    @abstractmethod
    def process(self, state: SeapopymState, observation: ObservationProtocol) -> Number:
        """Process model state and observation to return a score."""

__init__(comparator)

Initialize with a comparator metric.

Source code in packages/seapopym-optimization/src/seapopym_optimization/cost_function/processor.py
def __init__(self, comparator: MetricProtocol[xr.DataArray, ObservationProtocol]) -> None:
    """Initialize with a comparator metric."""
    self.comparator = comparator

process(state, observation) abstractmethod

Process model state and observation to return a score.

Source code in packages/seapopym-optimization/src/seapopym_optimization/cost_function/processor.py
@abstractmethod
def process(self, state: SeapopymState, observation: ObservationProtocol) -> Number:
    """Process model state and observation to return a score."""

TimeSeriesScoreProcessor

Bases: AbstractScoreProcessor

Processes observations in time series format by applying preprocessing and comparison metrics.

Source code in packages/seapopym-optimization/src/seapopym_optimization/cost_function/processor.py
class TimeSeriesScoreProcessor(AbstractScoreProcessor):
    """Processes observations in time series format by applying preprocessing and comparison metrics."""

    def __init__(
        self,
        comparator: MetricProtocol[xr.DataArray, ObservationProtocol],
        preprocess: None | Literal["resample", "interpolate"] = None,
    ) -> None:
        """Initialize with a comparator metric."""
        super().__init__(comparator)
        self.preprocess = preprocess

    def _extract_observation_type(
        self: TimeSeriesScoreProcessor, state: SeapopymState, observation_type: DayCycle
    ) -> Sequence[int]:
        """Extract functional group positions based on observation type."""
        if observation_type is DayCycle.DAY:
            return state[ConfigurationLabels.day_layer]
        if observation_type is DayCycle.NIGHT:
            return state[ConfigurationLabels.night_layer]
        msg = f"Unknown observation type: {observation_type}"
        raise ValueError(msg)

    def _format_prediction(
        self: TimeSeriesScoreProcessor,
        prediction: xr.DataArray,
        observation: TimeSeriesObservation,
        fg_positions: Sequence[int],
    ) -> xr.DataArray:
        """Ensure prediction has the correct dimensions."""
        if self.preprocess in ["resample", "interpolate"]:
            prediction = prediction.resample({CoordinatesLabels.time: observation.observation_interval}).mean()
            msg = "Prediction resampled to match observation interval."
            logger.debug(msg)

        if self.preprocess == "interpolate":
            """Interpolate prediction outputs to match observation interval"""
            prediction = prediction.interpolate_na(dim=CoordinatesLabels.time)
            msg = "Interpolate prediction interval to match observation interval."
            logger.info(msg)

        return prediction.sel(
            {
                CoordinatesLabels.functional_group: fg_positions,
                CoordinatesLabels.time: observation.observation[CoordinatesLabels.time],
                CoordinatesLabels.X: observation.observation[CoordinatesLabels.X],
                CoordinatesLabels.Y: observation.observation[CoordinatesLabels.Y],
            },
        )

    def _pre_process_prediction(self, state: SeapopymState, observation: TimeSeriesObservation) -> xr.DataArray:
        """Pre-process prediction to match observation dimensions."""
        fg_positions = self._extract_observation_type(state, observation.observation_type)
        prediction = state[ForcingLabels.biomass]
        prediction = prediction.pint.quantify().pint.to(observation.observation.units).pint.dequantify()
        prediction = self._format_prediction(prediction, observation, fg_positions)

        # Sum over functional_group dimension, squeeze size-1 dimensions
        summed = prediction.sum(CoordinatesLabels.functional_group)
        return summed.squeeze()

    def process(self, state: SeapopymState, observation: TimeSeriesObservation) -> Number:
        """Compare prediction with observation by applying the comparator. Can pre-process data if needed."""
        prediction = self._pre_process_prediction(state, observation)
        return self.comparator(prediction, observation.observation)

__init__(comparator, preprocess=None)

Initialize with a comparator metric.

Source code in packages/seapopym-optimization/src/seapopym_optimization/cost_function/processor.py
def __init__(
    self,
    comparator: MetricProtocol[xr.DataArray, ObservationProtocol],
    preprocess: None | Literal["resample", "interpolate"] = None,
) -> None:
    """Initialize with a comparator metric."""
    super().__init__(comparator)
    self.preprocess = preprocess

process(state, observation)

Compare prediction with observation by applying the comparator. Can pre-process data if needed.

Source code in packages/seapopym-optimization/src/seapopym_optimization/cost_function/processor.py
def process(self, state: SeapopymState, observation: TimeSeriesObservation) -> Number:
    """Compare prediction with observation by applying the comparator. Can pre-process data if needed."""
    prediction = self._pre_process_prediction(state, observation)
    return self.comparator(prediction, observation.observation)

LogTimeSeriesScoreProcessor

Bases: TimeSeriesScoreProcessor

Processes observations in time series format by applying log preprocessing and comparison metrics.

Source code in packages/seapopym-optimization/src/seapopym_optimization/cost_function/processor.py
class LogTimeSeriesScoreProcessor(TimeSeriesScoreProcessor):
    """Processes observations in time series format by applying log preprocessing and comparison metrics."""

    """Log(1 + biomass) applied to avoid negative values. Observation values must be in mgC/m2."""

    def process(self, state: SeapopymState, observation: TimeSeriesObservation) -> Number:
        """Compare log prediction with log observation by applying the comparator. Can pre-process data if needed."""
        prediction = self._pre_process_prediction(state, observation)
        return self.comparator(xr.ufuncs.log10(1 + prediction), xr.ufuncs.log10(1 + observation.observation))

process(state, observation)

Compare log prediction with log observation by applying the comparator. Can pre-process data if needed.

Source code in packages/seapopym-optimization/src/seapopym_optimization/cost_function/processor.py
def process(self, state: SeapopymState, observation: TimeSeriesObservation) -> Number:
    """Compare log prediction with log observation by applying the comparator. Can pre-process data if needed."""
    prediction = self._pre_process_prediction(state, observation)
    return self.comparator(xr.ufuncs.log10(1 + prediction), xr.ufuncs.log10(1 + observation.observation))

SpatialScoreProcessor

Bases: AbstractScoreProcessor

Processes observations in spatial format by applying comparison metrics.

Source code in packages/seapopym-optimization/src/seapopym_optimization/cost_function/processor.py
class SpatialScoreProcessor(AbstractScoreProcessor):
    """Processes observations in spatial format by applying comparison metrics."""

    def _extract_observation_type(
        self: SpatialScoreProcessor, state: SeapopymState, observation_type: DayCycle
    ) -> Sequence[int]:
        """Extract functional group positions based on observation type."""
        if observation_type is DayCycle.DAY:
            return state[ConfigurationLabels.day_layer]
        if observation_type is DayCycle.NIGHT:
            return state[ConfigurationLabels.night_layer]
        msg = f"Unknown observation type: {observation_type}"
        raise ValueError(msg)

    def _pre_process_prediction(self, state: SeapopymState, observation: SpatialObservation) -> xr.DataArray:
        """Pre-process prediction to match observation dimensions."""
        fg_positions = self._extract_observation_type(state, observation.observation_type)
        prediction = state[ForcingLabels.biomass]
        prediction = prediction.pint.quantify().pint.to(observation.observation.units).pint.dequantify()

        # Select the points corresponding to the observation
        # We assume observation has coordinates time, X, Y
        sel_dict = {
            CoordinatesLabels.functional_group: fg_positions,
            CoordinatesLabels.time: observation.observation[CoordinatesLabels.time],
            CoordinatesLabels.X: observation.observation[CoordinatesLabels.X],
            CoordinatesLabels.Y: observation.observation[CoordinatesLabels.Y],
        }
        prediction = prediction.sel(sel_dict, method="nearest")

        # Sum over functional_group dimension, squeeze size-1 dimensions
        summed = prediction.sum(CoordinatesLabels.functional_group)
        return summed.squeeze()

    def process(self, state: SeapopymState, observation: SpatialObservation) -> Number:
        """Compare prediction with observation by applying the comparator."""
        prediction = self._pre_process_prediction(state, observation)
        return self.comparator(prediction, observation.observation)

process(state, observation)

Compare prediction with observation by applying the comparator.

Source code in packages/seapopym-optimization/src/seapopym_optimization/cost_function/processor.py
def process(self, state: SeapopymState, observation: SpatialObservation) -> Number:
    """Compare prediction with observation by applying the comparator."""
    prediction = self._pre_process_prediction(state, observation)
    return self.comparator(prediction, observation.observation)

aggregate_biomass_by_layer(data, position, name, layer_coordinates, layer_coordinates_name='layer')

Aggregate biomass data by layer coordinates.

Source code in packages/seapopym-optimization/src/seapopym_optimization/cost_function/processor.py
def aggregate_biomass_by_layer(
    data: xr.DataArray,
    position: Sequence[int],
    name: str,
    layer_coordinates: Sequence[int],
    layer_coordinates_name: str = "layer",
) -> xr.DataArray:
    """Aggregate biomass data by layer coordinates."""
    layer_coord = xr.DataArray(
        np.asarray(position),
        dims=[CoordinatesLabels.functional_group],
        coords={CoordinatesLabels.functional_group: data[CoordinatesLabels.functional_group].data},
        name=layer_coordinates_name,
        attrs={"axis": "Z"},
    )
    return (
        data.assign_coords({layer_coordinates_name: layer_coord})
        .groupby(layer_coordinates_name)
        .sum(dim=CoordinatesLabels.functional_group)
        .reindex({layer_coordinates_name: layer_coordinates})
        .fillna(0)
        .rename(name)
    )

Metrics

seapopym_optimization.cost_function.metric

Protocols and implementations for metrics to compare model outputs with observations.

MetricProtocol

Bases: Protocol

Protocol for comparing prediction data with observations.

All future metric functions should follow this protocol.

Source code in packages/seapopym-optimization/src/seapopym_optimization/cost_function/metric.py
@runtime_checkable
class MetricProtocol[U, V](Protocol):
    """Protocol for comparing prediction data with observations.

    All future metric functions should follow this protocol.
    """

    def __call__(self, prediction: U, observation: V) -> Number:
        """Compare prediction to observation and return a score."""
        ...

__call__(prediction, observation)

Compare prediction to observation and return a score.

Source code in packages/seapopym-optimization/src/seapopym_optimization/cost_function/metric.py
def __call__(self, prediction: U, observation: V) -> Number:
    """Compare prediction to observation and return a score."""
    ...

rmse_comparator(prediction, observation)

Calculate Root Mean Square Error (RMSE) between prediction and observation.

Parameters

prediction : ArrayLike Predicted values. observation : ArrayLike Observed values.

Returns

Number RMSE value.

Source code in packages/seapopym-optimization/src/seapopym_optimization/cost_function/metric.py
def rmse_comparator(prediction: ArrayLike, observation: ArrayLike) -> Number:
    """Calculate Root Mean Square Error (RMSE) between prediction and observation.

    Parameters
    ----------
    prediction : ArrayLike
        Predicted values.
    observation : ArrayLike
        Observed values.

    Returns
    -------
    Number
        RMSE value.

    """
    return np.sqrt(np.mean((prediction - observation) ** 2))

nrmse_std_comparator(prediction, observation)

Calculate Normalized RMSE (by standard deviation) between prediction and observation.

The RMSE is divided by the standard deviation of the observation to provide a scale-invariant error metric.

Parameters

prediction : ArrayLike Predicted values. observation : ArrayLike Observed values.

Returns

Number Normalized RMSE value.

Source code in packages/seapopym-optimization/src/seapopym_optimization/cost_function/metric.py
def nrmse_std_comparator(prediction: ArrayLike, observation: ArrayLike) -> Number:
    """Calculate Normalized RMSE (by standard deviation) between prediction and observation.

    The RMSE is divided by the standard deviation of the observation to provide a scale-invariant
    error metric.

    Parameters
    ----------
    prediction : ArrayLike
        Predicted values.
    observation : ArrayLike
        Observed values.

    Returns
    -------
    Number
        Normalized RMSE value.

    """
    return rmse_comparator(prediction, observation) / observation.std()

Observations

Base

seapopym_optimization.observations.protocol

Protocol for observations used in cost functions.

ObservationProtocol

Bases: Protocol

Protocol for observations used in cost function evaluation.

Any observation object used in the optimization framework must implement this protocol. Observations represent empirical or reference data against which model predictions are compared.

Attributes

name : str Unique identifier or name of the observation. observation : object The observation data. The type and structure depend on the cost function processor (e.g., xarray.Dataset for time series or spatial data, numpy.ndarray for arrays).

Source code in packages/seapopym-optimization/src/seapopym_optimization/observations/protocol.py
@runtime_checkable
class ObservationProtocol(Protocol):
    """Protocol for observations used in cost function evaluation.

    Any observation object used in the optimization framework must implement this protocol.
    Observations represent empirical or reference data against which model predictions are compared.

    Attributes
    ----------
    name : str
        Unique identifier or name of the observation.
    observation : object
        The observation data. The type and structure depend on the cost function processor
        (e.g., xarray.Dataset for time series or spatial data, numpy.ndarray for arrays).

    """

    name: str
    observation: object

Time Series

seapopym_optimization.observations.time_serie

This module contains the cost function used to optimize the parameters of the SeapoPym model.

TimeSeriesObservation dataclass

Bases: Observation

The structure used to store the observations as a time series.

Meaning that the observation is a time series of biomass values at a given location and layer.

Source code in packages/seapopym-optimization/src/seapopym_optimization/observations/time_serie.py
@dataclass(kw_only=True)
class TimeSeriesObservation(Observation):
    """The structure used to store the observations as a time series.

    Meaning that the observation is a time series of biomass values at a given location and layer.
    """

    def __post_init__(self: TimeSeriesObservation) -> None:
        """Check that the observation data is compliant with the format of the predicted biomass."""
        super().__post_init__()

        for coord in [CoordinatesLabels.X, CoordinatesLabels.Y, CoordinatesLabels.Z]:
            if self.observation.cf.coords[coord].data.size != 1:
                msg = (
                    f"Multiple {coord} coordinates found in the observation Dataset. "
                    "The observation must be a time series with a single X, Y and Z (i.e. Seapodym layer) coordinate."
                )
                raise ValueError(msg)

__post_init__()

Check that the observation data is compliant with the format of the predicted biomass.

Source code in packages/seapopym-optimization/src/seapopym_optimization/observations/time_serie.py
def __post_init__(self: TimeSeriesObservation) -> None:
    """Check that the observation data is compliant with the format of the predicted biomass."""
    super().__post_init__()

    for coord in [CoordinatesLabels.X, CoordinatesLabels.Y, CoordinatesLabels.Z]:
        if self.observation.cf.coords[coord].data.size != 1:
            msg = (
                f"Multiple {coord} coordinates found in the observation Dataset. "
                "The observation must be a time series with a single X, Y and Z (i.e. Seapodym layer) coordinate."
            )
            raise ValueError(msg)

Spatial

seapopym_optimization.observations.spatial

This module contains the cost function used to optimize the parameters of the SeapoPym model.

SpatialObservation dataclass

Bases: Observation

The structure used to store the observations as a spatial dataset.

Meaning that the observation is a set of biomass values at given locations and times. The observation data must be an xarray.DataArray with a single dimension (e.g. "index" or "obs_id") and coordinates for time, latitude, and longitude.

Source code in packages/seapopym-optimization/src/seapopym_optimization/observations/spatial.py
@dataclass(kw_only=True)
class SpatialObservation(Observation):
    """The structure used to store the observations as a spatial dataset.

    Meaning that the observation is a set of biomass values at given locations and times.
    The observation data must be an xarray.DataArray with a single dimension (e.g. "index" or "obs_id")
    and coordinates for time, latitude, and longitude.
    """

    def __post_init__(self: SpatialObservation) -> None:
        """Check that the observation data is compliant with the format of the predicted biomass."""
        super().__post_init__()

        # Check for required coordinates
        required_coords = [CoordinatesLabels.time, CoordinatesLabels.X, CoordinatesLabels.Y, CoordinatesLabels.Z]
        for coord in required_coords:
            if coord not in self.observation.coords:
                msg = f"Coordinate {coord} must be in the observation DataArray."
                raise ValueError(msg)

__post_init__()

Check that the observation data is compliant with the format of the predicted biomass.

Source code in packages/seapopym-optimization/src/seapopym_optimization/observations/spatial.py
def __post_init__(self: SpatialObservation) -> None:
    """Check that the observation data is compliant with the format of the predicted biomass."""
    super().__post_init__()

    # Check for required coordinates
    required_coords = [CoordinatesLabels.time, CoordinatesLabels.X, CoordinatesLabels.Y, CoordinatesLabels.Z]
    for coord in required_coords:
        if coord not in self.observation.coords:
            msg = f"Coordinate {coord} must be in the observation DataArray."
            raise ValueError(msg)

Constraints

seapopym_optimization.constraint.energy_transfert_constraint

All the constraints (as penalty functions) used by the DEAP library to contraint parameters initialization.

EnergyCoefficientConstraint dataclass

Constraint to ensure that the sum of all energy transfert coefficients is within a specified range.

This constraint is used to apply a penalty if the sum of the coefficients is greater than max_energy_coef_value or less than min_energy_coef_value.

Attributes.
parameters_name: Sequence[str]
    The names of the parameters that are involved in the constraint, typically the energy transfert
    coefficients.
min_energy_coef_value: float
    The minimum allowed value for the sum of the energy transfert coefficients.
max_energy_coef_value: float
    The maximum allowed value for the sum of the energy transfert coefficients.
Source code in packages/seapopym-optimization/src/seapopym_optimization/constraint/energy_transfert_constraint.py
@dataclass
class EnergyCoefficientConstraint:
    """Constraint to ensure that the sum of all energy transfert coefficients is within a specified range.

    This constraint is used to apply a penalty if the sum of the coefficients is greater than `max_energy_coef_value`
    or less than `min_energy_coef_value`.

    Attributes.
    ----------
        parameters_name: Sequence[str]
            The names of the parameters that are involved in the constraint, typically the energy transfert
            coefficients.
        min_energy_coef_value: float
            The minimum allowed value for the sum of the energy transfert coefficients.
        max_energy_coef_value: float
            The maximum allowed value for the sum of the energy transfert coefficients.
    """

    parameters_name: Sequence[str]
    min_energy_coef_value: float
    max_energy_coef_value: float

    def _feasible(self, selected_index: list[int]) -> Callable[[Sequence[float]], bool]:
        """Check if the energy transfer sum is within allowed bounds.

        Validates that the sum of coefficients stays between the minimum and maximum defined values.
        """

        def feasible(individual: Sequence[float], min_coef: float, max_coef: float) -> bool:
            total_coef = sum([individual[i] for i in selected_index])
            return min_coef <= total_coef <= max_coef

        return partial(feasible, min_coef=self.min_energy_coef_value, max_coef=self.max_energy_coef_value)

    def generate(self, parameter_names: list[str]) -> tools.DeltaPenalty:
        """Generate the DeltaPenalty object used by the DEAP library.

        Apply the penalty on individuals that do not satisfy the constraint.
        """

        def generate_index(ordered_names: list[str]) -> list[int]:
            """List the index of the `parameters_name` in the `ordered_names` sequence.

            This should be used by the feasible function to retrieve the position of the selected parameters.
            """
            return [ordered_names.index(param) for param in self.parameters_name]

        feasible = self._feasible(selected_index=generate_index(parameter_names))
        return tools.DeltaPenalty(feasibility=feasible, delta=np.inf)

generate(parameter_names)

Generate the DeltaPenalty object used by the DEAP library.

Apply the penalty on individuals that do not satisfy the constraint.

Source code in packages/seapopym-optimization/src/seapopym_optimization/constraint/energy_transfert_constraint.py
def generate(self, parameter_names: list[str]) -> tools.DeltaPenalty:
    """Generate the DeltaPenalty object used by the DEAP library.

    Apply the penalty on individuals that do not satisfy the constraint.
    """

    def generate_index(ordered_names: list[str]) -> list[int]:
        """List the index of the `parameters_name` in the `ordered_names` sequence.

        This should be used by the feasible function to retrieve the position of the selected parameters.
        """
        return [ordered_names.index(param) for param in self.parameters_name]

    feasible = self._feasible(selected_index=generate_index(parameter_names))
    return tools.DeltaPenalty(feasibility=feasible, delta=np.inf)