Source code for paidiverpy.models.general_config

"""Configuration module."""

from pathlib import Path
from typing import Any
from typing import ClassVar
from typing import Literal
from pydantic import Field
from pydantic import model_validator
from paidiverpy.models.open_params import ImageOpenArgs
from paidiverpy.models.step_config import ConvertConfig
from paidiverpy.models.step_config import SamplingConfig
from paidiverpy.utils.base_model import BaseModel
from paidiverpy.utils.data import PaidiverpyData
from paidiverpy.utils.object_store import path_is_remote


[docs] class GeneralConfig(BaseModel): """General configuration class. This class is used to define the general configuration from the configuration file or from the input from the user. """ name: str = Field("raw", description="Name of the first step (the step to open images)") step_name: str = Field( "open", description="Step name. This is a placeholder for the first step name and should not be used in the configuration file." ) sample_data: Literal["plankton_csv", "benthic_csv", "benthic_ifdo", "nef_raw", "benthic_raw_images"] | None = Field( None, description="Sample data to use for testing. If provided, it will override input_path, metadata_path, and metadata_type.", ) input_path: str | Path | None = Field(None, description="Input path for image data. Can be a local path or a remote URL.") output_path: str | Path = Field("output", description="Output path for results. Can be a local path or a remote URL.") metadata_path: str | Path | None = Field(None, description="Path to metadata. Can be a local path or a remote URL.") metadata_type: ( Literal[ "IFDO", "CSV_FILE", ] | None ) = Field(None, description="Type of metadata. Can be 'IFDO' or 'CSV_FILE'") image_open_args: str | ImageOpenArgs = Field( "", description=( "Arguments to use when opening images. It can be a string with the image " "format or an ImageOpenArgs object. If it is a empty string, the type will be inferred from the file extension." ), ) append_data_to_metadata: str | None = Field( None, description=("Path to append data to metadata. If provided, it will be used to append data to the metadata file."), ) metadata_conventions: str | None = Field( None, description=("Metadata conventions to apply. If not provided, it will use the default conventions name described in the documentation."), ) n_jobs: int = Field(1, description="Number of jobs for parallel processing") local_cluster: dict[str, Any] | None = Field(None, description="Parameters for the local cluster") dask_config_kwargs: dict[str, Any] | None = Field( None, description=( "Dask configuration keyword arguments. If provided, it will be used to update the Dask configuration settings." ), ) track_changes: bool = Field(True, description="Whether to track config changes. If True, it will store in memory the output images on each step") rename: Literal["UUID", "datetime"] | None = Field( None, description="Field name to use for renaming. If not provided, the name will be the same as the input file name." ) sampling: list[SamplingConfig] | None = Field( None, description=( "Sampling step configurations to be applied to the images before processing them. If not provided, no sampling will be applied." ), ) convert: list[ConvertConfig] | None = Field( None, description=( "Convert step configurations to be applied to the images before processing them. If not provided, no conversion will be applied." ), ) model_config: ClassVar[dict[str, object]] = { "frozen": False, "json_schema_extra": { "anyOf": [ {"required": ["input_path", "metadata_path", "metadata_type"], "not": {"required": ["sample_data"]}}, {"required": ["sample_data"]}, ] }, } @model_validator(mode="before") @classmethod
[docs] def validate_fields(cls, values: dict[str, Any]) -> dict[str, Any]: """Validate the fields of the configuration. Args: values (dict): The values to validate. Returns: dict: The validated values. """ sample_data = values.get("sample_data") if sample_data: data = PaidiverpyData() information = data.load(sample_data) values["input_path"] = Path(information["input_path"]) values["metadata_path"] = Path(information["metadata_path"]) values["metadata_type"] = information["metadata_type"] values["image_open_args"] = information["image_open_args"] values["append_data_to_metadata"] = information.get("append_data_to_metadata") else: input_path = values.get("input_path") if input_path: values["input_path"] = Path(str(input_path)) if not path_is_remote(input_path) else input_path output_path = values.get("output_path") if output_path: values["output_path"] = Path(str(output_path)) if not path_is_remote(output_path) else output_path # Convert step configurations into StepConfig instances for step_type in ["sampling", "convert"]: steps = values.get(step_type) if steps: for step in steps: step["step_name"] = step_type step["name"] = step_type if step_type == "sampling": values[step_type] = [SamplingConfig(**step) for step in steps] else: values[step_type] = [ConvertConfig(**step) for step in steps] return values
@model_validator(mode="after")
[docs] def check_required_fields(self) -> "GeneralConfig": """Ensure output_path is provided and either sample_data or input_path is set.""" if not self.output_path: msg = "'output_path' is required." raise ValueError(msg) if not self.sample_data and not self.input_path: msg = "Either 'sample_data' or 'input_path' must be provided." raise ValueError(msg) return self
[docs] def update(self, **updates: dict[str, Any]) -> "GeneralConfig": """Update the model in-place with new values.""" for key, value in updates.items(): setattr(self, key, value) validated = self.__class__.model_validate(self.model_dump()) for key, val in validated.model_dump().items(): setattr(self, key, val) return self