Source code for paidiverpy.models.general_config

"""Configuration module."""

from pathlib import Path
from typing import Any
from typing import ClassVar
from typing import Literal
from pydantic import Field
from pydantic import model_validator
from paidiverpy.models.open_params import ImageOpenArgs
from paidiverpy.models.step_config import ConvertConfig
from paidiverpy.models.step_config import SamplingConfig
from paidiverpy.utils.base_model import BaseModel
from paidiverpy.utils.data import PaidiverpyData
from paidiverpy.utils.object_store import path_is_remote



[docs]
class GeneralConfig(BaseModel):
    """General configuration class.

    This class is used to define the general configuration from the configuration file
        or from the input from the user.

    """

    name: str = Field("raw", description="Name of the first step (the step to open images)")
    step_name: str = Field(
        "open", description="Step name. This is a placeholder for the first step name and should not be used in the configuration file."
    )
    sample_data: Literal["plankton_csv", "benthic_csv", "benthic_ifdo", "nef_raw", "benthic_raw_images"] | None = Field(
        None,
        description="Sample data to use for testing. If provided, it will override input_path, metadata_path, and metadata_type.",
    )
    input_path: str | Path | None = Field(None, description="Input path for image data. Can be a local path or a remote URL.")
    output_path: str | Path = Field("output", description="Output path for results. Can be a local path or a remote URL.")
    metadata_path: str | Path | None = Field(None, description="Path to metadata. Can be a local path or a remote URL.")
    metadata_type: (
        Literal[
            "IFDO",
            "CSV_FILE",
        ]
        | None
    ) = Field(None, description="Type of metadata. Can be 'IFDO' or 'CSV_FILE'")
    image_open_args: str | ImageOpenArgs = Field(
        "",
        description=(
            "Arguments to use when opening images. It can be a string with the image "
            "format or an ImageOpenArgs object. If it is a empty string, the type will be inferred from the file extension."
        ),
    )
    append_data_to_metadata: str | None = Field(
        None,
        description=("Path to append data to metadata. If provided, it will be used to append data to the metadata file."),
    )
    metadata_conventions: str | None = Field(
        None,
        description=("Metadata conventions to apply. If not provided, it will use the default conventions name described in the documentation."),
    )
    n_jobs: int = Field(1, description="Number of jobs for parallel processing")
    local_cluster: dict[str, Any] | None = Field(None, description="Parameters for the local cluster")
    dask_config_kwargs: dict[str, Any] | None = Field(
        None,
        description=(
            "Dask configuration keyword arguments. If provided, it will be used to update the Dask configuration settings."
        ),
    )
    track_changes: bool = Field(True, description="Whether to track config changes. If True, it will store in memory the output images on each step")
    rename: Literal["UUID", "datetime"] | None = Field(
        None, description="Field name to use for renaming. If not provided, the name will be the same as the input file name."
    )
    sampling: list[SamplingConfig] | None = Field(
        None,
        description=(
            "Sampling step configurations to be applied to the images before processing them. If not provided, no sampling will be applied."
        ),
    )

    convert: list[ConvertConfig] | None = Field(
        None,
        description=(
            "Convert step configurations to be applied to the images before processing them. If not provided, no conversion will be applied."
        ),
    )

    model_config: ClassVar[dict[str, object]] = {
        "frozen": False,
        "json_schema_extra": {
            "anyOf": [
                {"required": ["input_path", "metadata_path", "metadata_type"], "not": {"required": ["sample_data"]}},
                {"required": ["sample_data"]},
            ]
        },
    }

    @model_validator(mode="before")
    @classmethod

[docs]
    def validate_fields(cls, values: dict[str, Any]) -> dict[str, Any]:
        """Validate the fields of the configuration.

        Args:
            values (dict): The values to validate.

        Returns:
            dict: The validated values.
        """
        sample_data = values.get("sample_data")
        if sample_data:
            data = PaidiverpyData()
            information = data.load(sample_data)
            values["input_path"] = Path(information["input_path"])
            values["metadata_path"] = Path(information["metadata_path"])
            values["metadata_type"] = information["metadata_type"]
            values["image_open_args"] = information["image_open_args"]
            values["append_data_to_metadata"] = information.get("append_data_to_metadata")
        else:
            input_path = values.get("input_path")
            if input_path:
                values["input_path"] = Path(str(input_path)) if not path_is_remote(input_path) else input_path

        output_path = values.get("output_path")
        if output_path:
            values["output_path"] = Path(str(output_path)) if not path_is_remote(output_path) else output_path

        # Convert step configurations into StepConfig instances
        for step_type in ["sampling", "convert"]:
            steps = values.get(step_type)
            if steps:
                for step in steps:
                    step["step_name"] = step_type
                    step["name"] = step_type
                if step_type == "sampling":
                    values[step_type] = [SamplingConfig(**step) for step in steps]
                else:
                    values[step_type] = [ConvertConfig(**step) for step in steps]

        return values


    @model_validator(mode="after")

[docs]
    def check_required_fields(self) -> "GeneralConfig":
        """Ensure output_path is provided and either sample_data or input_path is set."""
        if not self.output_path:
            msg = "'output_path' is required."
            raise ValueError(msg)

        if not self.sample_data and not self.input_path:
            msg = "Either 'sample_data' or 'input_path' must be provided."
            raise ValueError(msg)

        return self



[docs]
    def update(self, **updates: dict[str, Any]) -> "GeneralConfig":
        """Update the model in-place with new values."""
        for key, value in updates.items():
            setattr(self, key, value)
        validated = self.__class__.model_validate(self.model_dump())
        for key, val in validated.model_dump().items():
            setattr(self, key, val)
        return self