paidiverpy.metadata_parser#

__init__.py for metadata_parser module.

Submodules#

Classes#

MetadataParser

Class for parsing metadata files.

Package Contents#

class paidiverpy.metadata_parser.MetadataParser(config: paidiverpy.config.configuration.Configuration | None = None, use_dask: bool = False, metadata_path: str | None = None, metadata_type: str | None = None, metadata_conventions: str | None = None, append_data_to_metadata: str | None = None)[source]#

Class for parsing metadata files.

Parameters:
  • config (Configuration | None) – Configuration object.

  • use_dask (bool) – Whether to use Dask for parallel processing.

  • metadata_path (str) – Path to the metadata file.

  • metadata_type (str) – Type of the metadata file.

  • append_data_to_metadata (str) – Path to the file with additional data.

Raises:
open_metadata() pandas.DataFrame[source]#

Open metadata file.

Raises:

ValueError – Metadata type is not supported.

Returns:

Metadata DataFrame.

Return type:

pd.DataFrame

set_metadata(metadata: pandas.DataFrame | None = None, dataset_metadata: dict[str, Any] | None = None) None[source]#

Set the metadata.

Parameters:
  • metadata (pd.DataFrame | None) – The metadata to set.

  • dataset_metadata (dict | None) – The dataset metadata to set.

export_metadata(output_format: str = 'csv', output_path: str = 'metadata', metadata: pandas.DataFrame | None = None, dataset_metadata: dict[str, Any] | None = None, from_step: int = -1) None[source]#

Export metadata to a file.

Parameters:

output_format (str, optional) – Format of the output file. It can be

“csv”, “json”, “IFDO”, or “croissant”. Defaults to “csv”.

output_path (str, optional): Path to the output file. Defaults to “metadata”. metadata (pd.DataFrame, optional): Metadata DataFrame. Defaults to None. dataset_metadata (dict, optional): Dataset metadata. Defaults to None. from_step (int, optional): Step from which to export metadata. Defaults to None, which means last step.

compute() None[source]#

Compute the metadata if it is a Dask DataFrame.

__repr__() str[source]#

Return the string representation of the metadata.

Returns:

String representation of the metadata.

Return type:

str

static convert_metadata_to(dataset_metadata: dict[str, Any], metadata: pandas.DataFrame, output_path: str, output_format: str, from_step: int = -1) None[source]#

Convert metadata to specified format.

Parameters:
  • dataset_metadata (dict) – Dataset metadata.

  • metadata (pd.DataFrame) – Metadata to convert.

  • output_path (str) – Path to save the converted metadata.

  • output_format (str) – Type of metadata to convert to. It can be “csv”,

“json”, “IFDO”, or “croissant”.

from_step (int): Step to filter metadata. Default is -1, which means the last step.

static group_metadata_and_dataset_metadata(metadata: pandas.DataFrame, dataset_metadata: dict[str, Any]) pandas.DataFrame[source]#

Group metadata and dataset metadata.

Parameters:
  • metadata (pd.DataFrame) – Metadata DataFrame.

  • dataset_metadata (dict) – Dataset metadata.

  • metadata_type (str) – Metadata type. Defaults to “IFDO”.

Returns:

Combined metadata DataFrame.

Return type:

pd.DataFrame