Source code for dimcat.data.datasets.processed

"""This module contains subclasses of Dataset. They reflect a particular processing status in terms of the previously
applied Slicers, Groupers, and Analyzers. Each of them yields a copied Dataset object exposing additional methods,
which are defined in the relevant mixin classes.
"""
from __future__ import annotations

import logging
from typing import TYPE_CHECKING, List, Optional

from dimcat.base import DimcatConfig
from dimcat.data.resources.base import Rs
from dimcat.dc_exceptions import NoMatchingResourceFoundError

from .base import Dataset

if TYPE_CHECKING:
    from dimcat.data.resources import Result

module_logger = logging.getLogger(__name__)


class _ProcessedMixin:
    """Base class for the mixin classes used to stitch together the various Dataset subclasses."""

    pass


class _SlicedMixin(_ProcessedMixin):
    pass


class _GroupedMixin(_ProcessedMixin):
    pass


class _AnalyzedMixin(_ProcessedMixin):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        if not self.outputs.has_package("results"):
            self.outputs.make_new_package(package_name="results")

    def add_result(self, result: Result):
        """Adds a result to the outputs catalog."""
        self.add_output(resource=result, package_name="results")

    def get_result(self, regex: Optional[str] = None):
        """Returns the last result that matches the given regex or, if None, the last result added."""
        results = self.outputs.get_package("results")
        if regex is None:
            return results.get_resource_by_name()
        results = self.get_results_by_regex(regex=regex)
        if not results:
            raise NoMatchingResourceFoundError(regex, results.package_name)
        else:
            return results[-1]

    def get_result_by_config(self, config: DimcatConfig) -> Rs:
        """Returns the result of the previously applied analyzer with the given name."""
        results = self.outputs.get_package("results")
        return results.get_resource_by_config(config=config)

    def get_result_by_name(self, name: str) -> Rs:
        """Returns the result of the previously applied analyzer with the given name."""
        results = self.outputs.get_package("results")
        return results.get_resource_by_name(name=name)

    def get_results_by_regex(self, regex: str) -> List[Rs]:
        """Returns the result of the previously applied analyzer with the given name."""
        results = self.outputs.get_package("results")
        return results.get_resources_by_regex(regex=regex)

    def get_results_by_type(self, resource_type: type) -> List[Rs]:
        """Returns the result of the previously applied analyzer with the given name."""
        results = self.outputs.get_package("results")
        return results.get_resources_by_type(resource_type=resource_type)


[docs]class SlicedGroupedAnalyzedDataset( _SlicedMixin, _GroupedMixin, _AnalyzedMixin, Dataset ): """A Dataset subclass that has been sliced, grouped, and analyzed.""" pass
[docs]class SlicedGroupedDataset(_SlicedMixin, _GroupedMixin, Dataset): """A Dataset subclass that has been sliced and grouped.""" pass
[docs]class SlicedAnalyzedDataset(_SlicedMixin, _AnalyzedMixin, Dataset): """A Dataset subclass that has been sliced and analyzed.""" pass
[docs]class GroupedAnalyzedDataset(_GroupedMixin, _AnalyzedMixin, Dataset): """A Dataset subclass that has been grouped and analyzed.""" pass
[docs]class SlicedDataset(_SlicedMixin, Dataset): """A Dataset subclass that has been sliced."""
[docs] @classmethod def from_dataset(cls, dataset: Dataset, **kwargs): """Create a new SlicedDataset from a Dataset object.""" if isinstance(dataset, _GroupedMixin): if isinstance(dataset, _AnalyzedMixin): return SlicedGroupedAnalyzedDataset.from_dataset(dataset, **kwargs) else: return SlicedGroupedDataset.from_dataset(dataset, **kwargs) elif isinstance(dataset, _AnalyzedMixin): return SlicedAnalyzedDataset.from_dataset(dataset, **kwargs) elif isinstance(dataset, Dataset): return super().from_dataset(dataset, **kwargs)
[docs]class GroupedDataset(_GroupedMixin, Dataset): """A Dataset subclass that has been grouped."""
[docs] @classmethod def from_dataset(cls, dataset: Dataset, **kwargs): """Create a new GroupedDataset from a Dataset object.""" if isinstance(dataset, _SlicedMixin): if isinstance(dataset, _AnalyzedMixin): return SlicedGroupedAnalyzedDataset.from_dataset(dataset, **kwargs) else: return SlicedGroupedDataset.from_dataset(dataset, **kwargs) elif isinstance(dataset, _AnalyzedMixin): return GroupedAnalyzedDataset.from_dataset(dataset, **kwargs) elif isinstance(dataset, Dataset): return super().from_dataset(dataset, **kwargs)
[docs]class AnalyzedDataset(_AnalyzedMixin, Dataset): """A Dataset subclass that has been analyzed."""
[docs] @classmethod def from_dataset(cls, dataset: Dataset, **kwargs): """Create a new AnalyzedDataset from a Dataset object.""" if isinstance(dataset, _GroupedMixin): if isinstance(dataset, _SlicedMixin): return SlicedGroupedAnalyzedDataset.from_dataset(dataset, **kwargs) else: return GroupedAnalyzedDataset.from_dataset(dataset, **kwargs) elif isinstance(dataset, _SlicedMixin): return SlicedAnalyzedDataset.from_dataset(dataset, **kwargs) elif isinstance(dataset, Dataset): return super().from_dataset(dataset, **kwargs)
SlicedDataset.register(SlicedGroupedDataset) SlicedDataset.register(SlicedAnalyzedDataset) SlicedDataset.register(SlicedGroupedAnalyzedDataset) GroupedDataset.register(SlicedGroupedDataset) GroupedDataset.register(GroupedAnalyzedDataset) GroupedDataset.register(SlicedGroupedAnalyzedDataset) AnalyzedDataset.register(SlicedAnalyzedDataset) AnalyzedDataset.register(GroupedAnalyzedDataset) AnalyzedDataset.register(SlicedGroupedAnalyzedDataset)