"""This module contains subclasses of Dataset. They reflect a particular processing status in terms of the previously
applied Slicers, Groupers, and Analyzers. Each of them yields a copied Dataset object exposing additional methods,
which are defined in the relevant mixin classes.
"""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING, List, Optional
from dimcat.base import DimcatConfig
from dimcat.data.resources.base import Rs
from dimcat.dc_exceptions import NoMatchingResourceFoundError
from .base import Dataset
if TYPE_CHECKING:
from dimcat.data.resources import Result
module_logger = logging.getLogger(__name__)
class _ProcessedMixin:
"""Base class for the mixin classes used to stitch together the various Dataset subclasses."""
pass
class _SlicedMixin(_ProcessedMixin):
pass
class _GroupedMixin(_ProcessedMixin):
pass
class _AnalyzedMixin(_ProcessedMixin):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if not self.outputs.has_package("results"):
self.outputs.make_new_package(package_name="results")
def add_result(self, result: Result):
"""Adds a result to the outputs catalog."""
self.add_output(resource=result, package_name="results")
def get_result(self, regex: Optional[str] = None):
"""Returns the last result that matches the given regex or, if None, the last result added."""
results = self.outputs.get_package("results")
if regex is None:
return results.get_resource_by_name()
results = self.get_results_by_regex(regex=regex)
if not results:
raise NoMatchingResourceFoundError(regex, results.package_name)
else:
return results[-1]
def get_result_by_config(self, config: DimcatConfig) -> Rs:
"""Returns the result of the previously applied analyzer with the given name."""
results = self.outputs.get_package("results")
return results.get_resource_by_config(config=config)
def get_result_by_name(self, name: str) -> Rs:
"""Returns the result of the previously applied analyzer with the given name."""
results = self.outputs.get_package("results")
return results.get_resource_by_name(name=name)
def get_results_by_regex(self, regex: str) -> List[Rs]:
"""Returns the result of the previously applied analyzer with the given name."""
results = self.outputs.get_package("results")
return results.get_resources_by_regex(regex=regex)
def get_results_by_type(self, resource_type: type) -> List[Rs]:
"""Returns the result of the previously applied analyzer with the given name."""
results = self.outputs.get_package("results")
return results.get_resources_by_type(resource_type=resource_type)
[docs]class SlicedGroupedAnalyzedDataset(
_SlicedMixin, _GroupedMixin, _AnalyzedMixin, Dataset
):
"""A Dataset subclass that has been sliced, grouped, and analyzed."""
pass
[docs]class SlicedGroupedDataset(_SlicedMixin, _GroupedMixin, Dataset):
"""A Dataset subclass that has been sliced and grouped."""
pass
[docs]class SlicedAnalyzedDataset(_SlicedMixin, _AnalyzedMixin, Dataset):
"""A Dataset subclass that has been sliced and analyzed."""
pass
[docs]class GroupedAnalyzedDataset(_GroupedMixin, _AnalyzedMixin, Dataset):
"""A Dataset subclass that has been grouped and analyzed."""
pass
[docs]class SlicedDataset(_SlicedMixin, Dataset):
"""A Dataset subclass that has been sliced."""
[docs] @classmethod
def from_dataset(cls, dataset: Dataset, **kwargs):
"""Create a new SlicedDataset from a Dataset object."""
if isinstance(dataset, _GroupedMixin):
if isinstance(dataset, _AnalyzedMixin):
return SlicedGroupedAnalyzedDataset.from_dataset(dataset, **kwargs)
else:
return SlicedGroupedDataset.from_dataset(dataset, **kwargs)
elif isinstance(dataset, _AnalyzedMixin):
return SlicedAnalyzedDataset.from_dataset(dataset, **kwargs)
elif isinstance(dataset, Dataset):
return super().from_dataset(dataset, **kwargs)
[docs]class GroupedDataset(_GroupedMixin, Dataset):
"""A Dataset subclass that has been grouped."""
[docs] @classmethod
def from_dataset(cls, dataset: Dataset, **kwargs):
"""Create a new GroupedDataset from a Dataset object."""
if isinstance(dataset, _SlicedMixin):
if isinstance(dataset, _AnalyzedMixin):
return SlicedGroupedAnalyzedDataset.from_dataset(dataset, **kwargs)
else:
return SlicedGroupedDataset.from_dataset(dataset, **kwargs)
elif isinstance(dataset, _AnalyzedMixin):
return GroupedAnalyzedDataset.from_dataset(dataset, **kwargs)
elif isinstance(dataset, Dataset):
return super().from_dataset(dataset, **kwargs)
[docs]class AnalyzedDataset(_AnalyzedMixin, Dataset):
"""A Dataset subclass that has been analyzed."""
[docs] @classmethod
def from_dataset(cls, dataset: Dataset, **kwargs):
"""Create a new AnalyzedDataset from a Dataset object."""
if isinstance(dataset, _GroupedMixin):
if isinstance(dataset, _SlicedMixin):
return SlicedGroupedAnalyzedDataset.from_dataset(dataset, **kwargs)
else:
return GroupedAnalyzedDataset.from_dataset(dataset, **kwargs)
elif isinstance(dataset, _SlicedMixin):
return SlicedAnalyzedDataset.from_dataset(dataset, **kwargs)
elif isinstance(dataset, Dataset):
return super().from_dataset(dataset, **kwargs)
SlicedDataset.register(SlicedGroupedDataset)
SlicedDataset.register(SlicedAnalyzedDataset)
SlicedDataset.register(SlicedGroupedAnalyzedDataset)
GroupedDataset.register(SlicedGroupedDataset)
GroupedDataset.register(GroupedAnalyzedDataset)
GroupedDataset.register(SlicedGroupedAnalyzedDataset)
AnalyzedDataset.register(SlicedAnalyzedDataset)
AnalyzedDataset.register(GroupedAnalyzedDataset)
AnalyzedDataset.register(SlicedGroupedAnalyzedDataset)