Source code for dimcat.steps.groupers.columns
import logging
from typing import Optional
import marshmallow as mm
import pandas as pd
from dimcat.data.resources import DimcatResource, FeatureName
from dimcat.dc_exceptions import ResourceIsMissingFeatureColumnError
from dimcat.steps.groupers.base import Grouper
module_logger = logging.getLogger(__name__)
[docs]class ColumnGrouper(Grouper):
"""This grouper and its subclasses groups resources by a particular column, if they contain it."""
[docs] class Schema(Grouper.Schema):
grouped_column = mm.fields.Str()
def __init__(
self,
grouped_column: str,
level_name: Optional[str] = None,
**kwargs,
):
if level_name is None:
level_name = grouped_column
super().__init__(level_name=level_name, **kwargs)
self.grouped_column: str = grouped_column
[docs] def check_resource(self, resource: DimcatResource) -> None:
super().check_resource(resource)
if self.grouped_column not in resource.df.columns:
raise ResourceIsMissingFeatureColumnError(
resource.resource_name, self.grouped_column
)
[docs]class MeasureGrouper(ColumnGrouper):
def __init__(
self,
grouped_column: str = "mn",
level_name: str = "measure",
**kwargs,
):
super().__init__(grouped_column=grouped_column, level_name=level_name, **kwargs)
[docs]class ModeGrouper(ColumnGrouper):
_allowed_features = (FeatureName.HarmonyLabels, FeatureName.KeyAnnotations)
def __init__(
self,
grouped_column: str = "localkey_mode",
level_name: str = "mode",
**kwargs,
):
super().__init__(grouped_column=grouped_column, level_name=level_name, **kwargs)