Source code for dimcat.steps.groupers.metadata
import logging
from numbers import Number
from typing import Dict, List, Sequence
import pandas as pd
from dimcat import Dataset
from dimcat.data.resources import DimcatIndex
from dimcat.steps.groupers import CustomPieceGrouper
from dimcat.utils import get_middle_composition_year
from typing_extensions import Self
module_logger = logging.getLogger(__name__)
[docs]class YearGrouper(CustomPieceGrouper):
[docs] @classmethod
def from_grouping(
cls,
grouping: Dict[Number, List[tuple]],
level_names: Sequence[str] = ("middle_composition_year", "corpus", "piece"),
sort: bool = False,
raise_if_multiple_membership: bool = False,
) -> Self:
"""Creates a YearGrouper from a dictionary of piece groups.
Args:
grouping: A dictionary where keys are group names and values are lists of index tuples.
level_names:
Names for the levels of the MultiIndex, i.e. one for the group level and one per level in the tuples.
sort: By default the returned MultiIndex is not sorted. Set True to disable sorting.
raise_if_multiple_membership: If True, raises a ValueError if a member is in multiple groups.
"""
return super().from_grouping(
grouping=grouping,
level_names=level_names,
sort=sort,
raise_if_multiple_membership=raise_if_multiple_membership,
)
def __init__(
self,
level_name: str = "middle_composition_year",
grouped_units: DimcatIndex | pd.MultiIndex = None,
**kwargs,
):
super().__init__(level_name=level_name, grouped_units=grouped_units, **kwargs)
[docs] def fit_to_dataset(self, dataset: Dataset) -> None:
metadata = dataset.get_metadata(raw=True)
sorted_composition_years = get_middle_composition_year(metadata).sort_values()
grouping = sorted_composition_years.groupby(
sorted_composition_years, sort=True
).groups
group_index = DimcatIndex.from_grouping(
grouping, ("middle_composition_year", "corpus", "piece")
)
if len(self.grouped_units) > 0:
self.logger.info(f"Replacing existing grouping with {group_index}")
self.grouped_units = group_index