dimcat.steps.groupers package#

Submodules#

dimcat.steps.groupers.annotations module#

class dimcat.steps.groupers.annotations.HasCadenceAnnotationsGrouper(level_name: str = 'has_cadence_annotations', grouped_units: Optional[Union[DimcatIndex, MultiIndex]] = None, smallest_unit: UnitOfAnalysis = UnitOfAnalysis.SLICE, **kwargs)[source]#

Bases: CriterionGrouper

Boolean grouper that categorizes slices, pieces, or groups by whether they have at least one cadence label or not.

static compute_criterion(unit: D) Hashable[source]#

Returns True if the unit has a column called ‘cadence’ containing at least one non-null value.

classmethod from_grouping(grouping: Dict[bool, List[tuple]], level_names: Sequence[str] = ('has_cadence_annotations', 'corpus', 'piece'), sort: bool = False, raise_if_multiple_membership: bool = False) Self[source]#

Creates a HasCadenceAnnotations grouper from a dictionary of piece groups. Keys should be True and False.

Args: grouping: A dictionary where keys are group names and values are lists of index tuples. level_names:

Names for the levels of the MultiIndex, i.e. one for the group level and one per level in the tuples.

sort: By default the returned MultiIndex is not sorted. Set True to disable sorting. raise_if_multiple_membership: If True, raises a ValueError if a member is in multiple groups.

class dimcat.steps.groupers.annotations.HasHarmonyLabelsGrouper(level_name: str = 'has_harmony_labels', grouped_units: Optional[IX] = None, **kwargs)[source]#

Bases: CustomPieceGrouper

Boolean grouper that categorizes pieces by whether they have at least one DCML harmony label or not.

fit_to_dataset(dataset: Dataset) None[source]#

Adjust the PipelineStep to the passed dataset.

Parameters:

dataset – The dataset to adjust to.

classmethod from_grouping(grouping: Dict[bool, List[tuple]], level_names: Sequence[str] = ('has_harmony_labels', 'corpus', 'piece'), sort: bool = False, raise_if_multiple_membership: bool = False) Self[source]#

Creates a HasHarmonyLabels grouper from a dictionary of piece groups. Keys should be True and False.

Args: grouping: A dictionary where keys are group names and values are lists of index tuples. level_names:

Names for the levels of the MultiIndex, i.e. one for the group level and one per level in the tuples.

sort: By default the returned MultiIndex is not sorted. Set True to disable sorting. raise_if_multiple_membership: If True, raises a ValueError if a member is in multiple groups.

dimcat.steps.groupers.base module#

class dimcat.steps.groupers.base.CorpusGrouper(level_name: str = 'corpus', **kwargs)[source]#

Bases: _IdGrouper

Results will be grouped by the ‘corpus’ part of the (‘corpus’, ‘piece’) index.

check_resource(resource: DimcatResource) None[source]#

Check if the resource is eligible for processing.

Raises:
class dimcat.steps.groupers.base.CriterionGrouper(level_name: str = 'criterion', grouped_units: Optional[Union[DimcatIndex, MultiIndex]] = None, smallest_unit: UnitOfAnalysis = UnitOfAnalysis.SLICE, **kwargs)[source]#

Bases: MappingGrouper

Groupers that are fitted to a Dataset by applying their criterion() method to the units of analysis for a particular resource and grouping the chunks according to the method’s outputs.

class Schema(*, only: Optional[Union[Sequence[str], AbstractSet[str]]] = None, exclude: Union[Sequence[str], AbstractSet[str]] = (), many: Optional[bool] = None, load_only: Union[Sequence[str], AbstractSet[str]] = (), dump_only: Union[Sequence[str], AbstractSet[str]] = (), partial: Optional[Union[bool, Sequence[str], AbstractSet[str]]] = None, unknown: Optional[Literal['exclude', 'include', 'raise']] = None)[source]#

Bases: Schema

dump_fields: dict[str, Field]#
exclude: set[Any] | MutableSet[Any]#
fields: dict[str, Field]#

Dictionary mapping field_names -> Field objects

load_fields: dict[str, Field]#
opts: Any = <marshmallow.schema.SchemaOpts object>#
unknown: types.UnknownOption#
check_dataset(dataset: Dataset) None[source]#

Check if the dataset is eligible for processing.

Raises:
static compute_criterion(unit: D) Hashable[source]#
fit_to_dataset(dataset: Dataset) None[source]#

Adjust the PipelineStep to the passed dataset.

Parameters:

dataset – The dataset to adjust to.

property required_feature: FeatureName#
property smallest_unit: UnitOfAnalysis#
class dimcat.steps.groupers.base.CustomPieceGrouper(level_name: str = 'piece_group', grouped_units: Optional[Union[DimcatIndex, MultiIndex]] = None, **kwargs)[source]#

Bases: MappingGrouper

Allows for grouping by specifying a {group_name: [piece_index_tuples]} dictionary.

classmethod from_grouping(grouping: Dict[Hashable, List[tuple]], level_names: Sequence[str] = ('piece_group', 'corpus', 'piece'), sort: bool = False, raise_if_multiple_membership: bool = False) Self[source]#

Creates a CustomPieceGrouper from a dictionary of piece groups.

Args: grouping: A dictionary where keys are group names and values are lists of index tuples. level_names:

Names for the levels of the MultiIndex, i.e. one for the group level and one per level in the tuples.

sort: By default the returned MultiIndex is not sorted. Set True to disable sorting. raise_if_multiple_membership: If True, raises a ValueError if a member is in multiple groups.

property grouped_units: PieceIndex#
class dimcat.steps.groupers.base.GroupedUnitsField(nested=<class 'dimcat.data.resources.dc.DimcatIndex.Schema'>, **kwargs)[source]#

Bases: Nested

class dimcat.steps.groupers.base.Grouper(level_name: str = 'group', **kwargs)[source]#

Bases: ResourceTransformation

class Schema(*, only: Optional[Union[Sequence[str], AbstractSet[str]]] = None, exclude: Union[Sequence[str], AbstractSet[str]] = (), many: Optional[bool] = None, load_only: Union[Sequence[str], AbstractSet[str]] = (), dump_only: Union[Sequence[str], AbstractSet[str]] = (), partial: Optional[Union[bool, Sequence[str], AbstractSet[str]]] = None, unknown: Optional[Literal['exclude', 'include', 'raise']] = None)[source]#

Bases: Schema

dump_fields: dict[str, Field]#
exclude: set[Any] | MutableSet[Any]#
fields: dict[str, Field]#

Dictionary mapping field_names -> Field objects

load_fields: dict[str, Field]#
opts: Any = <marshmallow.schema.SchemaOpts object>#
unknown: types.UnknownOption#
property level_name: str#
transform_resource(resource: DimcatResource) D[source]#

Apply the grouper to a Feature.

class dimcat.steps.groupers.base.MappingGrouper(level_name: str = 'group', grouped_units: Optional[Union[DimcatIndex, MultiIndex]] = None, **kwargs)[source]#

Bases: Grouper

Superclass for all Groupers that function on the basis of a {group_name: [index_tuples]} dictionary.

class Schema(*, only: Optional[Union[Sequence[str], AbstractSet[str]]] = None, exclude: Union[Sequence[str], AbstractSet[str]] = (), many: Optional[bool] = None, load_only: Union[Sequence[str], AbstractSet[str]] = (), dump_only: Union[Sequence[str], AbstractSet[str]] = (), partial: Optional[Union[bool, Sequence[str], AbstractSet[str]]] = None, unknown: Optional[Literal['exclude', 'include', 'raise']] = None)[source]#

Bases: Schema

deal_with_dict(data, **kwargs)[source]#
dump_fields: dict[str, Field]#
exclude: set[Any] | MutableSet[Any]#
fields: dict[str, Field]#

Dictionary mapping field_names -> Field objects

load_fields: dict[str, Field]#
opts: Any = <marshmallow.schema.SchemaOpts object>#
unknown: types.UnknownOption#
check_resource(resource: DimcatResource) None[source]#

Check if the resource is eligible for processing.

Raises:
classmethod from_grouping(grouping: Dict[Hashable, List[tuple]], level_names: Sequence[str] = ('group', 'corpus', 'piece'), sort: bool = False, raise_if_multiple_membership: bool = False) Self[source]#

Creates a CustomPieceGrouper from a dictionary of piece groups.

Args: grouping: A dictionary where keys are group names and values are lists of index tuples. level_names:

Names for the levels of the MultiIndex, i.e. one for the group level and one per level in the tuples.

sort: By default the returned MultiIndex is not sorted. Set True to disable sorting. raise_if_multiple_membership: If True, raises a ValueError if a member is in multiple groups.

property grouped_units: DimcatIndex#
transform_resource(resource: DimcatResource) D[source]#

Apply the grouper to a Feature.

class dimcat.steps.groupers.base.PieceGrouper(level_name: str = 'piece', **kwargs)[source]#

Bases: _IdGrouper

Results will be grouped by the ‘piece’ part of the (‘corpus’, ‘piece’) index.

check_resource(resource: DimcatResource) None[source]#

Check if the resource is eligible for processing.

Raises:

dimcat.steps.groupers.columns module#

class dimcat.steps.groupers.columns.ColumnGrouper(grouped_column: str, level_name: Optional[str] = None, **kwargs)[source]#

Bases: Grouper

This grouper and its subclasses groups resources by a particular column, if they contain it.

class Schema(*, only: Optional[Union[Sequence[str], AbstractSet[str]]] = None, exclude: Union[Sequence[str], AbstractSet[str]] = (), many: Optional[bool] = None, load_only: Union[Sequence[str], AbstractSet[str]] = (), dump_only: Union[Sequence[str], AbstractSet[str]] = (), partial: Optional[Union[bool, Sequence[str], AbstractSet[str]]] = None, unknown: Optional[Literal['exclude', 'include', 'raise']] = None)[source]#

Bases: Schema

dump_fields: dict[str, Field]#
exclude: set[Any] | MutableSet[Any]#
fields: dict[str, Field]#

Dictionary mapping field_names -> Field objects

load_fields: dict[str, Field]#
opts: Any = <marshmallow.schema.SchemaOpts object>#
unknown: types.UnknownOption#
check_resource(resource: DimcatResource) None[source]#

Check if the resource is eligible for processing.

Raises:
transform_resource(resource: DimcatResource) DataFrame[source]#

Apply the grouper to a Feature.

class dimcat.steps.groupers.columns.MeasureGrouper(grouped_column: str = 'mn', level_name: str = 'measure', **kwargs)[source]#

Bases: ColumnGrouper

class dimcat.steps.groupers.columns.ModeGrouper(grouped_column: str = 'localkey_mode', level_name: str = 'mode', **kwargs)[source]#

Bases: ColumnGrouper

dimcat.steps.groupers.metadata module#

class dimcat.steps.groupers.metadata.YearGrouper(level_name: str = 'middle_composition_year', grouped_units: Optional[Union[DimcatIndex, MultiIndex]] = None, **kwargs)[source]#

Bases: CustomPieceGrouper

fit_to_dataset(dataset: Dataset) None[source]#

Adjust the PipelineStep to the passed dataset.

Parameters:

dataset – The dataset to adjust to.

classmethod from_grouping(grouping: Dict[Number, List[tuple]], level_names: Sequence[str] = ('middle_composition_year', 'corpus', 'piece'), sort: bool = False, raise_if_multiple_membership: bool = False) Self[source]#

Creates a YearGrouper from a dictionary of piece groups.

Args: grouping: A dictionary where keys are group names and values are lists of index tuples. level_names:

Names for the levels of the MultiIndex, i.e. one for the group level and one per level in the tuples.

sort: By default the returned MultiIndex is not sorted. Set True to disable sorting. raise_if_multiple_membership: If True, raises a ValueError if a member is in multiple groups.

Module contents#