zae_engine.operation.run_length의 소스 코드

from dataclasses import dataclass
from typing import List, Union, Optional
from itertools import groupby



[문서]
@dataclass
class Run:
    """
    Represents a run in Run-Length Encoding (RLE).

    Attributes
    ----------
    start_index : int
        The starting index of the run.
    end_index : int
        The ending index of the run.
    value : int
        The value of the run.
    """

    start_index: int
    end_index: int
    value: int

    def __repr__(self):
        """Provide a string representation of the Run object."""
        return f"Run(start_index={self.start_index}, end_index={self.end_index}, value={self.value})"




[문서]
class RunList:
    """
    Stores the results of Run-Length Encoding (RLE) and provides methods
    to access raw and filtered runs.

    Attributes
    ----------
    all_runs : List[Run]
        A list of all runs without any filtering.
    sense : int
        The minimum length of runs to be considered in filtering.
    original_length : int
        The length of the original list that was encoded.
    """

    def __init__(self, all_runs: List[Run], sense: int, original_length: int):
        """
        Initializes the RunList object.

        Parameters
        ----------
        all_runs : List[Run]
            A list of all runs obtained from encoding.
        sense : int
            The minimum length of runs to be considered in filtering.
        original_length : int
            The length of the original list that was encoded.
        """
        self.all_runs = all_runs
        self.sense = sense
        self.original_length = original_length


[문서]
    def raw(self) -> List[Run]:
        """
        Returns all runs without any filtering.

        Returns
        -------
        List[Run]
            A list of all runs.
        """
        return self.all_runs



[문서]
    def filtered(self) -> List[Run]:
        """
        Returns runs that meet or exceed the specified sense value.

        Returns
        -------
        List[Run]
            A list of filtered runs.
        """
        return [run for run in self.all_runs if (run.end_index - run.start_index + 1) >= self.sense]


    def __repr__(self):
        """Provide a string representation of the RunList object."""
        return f"RunList(all_runs={self.all_runs}, sense={self.sense}, original_length={self.original_length})"




[문서]
class RunLengthCodec:
    """
    A codec class for Run-Length Encoding (RLE) and decoding.

    Parameters
    ----------
    tol_merge : int, optional
        The tolerance value to merge close runs. Default is 20.
    remove_incomplete : bool, optional
        Whether to remove incomplete runs during sanitization. Default is False.
    merge_closed : bool, optional
        Whether to merge close runs during sanitization. Default is False.
    base_class : int, optional
        The base class value to be excluded from runs. Default is 0.

    Methods
    -------
    encode(x: List[int], sense: int) -> RunList:
        Encodes a list of integers into RLE runs.
    decode(encoded_runs: RunList) -> List[int]:
        Decodes RLE runs back into the original list of integers.
    sanitize(run_list: RunList) -> RunList:
        Cleans and merges runs based on the codec's parameters.
    __call__(data: Union[List[int], List[List[int]], RunList, List[RunList]], sense: Optional[int] = None) -> Union[RunList, List[RunList], List[int], List[List[int]]]:
        Encodes or decodes data based on the input type.
    """

    def __init__(
        self, tol_merge: int = 20, remove_incomplete: bool = False, merge_closed: bool = False, base_class: int = 0
    ):
        """
        Initialize the RunLengthCodec with specified parameters.

        Parameters
        ----------
        tol_merge : int, optional
            The tolerance value to merge close runs. Default is 20.
        remove_incomplete : bool, optional
            Whether to remove runs that start at index 0 or end at the last index. Default is False.
        merge_closed : bool, optional
            Whether to merge runs that are close to each other. Default is False.
        base_class : int, optional
            The base class value to exclude from runs. Runs with this value will not be encoded. Default is 0.
        """
        self.tol_merge = tol_merge
        self.remove_incomplete = remove_incomplete
        self.merge_closed = merge_closed
        self.base_class = base_class


[문서]
    def encode(self, x: List[int], sense: int) -> RunList:
        """
        Encode a list of integers using Run-Length Encoding (RLE).

        This method converts a sequence of integers into a list of runs.
        Each run is represented as a `Run` object containing the start index,
        end index, and the value of the run. Runs with a length smaller than
        the specified `sense` are ignored in the `filtered()` method.

        Parameters
        ----------
        x : List[int]
            The input list of integers to be encoded.
        sense : int
            The minimum length of runs to be considered. Runs shorter than this
            value are excluded from the output when calling `filtered()`.

        Returns
        -------
        RunList
            A `RunList` object containing all runs, the sense value, and the original list length.
        """
        original_length = len(x)
        if not x:
            all_runs = []
        else:
            all_runs = []
            current_index = 0
            for value, group in groupby(x):
                group_list = list(group)
                run_length = len(group_list)
                start_index = current_index
                end_index = current_index + run_length - 1
                if value != self.base_class:
                    all_runs.append(Run(start_index=start_index, end_index=end_index, value=value))
                current_index += run_length
        run_list = RunList(all_runs=all_runs, sense=sense, original_length=original_length)
        sanitized_run_list = self.sanitize(run_list)
        return sanitized_run_list



[문서]
    def decode(self, encoded_runs: RunList) -> List[int]:
        """
        Decode a list of RLE runs back to the original list of integers.

        This method reconstructs the original sequence of integers from a `RunList` object.
        Each `Run` specifies the start index, end index, and the value to be filled in that range.
        The length of the output list is determined by the `original_length` stored in `RunList`.

        Parameters
        ----------
        encoded_runs : RunList
            A `RunList` object containing runs to be decoded.

        Returns
        -------
        List[int]
            The decoded list of integers reconstructed from the runs.
        """
        if not encoded_runs.all_runs:
            return [self.base_class] * encoded_runs.original_length  # Return background if no runs

        decoded = [self.base_class] * encoded_runs.original_length  # Initialize with background label 0

        for run in encoded_runs.all_runs:
            on = max(run.start_index, 0)  # Ensure non-negative start index
            off = min(run.end_index, encoded_runs.original_length - 1)  # Clamp to maximum index
            cls = run.value

            # Assign the class label to the specified range
            for i in range(on, off + 1):
                decoded[i] = cls

        return decoded



[문서]
    def sanitize(self, run_list: RunList) -> RunList:
        """
        Clean and merge runs based on the codec's parameters.

        This function processes the RunList by:
            1. Removing incomplete runs (if remove_incomplete is True).
            2. Merging close runs (if merge_closed is True), while respecting the base_class.

        Parameters
        ----------
        run_list : RunList
            The RunList object to be sanitized.

        Returns
        -------
        RunList
            The sanitized RunList object.
        """
        all_runs = run_list.all_runs.copy()

        # 1. Remove incomplete runs
        if self.remove_incomplete:
            # Incomplete runs are those that start at 0 or end at original_length -1
            sanitized_runs = [
                run for run in all_runs if run.start_index != 0 and run.end_index != run_list.original_length - 1
            ]
        else:
            sanitized_runs = all_runs

        # 2. Merge close runs
        if self.merge_closed and sanitized_runs:
            # Sort runs by start_index to ensure order
            sanitized_runs.sort(key=lambda run: run.start_index)

            merged_runs = []
            current_run = sanitized_runs[0]

            for next_run in sanitized_runs[1:]:
                gap = next_run.start_index - current_run.end_index - 1

                if gap <= self.tol_merge:
                    if current_run.value == next_run.value:
                        # Same value runs, merge them
                        current_run.end_index = next_run.end_index
                    else:
                        # Different value runs, decide which one to merge based on class
                        # Base class should not be involved in merging
                        if current_run.value == self.base_class or next_run.value == self.base_class:
                            # Do not merge with base class
                            merged_runs.append(current_run)
                            current_run = next_run
                        elif next_run.value < current_run.value:
                            # Merge current_run into next_run (retain next_run's value)
                            current_run.end_index = next_run.end_index
                            current_run.value = next_run.value
                        else:
                            # Merge next_run into current_run (retain current_run's value)
                            current_run.end_index = next_run.end_index
                else:
                    merged_runs.append(current_run)
                    current_run = next_run

            merged_runs.append(current_run)
            sanitized_runs = merged_runs

        return RunList(all_runs=sanitized_runs, sense=run_list.sense, original_length=run_list.original_length)



[문서]
    def __call__(
        self, data: Union[List[int], List[List[int]], RunList, List[RunList]], sense: Optional[int] = None
    ) -> Union[RunList, List[RunList], List[int], List[List[int]]]:
        """
        Encode or decode data based on its type.

        If the input `data` is a list of integers, it encodes the list using RLE.
        If the input `data` is a list of lists of integers (batch), it encodes each list.
        If the input `data` is a `RunList`, it decodes it back to the original list of integers.
        If the input `data` is a list of `RunList` (batch), it decodes each `RunList`.

        Parameters
        ----------
        data : Union[List[int], List[List[int]], RunList, List[RunList]]
            The data to be encoded or decoded.
            - If `List[int]`, the data will be encoded.
            - If `List[List[int]]`, the data will be batch encoded.
            - If `RunList`, the data will be decoded.
            - If `List[RunList]`, the data will be batch decoded.
        sense : Optional[int], optional
            The minimum length of runs to be considered during encoding.
            Required if `data` is a `List[int]` or `List[List[int]]`.
            Ignored if `data` is a `RunList` or `List[RunList]`.
            Default is None.

        Returns
        -------
        Union[RunList, List[RunList], List[int], List[List[int]]]
            - Returns a `RunList` object if encoding a single list.
            - Returns a list of `RunList` objects if encoding a batch of lists.
            - Returns a list of integers if decoding a single `RunList`.
            - Returns a list of lists of integers if decoding a batch of `RunList`s.

        Raises
        ------
        ValueError
            If `sense` is not provided when encoding.
        TypeError
            If `data` is neither a list of integers, list of lists of integers, `RunList`, nor a list of `RunList`s.
        """
        if isinstance(data, list):
            if all(isinstance(item, list) for item in data):
                # Batch encoding
                if sense is None:
                    raise ValueError("Parameter 'sense' must be provided for encoding.")
                return [self.encode(sample, sense) for sample in data]
            elif all(isinstance(item, int) for item in data):
                # Single encoding
                if sense is None:
                    raise ValueError("Parameter 'sense' must be provided for encoding.")
                return self.encode(data, sense)
            else:
                raise TypeError("Input list must be a list of integers or a list of lists of integers.")
        elif isinstance(data, RunList):
            # Single decoding
            return self.decode(data)
        elif isinstance(data, list) and all(isinstance(item, RunList) for item in data):
            # Batch decoding
            return [self.decode(run_list) for run_list in data]
        else:
            raise TypeError(
                "Input data must be a list of integers, list of lists of integers, RunList, or list of RunList objects."
            )