zae_engine.operation.run_length의 소스 코드

from dataclasses import dataclass
from typing import List, Union, Optional
from itertools import groupby


[문서] @dataclass class Run: """ Represents a run in Run-Length Encoding (RLE). Attributes ---------- start_index : int The starting index of the run. end_index : int The ending index of the run. value : int The value of the run. """ start_index: int end_index: int value: int def __repr__(self): """Provide a string representation of the Run object.""" return f"Run(start_index={self.start_index}, end_index={self.end_index}, value={self.value})"
[문서] class RunList: """ Stores the results of Run-Length Encoding (RLE) and provides methods to access raw and filtered runs. Attributes ---------- all_runs : List[Run] A list of all runs without any filtering. sense : int The minimum length of runs to be considered in filtering. original_length : int The length of the original list that was encoded. """ def __init__(self, all_runs: List[Run], sense: int, original_length: int): """ Initializes the RunList object. Parameters ---------- all_runs : List[Run] A list of all runs obtained from encoding. sense : int The minimum length of runs to be considered in filtering. original_length : int The length of the original list that was encoded. """ self.all_runs = all_runs self.sense = sense self.original_length = original_length
[문서] def raw(self) -> List[Run]: """ Returns all runs without any filtering. Returns ------- List[Run] A list of all runs. """ return self.all_runs
[문서] def filtered(self) -> List[Run]: """ Returns runs that meet or exceed the specified sense value. Returns ------- List[Run] A list of filtered runs. """ return [run for run in self.all_runs if (run.end_index - run.start_index + 1) >= self.sense]
def __repr__(self): """Provide a string representation of the RunList object.""" return f"RunList(all_runs={self.all_runs}, sense={self.sense}, original_length={self.original_length})"
[문서] class RunLengthCodec: """ A codec class for Run-Length Encoding (RLE) and decoding. Parameters ---------- tol_merge : int, optional The tolerance value to merge close runs. Default is 20. remove_incomplete : bool, optional Whether to remove incomplete runs during sanitization. Default is False. merge_closed : bool, optional Whether to merge close runs during sanitization. Default is False. base_class : int, optional The base class value to be excluded from runs. Default is 0. Methods ------- encode(x: List[int], sense: int) -> RunList: Encodes a list of integers into RLE runs. decode(encoded_runs: RunList) -> List[int]: Decodes RLE runs back into the original list of integers. sanitize(run_list: RunList) -> RunList: Cleans and merges runs based on the codec's parameters. __call__(data: Union[List[int], List[List[int]], RunList, List[RunList]], sense: Optional[int] = None) -> Union[RunList, List[RunList], List[int], List[List[int]]]: Encodes or decodes data based on the input type. """ def __init__( self, tol_merge: int = 20, remove_incomplete: bool = False, merge_closed: bool = False, base_class: int = 0 ): """ Initialize the RunLengthCodec with specified parameters. Parameters ---------- tol_merge : int, optional The tolerance value to merge close runs. Default is 20. remove_incomplete : bool, optional Whether to remove runs that start at index 0 or end at the last index. Default is False. merge_closed : bool, optional Whether to merge runs that are close to each other. Default is False. base_class : int, optional The base class value to exclude from runs. Runs with this value will not be encoded. Default is 0. """ self.tol_merge = tol_merge self.remove_incomplete = remove_incomplete self.merge_closed = merge_closed self.base_class = base_class
[문서] def encode(self, x: List[int], sense: int) -> RunList: """ Encode a list of integers using Run-Length Encoding (RLE). This method converts a sequence of integers into a list of runs. Each run is represented as a `Run` object containing the start index, end index, and the value of the run. Runs with a length smaller than the specified `sense` are ignored in the `filtered()` method. Parameters ---------- x : List[int] The input list of integers to be encoded. sense : int The minimum length of runs to be considered. Runs shorter than this value are excluded from the output when calling `filtered()`. Returns ------- RunList A `RunList` object containing all runs, the sense value, and the original list length. """ original_length = len(x) if not x: all_runs = [] else: all_runs = [] current_index = 0 for value, group in groupby(x): group_list = list(group) run_length = len(group_list) start_index = current_index end_index = current_index + run_length - 1 if value != self.base_class: all_runs.append(Run(start_index=start_index, end_index=end_index, value=value)) current_index += run_length run_list = RunList(all_runs=all_runs, sense=sense, original_length=original_length) sanitized_run_list = self.sanitize(run_list) return sanitized_run_list
[문서] def decode(self, encoded_runs: RunList) -> List[int]: """ Decode a list of RLE runs back to the original list of integers. This method reconstructs the original sequence of integers from a `RunList` object. Each `Run` specifies the start index, end index, and the value to be filled in that range. The length of the output list is determined by the `original_length` stored in `RunList`. Parameters ---------- encoded_runs : RunList A `RunList` object containing runs to be decoded. Returns ------- List[int] The decoded list of integers reconstructed from the runs. """ if not encoded_runs.all_runs: return [self.base_class] * encoded_runs.original_length # Return background if no runs decoded = [self.base_class] * encoded_runs.original_length # Initialize with background label 0 for run in encoded_runs.all_runs: on = max(run.start_index, 0) # Ensure non-negative start index off = min(run.end_index, encoded_runs.original_length - 1) # Clamp to maximum index cls = run.value # Assign the class label to the specified range for i in range(on, off + 1): decoded[i] = cls return decoded
[문서] def sanitize(self, run_list: RunList) -> RunList: """ Clean and merge runs based on the codec's parameters. This function processes the RunList by: 1. Removing incomplete runs (if remove_incomplete is True). 2. Merging close runs (if merge_closed is True), while respecting the base_class. Parameters ---------- run_list : RunList The RunList object to be sanitized. Returns ------- RunList The sanitized RunList object. """ all_runs = run_list.all_runs.copy() # 1. Remove incomplete runs if self.remove_incomplete: # Incomplete runs are those that start at 0 or end at original_length -1 sanitized_runs = [ run for run in all_runs if run.start_index != 0 and run.end_index != run_list.original_length - 1 ] else: sanitized_runs = all_runs # 2. Merge close runs if self.merge_closed and sanitized_runs: # Sort runs by start_index to ensure order sanitized_runs.sort(key=lambda run: run.start_index) merged_runs = [] current_run = sanitized_runs[0] for next_run in sanitized_runs[1:]: gap = next_run.start_index - current_run.end_index - 1 if gap <= self.tol_merge: if current_run.value == next_run.value: # Same value runs, merge them current_run.end_index = next_run.end_index else: # Different value runs, decide which one to merge based on class # Base class should not be involved in merging if current_run.value == self.base_class or next_run.value == self.base_class: # Do not merge with base class merged_runs.append(current_run) current_run = next_run elif next_run.value < current_run.value: # Merge current_run into next_run (retain next_run's value) current_run.end_index = next_run.end_index current_run.value = next_run.value else: # Merge next_run into current_run (retain current_run's value) current_run.end_index = next_run.end_index else: merged_runs.append(current_run) current_run = next_run merged_runs.append(current_run) sanitized_runs = merged_runs return RunList(all_runs=sanitized_runs, sense=run_list.sense, original_length=run_list.original_length)
[문서] def __call__( self, data: Union[List[int], List[List[int]], RunList, List[RunList]], sense: Optional[int] = None ) -> Union[RunList, List[RunList], List[int], List[List[int]]]: """ Encode or decode data based on its type. If the input `data` is a list of integers, it encodes the list using RLE. If the input `data` is a list of lists of integers (batch), it encodes each list. If the input `data` is a `RunList`, it decodes it back to the original list of integers. If the input `data` is a list of `RunList` (batch), it decodes each `RunList`. Parameters ---------- data : Union[List[int], List[List[int]], RunList, List[RunList]] The data to be encoded or decoded. - If `List[int]`, the data will be encoded. - If `List[List[int]]`, the data will be batch encoded. - If `RunList`, the data will be decoded. - If `List[RunList]`, the data will be batch decoded. sense : Optional[int], optional The minimum length of runs to be considered during encoding. Required if `data` is a `List[int]` or `List[List[int]]`. Ignored if `data` is a `RunList` or `List[RunList]`. Default is None. Returns ------- Union[RunList, List[RunList], List[int], List[List[int]]] - Returns a `RunList` object if encoding a single list. - Returns a list of `RunList` objects if encoding a batch of lists. - Returns a list of integers if decoding a single `RunList`. - Returns a list of lists of integers if decoding a batch of `RunList`s. Raises ------ ValueError If `sense` is not provided when encoding. TypeError If `data` is neither a list of integers, list of lists of integers, `RunList`, nor a list of `RunList`s. """ if isinstance(data, list): if all(isinstance(item, list) for item in data): # Batch encoding if sense is None: raise ValueError("Parameter 'sense' must be provided for encoding.") return [self.encode(sample, sense) for sample in data] elif all(isinstance(item, int) for item in data): # Single encoding if sense is None: raise ValueError("Parameter 'sense' must be provided for encoding.") return self.encode(data, sense) else: raise TypeError("Input list must be a list of integers or a list of lists of integers.") elif isinstance(data, RunList): # Single decoding return self.decode(data) elif isinstance(data, list) and all(isinstance(item, RunList) for item in data): # Batch decoding return [self.decode(run_list) for run_list in data] else: raise TypeError( "Input data must be a list of integers, list of lists of integers, RunList, or list of RunList objects." )