Coverage for src / rtflite / pagination / strategies / base.py: 96%

24 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-08 04:50 +0000

1from abc import ABC, abstractmethod 

2from typing import Any 

3 

4import polars as pl 

5from pydantic import BaseModel, ConfigDict, Field 

6 

7from ...attributes import TableAttributes 

8from ...input import RTFBody, RTFPage 

9 

10 

11class PageContext(BaseModel): 

12 """Holds all data and metadata required to render a single page.""" 

13 

14 model_config = ConfigDict(arbitrary_types_allowed=True) 

15 

16 # Core Data 

17 page_number: int 

18 total_pages: int 

19 data: pl.DataFrame 

20 

21 # Page State 

22 is_first_page: bool 

23 is_last_page: bool 

24 

25 # Layout 

26 col_widths: list[float] 

27 

28 # Content Flags 

29 needs_header: bool = True 

30 

31 # Base attributes for the table body (sliced/processed) 

32 table_attrs: TableAttributes | None = None 

33 

34 # Feature-specific Metadata (populated by strategies or processors) 

35 subline_header: dict[str, Any] | None = None 

36 pageby_header_info: dict[str, Any] | None = None 

37 group_boundaries: list[dict[str, Any]] | None = None 

38 

39 # Finalized Attributes (populated by PageProcessor) 

40 # These override the document-level attributes for this specific page 

41 final_body_attrs: TableAttributes | None = None 

42 component_borders: dict[str, Any] = Field(default_factory=dict) 

43 

44 

45class PaginationContext(BaseModel): 

46 """Context passed to the strategy to perform pagination.""" 

47 

48 model_config = ConfigDict(arbitrary_types_allowed=True) 

49 

50 df: pl.DataFrame 

51 rtf_body: RTFBody 

52 rtf_page: RTFPage 

53 col_widths: list[float] 

54 table_attrs: TableAttributes | None 

55 additional_rows_per_page: int = 0 

56 row_metadata: pl.DataFrame | None = None 

57 removed_column_indices: list[int] | None = None 

58 

59 

60class PaginationStrategy(ABC): 

61 """Abstract base class for pagination strategies.""" 

62 

63 @abstractmethod 

64 def paginate(self, context: PaginationContext) -> list[PageContext]: 

65 """Split the document into pages based on the strategy.""" 

66 pass