Coverage for src/rtflite/services/advanced_pagination_service.py: 90%
98 statements
« prev ^ index » next coverage.py v7.10.5, created at 2025-08-25 22:35 +0000
« prev ^ index » next coverage.py v7.10.5, created at 2025-08-25 22:35 +0000
1"""Advanced Pagination Service for rtflite.
3This service provides enhanced pagination capabilities using the PageDict system,
4enabling page_index-like functionality while maintaining backward compatibility.
5"""
7from typing import Any
9import polars as pl
11from ..pagination import PageBreakType, PageConfig, PageDict, PageIndexManager
12from .document_service import RTFDocumentService
15class AdvancedPaginationService:
16 """Service for advanced pagination features using PageDict system"""
18 def __init__(self) -> None:
19 self.document_service = RTFDocumentService()
20 self.page_dict: PageDict | None = None
21 self.page_index_manager: PageIndexManager | None = None
23 def create_page_dict(self, document, nrow_per_page: int | None = None) -> PageDict:
24 """Create a PageDict for the given document
26 Args:
27 document: RTF document to create PageDict for
28 nrow_per_page: Override the document's nrow setting
30 Returns:
31 PageDict instance with calculated page configurations
32 """
33 # Use document's nrow or provided override
34 nrow = nrow_per_page or document.rtf_page.nrow
36 # Calculate additional rows needed for headers, footnotes, etc.
37 additional_rows = self.document_service.calculate_additional_rows_per_page(
38 document
39 )
41 # Create PageDict instance
42 page_dict = PageDict(nrow_per_page=nrow)
44 # Handle multi-section documents (list of DataFrames)
45 if isinstance(document.df, list):
46 self._process_multi_section_document(document, page_dict, additional_rows)
47 else:
48 # Single DataFrame
49 self._process_single_dataframe(
50 document.df, document.rtf_body, page_dict, additional_rows
51 )
53 self.page_dict = page_dict
54 return page_dict
56 def get_page_index_manager(self) -> PageIndexManager:
57 """Get or create a PageIndexManager for the current PageDict"""
58 if self.page_dict is None:
59 raise ValueError("Must create PageDict first using create_page_dict()")
61 if self.page_index_manager is None:
62 self.page_index_manager = PageIndexManager(self.page_dict)
64 return self.page_index_manager
66 def _process_single_dataframe(
67 self, df: pl.DataFrame, rtf_body, page_dict: PageDict, additional_rows: int
68 ) -> None:
69 """Process a single DataFrame for pagination"""
70 # Extract pagination parameters from rtf_body
71 page_by = getattr(rtf_body, "page_by", None)
72 subline_by = getattr(rtf_body, "subline_by", None)
73 new_page = getattr(rtf_body, "new_page", False)
75 # Calculate page configurations
76 page_dict.calculate_pages_from_dataframe(
77 df=df,
78 page_by=page_by,
79 subline_by=subline_by,
80 new_page=new_page,
81 additional_rows_per_page=additional_rows,
82 )
84 def _process_multi_section_document(
85 self, document, page_dict: PageDict, additional_rows: int
86 ) -> None:
87 """Process a multi-section document (list of DataFrames)"""
88 current_row_offset = 0
90 for section_idx, df in enumerate(document.df):
91 rtf_body = (
92 document.rtf_body[section_idx]
93 if isinstance(document.rtf_body, list)
94 else document.rtf_body
95 )
97 # Create a temporary PageDict for this section
98 section_page_dict = PageDict(nrow_per_page=page_dict.nrow_per_page)
99 self._process_single_dataframe(
100 df, rtf_body, section_page_dict, additional_rows
101 )
103 # Merge section pages into main PageDict with offset
104 self._merge_section_pages(
105 section_page_dict, page_dict, current_row_offset, section_idx
106 )
108 current_row_offset += df.height
110 def _merge_section_pages(
111 self,
112 section_page_dict: PageDict,
113 main_page_dict: PageDict,
114 row_offset: int,
115 section_idx: int,
116 ) -> None:
117 """Merge section pages into the main PageDict"""
118 # Calculate the page number offset
119 page_offset = main_page_dict.total_pages
121 for page_num, config in section_page_dict.page_configs.items():
122 # Create new config with adjusted page number and row indices
123 new_config = PageConfig(
124 page_number=page_num + page_offset,
125 start_row=config.start_row + row_offset,
126 end_row=config.end_row + row_offset,
127 break_type=config.break_type,
128 section_headers=list(config.section_headers)
129 + [f"Section {section_idx + 1}"],
130 subline_header=config.subline_header,
131 group_context=dict(config.group_context),
132 forced_content=set(config.forced_content),
133 )
135 main_page_dict.add_page_config(new_config)
137 def get_page_for_row(self, row_index: int) -> int:
138 """Get the page number where a specific row appears"""
139 if self.page_dict is None:
140 return 1
142 for page_num, config in self.page_dict.page_configs.items():
143 if config.start_row <= row_index <= config.end_row:
144 return page_num
146 return 1 # Default to first page
148 def get_rows_for_page(self, page_num: int) -> tuple[int, int]:
149 """Get the row range for a specific page"""
150 if self.page_dict is None:
151 return (0, 0)
153 config = self.page_dict.get_page_config(page_num)
154 if config:
155 return (config.start_row, config.end_row)
157 return (0, 0)
159 def force_content_to_page(self, content_id: str, page_num: int) -> None:
160 """Force specific content to appear on a specific page (page_index functionality)"""
161 manager = self.get_page_index_manager()
162 manager.assign_content_to_page(content_id, page_num)
164 def get_pagination_summary(self) -> dict[str, Any]:
165 """Get a summary of the pagination configuration"""
166 if self.page_dict is None:
167 return {"error": "No PageDict available"}
169 page_configs: dict[int, dict[str, Any]] = {}
171 for page_num, config in self.page_dict.page_configs.items():
172 page_configs[page_num] = {
173 "rows": f"{config.start_row}-{config.end_row}",
174 "row_count": config.row_count,
175 "break_type": config.break_type.value,
176 "is_section_start": config.is_section_start,
177 "section_headers": config.section_headers,
178 "subline_header": config.subline_header,
179 "forced_content_count": len(config.forced_content),
180 }
182 summary = {
183 "total_pages": self.page_dict.total_pages,
184 "nrow_per_page": self.page_dict.nrow_per_page,
185 "break_types": self.page_dict.get_page_break_summary(),
186 "page_configs": page_configs,
187 }
189 return summary
191 def convert_to_legacy_format(self) -> list[dict[str, Any]]:
192 """Convert PageDict to legacy page info format for backward compatibility"""
193 if self.page_dict is None:
194 return []
196 return [dict(page_info) for page_info in self.page_dict.to_legacy_page_info()]
198 def optimize_pagination(self) -> None:
199 """Optimize pagination for better balance and readability"""
200 if self.page_index_manager:
201 self.page_index_manager.optimize_page_distribution()
203 def validate_pagination(self) -> list[str]:
204 """Validate the pagination configuration and return any issues"""
205 issues = []
207 if self.page_dict is None:
208 issues.append("No PageDict available")
209 return issues
211 # Check for empty pages
212 for page_num, config in self.page_dict.page_configs.items():
213 if config.row_count <= 0:
214 issues.append(f"Page {page_num} has no content rows")
216 # Check for overlapping page ranges
217 sorted_pages = sorted(self.page_dict.page_configs.items())
218 for i in range(len(sorted_pages) - 1):
219 current_page = sorted_pages[i][1]
220 next_page = sorted_pages[i + 1][1]
222 if current_page.end_row >= next_page.start_row:
223 issues.append(
224 f"Pages {current_page.page_number} and {next_page.page_number} have overlapping row ranges"
225 )
227 # Check for missing row coverage
228 if sorted_pages:
229 total_expected_rows = sorted_pages[-1][1].end_row + 1
230 covered_rows = set()
232 for _, config in sorted_pages:
233 for row in range(config.start_row, config.end_row + 1):
234 covered_rows.add(row)
236 missing_rows = set(range(total_expected_rows)) - covered_rows
237 if missing_rows:
238 issues.append(f"Missing row coverage for rows: {sorted(missing_rows)}")
240 return issues