Coverage for src/rtflite/services/advanced_pagination_service.py: 90%
98 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 16:35 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 16:35 +0000
1"""Advanced Pagination Service for rtflite.
3This service provides enhanced pagination capabilities using the PageDict system,
4enabling page_index-like functionality while maintaining backward compatibility.
5"""
7from typing import Any
9import polars as pl
11from ..pagination import PageBreakType, PageConfig, PageDict, PageIndexManager
12from .document_service import RTFDocumentService
15class AdvancedPaginationService:
16 """Service for advanced pagination features using PageDict system"""
18 def __init__(self) -> None:
19 self.document_service = RTFDocumentService()
20 self.page_dict: PageDict | None = None
21 self.page_index_manager: PageIndexManager | None = None
23 def create_page_dict(self, document, nrow_per_page: int | None = None) -> PageDict:
24 """Create a PageDict for the given document
26 Args:
27 document: RTF document to create PageDict for
28 nrow_per_page: Override the document's nrow setting
30 Returns:
31 PageDict instance with calculated page configurations
32 """
33 # Use document's nrow or provided override
34 nrow = nrow_per_page or document.rtf_page.nrow
36 # Calculate additional rows needed for headers, footnotes, etc.
37 additional_rows = self.document_service.calculate_additional_rows_per_page(
38 document
39 )
41 # Create PageDict instance
42 page_dict = PageDict(nrow_per_page=nrow)
44 # Handle multi-section documents (list of DataFrames)
45 if isinstance(document.df, list):
46 self._process_multi_section_document(document, page_dict, additional_rows)
47 else:
48 # Single DataFrame
49 self._process_single_dataframe(
50 document.df, document.rtf_body, page_dict, additional_rows
51 )
53 self.page_dict = page_dict
54 return page_dict
56 def get_page_index_manager(self) -> PageIndexManager:
57 """Get or create a PageIndexManager for the current PageDict"""
58 if self.page_dict is None:
59 raise ValueError("Must create PageDict first using create_page_dict()")
61 if self.page_index_manager is None:
62 self.page_index_manager = PageIndexManager(self.page_dict)
64 return self.page_index_manager
66 def _process_single_dataframe(
67 self, df: pl.DataFrame, rtf_body, page_dict: PageDict, additional_rows: int
68 ) -> None:
69 """Process a single DataFrame for pagination"""
70 # Extract pagination parameters from rtf_body
71 page_by = getattr(rtf_body, "page_by", None)
72 subline_by = getattr(rtf_body, "subline_by", None)
73 new_page = getattr(rtf_body, "new_page", False)
75 # Calculate page configurations
76 page_dict.calculate_pages_from_dataframe(
77 df=df,
78 page_by=page_by,
79 subline_by=subline_by,
80 new_page=new_page,
81 additional_rows_per_page=additional_rows,
82 )
84 def _process_multi_section_document(
85 self, document, page_dict: PageDict, additional_rows: int
86 ) -> None:
87 """Process a multi-section document (list of DataFrames)"""
88 current_row_offset = 0
90 for section_idx, df in enumerate(document.df):
91 rtf_body = (
92 document.rtf_body[section_idx]
93 if isinstance(document.rtf_body, list)
94 else document.rtf_body
95 )
97 # Create a temporary PageDict for this section
98 section_page_dict = PageDict(nrow_per_page=page_dict.nrow_per_page)
99 self._process_single_dataframe(
100 df, rtf_body, section_page_dict, additional_rows
101 )
103 # Merge section pages into main PageDict with offset
104 self._merge_section_pages(
105 section_page_dict, page_dict, current_row_offset, section_idx
106 )
108 current_row_offset += df.height
110 def _merge_section_pages(
111 self,
112 section_page_dict: PageDict,
113 main_page_dict: PageDict,
114 row_offset: int,
115 section_idx: int,
116 ) -> None:
117 """Merge section pages into the main PageDict"""
118 # Calculate the page number offset
119 page_offset = main_page_dict.total_pages
121 for page_num, config in section_page_dict.page_configs.items():
122 # Create new config with adjusted page number and row indices
123 new_config = PageConfig(
124 page_number=page_num + page_offset,
125 start_row=config.start_row + row_offset,
126 end_row=config.end_row + row_offset,
127 break_type=config.break_type,
128 section_headers=config.section_headers + [f"Section {section_idx + 1}"],
129 subline_header=config.subline_header,
130 group_context=config.group_context.copy(),
131 forced_content=config.forced_content.copy(),
132 )
134 main_page_dict.add_page_config(new_config)
136 def get_page_for_row(self, row_index: int) -> int:
137 """Get the page number where a specific row appears"""
138 if self.page_dict is None:
139 return 1
141 for page_num, config in self.page_dict.page_configs.items():
142 if config.start_row <= row_index <= config.end_row:
143 return page_num
145 return 1 # Default to first page
147 def get_rows_for_page(self, page_num: int) -> tuple[int, int]:
148 """Get the row range for a specific page"""
149 if self.page_dict is None:
150 return (0, 0)
152 config = self.page_dict.get_page_config(page_num)
153 if config:
154 return (config.start_row, config.end_row)
156 return (0, 0)
158 def force_content_to_page(self, content_id: str, page_num: int) -> None:
159 """Force specific content to appear on a specific page (page_index functionality)"""
160 manager = self.get_page_index_manager()
161 manager.assign_content_to_page(content_id, page_num)
163 def get_pagination_summary(self) -> dict[str, Any]:
164 """Get a summary of the pagination configuration"""
165 if self.page_dict is None:
166 return {"error": "No PageDict available"}
168 page_configs: dict[int, dict[str, Any]] = {}
170 for page_num, config in self.page_dict.page_configs.items():
171 page_configs[page_num] = {
172 "rows": f"{config.start_row}-{config.end_row}",
173 "row_count": config.row_count,
174 "break_type": config.break_type.value,
175 "is_section_start": config.is_section_start,
176 "section_headers": config.section_headers,
177 "subline_header": config.subline_header,
178 "forced_content_count": len(config.forced_content),
179 }
181 summary = {
182 "total_pages": self.page_dict.total_pages,
183 "nrow_per_page": self.page_dict.nrow_per_page,
184 "break_types": self.page_dict.get_page_break_summary(),
185 "page_configs": page_configs,
186 }
188 return summary
190 def convert_to_legacy_format(self) -> list[dict[str, Any]]:
191 """Convert PageDict to legacy page info format for backward compatibility"""
192 if self.page_dict is None:
193 return []
195 return self.page_dict.to_legacy_page_info()
197 def optimize_pagination(self) -> None:
198 """Optimize pagination for better balance and readability"""
199 if self.page_index_manager:
200 self.page_index_manager.optimize_page_distribution()
202 def validate_pagination(self) -> list[str]:
203 """Validate the pagination configuration and return any issues"""
204 issues = []
206 if self.page_dict is None:
207 issues.append("No PageDict available")
208 return issues
210 # Check for empty pages
211 for page_num, config in self.page_dict.page_configs.items():
212 if config.row_count <= 0:
213 issues.append(f"Page {page_num} has no content rows")
215 # Check for overlapping page ranges
216 sorted_pages = sorted(self.page_dict.page_configs.items())
217 for i in range(len(sorted_pages) - 1):
218 current_page = sorted_pages[i][1]
219 next_page = sorted_pages[i + 1][1]
221 if current_page.end_row >= next_page.start_row:
222 issues.append(
223 f"Pages {current_page.page_number} and {next_page.page_number} have overlapping row ranges"
224 )
226 # Check for missing row coverage
227 if sorted_pages:
228 total_expected_rows = sorted_pages[-1][1].end_row + 1
229 covered_rows = set()
231 for _, config in sorted_pages:
232 for row in range(config.start_row, config.end_row + 1):
233 covered_rows.add(row)
235 missing_rows = set(range(total_expected_rows)) - covered_rows
236 if missing_rows:
237 issues.append(f"Missing row coverage for rows: {sorted(missing_rows)}")
239 return issues