Coverage for src/rtflite/services/advanced_pagination_service.py: 90%

98 statements  

« prev     ^ index     » next       coverage.py v7.10.5, created at 2025-08-25 22:35 +0000

1"""Advanced Pagination Service for rtflite. 

2 

3This service provides enhanced pagination capabilities using the PageDict system, 

4enabling page_index-like functionality while maintaining backward compatibility. 

5""" 

6 

7from typing import Any 

8 

9import polars as pl 

10 

11from ..pagination import PageBreakType, PageConfig, PageDict, PageIndexManager 

12from .document_service import RTFDocumentService 

13 

14 

15class AdvancedPaginationService: 

16 """Service for advanced pagination features using PageDict system""" 

17 

18 def __init__(self) -> None: 

19 self.document_service = RTFDocumentService() 

20 self.page_dict: PageDict | None = None 

21 self.page_index_manager: PageIndexManager | None = None 

22 

23 def create_page_dict(self, document, nrow_per_page: int | None = None) -> PageDict: 

24 """Create a PageDict for the given document 

25 

26 Args: 

27 document: RTF document to create PageDict for 

28 nrow_per_page: Override the document's nrow setting 

29 

30 Returns: 

31 PageDict instance with calculated page configurations 

32 """ 

33 # Use document's nrow or provided override 

34 nrow = nrow_per_page or document.rtf_page.nrow 

35 

36 # Calculate additional rows needed for headers, footnotes, etc. 

37 additional_rows = self.document_service.calculate_additional_rows_per_page( 

38 document 

39 ) 

40 

41 # Create PageDict instance 

42 page_dict = PageDict(nrow_per_page=nrow) 

43 

44 # Handle multi-section documents (list of DataFrames) 

45 if isinstance(document.df, list): 

46 self._process_multi_section_document(document, page_dict, additional_rows) 

47 else: 

48 # Single DataFrame 

49 self._process_single_dataframe( 

50 document.df, document.rtf_body, page_dict, additional_rows 

51 ) 

52 

53 self.page_dict = page_dict 

54 return page_dict 

55 

56 def get_page_index_manager(self) -> PageIndexManager: 

57 """Get or create a PageIndexManager for the current PageDict""" 

58 if self.page_dict is None: 

59 raise ValueError("Must create PageDict first using create_page_dict()") 

60 

61 if self.page_index_manager is None: 

62 self.page_index_manager = PageIndexManager(self.page_dict) 

63 

64 return self.page_index_manager 

65 

66 def _process_single_dataframe( 

67 self, df: pl.DataFrame, rtf_body, page_dict: PageDict, additional_rows: int 

68 ) -> None: 

69 """Process a single DataFrame for pagination""" 

70 # Extract pagination parameters from rtf_body 

71 page_by = getattr(rtf_body, "page_by", None) 

72 subline_by = getattr(rtf_body, "subline_by", None) 

73 new_page = getattr(rtf_body, "new_page", False) 

74 

75 # Calculate page configurations 

76 page_dict.calculate_pages_from_dataframe( 

77 df=df, 

78 page_by=page_by, 

79 subline_by=subline_by, 

80 new_page=new_page, 

81 additional_rows_per_page=additional_rows, 

82 ) 

83 

84 def _process_multi_section_document( 

85 self, document, page_dict: PageDict, additional_rows: int 

86 ) -> None: 

87 """Process a multi-section document (list of DataFrames)""" 

88 current_row_offset = 0 

89 

90 for section_idx, df in enumerate(document.df): 

91 rtf_body = ( 

92 document.rtf_body[section_idx] 

93 if isinstance(document.rtf_body, list) 

94 else document.rtf_body 

95 ) 

96 

97 # Create a temporary PageDict for this section 

98 section_page_dict = PageDict(nrow_per_page=page_dict.nrow_per_page) 

99 self._process_single_dataframe( 

100 df, rtf_body, section_page_dict, additional_rows 

101 ) 

102 

103 # Merge section pages into main PageDict with offset 

104 self._merge_section_pages( 

105 section_page_dict, page_dict, current_row_offset, section_idx 

106 ) 

107 

108 current_row_offset += df.height 

109 

110 def _merge_section_pages( 

111 self, 

112 section_page_dict: PageDict, 

113 main_page_dict: PageDict, 

114 row_offset: int, 

115 section_idx: int, 

116 ) -> None: 

117 """Merge section pages into the main PageDict""" 

118 # Calculate the page number offset 

119 page_offset = main_page_dict.total_pages 

120 

121 for page_num, config in section_page_dict.page_configs.items(): 

122 # Create new config with adjusted page number and row indices 

123 new_config = PageConfig( 

124 page_number=page_num + page_offset, 

125 start_row=config.start_row + row_offset, 

126 end_row=config.end_row + row_offset, 

127 break_type=config.break_type, 

128 section_headers=list(config.section_headers) 

129 + [f"Section {section_idx + 1}"], 

130 subline_header=config.subline_header, 

131 group_context=dict(config.group_context), 

132 forced_content=set(config.forced_content), 

133 ) 

134 

135 main_page_dict.add_page_config(new_config) 

136 

137 def get_page_for_row(self, row_index: int) -> int: 

138 """Get the page number where a specific row appears""" 

139 if self.page_dict is None: 

140 return 1 

141 

142 for page_num, config in self.page_dict.page_configs.items(): 

143 if config.start_row <= row_index <= config.end_row: 

144 return page_num 

145 

146 return 1 # Default to first page 

147 

148 def get_rows_for_page(self, page_num: int) -> tuple[int, int]: 

149 """Get the row range for a specific page""" 

150 if self.page_dict is None: 

151 return (0, 0) 

152 

153 config = self.page_dict.get_page_config(page_num) 

154 if config: 

155 return (config.start_row, config.end_row) 

156 

157 return (0, 0) 

158 

159 def force_content_to_page(self, content_id: str, page_num: int) -> None: 

160 """Force specific content to appear on a specific page (page_index functionality)""" 

161 manager = self.get_page_index_manager() 

162 manager.assign_content_to_page(content_id, page_num) 

163 

164 def get_pagination_summary(self) -> dict[str, Any]: 

165 """Get a summary of the pagination configuration""" 

166 if self.page_dict is None: 

167 return {"error": "No PageDict available"} 

168 

169 page_configs: dict[int, dict[str, Any]] = {} 

170 

171 for page_num, config in self.page_dict.page_configs.items(): 

172 page_configs[page_num] = { 

173 "rows": f"{config.start_row}-{config.end_row}", 

174 "row_count": config.row_count, 

175 "break_type": config.break_type.value, 

176 "is_section_start": config.is_section_start, 

177 "section_headers": config.section_headers, 

178 "subline_header": config.subline_header, 

179 "forced_content_count": len(config.forced_content), 

180 } 

181 

182 summary = { 

183 "total_pages": self.page_dict.total_pages, 

184 "nrow_per_page": self.page_dict.nrow_per_page, 

185 "break_types": self.page_dict.get_page_break_summary(), 

186 "page_configs": page_configs, 

187 } 

188 

189 return summary 

190 

191 def convert_to_legacy_format(self) -> list[dict[str, Any]]: 

192 """Convert PageDict to legacy page info format for backward compatibility""" 

193 if self.page_dict is None: 

194 return [] 

195 

196 return [dict(page_info) for page_info in self.page_dict.to_legacy_page_info()] 

197 

198 def optimize_pagination(self) -> None: 

199 """Optimize pagination for better balance and readability""" 

200 if self.page_index_manager: 

201 self.page_index_manager.optimize_page_distribution() 

202 

203 def validate_pagination(self) -> list[str]: 

204 """Validate the pagination configuration and return any issues""" 

205 issues = [] 

206 

207 if self.page_dict is None: 

208 issues.append("No PageDict available") 

209 return issues 

210 

211 # Check for empty pages 

212 for page_num, config in self.page_dict.page_configs.items(): 

213 if config.row_count <= 0: 

214 issues.append(f"Page {page_num} has no content rows") 

215 

216 # Check for overlapping page ranges 

217 sorted_pages = sorted(self.page_dict.page_configs.items()) 

218 for i in range(len(sorted_pages) - 1): 

219 current_page = sorted_pages[i][1] 

220 next_page = sorted_pages[i + 1][1] 

221 

222 if current_page.end_row >= next_page.start_row: 

223 issues.append( 

224 f"Pages {current_page.page_number} and {next_page.page_number} have overlapping row ranges" 

225 ) 

226 

227 # Check for missing row coverage 

228 if sorted_pages: 

229 total_expected_rows = sorted_pages[-1][1].end_row + 1 

230 covered_rows = set() 

231 

232 for _, config in sorted_pages: 

233 for row in range(config.start_row, config.end_row + 1): 

234 covered_rows.add(row) 

235 

236 missing_rows = set(range(total_expected_rows)) - covered_rows 

237 if missing_rows: 

238 issues.append(f"Missing row coverage for rows: {sorted(missing_rows)}") 

239 

240 return issues