Coverage for src/rtflite/services/advanced_pagination_service.py: 90%

98 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-14 16:35 +0000

1"""Advanced Pagination Service for rtflite. 

2 

3This service provides enhanced pagination capabilities using the PageDict system, 

4enabling page_index-like functionality while maintaining backward compatibility. 

5""" 

6 

7from typing import Any 

8 

9import polars as pl 

10 

11from ..pagination import PageBreakType, PageConfig, PageDict, PageIndexManager 

12from .document_service import RTFDocumentService 

13 

14 

15class AdvancedPaginationService: 

16 """Service for advanced pagination features using PageDict system""" 

17 

18 def __init__(self) -> None: 

19 self.document_service = RTFDocumentService() 

20 self.page_dict: PageDict | None = None 

21 self.page_index_manager: PageIndexManager | None = None 

22 

23 def create_page_dict(self, document, nrow_per_page: int | None = None) -> PageDict: 

24 """Create a PageDict for the given document 

25 

26 Args: 

27 document: RTF document to create PageDict for 

28 nrow_per_page: Override the document's nrow setting 

29 

30 Returns: 

31 PageDict instance with calculated page configurations 

32 """ 

33 # Use document's nrow or provided override 

34 nrow = nrow_per_page or document.rtf_page.nrow 

35 

36 # Calculate additional rows needed for headers, footnotes, etc. 

37 additional_rows = self.document_service.calculate_additional_rows_per_page( 

38 document 

39 ) 

40 

41 # Create PageDict instance 

42 page_dict = PageDict(nrow_per_page=nrow) 

43 

44 # Handle multi-section documents (list of DataFrames) 

45 if isinstance(document.df, list): 

46 self._process_multi_section_document(document, page_dict, additional_rows) 

47 else: 

48 # Single DataFrame 

49 self._process_single_dataframe( 

50 document.df, document.rtf_body, page_dict, additional_rows 

51 ) 

52 

53 self.page_dict = page_dict 

54 return page_dict 

55 

56 def get_page_index_manager(self) -> PageIndexManager: 

57 """Get or create a PageIndexManager for the current PageDict""" 

58 if self.page_dict is None: 

59 raise ValueError("Must create PageDict first using create_page_dict()") 

60 

61 if self.page_index_manager is None: 

62 self.page_index_manager = PageIndexManager(self.page_dict) 

63 

64 return self.page_index_manager 

65 

66 def _process_single_dataframe( 

67 self, df: pl.DataFrame, rtf_body, page_dict: PageDict, additional_rows: int 

68 ) -> None: 

69 """Process a single DataFrame for pagination""" 

70 # Extract pagination parameters from rtf_body 

71 page_by = getattr(rtf_body, "page_by", None) 

72 subline_by = getattr(rtf_body, "subline_by", None) 

73 new_page = getattr(rtf_body, "new_page", False) 

74 

75 # Calculate page configurations 

76 page_dict.calculate_pages_from_dataframe( 

77 df=df, 

78 page_by=page_by, 

79 subline_by=subline_by, 

80 new_page=new_page, 

81 additional_rows_per_page=additional_rows, 

82 ) 

83 

84 def _process_multi_section_document( 

85 self, document, page_dict: PageDict, additional_rows: int 

86 ) -> None: 

87 """Process a multi-section document (list of DataFrames)""" 

88 current_row_offset = 0 

89 

90 for section_idx, df in enumerate(document.df): 

91 rtf_body = ( 

92 document.rtf_body[section_idx] 

93 if isinstance(document.rtf_body, list) 

94 else document.rtf_body 

95 ) 

96 

97 # Create a temporary PageDict for this section 

98 section_page_dict = PageDict(nrow_per_page=page_dict.nrow_per_page) 

99 self._process_single_dataframe( 

100 df, rtf_body, section_page_dict, additional_rows 

101 ) 

102 

103 # Merge section pages into main PageDict with offset 

104 self._merge_section_pages( 

105 section_page_dict, page_dict, current_row_offset, section_idx 

106 ) 

107 

108 current_row_offset += df.height 

109 

110 def _merge_section_pages( 

111 self, 

112 section_page_dict: PageDict, 

113 main_page_dict: PageDict, 

114 row_offset: int, 

115 section_idx: int, 

116 ) -> None: 

117 """Merge section pages into the main PageDict""" 

118 # Calculate the page number offset 

119 page_offset = main_page_dict.total_pages 

120 

121 for page_num, config in section_page_dict.page_configs.items(): 

122 # Create new config with adjusted page number and row indices 

123 new_config = PageConfig( 

124 page_number=page_num + page_offset, 

125 start_row=config.start_row + row_offset, 

126 end_row=config.end_row + row_offset, 

127 break_type=config.break_type, 

128 section_headers=config.section_headers + [f"Section {section_idx + 1}"], 

129 subline_header=config.subline_header, 

130 group_context=config.group_context.copy(), 

131 forced_content=config.forced_content.copy(), 

132 ) 

133 

134 main_page_dict.add_page_config(new_config) 

135 

136 def get_page_for_row(self, row_index: int) -> int: 

137 """Get the page number where a specific row appears""" 

138 if self.page_dict is None: 

139 return 1 

140 

141 for page_num, config in self.page_dict.page_configs.items(): 

142 if config.start_row <= row_index <= config.end_row: 

143 return page_num 

144 

145 return 1 # Default to first page 

146 

147 def get_rows_for_page(self, page_num: int) -> tuple[int, int]: 

148 """Get the row range for a specific page""" 

149 if self.page_dict is None: 

150 return (0, 0) 

151 

152 config = self.page_dict.get_page_config(page_num) 

153 if config: 

154 return (config.start_row, config.end_row) 

155 

156 return (0, 0) 

157 

158 def force_content_to_page(self, content_id: str, page_num: int) -> None: 

159 """Force specific content to appear on a specific page (page_index functionality)""" 

160 manager = self.get_page_index_manager() 

161 manager.assign_content_to_page(content_id, page_num) 

162 

163 def get_pagination_summary(self) -> dict[str, Any]: 

164 """Get a summary of the pagination configuration""" 

165 if self.page_dict is None: 

166 return {"error": "No PageDict available"} 

167 

168 page_configs: dict[int, dict[str, Any]] = {} 

169 

170 for page_num, config in self.page_dict.page_configs.items(): 

171 page_configs[page_num] = { 

172 "rows": f"{config.start_row}-{config.end_row}", 

173 "row_count": config.row_count, 

174 "break_type": config.break_type.value, 

175 "is_section_start": config.is_section_start, 

176 "section_headers": config.section_headers, 

177 "subline_header": config.subline_header, 

178 "forced_content_count": len(config.forced_content), 

179 } 

180 

181 summary = { 

182 "total_pages": self.page_dict.total_pages, 

183 "nrow_per_page": self.page_dict.nrow_per_page, 

184 "break_types": self.page_dict.get_page_break_summary(), 

185 "page_configs": page_configs, 

186 } 

187 

188 return summary 

189 

190 def convert_to_legacy_format(self) -> list[dict[str, Any]]: 

191 """Convert PageDict to legacy page info format for backward compatibility""" 

192 if self.page_dict is None: 

193 return [] 

194 

195 return self.page_dict.to_legacy_page_info() 

196 

197 def optimize_pagination(self) -> None: 

198 """Optimize pagination for better balance and readability""" 

199 if self.page_index_manager: 

200 self.page_index_manager.optimize_page_distribution() 

201 

202 def validate_pagination(self) -> list[str]: 

203 """Validate the pagination configuration and return any issues""" 

204 issues = [] 

205 

206 if self.page_dict is None: 

207 issues.append("No PageDict available") 

208 return issues 

209 

210 # Check for empty pages 

211 for page_num, config in self.page_dict.page_configs.items(): 

212 if config.row_count <= 0: 

213 issues.append(f"Page {page_num} has no content rows") 

214 

215 # Check for overlapping page ranges 

216 sorted_pages = sorted(self.page_dict.page_configs.items()) 

217 for i in range(len(sorted_pages) - 1): 

218 current_page = sorted_pages[i][1] 

219 next_page = sorted_pages[i + 1][1] 

220 

221 if current_page.end_row >= next_page.start_row: 

222 issues.append( 

223 f"Pages {current_page.page_number} and {next_page.page_number} have overlapping row ranges" 

224 ) 

225 

226 # Check for missing row coverage 

227 if sorted_pages: 

228 total_expected_rows = sorted_pages[-1][1].end_row + 1 

229 covered_rows = set() 

230 

231 for _, config in sorted_pages: 

232 for row in range(config.start_row, config.end_row + 1): 

233 covered_rows.add(row) 

234 

235 missing_rows = set(range(total_expected_rows)) - covered_rows 

236 if missing_rows: 

237 issues.append(f"Missing row coverage for rows: {sorted(missing_rows)}") 

238 

239 return issues