Coverage for src / rtflite / services / encoding_service.py: 90%

184 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-08 04:50 +0000

1"""RTF encoding service that handles document component encoding.""" 

2 

3from collections.abc import Sequence 

4 

5 

6class RTFEncodingService: 

7 """Service class that handles RTF component encoding operations. 

8 

9 This class extracts encoding logic from RTFDocument to improve separation 

10 of concerns and enable better testing and maintainability. 

11 """ 

12 

13 def __init__(self): 

14 from ..rtf import RTFSyntaxGenerator 

15 

16 self.syntax = RTFSyntaxGenerator() 

17 

18 def encode_spanning_row( 

19 self, 

20 text: str, 

21 page_width: float, 

22 rtf_body_attrs=None, 

23 col_idx: int = 0, 

24 ) -> Sequence[str]: 

25 """Generate a spanning table row (single cell spanning full width). 

26 

27 This is used for page_by group headers that span across all columns. 

28 Works for both single-page and paginated documents. 

29 

30 Args: 

31 text: Text to display in the spanning row 

32 page_width: Total page width in inches 

33 rtf_body_attrs: RTFBody attributes for styling (optional) 

34 col_idx: Column index to inherit attributes from (default: 0) 

35 

36 Returns: 

37 List of RTF strings for the spanning row 

38 """ 

39 from ..attributes import BroadcastValue 

40 from ..row import Border, Cell, Row, TextContent 

41 

42 def get_attr(attr_name, default_val): 

43 if rtf_body_attrs is None: 

44 return default_val 

45 val = getattr(rtf_body_attrs, attr_name, None) 

46 if val is None: 

47 return default_val 

48 # Use BroadcastValue to resolve the attribute for the specific column 

49 # We use row 0 as the reference for column-based attributes 

50 return BroadcastValue(value=val, dimension=None).iloc(0, col_idx) 

51 

52 # Extract attributes using the helper 

53 font = get_attr("text_font", 0) 

54 size = get_attr("text_font_size", 18) 

55 text_format = get_attr("text_format", "") 

56 color = get_attr("text_color", "") 

57 bg_color = get_attr("text_background_color", "") 

58 justification = get_attr("text_justification", "c") 

59 

60 indent_first = get_attr("text_indent_first", 0) 

61 indent_left = get_attr("text_indent_left", 0) 

62 indent_right = get_attr("text_indent_right", 0) 

63 space = get_attr("text_space", 1) 

64 space_before = get_attr("text_space_before", 15) 

65 space_after = get_attr("text_space_after", 15) 

66 convert = get_attr("text_convert", False) 

67 hyphenation = get_attr("text_hyphenation", True) 

68 

69 border_left = get_attr("border_left", "single") 

70 border_right = get_attr("border_right", "single") 

71 border_top = get_attr("border_top", "single") 

72 border_bottom = get_attr("border_bottom", "single") 

73 

74 v_just = get_attr("cell_vertical_justification", "bottom") 

75 cell_just = get_attr("cell_justification", "c") 

76 cell_height = get_attr("cell_height", 0.15) 

77 

78 # Create spanning cell 

79 cell = Cell( 

80 text=TextContent( 

81 text=text, 

82 font=font, 

83 size=size, 

84 format=text_format, 

85 color=color, 

86 background_color=bg_color, 

87 justification=justification, 

88 indent_first=indent_first, 

89 indent_left=indent_left, 

90 indent_right=indent_right, 

91 space=space, 

92 space_before=space_before, 

93 space_after=space_after, 

94 convert=convert, 

95 hyphenation=hyphenation, 

96 ), 

97 width=page_width, 

98 border_left=Border(style=border_left), 

99 border_right=Border(style=border_right), 

100 border_top=Border(style=border_top), 

101 border_bottom=Border(style=border_bottom), 

102 vertical_justification=v_just, 

103 ) 

104 

105 # Create row with single spanning cell 

106 row = Row(row_cells=[cell], justification=cell_just, height=cell_height) 

107 

108 return row._as_rtf() 

109 

110 def encode_document_start(self) -> str: 

111 """Encode RTF document start.""" 

112 return "{\\rtf1\\ansi\n\\deff0\\deflang1033" 

113 

114 def encode_font_table(self) -> str: 

115 """Encode RTF font table.""" 

116 return self.syntax.generate_font_table() 

117 

118 def encode_color_table( 

119 self, document=None, used_colors: Sequence[str] | None = None 

120 ) -> str: 

121 """Encode RTF color table with comprehensive 657-color support. 

122 

123 Args: 

124 document: RTF document to analyze for color usage (preferred) 

125 used_colors: Color names used in the document. If None and a 

126 document is provided, colors are auto-detected. 

127 

128 Returns: 

129 RTF color table string (empty if no colors beyond black/"" are used) 

130 """ 

131 if document is not None and used_colors is None: 

132 # Auto-detect colors from document 

133 from ..services.color_service import color_service 

134 

135 used_colors = color_service.collect_document_colors(document) 

136 

137 return self.syntax.generate_color_table(used_colors) 

138 

139 def encode_page_settings(self, page_config) -> str: 

140 """Encode RTF page settings. 

141 

142 Args: 

143 page_config: RTFPage configuration object 

144 

145 Returns: 

146 RTF page settings string 

147 """ 

148 return self.syntax.generate_page_settings( 

149 page_config.width, 

150 page_config.height, 

151 page_config.margin, 

152 page_config.orientation, 

153 ) 

154 

155 def encode_page_header(self, header_config, method: str = "line") -> str: 

156 """Encode page header component. 

157 

158 Args: 

159 header_config: RTFPageHeader configuration 

160 method: Encoding method 

161 

162 Returns: 

163 RTF header string 

164 """ 

165 if header_config is None or not header_config.text: 

166 return "" 

167 

168 # Use the existing text encoding method 

169 result = header_config._encode_text(text=header_config.text, method=method) 

170 

171 return f"{{\\header{result}}}" 

172 

173 def encode_page_footer(self, footer_config, method: str = "line") -> str: 

174 """Encode page footer component. 

175 

176 Args: 

177 footer_config: RTFPageFooter configuration 

178 method: Encoding method 

179 

180 Returns: 

181 RTF footer string 

182 """ 

183 if footer_config is None or not footer_config.text: 

184 return "" 

185 

186 # Use the existing text encoding method 

187 result = footer_config._encode_text(text=footer_config.text, method=method) 

188 return f"{{\\footer{result}}}" 

189 

190 def encode_title(self, title_config, method: str = "line") -> str: 

191 """Encode title component. 

192 

193 Args: 

194 title_config: RTFTitle configuration 

195 method: Encoding method 

196 

197 Returns: 

198 RTF title string 

199 """ 

200 if not title_config or not title_config.text: 

201 return "" 

202 

203 # Use the existing text encoding method 

204 return title_config._encode_text(text=title_config.text, method=method) 

205 

206 def encode_subline(self, subline_config, method: str = "line") -> str: 

207 """Encode subline component. 

208 

209 Args: 

210 subline_config: RTFSubline configuration 

211 method: Encoding method 

212 

213 Returns: 

214 RTF subline string 

215 """ 

216 if subline_config is None or not subline_config.text: 

217 return "" 

218 

219 # Use the existing text encoding method 

220 return subline_config._encode_text(text=subline_config.text, method=method) 

221 

222 def encode_footnote( 

223 self, 

224 footnote_config, 

225 page_number: int | None = None, 

226 page_col_width: float | None = None, 

227 border_style: str | None = None, 

228 ) -> Sequence[str]: 

229 """Encode footnote component with advanced formatting. 

230 

231 Args: 

232 footnote_config: RTFFootnote configuration 

233 page_number: Page number for footnote 

234 page_col_width: Page column width for calculations 

235 border_style: Optional border style to override defaults 

236 

237 Returns: 

238 List of RTF footnote strings 

239 """ 

240 if footnote_config is None: 

241 return [] 

242 

243 rtf_attrs = footnote_config 

244 

245 # Apply explicitly passed border style 

246 if border_style: 

247 # Create a copy with modified border 

248 rtf_attrs = rtf_attrs.model_copy() 

249 rtf_attrs.border_bottom = [[border_style]] 

250 

251 # Check if footnote should be rendered as table or paragraph 

252 if hasattr(rtf_attrs, "as_table") and not rtf_attrs.as_table: 

253 # Render as paragraph (plain text) 

254 if isinstance(rtf_attrs.text, list): 

255 text_list = rtf_attrs.text 

256 else: 

257 text_list = [rtf_attrs.text] if rtf_attrs.text else [] 

258 

259 # Use TextAttributes._encode_text method directly for paragraph rendering 

260 return rtf_attrs._encode_text(text_list, method="paragraph") 

261 else: 

262 # Render as table (default behavior) 

263 if page_col_width is not None: 

264 from ..row import Utils 

265 

266 col_total_width = page_col_width 

267 col_widths = Utils._col_widths(rtf_attrs.col_rel_width, col_total_width) 

268 

269 # Create DataFrame from text string 

270 import polars as pl 

271 

272 df = pl.DataFrame([[rtf_attrs.text]]) 

273 return rtf_attrs._encode(df, col_widths) 

274 else: 

275 # Fallback without column width calculations 

276 import polars as pl 

277 

278 df = pl.DataFrame([[rtf_attrs.text]]) 

279 return rtf_attrs._encode(df) 

280 

281 def encode_source( 

282 self, 

283 source_config, 

284 page_number: int | None = None, 

285 page_col_width: float | None = None, 

286 border_style: str | None = None, 

287 ) -> Sequence[str]: 

288 """Encode source component with advanced formatting. 

289 

290 Args: 

291 source_config: RTFSource configuration 

292 page_number: Page number for source 

293 page_col_width: Page column width for calculations 

294 border_style: Optional border style to override defaults 

295 

296 Returns: 

297 List of RTF source strings 

298 """ 

299 if source_config is None: 

300 return [] 

301 

302 rtf_attrs = source_config 

303 

304 # Apply explicitly passed border style 

305 if border_style: 

306 # Create a copy with modified border 

307 rtf_attrs = rtf_attrs.model_copy() 

308 rtf_attrs.border_bottom = [[border_style]] 

309 

310 # Check if source should be rendered as table or paragraph 

311 if hasattr(rtf_attrs, "as_table") and not rtf_attrs.as_table: 

312 # Render as paragraph (plain text) 

313 if isinstance(rtf_attrs.text, list): 

314 text_list = rtf_attrs.text 

315 else: 

316 text_list = [rtf_attrs.text] if rtf_attrs.text else [] 

317 

318 # Use TextAttributes._encode_text method directly for paragraph rendering 

319 return rtf_attrs._encode_text(text_list, method="paragraph") 

320 else: 

321 # Render as table (default behavior) 

322 if page_col_width is not None: 

323 from ..row import Utils 

324 

325 col_total_width = page_col_width 

326 col_widths = Utils._col_widths(rtf_attrs.col_rel_width, col_total_width) 

327 

328 # Create DataFrame from text string 

329 import polars as pl 

330 

331 df = pl.DataFrame([[rtf_attrs.text]]) 

332 return rtf_attrs._encode(df, col_widths) 

333 else: 

334 # Fallback without column width calculations 

335 import polars as pl 

336 

337 df = pl.DataFrame([[rtf_attrs.text]]) 

338 return rtf_attrs._encode(df) 

339 

340 def prepare_dataframe_for_body_encoding(self, df, rtf_attrs): 

341 """Prepare DataFrame for body encoding with group_by and column removal. 

342 

343 Args: 

344 df: Input DataFrame 

345 rtf_attrs: RTFBody attributes 

346 

347 Returns: 

348 Tuple of (processed_df, original_df) where processed_df has 

349 transformations applied 

350 """ 

351 original_df = df.clone() 

352 processed_df = df.clone() 

353 

354 # Collect columns to remove 

355 columns_to_remove = set() 

356 

357 # Remove subline_by columns from the processed DataFrame 

358 if rtf_attrs.subline_by is not None: 

359 columns_to_remove.update(rtf_attrs.subline_by) 

360 

361 # Remove page_by columns from table display 

362 # page_by columns are shown as spanning rows, not as table columns 

363 # The new_page flag only controls whether to force page breaks 

364 # at group boundaries 

365 if rtf_attrs.page_by is not None: 

366 # Restore previous behavior: 

367 # - If new_page=True: Respect pageby_row (default 'column' -> keep column) 

368 # - If new_page=False: Always remove columns (legacy behavior 

369 # implies spanning rows) 

370 if rtf_attrs.new_page: 

371 pageby_row = getattr(rtf_attrs, "pageby_row", "column") 

372 if pageby_row != "column": 

373 columns_to_remove.update(rtf_attrs.page_by) 

374 else: 

375 columns_to_remove.update(rtf_attrs.page_by) 

376 

377 # Apply column removal if any columns need to be removed 

378 if columns_to_remove: 

379 remaining_columns = [ 

380 col for col in processed_df.columns if col not in columns_to_remove 

381 ] 

382 processed_df = processed_df.select(remaining_columns) 

383 

384 # Create a copy of attributes to modify 

385 processed_attrs = rtf_attrs.model_copy(deep=True) 

386 

387 # Handle attribute slicing for removed columns 

388 # We need to slice list-based attributes to match the new column structure 

389 from ..attributes import BroadcastValue 

390 

391 # Add footer content 

392 # For now, we assume standard document footers are handled outside. 

393 # But typically footers are page footers handled by RTFPageFooter. 

394 # Get indices of removed columns in the original dataframe 

395 removed_indices = [ 

396 original_df.columns.index(col) for col in columns_to_remove 

397 ] 

398 removed_indices.sort(reverse=True) # Sort reverse to remove safely 

399 

400 rows, cols = original_df.shape 

401 

402 # attributes to slice 

403 # We iterate over all fields that could be list-based 

404 for attr_name in type(processed_attrs).model_fields: 

405 if attr_name == "col_rel_width": 

406 continue # Handled separately below 

407 

408 val = getattr(processed_attrs, attr_name) 

409 if val is None: 

410 continue 

411 

412 # Check if it's a list/sequence that needs slicing 

413 # We use BroadcastValue to expand it to full grid, then slice 

414 if isinstance(val, (list, tuple)): 

415 # Expand to full grid 

416 expanded = BroadcastValue( 

417 value=val, dimension=(rows, cols) 

418 ).to_list() 

419 

420 # Slice each row 

421 sliced_expanded = [] 

422 if expanded: 

423 for row_data in expanded: 

424 # Remove items at specified indices 

425 new_row = [ 

426 item 

427 for i, item in enumerate(row_data) 

428 if i not in removed_indices 

429 ] 

430 sliced_expanded.append(new_row) 

431 

432 # Update attribute 

433 setattr(processed_attrs, attr_name, sliced_expanded) 

434 

435 # Update col_rel_width separately (it's 1D usually) 

436 if processed_attrs.col_rel_width is not None: 

437 # Expand if needed (though usually 1D) 

438 current_widths = processed_attrs.col_rel_width 

439 # If it matches original columns, slice it 

440 if len(current_widths) == cols: 

441 new_widths = [ 

442 w 

443 for i, w in enumerate(current_widths) 

444 if i not in removed_indices 

445 ] 

446 processed_attrs.col_rel_width = new_widths 

447 else: 

448 processed_attrs = rtf_attrs 

449 

450 # Note: group_by suppression is handled in the pagination strategy 

451 # for documents that need pagination. For non-paginated documents, 

452 # group_by is handled separately in encode_body method. 

453 

454 return processed_df, original_df, processed_attrs 

455 

456 def encode_column_header( 

457 self, df, rtf_attrs, page_col_width: float 

458 ) -> Sequence[str] | None: 

459 """Encode column header component with column width support. 

460 

461 Args: 

462 df: DataFrame containing header data 

463 rtf_attrs: RTFColumnHeader attributes 

464 page_col_width: Page column width for calculations 

465 

466 Returns: 

467 List of RTF header strings 

468 """ 

469 if rtf_attrs is None: 

470 return None 

471 

472 # Convert text list to DataFrame for encoding if needed 

473 import polars as pl 

474 

475 df_to_encode = df 

476 if isinstance(df, (list, tuple)): 

477 # Create DataFrame from list 

478 schema = [f"col_{i + 1}" for i in range(len(df))] 

479 df_to_encode = pl.DataFrame([df], schema=schema, orient="row") 

480 elif df is None and rtf_attrs.text: 

481 # Fallback to rtf_attrs.text if df is None 

482 text = rtf_attrs.text 

483 if isinstance(text, (list, tuple)): 

484 schema = [f"col_{i + 1}" for i in range(len(text))] 

485 df_to_encode = pl.DataFrame([text], schema=schema, orient="row") 

486 

487 if df_to_encode is None: 

488 return None 

489 

490 dim = df_to_encode.shape 

491 

492 rtf_attrs.col_rel_width = rtf_attrs.col_rel_width or [1] * dim[1] 

493 rtf_attrs = rtf_attrs._set_default() 

494 

495 from ..row import Utils 

496 

497 col_widths = Utils._col_widths(rtf_attrs.col_rel_width, page_col_width) 

498 

499 return rtf_attrs._encode(df_to_encode, col_widths) 

500 

501 def encode_page_break(self, page_config, page_margin_encode_func) -> str: 

502 """Generate proper RTF page break sequence matching r2rtf format. 

503 

504 Args: 

505 page_config: RTFPage configuration 

506 page_margin_encode_func: Function to encode page margins 

507 

508 Returns: 

509 RTF page break string 

510 """ 

511 from ..core import RTFConstants 

512 

513 page_setup = ( 

514 f"\\paperw{int(page_config.width * RTFConstants.TWIPS_PER_INCH)}" 

515 f"\\paperh{int(page_config.height * RTFConstants.TWIPS_PER_INCH)}\n\n" 

516 f"{page_margin_encode_func()}\n" 

517 ) 

518 

519 return f"{{\\pard\\fs2\\par}}\\page{{\\pard\\fs2\\par}}\n{page_setup}" 

520 

521 def encode_page_margin(self, page_config) -> str: 

522 """Define RTF margin settings. 

523 

524 Args: 

525 page_config: RTFPage configuration with margin settings 

526 

527 Returns: 

528 RTF margin settings string 

529 """ 

530 from ..row import Utils 

531 

532 margin_codes = [ 

533 "\\margl", 

534 "\\margr", 

535 "\\margt", 

536 "\\margb", 

537 "\\headery", 

538 "\\footery", 

539 ] 

540 margins = [Utils._inch_to_twip(m) for m in page_config.margin] 

541 margin = "".join( 

542 f"{code}{margin}" 

543 for code, margin in zip(margin_codes, margins, strict=True) 

544 ) 

545 return margin + "\n"