Coverage for src/rtflite/input.py: 87%

340 statements  

« prev     ^ index     » next       coverage.py v7.10.4, created at 2025-08-17 01:22 +0000

1from collections.abc import Sequence 

2from pathlib import Path 

3from typing import Any 

4 

5from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator 

6 

7from rtflite.attributes import TableAttributes, TextAttributes 

8from rtflite.core.constants import RTFConstants 

9from rtflite.row import BORDER_CODES 

10 

11 

12class AttributeDefaultsMixin: 

13 """Mixin class for common attribute default setting patterns.""" 

14 

15 def _set_attribute_defaults(self, exclude_attrs: set[Any] | None = None) -> None: 

16 """Set default values for text attributes by converting scalars to lists/tuples.""" 

17 exclude_attrs = exclude_attrs or set() 

18 for attr, value in self.__dict__.items(): 

19 if attr not in exclude_attrs: 

20 if isinstance(value, (str, int, float, bool)): 

21 setattr(self, attr, [value]) 

22 elif isinstance(value, list): 

23 setattr(self, attr, tuple(value)) 

24 

25 

26class RTFTextComponent(TextAttributes, AttributeDefaultsMixin): 

27 """Consolidated base class for text-based RTF components. 

28 

29 This class unifies RTFPageHeader, RTFPageFooter, RTFSubline, and RTFTitle 

30 components which share nearly identical structure with only different defaults. 

31 """ 

32 

33 text: Sequence[str] | None = Field(default=None, description="Text content") 

34 text_indent_reference: str | None = Field( 

35 default="table", 

36 description="Reference point for indentation ('page' or 'table')", 

37 ) 

38 

39 @field_validator("text", mode="before") 

40 def convert_text(cls, v): 

41 return ValidationHelpers.convert_string_to_sequence(v) 

42 

43 def __init__(self, **data): 

44 # Get defaults from the component-specific config 

45 defaults = self._get_component_defaults() 

46 

47 # Update defaults with any provided values 

48 defaults.update(data) 

49 super().__init__(**defaults) 

50 self._set_default() 

51 

52 def _set_default(self): 

53 self._set_attribute_defaults() 

54 return self 

55 

56 def _get_component_defaults(self) -> dict: 

57 """Override in subclasses to provide component-specific defaults.""" 

58 return DefaultsFactory.get_text_defaults() 

59 

60 

61class ValidationHelpers: 

62 """Helper class for common validation patterns.""" 

63 

64 @staticmethod 

65 def convert_string_to_sequence(v: Any) -> Any: 

66 """Convert string to single-item sequence for text fields.""" 

67 if v is not None: 

68 if isinstance(v, str): 

69 return [v] 

70 return v 

71 return v 

72 

73 @staticmethod 

74 def validate_boolean_field(v: Any, field_name: str) -> bool: 

75 """Validate that a field is a boolean value.""" 

76 if not isinstance(v, bool): 

77 raise ValueError( 

78 f"{field_name} must be a boolean, got {type(v).__name__}: {v}" 

79 ) 

80 return v 

81 

82 

83class DefaultsFactory: 

84 """Factory class for creating common default configurations.""" 

85 

86 @staticmethod 

87 def get_text_defaults() -> dict: 

88 """Get common text attribute defaults.""" 

89 return { 

90 "text_font": [1], 

91 "text_font_size": [9], 

92 "text_indent_first": [0], 

93 "text_indent_left": [0], 

94 "text_indent_right": [0], 

95 "text_space": [1.0], 

96 "text_space_before": [RTFConstants.DEFAULT_SPACE_BEFORE], 

97 "text_space_after": [RTFConstants.DEFAULT_SPACE_AFTER], 

98 "text_hyphenation": [True], 

99 } 

100 

101 @staticmethod 

102 def get_page_header_defaults() -> dict: 

103 """Get page header specific defaults.""" 

104 defaults = DefaultsFactory.get_text_defaults() 

105 defaults.update( 

106 { 

107 "text_font_size": [12], 

108 "text_justification": ["r"], 

109 "text_convert": [False], # Preserve RTF field codes 

110 "text_indent_reference": "page", 

111 } 

112 ) 

113 return defaults 

114 

115 @staticmethod 

116 def get_page_footer_defaults() -> dict: 

117 """Get page footer specific defaults.""" 

118 defaults = DefaultsFactory.get_text_defaults() 

119 defaults.update( 

120 { 

121 "text_font_size": [12], 

122 "text_justification": ["c"], 

123 "text_convert": [False], # Preserve RTF field codes 

124 "text_indent_reference": "page", 

125 } 

126 ) 

127 return defaults 

128 

129 @staticmethod 

130 def get_title_defaults() -> dict: 

131 """Get title specific defaults.""" 

132 defaults = DefaultsFactory.get_text_defaults() 

133 defaults.update( 

134 { 

135 "text_font_size": [12], 

136 "text_justification": ["c"], 

137 "text_space_before": [180.0], 

138 "text_space_after": [180.0], 

139 "text_convert": [True], # Enable LaTeX conversion for titles 

140 "text_indent_reference": "table", 

141 } 

142 ) 

143 return defaults 

144 

145 @staticmethod 

146 def get_subline_defaults() -> dict: 

147 """Get subline specific defaults.""" 

148 defaults = DefaultsFactory.get_text_defaults() 

149 defaults.update( 

150 { 

151 "text_font_size": [9], 

152 "text_justification": ["l"], 

153 "text_convert": [False], 

154 "text_indent_reference": "table", 

155 } 

156 ) 

157 return defaults 

158 

159 @staticmethod 

160 def get_table_defaults() -> dict: 

161 """Get common table attribute defaults.""" 

162 return { 

163 "col_rel_width": [1.0], 

164 "border_width": [[15]], 

165 "cell_height": [[0.15]], 

166 "cell_justification": [["c"]], 

167 "cell_vertical_justification": [["top"]], 

168 "text_font": [[1]], 

169 "text_format": [[""]], 

170 "text_font_size": [[9]], 

171 "text_justification": [["l"]], 

172 "text_indent_first": [[0]], 

173 "text_indent_left": [[0]], 

174 "text_indent_right": [[0]], 

175 "text_space": [[1]], 

176 "text_space_before": [[15]], 

177 "text_space_after": [[15]], 

178 "text_hyphenation": [[True]], 

179 } 

180 

181 @staticmethod 

182 def get_border_defaults(as_table: bool) -> dict: 

183 """Get conditional border defaults based on table rendering mode.""" 

184 if as_table: 

185 # Table rendering: has borders (R2RTF as_table=TRUE behavior) 

186 return { 

187 "border_left": [["single"]], 

188 "border_right": [["single"]], 

189 "border_top": [["single"]], 

190 "border_bottom": [[""]], 

191 } 

192 else: 

193 # Plain text rendering: no borders (R2RTF as_table=FALSE behavior) 

194 return { 

195 "border_left": [[""]], 

196 "border_right": [[""]], 

197 "border_top": [[""]], 

198 "border_bottom": [[""]], 

199 } 

200 

201 

202class RTFPage(BaseModel): 

203 """Configure RTF page layout and pagination settings. 

204 

205 The RTFPage component controls page dimensions, margins, orientation, 

206 and pagination behavior including rows per page and border styles for 

207 first/last rows across page boundaries. 

208 

209 Examples: 

210 Basic portrait page with custom margins: 

211 ```python 

212 page = RTFPage( 

213 orientation="portrait", 

214 margin=[1.0, 1.0, 1.5, 1.0, 1.5, 1.0] # left, right, top, bottom, header, footer 

215 ) 

216 ``` 

217 

218 Landscape layout for wide tables: 

219 ```python 

220 page = RTFPage( 

221 orientation="landscape", 

222 nrow=30, # Fewer rows due to landscape 

223 border_first="double", # Double border on first row 

224 border_last="single" # Single border on last row 

225 ) 

226 ``` 

227 

228 Attributes: 

229 nrow: Total number of rows per page including ALL components: 

230 - Column headers (if displayed) 

231 - Data rows 

232 - Footnotes (if present) 

233 - Source lines (if present) 

234 This is NOT just data rows - it's the complete row budget. 

235 

236 border_first: Border style for the first row of the table. 

237 Defaults to "double" for emphasis. 

238 

239 border_last: Border style for the last row of the table. 

240 Defaults to "double" for closure. 

241 

242 Note: 

243 The nrow parameter represents the total row capacity of a page, 

244 not just data rows. Plan accordingly when setting this value. 

245 """ 

246 

247 orientation: str | None = Field( 

248 default="portrait", description="Page orientation ('portrait' or 'landscape')" 

249 ) 

250 

251 @field_validator("orientation") 

252 def validate_orientation(cls, v): 

253 if v not in ["portrait", "landscape"]: 

254 raise ValueError( 

255 f"Invalid orientation. Must be 'portrait' or 'landscape'. Given: {v}" 

256 ) 

257 return v 

258 

259 width: float | None = Field(default=None, description="Page width in inches") 

260 height: float | None = Field(default=None, description="Page height in inches") 

261 margin: Sequence[float] | None = Field( 

262 default=None, 

263 description="Page margins [left, right, top, bottom, header, footer] in inches", 

264 ) 

265 

266 @field_validator("margin") 

267 def validate_margin(cls, v): 

268 if v is not None and len(v) != 6: 

269 raise ValueError("Margin must be a sequence of 6 values.") 

270 return v 

271 

272 nrow: int | None = Field( 

273 default=None, 

274 description="Total rows per page including headers, data, footnotes, and sources. NOT just data rows - this is the complete page row budget.", 

275 ) 

276 

277 border_first: str | None = Field( 

278 default="double", description="First row border style" 

279 ) 

280 border_last: str | None = Field( 

281 default="double", description="Last row border style" 

282 ) 

283 col_width: float | None = Field( 

284 default=None, description="Total width of table columns in inches" 

285 ) 

286 use_color: bool | None = Field( 

287 default=False, description="Whether to use color in the document" 

288 ) 

289 

290 page_title: str = Field( 

291 default="all", 

292 description="Where to display titles in multi-page documents ('first', 'last', 'all')", 

293 ) 

294 page_footnote: str = Field( 

295 default="last", 

296 description="Where to display footnotes in multi-page documents ('first', 'last', 'all')", 

297 ) 

298 page_source: str = Field( 

299 default="last", 

300 description="Where to display source in multi-page documents ('first', 'last', 'all')", 

301 ) 

302 

303 @field_validator("border_first", "border_last") 

304 def validate_border(cls, v): 

305 if v not in BORDER_CODES: 

306 raise ValueError( 

307 f"{cls.__field_name__.capitalize()} with invalid border style: {v}" 

308 ) 

309 return v 

310 

311 @field_validator("page_title", "page_footnote", "page_source") 

312 def validate_page_placement(cls, v): 

313 valid_options = {"first", "last", "all"} 

314 if v not in valid_options: 

315 raise ValueError( 

316 f"Invalid page placement option '{v}'. Must be one of {valid_options}" 

317 ) 

318 return v 

319 

320 @field_validator("width", "height", "nrow", "col_width") 

321 def validate_width_height(cls, v): 

322 if v is not None and v <= 0: 

323 raise ValueError( 

324 f"{cls.__field_name__.capitalize()} must be greater than 0." 

325 ) 

326 return v 

327 

328 def __init__(self, **data): 

329 super().__init__(**data) 

330 self._set_default() 

331 

332 def _set_default(self): 

333 """Set default values based on page orientation.""" 

334 if self.orientation == "portrait": 

335 self._set_portrait_defaults() 

336 elif self.orientation == "landscape": 

337 self._set_landscape_defaults() 

338 

339 self._validate_margin_length() 

340 return self 

341 

342 def _set_portrait_defaults(self) -> None: 

343 """Set default values for portrait orientation.""" 

344 self.width = self.width or 8.5 

345 self.height = self.height or 11 

346 self.margin = self.margin or [1.25, 1, 1.75, 1.25, 1.75, 1.00625] 

347 self.col_width = self.col_width or self.width - 2.25 

348 self.nrow = self.nrow or 40 

349 

350 def _set_landscape_defaults(self) -> None: 

351 """Set default values for landscape orientation.""" 

352 self.width = self.width or 11 

353 self.height = self.height or 8.5 

354 self.margin = self.margin or [1.0, 1.0, 2, 1.25, 1.25, 1.25] 

355 self.col_width = self.col_width or self.width - 2.5 

356 self.nrow = self.nrow or 24 

357 

358 def _validate_margin_length(self) -> None: 

359 """Validate that margin has exactly 6 values.""" 

360 if self.margin is not None and len(self.margin) != 6: 

361 raise ValueError("Margin length must be 6.") 

362 

363 

364class RTFPageHeader(RTFTextComponent): 

365 """RTF page header component for document headers. 

366 

367 The RTFPageHeader appears at the top of every page, typically used for 

368 page numbering, document titles, or study identifiers. Right-aligned by 

369 default with automatic page numbering. 

370 

371 Examples: 

372 Default page numbering: 

373 ```python 

374 header = RTFPageHeader() # Shows "Page X of Y" 

375 ``` 

376 

377 Custom header text: 

378 ```python 

379 header = RTFPageHeader( 

380 text="Protocol ABC-123 | Confidential", 

381 text_justification=["c"] # Center align 

382 ) 

383 ``` 

384 

385 Header with page number: 

386 ```python 

387 header = RTFPageHeader( 

388 text="Study Report - Page \\\\chpgn", # Current page number 

389 text_format=["b"], # Bold 

390 text_font_size=[10] 

391 ) 

392 ``` 

393 

394 Note: 

395 - Default text is "Page \\\\chpgn of {\\\\field{\\\\*\\\\fldinst NUMPAGES }}" 

396 - Text conversion is disabled by default to preserve RTF field codes 

397 - Right-aligned by default 

398 """ 

399 

400 def __init__(self, **data): 

401 # Set the default header text if not provided 

402 if "text" not in data: 

403 data["text"] = "Page \\chpgn of {\\field{\\*\\fldinst NUMPAGES }}" 

404 super().__init__(**data) 

405 

406 def _get_component_defaults(self) -> dict: 

407 return DefaultsFactory.get_page_header_defaults() 

408 

409 

410class RTFPageFooter(RTFTextComponent): 

411 """RTF page footer component for document footers. 

412 

413 The RTFPageFooter appears at the bottom of every page, typically used for 

414 confidentiality notices, timestamps, or file paths. Center-aligned by default. 

415 

416 Examples: 

417 Simple footer: 

418 ```python 

419 footer = RTFPageFooter( 

420 text="Company Confidential" 

421 ) 

422 ``` 

423 

424 Multi-line footer: 

425 ```python 

426 footer = RTFPageFooter( 

427 text=[ 

428 "Proprietary and Confidential", 

429 "Do Not Distribute" 

430 ], 

431 text_font_size=[8, 8] 

432 ) 

433 ``` 

434 

435 Footer with timestamp: 

436 ```python 

437 footer = RTFPageFooter( 

438 text="Generated: 2024-01-15 14:30:00 | program.py", 

439 text_justification=["l"], # Left align 

440 text_font_size=[8] 

441 ) 

442 ``` 

443 

444 Note: 

445 - Center-aligned by default 

446 - Text conversion is disabled by default to preserve special characters 

447 - Appears on every page of the document 

448 """ 

449 

450 def _get_component_defaults(self) -> dict: 

451 return DefaultsFactory.get_page_footer_defaults() 

452 

453 

454class RTFSubline(RTFTextComponent): 

455 """RTF subline component with left-aligned text.""" 

456 

457 def _get_component_defaults(self) -> dict: 

458 return DefaultsFactory.get_subline_defaults() 

459 

460 

461class RTFTableTextComponent(TableAttributes): 

462 """Consolidated base class for table-based text components (footnotes and sources). 

463 

464 This class unifies RTFFootnote and RTFSource which share nearly identical structure 

465 with only different default values for as_table and text justification. 

466 """ 

467 

468 model_config = ConfigDict(arbitrary_types_allowed=True) 

469 

470 text: Sequence[str] | None = Field(default=None, description="Text content") 

471 as_table: bool = Field( 

472 description="Whether to render as table (True) or plain text (False)", 

473 ) 

474 

475 @field_validator("text", mode="before") 

476 def convert_text(cls, v): 

477 return ValidationHelpers.convert_string_to_sequence(v) 

478 

479 @field_validator("as_table", mode="before") 

480 def validate_as_table(cls, v): 

481 return ValidationHelpers.validate_boolean_field(v, "as_table") 

482 

483 def __init__(self, **data): 

484 # Set as_table default if not provided 

485 if "as_table" not in data: 

486 data["as_table"] = self._get_default_as_table() 

487 

488 as_table = data["as_table"] 

489 defaults = self._get_component_table_defaults(as_table) 

490 defaults.update(data) 

491 super().__init__(**defaults) 

492 self._process_text_conversion() 

493 

494 def _get_default_as_table(self) -> bool: 

495 """Override in subclasses to provide component-specific as_table default.""" 

496 return True 

497 

498 def _get_component_table_defaults(self, as_table: bool) -> dict: 

499 """Get defaults with component-specific overrides.""" 

500 defaults = DefaultsFactory.get_table_defaults() 

501 border_defaults = DefaultsFactory.get_border_defaults(as_table) 

502 component_overrides = self._get_component_overrides() 

503 

504 defaults.update(border_defaults) 

505 defaults.update(component_overrides) 

506 return defaults 

507 

508 def _get_component_overrides(self) -> dict: 

509 """Override in subclasses to provide component-specific overrides.""" 

510 return {"text_convert": [[True]]} # Default: enable text conversion 

511 

512 def _process_text_conversion(self) -> None: 

513 """Convert text sequence to line-separated string format.""" 

514 if self.text is not None: 

515 if isinstance(self.text, Sequence): 

516 if len(self.text) == 0: 

517 self.text = [] 

518 else: 

519 self.text = "\\line ".join(self.text) 

520 

521 def _set_default(self): 

522 for attr, value in self.__dict__.items(): 

523 if isinstance(value, (str, int, float, bool)): 

524 setattr(self, attr, [value]) 

525 return self 

526 

527 

528class RTFFootnote(RTFTableTextComponent): 

529 """RTF footnote component for explanatory notes and citations. 

530 

531 The RTFFootnote component displays footnote text at the bottom of tables. 

532 Supports multiple footnote lines and can be rendered as a table (with borders) 

533 or plain text. Text conversion is enabled by default. 

534 

535 Examples: 

536 Single footnote: 

537 ```python 

538 footnote = RTFFootnote( 

539 text="CI = Confidence Interval; N = Number of subjects" 

540 ) 

541 ``` 

542 

543 Multiple footnotes: 

544 ```python 

545 footnote = RTFFootnote( 

546 text=[ 

547 "* p-value from ANCOVA model", 

548 "** Missing values were imputed using LOCF", 

549 "*** Baseline is defined as last value before first dose" 

550 ] 

551 ) 

552 ``` 

553 

554 Footnote without table borders: 

555 ```python 

556 footnote = RTFFootnote( 

557 text="Data cutoff date: 2023-12-31", 

558 as_table=False # No borders around footnote 

559 ) 

560 ``` 

561 

562 Note: 

563 - Multiple footnote lines are joined with \\\\line separator 

564 - Text conversion is enabled by default (LaTeX symbols supported) 

565 - Default rendering includes table borders (as_table=True) 

566 """ 

567 

568 def _get_default_as_table(self) -> bool: 

569 return True # Footnotes default to table rendering 

570 

571 

572class RTFSource(RTFTableTextComponent): 

573 """RTF source component for data source citations. 

574 

575 The RTFSource component displays source information at the very bottom 

576 of the document. Typically used for dataset names, program references, 

577 or generation timestamps. Rendered as plain text without borders by default. 

578 

579 Examples: 

580 Simple source citation: 

581 ```python 

582 source = RTFSource( 

583 text="Source: ADAE dataset, generated 2024-01-15" 

584 ) 

585 ``` 

586 

587 Multiple source lines: 

588 ```python 

589 source = RTFSource( 

590 text=[ 

591 "Dataset: ADAE version 3.0", 

592 "Program: ae_summary.py", 

593 "Generated: 2024-01-15 14:30:00" 

594 ] 

595 ) 

596 ``` 

597 

598 Source with table borders: 

599 ```python 

600 source = RTFSource( 

601 text="Database lock: 2023-12-31", 

602 as_table=True, # Add borders around source 

603 text_justification=[["l"]] # Left align instead of center 

604 ) 

605 ``` 

606 

607 Note: 

608 - Center-aligned by default 

609 - Rendered without borders by default (as_table=False) 

610 - Text conversion is enabled by default 

611 """ 

612 

613 def _get_default_as_table(self) -> bool: 

614 return False # Sources default to plain text rendering 

615 

616 def _get_component_overrides(self) -> dict: 

617 base_overrides = super()._get_component_overrides() 

618 base_overrides.update( 

619 { 

620 "text_justification": [["c"]], # Center justification for sources 

621 } 

622 ) 

623 return base_overrides 

624 

625 

626class RTFTitle(RTFTextComponent): 

627 """RTF title component with center-aligned text and LaTeX conversion enabled. 

628 

629 The RTFTitle component displays centered title text at the top of the document 

630 or table. It supports multiple title lines and LaTeX-style text conversion 

631 for mathematical symbols and formatting. 

632 

633 Examples: 

634 Single line title: 

635 ```python 

636 title = RTFTitle(text="Adverse Events Summary") 

637 ``` 

638 

639 Multi-line title with formatting: 

640 ```python 

641 title = RTFTitle( 

642 text=["Clinical Study Report", "Safety Analysis Set"], 

643 text_format=["b", ""] # First line bold, second normal 

644 ) 

645 ``` 

646 

647 Title with LaTeX symbols: 

648 ```python 

649 title = RTFTitle( 

650 text="Efficacy Analysis (\\\\alpha = 0.05)" 

651 ) 

652 # Renders as: Efficacy Analysis (alpha = 0.05) with Greek alpha symbol 

653 ``` 

654 

655 Note: 

656 Text conversion is enabled by default for titles, converting: 

657 - LaTeX symbols (e.g., \\\\alpha to Greek alpha, \\\\beta to Greek beta) 

658 - Subscripts (e.g., x_1 to x with subscript 1) 

659 - Other mathematical notation 

660 """ 

661 

662 def _get_component_defaults(self) -> dict: 

663 return DefaultsFactory.get_title_defaults() 

664 

665 

666class RTFColumnHeader(TableAttributes): 

667 """Configure column headers for RTF tables. 

668 

669 The RTFColumnHeader component defines column headers that appear at the 

670 top of tables and repeat on each page in multi-page documents. Supports 

671 multi-row headers and flexible column spanning. 

672 

673 Examples: 

674 Simple column headers: 

675 ```python 

676 header = RTFColumnHeader( 

677 text=["Name", "Age", "Treatment", "Response"] 

678 ) 

679 ``` 

680 

681 Headers with custom formatting: 

682 ```python 

683 header = RTFColumnHeader( 

684 text=["Subject", "Baseline", "Week 4", "Week 8"], 

685 text_format=["b", "b", "b", "b"], # All bold 

686 text_justification=["l", "c", "c", "c"], # Left, center, center, center 

687 border_bottom=["double", "double", "double", "double"] 

688 ) 

689 ``` 

690 

691 Multi-row headers with col_rel_width: 

692 ```python 

693 # First row spans multiple columns 

694 header1 = RTFColumnHeader( 

695 text=["Patient Info", "Treatment Results"], 

696 col_rel_width=[2, 3] # Spans 2 and 3 columns respectively 

697 ) 

698 # Second row with individual columns 

699 header2 = RTFColumnHeader( 

700 text=["ID", "Age", "Drug A", "Drug B", "Placebo"], 

701 col_rel_width=[1, 1, 1, 1, 1] 

702 ) 

703 ``` 

704 

705 Note: 

706 - Headers automatically repeat on each page in multi-page documents 

707 - Use col_rel_width to create spanning headers 

708 - Border styles from RTFPage are applied to the first row 

709 """ 

710 

711 model_config = ConfigDict(arbitrary_types_allowed=True) 

712 

713 text: Sequence[str] | None = Field( 

714 default=None, description="Column header text. List of strings, one per column." 

715 ) 

716 

717 @field_validator("text", mode="before") 

718 def convert_text_before(cls, v): 

719 if v is not None: 

720 if isinstance(v, str): 

721 return [v] 

722 if isinstance(v, (list, tuple)) and all( 

723 isinstance(item, str) for item in v 

724 ): 

725 return list(v) 

726 return v 

727 

728 @field_validator("text", mode="after") 

729 def convert_text_after(cls, v): 

730 if v is not None and isinstance(v, (list, tuple)): 

731 try: 

732 import polars as pl 

733 

734 schema = [f"col_{i + 1}" for i in range(len(v))] 

735 return pl.DataFrame([v], schema=schema, orient="row") 

736 except ImportError: 

737 pass 

738 return v 

739 

740 def __init__(self, **data): 

741 data = self._handle_backwards_compatibility(data) 

742 defaults = self._get_column_header_defaults() 

743 defaults.update(data) 

744 super().__init__(**defaults) 

745 self._set_default() 

746 

747 def _handle_backwards_compatibility(self, data: dict) -> dict: 

748 """Handle backwards compatibility for df parameter.""" 

749 if "df" in data and "text" not in data: 

750 df = data.pop("df") 

751 data["text"] = self._convert_dataframe_to_text(df) 

752 return data 

753 

754 def _convert_dataframe_to_text(self, df) -> list | None: 

755 """Convert DataFrame to text list based on orientation.""" 

756 try: 

757 import polars as pl 

758 

759 if isinstance(df, pl.DataFrame): 

760 return self._handle_dataframe_orientation(df) 

761 except ImportError: 

762 pass 

763 return None 

764 

765 def _handle_dataframe_orientation(self, df) -> list: 

766 """Handle DataFrame orientation for column headers.""" 

767 # For backwards compatibility, assume single-row DataFrame 

768 # If DataFrame has multiple rows, transpose it first 

769 if df.shape[0] > 1 and df.shape[1] == 1: 

770 # Column-oriented: transpose to row-oriented 

771 return df.get_column(df.columns[0]).to_list() 

772 else: 

773 # Row-oriented: take first row 

774 return list(df.row(0)) 

775 

776 def _get_column_header_defaults(self) -> dict: 

777 """Get default configuration for column headers.""" 

778 return { 

779 "border_left": ["single"], 

780 "border_right": ["single"], 

781 "border_top": ["single"], 

782 "border_bottom": [""], 

783 "border_width": [15], 

784 "cell_height": [0.15], 

785 "cell_justification": ["c"], 

786 "cell_vertical_justification": ["bottom"], 

787 "text_font": [1], 

788 "text_format": [""], 

789 "text_font_size": [9], 

790 "text_justification": ["c"], 

791 "text_indent_first": [0], 

792 "text_indent_left": [0], 

793 "text_indent_right": [0], 

794 "text_space": [1], 

795 "text_space_before": [15], 

796 "text_space_after": [15], 

797 "text_hyphenation": [False], 

798 "text_convert": [True], 

799 } 

800 

801 def _set_default(self): 

802 for attr, value in self.__dict__.items(): 

803 if isinstance(value, (str, int, float, bool)): 

804 setattr(self, attr, [value]) 

805 

806 return self 

807 

808 

809class RTFBody(TableAttributes): 

810 """Configure table body formatting and layout. 

811 

812 The RTFBody component controls how data is displayed in the RTF table, 

813 including column widths, text formatting, borders, and advanced features 

814 like group_by for value suppression and subline_by for section headers. 

815 

816 Examples: 

817 Basic table with custom column widths: 

818 ```python 

819 body = RTFBody( 

820 col_rel_width=[3, 2, 2, 2], 

821 text_justification=[["l", "c", "c", "c"]] 

822 ) 

823 ``` 

824 

825 Using group_by to suppress duplicate values: 

826 ```python 

827 body = RTFBody( 

828 group_by=["SITE", "SUBJECT"], 

829 col_rel_width=[2, 2, 3, 1] 

830 ) 

831 ``` 

832 

833 Using subline_by for section headers: 

834 ```python 

835 body = RTFBody( 

836 subline_by=["SITE", "STUDY"], # Creates paragraph headers 

837 col_rel_width=[3, 2, 2] # Note: subline_by columns are removed from table 

838 ) 

839 ``` 

840 

841 Note: 

842 When using subline_by: 

843 - The specified columns are removed from the table display 

844 - Values appear as paragraph headers before each section 

845 - Pagination is automatically enabled (new_page=True) 

846 - Formatting attributes apply uniformly to the entire table 

847 """ 

848 

849 model_config = ConfigDict(arbitrary_types_allowed=True) 

850 

851 as_colheader: bool = Field( 

852 default=True, description="Whether to display column headers" 

853 ) 

854 group_by: Sequence[str] | None = Field( 

855 default=None, 

856 description="Column names for hierarchical value suppression. Values are shown only on first occurrence within groups, with page context restoration for multi-page tables.", 

857 ) 

858 page_by: Sequence[str] | None = Field( 

859 default=None, 

860 description="Column names to trigger page breaks when values change", 

861 ) 

862 new_page: bool = Field( 

863 default=False, 

864 description="Force new page before table. Automatically set to True when using subline_by.", 

865 ) 

866 pageby_header: bool = Field( 

867 default=True, description="Repeat column headers on new pages" 

868 ) 

869 pageby_row: str = Field( 

870 default="column", 

871 description="Page break handling: 'column' (keep column) or 'first_row' (use first row as header)", 

872 ) 

873 subline_by: Sequence[str] | None = Field( 

874 default=None, 

875 description="Column names to create paragraph headers. These columns are removed from the table and their values appear as section headers above each group. Forces pagination.", 

876 ) 

877 last_row: bool = Field( 

878 default=True, 

879 description="Whether the table contains the last row of the final table", 

880 ) 

881 

882 @field_validator("group_by", "page_by", "subline_by", mode="before") 

883 def convert_text(cls, v): 

884 if v is not None: 

885 if isinstance(v, str): 

886 return [v] 

887 return v 

888 

889 @field_validator("pageby_row") 

890 def validate_pageby_row(cls, v): 

891 if v not in ["column", "first_row"]: 

892 raise ValueError( 

893 f"Invalid pageby_row. Must be 'column' or 'first_row'. Given: {v}" 

894 ) 

895 return v 

896 

897 def __init__(self, **data): 

898 defaults = { 

899 "border_left": [["single"]], 

900 "border_right": [["single"]], 

901 "border_first": [["single"]], 

902 "border_last": [["single"]], 

903 "border_width": [[15]], 

904 "cell_height": [[0.15]], 

905 "cell_justification": [["c"]], 

906 "cell_vertical_justification": [["top"]], 

907 "text_font": [[1]], 

908 "text_font_size": [[9]], 

909 "text_indent_first": [[0]], 

910 "text_indent_left": [[0]], 

911 "text_indent_right": [[0]], 

912 "text_space": [[1]], 

913 "text_space_before": [[15]], 

914 "text_space_after": [[15]], 

915 "text_hyphenation": [[False]], 

916 "text_convert": [[True]], 

917 } 

918 

919 # Update defaults with any provided values 

920 defaults.update(data) 

921 super().__init__(**defaults) 

922 self._set_default() 

923 

924 def _set_default(self): 

925 self._set_table_attribute_defaults() 

926 self._set_border_defaults() 

927 self._validate_page_by_logic() 

928 return self 

929 

930 def _set_table_attribute_defaults(self) -> None: 

931 """Set default values for table attributes, excluding special control attributes.""" 

932 excluded_attrs = { 

933 "as_colheader", 

934 "page_by", 

935 "new_page", 

936 "pageby_header", 

937 "pageby_row", 

938 "subline_by", 

939 "last_row", 

940 } 

941 

942 for attr, value in self.__dict__.items(): 

943 if ( 

944 isinstance(value, (str, int, float, bool)) 

945 and attr not in excluded_attrs 

946 ): 

947 setattr(self, attr, [value]) 

948 

949 def _set_border_defaults(self) -> None: 

950 """Set default values for border and justification attributes.""" 

951 self.border_top = self.border_top or [[""]] 

952 self.border_bottom = self.border_bottom or [[""]] 

953 self.border_left = self.border_left or [["single"]] 

954 self.border_right = self.border_right or [["single"]] 

955 self.border_first = self.border_first or [["single"]] 

956 self.border_last = self.border_last or [["single"]] 

957 self.cell_vertical_justification = self.cell_vertical_justification or [ 

958 ["center"] 

959 ] 

960 self.text_justification = self.text_justification or [["c"]] 

961 

962 def _validate_page_by_logic(self) -> None: 

963 """Validate that page_by and new_page settings are consistent.""" 

964 if self.page_by is None and self.new_page: 

965 raise ValueError("`new_page` must be `False` if `page_by` is not specified") 

966 

967 

968class RTFFigure(BaseModel): 

969 """RTF Figure component for embedding images in RTF documents. 

970 

971 This class handles figure embedding with support for multiple images, 

972 custom sizing, and proper RTF encoding. 

973 """ 

974 

975 model_config = ConfigDict(arbitrary_types_allowed=True) 

976 

977 # Figure data 

978 figures: str | Path | list[str | Path] | None = Field( 

979 default=None, 

980 description="Image file path(s) - single path or list of paths to PNG, JPEG, or EMF files", 

981 ) 

982 

983 # Figure dimensions 

984 fig_height: float | list[float] = Field( 

985 default=5.0, description="Height of figures in inches (single value or list)" 

986 ) 

987 fig_width: float | list[float] = Field( 

988 default=5.0, description="Width of figures in inches (single value or list)" 

989 ) 

990 

991 # Figure positioning 

992 fig_align: str = Field( 

993 default="center", 

994 description="Horizontal alignment of figures ('left', 'center', 'right')", 

995 ) 

996 fig_pos: str = Field( 

997 default="after", 

998 description="Position relative to table content ('before' or 'after')", 

999 ) 

1000 

1001 @field_validator("fig_height", "fig_width", mode="before") 

1002 def convert_dimensions(cls, v): 

1003 """Convert single value to list if needed.""" 

1004 if isinstance(v, (int, float)): 

1005 return [v] 

1006 return v 

1007 

1008 @field_validator("fig_align") 

1009 def validate_alignment(cls, v): 

1010 """Validate figure alignment value.""" 

1011 valid_alignments = ["left", "center", "right"] 

1012 if v not in valid_alignments: 

1013 raise ValueError( 

1014 f"Invalid fig_align. Must be one of {valid_alignments}. Given: {v}" 

1015 ) 

1016 return v 

1017 

1018 @field_validator("fig_pos") 

1019 def validate_position(cls, v): 

1020 """Validate figure position value.""" 

1021 valid_positions = ["before", "after"] 

1022 if v not in valid_positions: 

1023 raise ValueError( 

1024 f"Invalid fig_pos. Must be one of {valid_positions}. Given: {v}" 

1025 ) 

1026 return v 

1027 

1028 @model_validator(mode="after") 

1029 def validate_figure_data(self): 

1030 """Validate figure paths and convert to list format.""" 

1031 if self.figures is not None: 

1032 # Convert single path to list 

1033 if isinstance(self.figures, (str, Path)): 

1034 self.figures = [self.figures] 

1035 

1036 # Validate that all files exist 

1037 for fig_path in self.figures: 

1038 path_obj = Path(fig_path) 

1039 if not path_obj.exists(): 

1040 raise FileNotFoundError(f"Figure file not found: {fig_path}") 

1041 

1042 return self