Coverage for src / rtflite / input.py: 87%

337 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-11-28 05:09 +0000

1from collections.abc import Sequence 

2from pathlib import Path 

3from typing import Any 

4 

5from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator 

6 

7from rtflite.attributes import TableAttributes, TextAttributes 

8from rtflite.core.constants import RTFConstants 

9from rtflite.row import BORDER_CODES 

10 

11 

12class AttributeDefaultsMixin: 

13 """Mixin class for common attribute default setting patterns.""" 

14 

15 def _set_attribute_defaults(self, exclude_attrs: set[Any] | None = None) -> None: 

16 """Convert scalar text attributes to sequences for default handling.""" 

17 exclude_attrs = exclude_attrs or set() 

18 for attr, value in self.__dict__.items(): 

19 if attr not in exclude_attrs: 

20 if isinstance(value, (str, int, float, bool)): 

21 setattr(self, attr, [value]) 

22 elif isinstance(value, list): 

23 setattr(self, attr, tuple(value)) 

24 

25 

26class RTFTextComponent(TextAttributes, AttributeDefaultsMixin): 

27 """Consolidated base class for text-based RTF components. 

28 

29 This class unifies RTFPageHeader, RTFPageFooter, RTFSubline, and RTFTitle 

30 components which share nearly identical structure with only different defaults. 

31 """ 

32 

33 text: Sequence[str] | None = Field(default=None, description="Text content") 

34 text_indent_reference: str | None = Field( 

35 default="table", 

36 description="Reference point for indentation ('page' or 'table')", 

37 ) 

38 

39 @field_validator("text", mode="before") 

40 def convert_text(cls, v): 

41 return ValidationHelpers.convert_string_to_sequence(v) 

42 

43 def __init__(self, **data): 

44 # Get defaults from the component-specific config 

45 defaults = self._get_component_defaults() 

46 

47 # Update defaults with any provided values 

48 defaults.update(data) 

49 super().__init__(**defaults) 

50 self._set_default() 

51 

52 def _set_default(self): 

53 self._set_attribute_defaults() 

54 return self 

55 

56 def _get_component_defaults(self) -> dict: 

57 """Override in subclasses to provide component-specific defaults.""" 

58 return DefaultsFactory.get_text_defaults() 

59 

60 

61class ValidationHelpers: 

62 """Helper class for common validation patterns.""" 

63 

64 @staticmethod 

65 def convert_string_to_sequence(v: Any) -> Any: 

66 """Convert string to single-item sequence for text fields.""" 

67 if v is not None: 

68 if isinstance(v, str): 

69 return [v] 

70 return v 

71 return v 

72 

73 @staticmethod 

74 def validate_boolean_field(v: Any, field_name: str) -> bool: 

75 """Validate that a field is a boolean value.""" 

76 if not isinstance(v, bool): 

77 raise ValueError( 

78 f"{field_name} must be a boolean, got {type(v).__name__}: {v}" 

79 ) 

80 return v 

81 

82 

83class DefaultsFactory: 

84 """Factory class for creating common default configurations.""" 

85 

86 @staticmethod 

87 def get_text_defaults() -> dict: 

88 """Get common text attribute defaults.""" 

89 return { 

90 "text_font": [1], 

91 "text_font_size": [9], 

92 "text_indent_first": [0], 

93 "text_indent_left": [0], 

94 "text_indent_right": [0], 

95 "text_space": [1.0], 

96 "text_space_before": [RTFConstants.DEFAULT_SPACE_BEFORE], 

97 "text_space_after": [RTFConstants.DEFAULT_SPACE_AFTER], 

98 "text_hyphenation": [True], 

99 } 

100 

101 @staticmethod 

102 def get_page_header_defaults() -> dict: 

103 """Get page header specific defaults.""" 

104 defaults = DefaultsFactory.get_text_defaults() 

105 defaults.update( 

106 { 

107 "text_font_size": [12], 

108 "text_justification": ["r"], 

109 "text_convert": [False], # Preserve RTF field codes 

110 "text_indent_reference": "page", 

111 } 

112 ) 

113 return defaults 

114 

115 @staticmethod 

116 def get_page_footer_defaults() -> dict: 

117 """Get page footer specific defaults.""" 

118 defaults = DefaultsFactory.get_text_defaults() 

119 defaults.update( 

120 { 

121 "text_font_size": [12], 

122 "text_justification": ["c"], 

123 "text_convert": [False], # Preserve RTF field codes 

124 "text_indent_reference": "page", 

125 } 

126 ) 

127 return defaults 

128 

129 @staticmethod 

130 def get_title_defaults() -> dict: 

131 """Get title specific defaults.""" 

132 defaults = DefaultsFactory.get_text_defaults() 

133 defaults.update( 

134 { 

135 "text_font_size": [12], 

136 "text_justification": ["c"], 

137 "text_space_before": [180.0], 

138 "text_space_after": [180.0], 

139 "text_convert": [True], # Enable LaTeX conversion for titles 

140 "text_indent_reference": "table", 

141 } 

142 ) 

143 return defaults 

144 

145 @staticmethod 

146 def get_subline_defaults() -> dict: 

147 """Get subline specific defaults.""" 

148 defaults = DefaultsFactory.get_text_defaults() 

149 defaults.update( 

150 { 

151 "text_font_size": [9], 

152 "text_justification": ["l"], 

153 "text_convert": [False], 

154 "text_indent_reference": "table", 

155 } 

156 ) 

157 return defaults 

158 

159 @staticmethod 

160 def get_table_defaults() -> dict: 

161 """Get common table attribute defaults.""" 

162 return { 

163 "col_rel_width": [1.0], 

164 "border_width": [[15]], 

165 "cell_height": [[0.15]], 

166 "cell_justification": [["c"]], 

167 "cell_vertical_justification": [["top"]], 

168 "text_font": [[1]], 

169 "text_format": [[""]], 

170 "text_font_size": [[9]], 

171 "text_justification": [["l"]], 

172 "text_indent_first": [[0]], 

173 "text_indent_left": [[0]], 

174 "text_indent_right": [[0]], 

175 "text_space": [[1]], 

176 "text_space_before": [[15]], 

177 "text_space_after": [[15]], 

178 "text_hyphenation": [[True]], 

179 } 

180 

181 @staticmethod 

182 def get_border_defaults(as_table: bool) -> dict: 

183 """Get conditional border defaults based on table rendering mode.""" 

184 if as_table: 

185 # Table rendering: has borders (R2RTF as_table=TRUE behavior) 

186 return { 

187 "border_left": [["single"]], 

188 "border_right": [["single"]], 

189 "border_top": [["single"]], 

190 "border_bottom": [[""]], 

191 } 

192 else: 

193 # Plain text rendering: no borders (R2RTF as_table=FALSE behavior) 

194 return { 

195 "border_left": [[""]], 

196 "border_right": [[""]], 

197 "border_top": [[""]], 

198 "border_bottom": [[""]], 

199 } 

200 

201 

202class RTFPage(BaseModel): 

203 """Configure RTF page layout and pagination settings. 

204 

205 The RTFPage component controls page dimensions, margins, orientation, 

206 and pagination behavior including rows per page and border styles for 

207 first/last rows across page boundaries. 

208 

209 Examples: 

210 Basic portrait page with custom margins: 

211 ```python 

212 page = RTFPage( 

213 orientation="portrait", 

214 margin=[ 

215 1.0, 

216 1.0, 

217 1.5, 

218 1.0, 

219 1.5, 

220 1.0, 

221 ], # left, right, top, bottom, header, footer 

222 ) 

223 ``` 

224 

225 Landscape layout for wide tables: 

226 ```python 

227 page = RTFPage( 

228 orientation="landscape", 

229 nrow=30, # Fewer rows due to landscape 

230 border_first="double", # Double border on first row 

231 border_last="single" # Single border on last row 

232 ) 

233 ``` 

234 

235 Attributes: 

236 nrow: Total number of rows per page including ALL components: 

237 - Column headers (if displayed) 

238 - Data rows 

239 - Footnotes (if present) 

240 - Source lines (if present) 

241 This is NOT just data rows - it's the complete row budget. 

242 

243 border_first: Border style for the first row of the table. 

244 Defaults to "double" for emphasis. 

245 

246 border_last: Border style for the last row of the table. 

247 Defaults to "double" for closure. 

248 

249 Note: 

250 The nrow parameter represents the total row capacity of a page, 

251 not just data rows. Plan accordingly when setting this value. 

252 """ 

253 

254 orientation: str | None = Field( 

255 default="portrait", description="Page orientation ('portrait' or 'landscape')" 

256 ) 

257 

258 @field_validator("orientation") 

259 def validate_orientation(cls, v): 

260 if v not in ["portrait", "landscape"]: 

261 raise ValueError( 

262 f"Invalid orientation. Must be 'portrait' or 'landscape'. Given: {v}" 

263 ) 

264 return v 

265 

266 width: float | None = Field(default=None, description="Page width in inches") 

267 height: float | None = Field(default=None, description="Page height in inches") 

268 margin: Sequence[float] | None = Field( 

269 default=None, 

270 description="Page margins [left, right, top, bottom, header, footer] in inches", 

271 ) 

272 

273 @field_validator("margin") 

274 def validate_margin(cls, v): 

275 if v is not None and len(v) != 6: 

276 raise ValueError("Margin must be a sequence of 6 values.") 

277 return v 

278 

279 nrow: int | None = Field( 

280 default=None, 

281 description=( 

282 "Total rows per page including headers, data, footnotes, and " 

283 "sources. NOT just data rows - this is the complete page row budget." 

284 ), 

285 ) 

286 

287 border_first: str | None = Field( 

288 default="double", description="First row border style" 

289 ) 

290 border_last: str | None = Field( 

291 default="double", description="Last row border style" 

292 ) 

293 col_width: float | None = Field( 

294 default=None, description="Total width of table columns in inches" 

295 ) 

296 use_color: bool | None = Field( 

297 default=False, description="Whether to use color in the document" 

298 ) 

299 

300 page_title: str = Field( 

301 default="all", 

302 description=( 

303 "Where to display titles in multi-page documents ('first', 'last', 'all')" 

304 ), 

305 ) 

306 page_footnote: str = Field( 

307 default="last", 

308 description=( 

309 "Where to display footnotes in multi-page documents ('first', " 

310 "'last', 'all')" 

311 ), 

312 ) 

313 page_source: str = Field( 

314 default="last", 

315 description=( 

316 "Where to display source in multi-page documents ('first', 'last', 'all')" 

317 ), 

318 ) 

319 

320 @field_validator("border_first", "border_last") 

321 def validate_border(cls, v): 

322 if v not in BORDER_CODES: 

323 raise ValueError( 

324 f"{cls.__field_name__.capitalize()} with invalid border style: {v}" 

325 ) 

326 return v 

327 

328 @field_validator("page_title", "page_footnote", "page_source") 

329 def validate_page_placement(cls, v): 

330 valid_options = {"first", "last", "all"} 

331 if v not in valid_options: 

332 raise ValueError( 

333 f"Invalid page placement option '{v}'. Must be one of {valid_options}" 

334 ) 

335 return v 

336 

337 @field_validator("width", "height", "nrow", "col_width") 

338 def validate_width_height(cls, v): 

339 if v is not None and v <= 0: 

340 raise ValueError( 

341 f"{cls.__field_name__.capitalize()} must be greater than 0." 

342 ) 

343 return v 

344 

345 def __init__(self, **data): 

346 super().__init__(**data) 

347 self._set_default() 

348 

349 def _set_default(self): 

350 """Set default values based on page orientation.""" 

351 if self.orientation == "portrait": 

352 self._set_portrait_defaults() 

353 elif self.orientation == "landscape": 

354 self._set_landscape_defaults() 

355 

356 self._validate_margin_length() 

357 return self 

358 

359 def _set_portrait_defaults(self) -> None: 

360 """Set default values for portrait orientation.""" 

361 self.width = self.width or 8.5 

362 self.height = self.height or 11 

363 self.margin = self.margin or [1.25, 1, 1.75, 1.25, 1.75, 1.00625] 

364 self.col_width = self.col_width or self.width - 2.25 

365 self.nrow = self.nrow or 40 

366 

367 def _set_landscape_defaults(self) -> None: 

368 """Set default values for landscape orientation.""" 

369 self.width = self.width or 11 

370 self.height = self.height or 8.5 

371 self.margin = self.margin or [1.0, 1.0, 2, 1.25, 1.25, 1.25] 

372 self.col_width = self.col_width or self.width - 2.5 

373 self.nrow = self.nrow or 24 

374 

375 def _validate_margin_length(self) -> None: 

376 """Validate that margin has exactly 6 values.""" 

377 if self.margin is not None and len(self.margin) != 6: 

378 raise ValueError("Margin length must be 6.") 

379 

380 

381class RTFPageHeader(RTFTextComponent): 

382 """RTF page header component for document headers. 

383 

384 The RTFPageHeader appears at the top of every page, typically used for 

385 page numbering, document titles, or study identifiers. Right-aligned by 

386 default with automatic page numbering. 

387 

388 Examples: 

389 Default page numbering: 

390 ```python 

391 header = RTFPageHeader() # Shows "Page X of Y" 

392 ``` 

393 

394 Custom header text: 

395 ```python 

396 header = RTFPageHeader( 

397 text="Protocol ABC-123 | Confidential", 

398 text_justification=["c"] # Center align 

399 ) 

400 ``` 

401 

402 Header with page number: 

403 ```python 

404 header = RTFPageHeader( 

405 text="Study Report - Page \\\\chpgn", # Current page number 

406 text_format=["b"], # Bold 

407 text_font_size=[10] 

408 ) 

409 ``` 

410 

411 Note: 

412 - Default text is "Page \\\\chpgn of {\\\\field{\\\\*\\\\fldinst NUMPAGES }}" 

413 - Text conversion is disabled by default to preserve RTF field codes 

414 - Right-aligned by default 

415 """ 

416 

417 def __init__(self, **data): 

418 # Set the default header text if not provided 

419 if "text" not in data: 

420 data["text"] = "Page \\chpgn of {\\field{\\*\\fldinst NUMPAGES }}" 

421 super().__init__(**data) 

422 

423 def _get_component_defaults(self) -> dict: 

424 return DefaultsFactory.get_page_header_defaults() 

425 

426 

427class RTFPageFooter(RTFTextComponent): 

428 """RTF page footer component for document footers. 

429 

430 The RTFPageFooter appears at the bottom of every page, typically used for 

431 confidentiality notices, timestamps, or file paths. Center-aligned by default. 

432 

433 Examples: 

434 Simple footer: 

435 ```python 

436 footer = RTFPageFooter( 

437 text="Company Confidential" 

438 ) 

439 ``` 

440 

441 Multi-line footer: 

442 ```python 

443 footer = RTFPageFooter( 

444 text=[ 

445 "Proprietary and Confidential", 

446 "Do Not Distribute" 

447 ], 

448 text_font_size=[8, 8] 

449 ) 

450 ``` 

451 

452 Footer with timestamp: 

453 ```python 

454 footer = RTFPageFooter( 

455 text="Generated: 2024-01-15 14:30:00 | program.py", 

456 text_justification=["l"], # Left align 

457 text_font_size=[8] 

458 ) 

459 ``` 

460 

461 Note: 

462 - Center-aligned by default 

463 - Text conversion is disabled by default to preserve special characters 

464 - Appears on every page of the document 

465 """ 

466 

467 def _get_component_defaults(self) -> dict: 

468 return DefaultsFactory.get_page_footer_defaults() 

469 

470 

471class RTFSubline(RTFTextComponent): 

472 """RTF subline component with left-aligned text.""" 

473 

474 def _get_component_defaults(self) -> dict: 

475 return DefaultsFactory.get_subline_defaults() 

476 

477 

478class RTFTableTextComponent(TableAttributes): 

479 """Consolidated base class for table-based text components (footnotes and sources). 

480 

481 This class unifies RTFFootnote and RTFSource which share nearly identical structure 

482 with only different default values for as_table and text justification. 

483 """ 

484 

485 model_config = ConfigDict(arbitrary_types_allowed=True) 

486 

487 text: Sequence[str] | None = Field(default=None, description="Text content") 

488 as_table: bool = Field( 

489 description="Whether to render as table (True) or plain text (False)", 

490 ) 

491 

492 @field_validator("text", mode="before") 

493 def convert_text(cls, v): 

494 return ValidationHelpers.convert_string_to_sequence(v) 

495 

496 @field_validator("as_table", mode="before") 

497 def validate_as_table(cls, v): 

498 return ValidationHelpers.validate_boolean_field(v, "as_table") 

499 

500 def __init__(self, **data): 

501 # Set as_table default if not provided 

502 if "as_table" not in data: 

503 data["as_table"] = self._get_default_as_table() 

504 

505 as_table = data["as_table"] 

506 defaults = self._get_component_table_defaults(as_table) 

507 defaults.update(data) 

508 super().__init__(**defaults) 

509 self._process_text_conversion() 

510 

511 def _get_default_as_table(self) -> bool: 

512 """Override in subclasses to provide component-specific as_table default.""" 

513 return True 

514 

515 def _get_component_table_defaults(self, as_table: bool) -> dict: 

516 """Get defaults with component-specific overrides.""" 

517 defaults = DefaultsFactory.get_table_defaults() 

518 border_defaults = DefaultsFactory.get_border_defaults(as_table) 

519 component_overrides = self._get_component_overrides() 

520 

521 defaults.update(border_defaults) 

522 defaults.update(component_overrides) 

523 return defaults 

524 

525 def _get_component_overrides(self) -> dict: 

526 """Override in subclasses to provide component-specific overrides.""" 

527 return {"text_convert": [[True]]} # Default: enable text conversion 

528 

529 def _process_text_conversion(self) -> None: 

530 """Convert text sequence to line-separated string format.""" 

531 if self.text is not None and isinstance(self.text, Sequence): 

532 self.text = [] if len(self.text) == 0 else "\\line ".join(self.text) 

533 

534 def _set_default(self): 

535 for attr, value in self.__dict__.items(): 

536 if isinstance(value, (str, int, float, bool)): 

537 setattr(self, attr, [value]) 

538 return self 

539 

540 

541class RTFFootnote(RTFTableTextComponent): 

542 """RTF footnote component for explanatory notes and citations. 

543 

544 The RTFFootnote component displays footnote text at the bottom of tables. 

545 Supports multiple footnote lines and can be rendered as a table (with borders) 

546 or plain text. Text conversion is enabled by default. 

547 

548 Examples: 

549 Single footnote: 

550 ```python 

551 footnote = RTFFootnote( 

552 text="CI = Confidence Interval; N = Number of subjects" 

553 ) 

554 ``` 

555 

556 Multiple footnotes: 

557 ```python 

558 footnote = RTFFootnote( 

559 text=[ 

560 "* p-value from ANCOVA model", 

561 "** Missing values were imputed using LOCF", 

562 "*** Baseline is defined as last value before first dose" 

563 ] 

564 ) 

565 ``` 

566 

567 Footnote without table borders: 

568 ```python 

569 footnote = RTFFootnote( 

570 text="Data cutoff date: 2023-12-31", 

571 as_table=False # No borders around footnote 

572 ) 

573 ``` 

574 

575 Note: 

576 - Multiple footnote lines are joined with \\\\line separator 

577 - Text conversion is enabled by default (LaTeX symbols supported) 

578 - Default rendering includes table borders (as_table=True) 

579 """ 

580 

581 def _get_default_as_table(self) -> bool: 

582 return True # Footnotes default to table rendering 

583 

584 

585class RTFSource(RTFTableTextComponent): 

586 """RTF source component for data source citations. 

587 

588 The RTFSource component displays source information at the very bottom 

589 of the document. Typically used for dataset names, program references, 

590 or generation timestamps. Rendered as plain text without borders by default. 

591 

592 Examples: 

593 Simple source citation: 

594 ```python 

595 source = RTFSource( 

596 text="Source: ADAE dataset, generated 2024-01-15" 

597 ) 

598 ``` 

599 

600 Multiple source lines: 

601 ```python 

602 source = RTFSource( 

603 text=[ 

604 "Dataset: ADAE version 3.0", 

605 "Program: ae_summary.py", 

606 "Generated: 2024-01-15 14:30:00" 

607 ] 

608 ) 

609 ``` 

610 

611 Source with table borders: 

612 ```python 

613 source = RTFSource( 

614 text="Database lock: 2023-12-31", 

615 as_table=True, # Add borders around source 

616 text_justification=[["l"]] # Left align instead of center 

617 ) 

618 ``` 

619 

620 Note: 

621 - Center-aligned by default 

622 - Rendered without borders by default (as_table=False) 

623 - Text conversion is enabled by default 

624 """ 

625 

626 def _get_default_as_table(self) -> bool: 

627 return False # Sources default to plain text rendering 

628 

629 def _get_component_overrides(self) -> dict: 

630 base_overrides = super()._get_component_overrides() 

631 base_overrides.update( 

632 { 

633 "text_justification": [["c"]], # Center justification for sources 

634 } 

635 ) 

636 return base_overrides 

637 

638 

639class RTFTitle(RTFTextComponent): 

640 """RTF title component with center-aligned text and LaTeX conversion enabled. 

641 

642 The RTFTitle component displays centered title text at the top of the document 

643 or table. It supports multiple title lines and LaTeX-style text conversion 

644 for mathematical symbols and formatting. 

645 

646 Examples: 

647 Single line title: 

648 ```python 

649 title = RTFTitle(text="Adverse Events Summary") 

650 ``` 

651 

652 Multi-line title with formatting: 

653 ```python 

654 title = RTFTitle( 

655 text=["Clinical Study Report", "Safety Analysis Set"], 

656 text_format=["b", ""] # First line bold, second normal 

657 ) 

658 ``` 

659 

660 Title with LaTeX symbols: 

661 ```python 

662 title = RTFTitle( 

663 text="Efficacy Analysis (\\\\alpha = 0.05)" 

664 ) 

665 # Renders as: Efficacy Analysis (alpha = 0.05) with Greek alpha symbol 

666 ``` 

667 

668 Note: 

669 Text conversion is enabled by default for titles, converting: 

670 - LaTeX symbols (e.g., \\\\alpha to Greek alpha, \\\\beta to Greek beta) 

671 - Subscripts (e.g., x_1 to x with subscript 1) 

672 - Other mathematical notation 

673 """ 

674 

675 def _get_component_defaults(self) -> dict: 

676 return DefaultsFactory.get_title_defaults() 

677 

678 

679class RTFColumnHeader(TableAttributes): 

680 """Configure column headers for RTF tables. 

681 

682 The RTFColumnHeader component defines column headers that appear at the 

683 top of tables and repeat on each page in multi-page documents. Supports 

684 multi-row headers and flexible column spanning. 

685 

686 Examples: 

687 Simple column headers: 

688 ```python 

689 header = RTFColumnHeader( 

690 text=["Name", "Age", "Treatment", "Response"] 

691 ) 

692 ``` 

693 

694 Headers with custom formatting: 

695 ```python 

696 header = RTFColumnHeader( 

697 text=["Subject", "Baseline", "Week 4", "Week 8"], 

698 text_format=["b", "b", "b", "b"], # All bold 

699 text_justification=["l", "c", "c", "c"], # Left, center, center, center 

700 border_bottom=["double", "double", "double", "double"] 

701 ) 

702 ``` 

703 

704 Multi-row headers with col_rel_width: 

705 ```python 

706 # First row spans multiple columns 

707 header1 = RTFColumnHeader( 

708 text=["Patient Info", "Treatment Results"], 

709 col_rel_width=[2, 3] # Spans 2 and 3 columns respectively 

710 ) 

711 # Second row with individual columns 

712 header2 = RTFColumnHeader( 

713 text=["ID", "Age", "Drug A", "Drug B", "Placebo"], 

714 col_rel_width=[1, 1, 1, 1, 1] 

715 ) 

716 ``` 

717 

718 Note: 

719 - Headers automatically repeat on each page in multi-page documents 

720 - Use col_rel_width to create spanning headers 

721 - Border styles from RTFPage are applied to the first row 

722 """ 

723 

724 model_config = ConfigDict(arbitrary_types_allowed=True) 

725 

726 text: Sequence[str] | None = Field( 

727 default=None, description="Column header text. List of strings, one per column." 

728 ) 

729 

730 @field_validator("text", mode="before") 

731 def convert_text_before(cls, v): 

732 if v is not None: 

733 if isinstance(v, str): 

734 return [v] 

735 if isinstance(v, (list, tuple)) and all( 

736 isinstance(item, str) for item in v 

737 ): 

738 return list(v) 

739 return v 

740 

741 @field_validator("text", mode="after") 

742 def convert_text_after(cls, v): 

743 if v is not None and isinstance(v, (list, tuple)): 

744 try: 

745 import polars as pl 

746 

747 schema = [f"col_{i + 1}" for i in range(len(v))] 

748 return pl.DataFrame([v], schema=schema, orient="row") 

749 except ImportError: 

750 pass 

751 return v 

752 

753 def __init__(self, **data): 

754 data = self._handle_backwards_compatibility(data) 

755 defaults = self._get_column_header_defaults() 

756 defaults.update(data) 

757 super().__init__(**defaults) 

758 self._set_default() 

759 

760 def _handle_backwards_compatibility(self, data: dict) -> dict: 

761 """Handle backwards compatibility for df parameter.""" 

762 if "df" in data and "text" not in data: 

763 df = data.pop("df") 

764 data["text"] = self._convert_dataframe_to_text(df) 

765 return data 

766 

767 def _convert_dataframe_to_text(self, df) -> list | None: 

768 """Convert DataFrame to text list based on orientation.""" 

769 try: 

770 import polars as pl 

771 

772 if isinstance(df, pl.DataFrame): 

773 return self._handle_dataframe_orientation(df) 

774 except ImportError: 

775 pass 

776 return None 

777 

778 def _handle_dataframe_orientation(self, df) -> list: 

779 """Handle DataFrame orientation for column headers.""" 

780 # For backwards compatibility, assume single-row DataFrame 

781 # If DataFrame has multiple rows, transpose it first 

782 if df.shape[0] > 1 and df.shape[1] == 1: 

783 # Column-oriented: transpose to row-oriented 

784 return df.get_column(df.columns[0]).to_list() 

785 else: 

786 # Row-oriented: take first row 

787 return list(df.row(0)) 

788 

789 def _get_column_header_defaults(self) -> dict: 

790 """Get default configuration for column headers.""" 

791 return { 

792 "border_left": ["single"], 

793 "border_right": ["single"], 

794 "border_top": ["single"], 

795 "border_bottom": [""], 

796 "border_width": [15], 

797 "cell_height": [0.15], 

798 "cell_justification": ["c"], 

799 "cell_vertical_justification": ["bottom"], 

800 "text_font": [1], 

801 "text_format": [""], 

802 "text_font_size": [9], 

803 "text_justification": ["c"], 

804 "text_indent_first": [0], 

805 "text_indent_left": [0], 

806 "text_indent_right": [0], 

807 "text_space": [1], 

808 "text_space_before": [15], 

809 "text_space_after": [15], 

810 "text_hyphenation": [False], 

811 "text_convert": [True], 

812 } 

813 

814 def _set_default(self): 

815 for attr, value in self.__dict__.items(): 

816 if isinstance(value, (str, int, float, bool)): 

817 setattr(self, attr, [value]) 

818 

819 return self 

820 

821 

822class RTFBody(TableAttributes): 

823 """Configure table body formatting and layout. 

824 

825 The RTFBody component controls how data is displayed in the RTF table, 

826 including column widths, text formatting, borders, and advanced features 

827 like group_by for value suppression and subline_by for section headers. 

828 

829 Examples: 

830 Basic table with custom column widths: 

831 ```python 

832 body = RTFBody( 

833 col_rel_width=[3, 2, 2, 2], 

834 text_justification=[["l", "c", "c", "c"]] 

835 ) 

836 ``` 

837 

838 Using group_by to suppress duplicate values: 

839 ```python 

840 body = RTFBody( 

841 group_by=["SITE", "SUBJECT"], 

842 col_rel_width=[2, 2, 3, 1] 

843 ) 

844 ``` 

845 

846 Using subline_by for section headers: 

847 ```python 

848 body = RTFBody( 

849 subline_by=["SITE", "STUDY"], # Creates paragraph headers 

850 col_rel_width=[3, 2, 2] # Note: subline_by columns are removed from table 

851 ) 

852 ``` 

853 

854 Note: 

855 When using subline_by: 

856 - The specified columns are removed from the table display 

857 - Values appear as paragraph headers before each section 

858 - Pagination is automatically enabled (new_page=True) 

859 - Formatting attributes apply uniformly to the entire table 

860 """ 

861 

862 model_config = ConfigDict(arbitrary_types_allowed=True) 

863 

864 as_colheader: bool = Field( 

865 default=True, description="Whether to display column headers" 

866 ) 

867 group_by: Sequence[str] | None = Field( 

868 default=None, 

869 description=( 

870 "Column names for hierarchical value suppression. Values appear " 

871 "only on the first occurrence within groups, with page context " 

872 "restoration for multi-page tables." 

873 ), 

874 ) 

875 page_by: Sequence[str] | None = Field( 

876 default=None, 

877 description="Column names to trigger page breaks when values change", 

878 ) 

879 new_page: bool = Field( 

880 default=False, 

881 description=( 

882 "Force a new page before the table. Automatically set to True when " 

883 "using subline_by." 

884 ), 

885 ) 

886 pageby_header: bool = Field( 

887 default=True, description="Repeat column headers on new pages" 

888 ) 

889 pageby_row: str = Field( 

890 default="column", 

891 description=( 

892 "Page break handling: 'column' (keep column) or 'first_row' (use " 

893 "first row as header)" 

894 ), 

895 ) 

896 subline_by: Sequence[str] | None = Field( 

897 default=None, 

898 description=( 

899 "Column names to create paragraph headers. These columns are " 

900 "removed from the table and their values appear as section headers " 

901 "above each group. Forces pagination." 

902 ), 

903 ) 

904 last_row: bool = Field( 

905 default=True, 

906 description="Whether the table contains the last row of the final table", 

907 ) 

908 

909 @field_validator("group_by", "page_by", "subline_by", mode="before") 

910 def convert_text(cls, v): 

911 if v is not None: 

912 if isinstance(v, str): 

913 return [v] 

914 return v 

915 

916 @field_validator("pageby_row") 

917 def validate_pageby_row(cls, v): 

918 if v not in ["column", "first_row"]: 

919 raise ValueError( 

920 f"Invalid pageby_row. Must be 'column' or 'first_row'. Given: {v}" 

921 ) 

922 return v 

923 

924 def __init__(self, **data): 

925 defaults = { 

926 "border_left": [["single"]], 

927 "border_right": [["single"]], 

928 "border_first": [["single"]], 

929 "border_last": [["single"]], 

930 "border_width": [[15]], 

931 "cell_height": [[0.15]], 

932 "cell_justification": [["c"]], 

933 "cell_vertical_justification": [["top"]], 

934 "text_font": [[1]], 

935 "text_font_size": [[9]], 

936 "text_indent_first": [[0]], 

937 "text_indent_left": [[0]], 

938 "text_indent_right": [[0]], 

939 "text_space": [[1]], 

940 "text_space_before": [[15]], 

941 "text_space_after": [[15]], 

942 "text_hyphenation": [[False]], 

943 "text_convert": [[True]], 

944 } 

945 

946 # Update defaults with any provided values 

947 defaults.update(data) 

948 super().__init__(**defaults) 

949 self._set_default() 

950 

951 def _set_default(self): 

952 self._set_table_attribute_defaults() 

953 self._set_border_defaults() 

954 self._validate_page_by_logic() 

955 return self 

956 

957 def _set_table_attribute_defaults(self) -> None: 

958 """Set default table attributes, excluding special control fields.""" 

959 excluded_attrs = { 

960 "as_colheader", 

961 "page_by", 

962 "new_page", 

963 "pageby_header", 

964 "pageby_row", 

965 "subline_by", 

966 "last_row", 

967 } 

968 

969 for attr, value in self.__dict__.items(): 

970 if ( 

971 isinstance(value, (str, int, float, bool)) 

972 and attr not in excluded_attrs 

973 ): 

974 setattr(self, attr, [value]) 

975 

976 def _set_border_defaults(self) -> None: 

977 """Set default values for border and justification attributes.""" 

978 self.border_top = self.border_top or [[""]] 

979 self.border_bottom = self.border_bottom or [[""]] 

980 self.border_left = self.border_left or [["single"]] 

981 self.border_right = self.border_right or [["single"]] 

982 self.border_first = self.border_first or [["single"]] 

983 self.border_last = self.border_last or [["single"]] 

984 self.cell_vertical_justification = self.cell_vertical_justification or [ 

985 ["center"] 

986 ] 

987 self.text_justification = self.text_justification or [["c"]] 

988 

989 def _validate_page_by_logic(self) -> None: 

990 """Validate that page_by and new_page settings are consistent.""" 

991 if self.page_by is None and self.new_page: 

992 raise ValueError("`new_page` must be `False` if `page_by` is not specified") 

993 

994 

995class RTFFigure(BaseModel): 

996 """RTF Figure component for embedding images in RTF documents. 

997 

998 This class handles figure embedding with support for multiple images, 

999 custom sizing, and proper RTF encoding. 

1000 """ 

1001 

1002 model_config = ConfigDict(arbitrary_types_allowed=True) 

1003 

1004 # Figure data 

1005 figures: str | Path | list[str | Path] | None = Field( 

1006 default=None, 

1007 description=( 

1008 "Image file path(s)-single path or list of paths to PNG, JPEG, or EMF files" 

1009 ), 

1010 ) 

1011 

1012 # Figure dimensions 

1013 fig_height: float | list[float] = Field( 

1014 default=5.0, description="Height of figures in inches (single value or list)" 

1015 ) 

1016 fig_width: float | list[float] = Field( 

1017 default=5.0, description="Width of figures in inches (single value or list)" 

1018 ) 

1019 

1020 # Figure positioning 

1021 fig_align: str = Field( 

1022 default="center", 

1023 description="Horizontal alignment of figures ('left', 'center', 'right')", 

1024 ) 

1025 fig_pos: str = Field( 

1026 default="after", 

1027 description="Position relative to table content ('before' or 'after')", 

1028 ) 

1029 

1030 @field_validator("fig_height", "fig_width", mode="before") 

1031 def convert_dimensions(cls, v): 

1032 """Convert single value to list if needed.""" 

1033 if isinstance(v, (int, float)): 

1034 return [v] 

1035 return v 

1036 

1037 @field_validator("fig_align") 

1038 def validate_alignment(cls, v): 

1039 """Validate figure alignment value.""" 

1040 valid_alignments = ["left", "center", "right"] 

1041 if v not in valid_alignments: 

1042 raise ValueError( 

1043 f"Invalid fig_align. Must be one of {valid_alignments}. Given: {v}" 

1044 ) 

1045 return v 

1046 

1047 @field_validator("fig_pos") 

1048 def validate_position(cls, v): 

1049 """Validate figure position value.""" 

1050 valid_positions = ["before", "after"] 

1051 if v not in valid_positions: 

1052 raise ValueError( 

1053 f"Invalid fig_pos. Must be one of {valid_positions}. Given: {v}" 

1054 ) 

1055 return v 

1056 

1057 @model_validator(mode="after") 

1058 def validate_figure_data(self): 

1059 """Validate figure paths and convert to list format.""" 

1060 if self.figures is not None: 

1061 # Convert single path to list 

1062 if isinstance(self.figures, (str, Path)): 

1063 self.figures = [self.figures] 

1064 

1065 # Validate that all files exist 

1066 for fig_path in self.figures: 

1067 path_obj = Path(fig_path) 

1068 if not path_obj.exists(): 

1069 raise FileNotFoundError(f"Figure file not found: {fig_path}") 

1070 

1071 return self