Coverage for src/rtflite/input.py: 87%
340 statements
« prev ^ index » next coverage.py v7.10.4, created at 2025-08-17 01:22 +0000
« prev ^ index » next coverage.py v7.10.4, created at 2025-08-17 01:22 +0000
1from collections.abc import Sequence
2from pathlib import Path
3from typing import Any
5from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
7from rtflite.attributes import TableAttributes, TextAttributes
8from rtflite.core.constants import RTFConstants
9from rtflite.row import BORDER_CODES
12class AttributeDefaultsMixin:
13 """Mixin class for common attribute default setting patterns."""
15 def _set_attribute_defaults(self, exclude_attrs: set[Any] | None = None) -> None:
16 """Set default values for text attributes by converting scalars to lists/tuples."""
17 exclude_attrs = exclude_attrs or set()
18 for attr, value in self.__dict__.items():
19 if attr not in exclude_attrs:
20 if isinstance(value, (str, int, float, bool)):
21 setattr(self, attr, [value])
22 elif isinstance(value, list):
23 setattr(self, attr, tuple(value))
26class RTFTextComponent(TextAttributes, AttributeDefaultsMixin):
27 """Consolidated base class for text-based RTF components.
29 This class unifies RTFPageHeader, RTFPageFooter, RTFSubline, and RTFTitle
30 components which share nearly identical structure with only different defaults.
31 """
33 text: Sequence[str] | None = Field(default=None, description="Text content")
34 text_indent_reference: str | None = Field(
35 default="table",
36 description="Reference point for indentation ('page' or 'table')",
37 )
39 @field_validator("text", mode="before")
40 def convert_text(cls, v):
41 return ValidationHelpers.convert_string_to_sequence(v)
43 def __init__(self, **data):
44 # Get defaults from the component-specific config
45 defaults = self._get_component_defaults()
47 # Update defaults with any provided values
48 defaults.update(data)
49 super().__init__(**defaults)
50 self._set_default()
52 def _set_default(self):
53 self._set_attribute_defaults()
54 return self
56 def _get_component_defaults(self) -> dict:
57 """Override in subclasses to provide component-specific defaults."""
58 return DefaultsFactory.get_text_defaults()
61class ValidationHelpers:
62 """Helper class for common validation patterns."""
64 @staticmethod
65 def convert_string_to_sequence(v: Any) -> Any:
66 """Convert string to single-item sequence for text fields."""
67 if v is not None:
68 if isinstance(v, str):
69 return [v]
70 return v
71 return v
73 @staticmethod
74 def validate_boolean_field(v: Any, field_name: str) -> bool:
75 """Validate that a field is a boolean value."""
76 if not isinstance(v, bool):
77 raise ValueError(
78 f"{field_name} must be a boolean, got {type(v).__name__}: {v}"
79 )
80 return v
83class DefaultsFactory:
84 """Factory class for creating common default configurations."""
86 @staticmethod
87 def get_text_defaults() -> dict:
88 """Get common text attribute defaults."""
89 return {
90 "text_font": [1],
91 "text_font_size": [9],
92 "text_indent_first": [0],
93 "text_indent_left": [0],
94 "text_indent_right": [0],
95 "text_space": [1.0],
96 "text_space_before": [RTFConstants.DEFAULT_SPACE_BEFORE],
97 "text_space_after": [RTFConstants.DEFAULT_SPACE_AFTER],
98 "text_hyphenation": [True],
99 }
101 @staticmethod
102 def get_page_header_defaults() -> dict:
103 """Get page header specific defaults."""
104 defaults = DefaultsFactory.get_text_defaults()
105 defaults.update(
106 {
107 "text_font_size": [12],
108 "text_justification": ["r"],
109 "text_convert": [False], # Preserve RTF field codes
110 "text_indent_reference": "page",
111 }
112 )
113 return defaults
115 @staticmethod
116 def get_page_footer_defaults() -> dict:
117 """Get page footer specific defaults."""
118 defaults = DefaultsFactory.get_text_defaults()
119 defaults.update(
120 {
121 "text_font_size": [12],
122 "text_justification": ["c"],
123 "text_convert": [False], # Preserve RTF field codes
124 "text_indent_reference": "page",
125 }
126 )
127 return defaults
129 @staticmethod
130 def get_title_defaults() -> dict:
131 """Get title specific defaults."""
132 defaults = DefaultsFactory.get_text_defaults()
133 defaults.update(
134 {
135 "text_font_size": [12],
136 "text_justification": ["c"],
137 "text_space_before": [180.0],
138 "text_space_after": [180.0],
139 "text_convert": [True], # Enable LaTeX conversion for titles
140 "text_indent_reference": "table",
141 }
142 )
143 return defaults
145 @staticmethod
146 def get_subline_defaults() -> dict:
147 """Get subline specific defaults."""
148 defaults = DefaultsFactory.get_text_defaults()
149 defaults.update(
150 {
151 "text_font_size": [9],
152 "text_justification": ["l"],
153 "text_convert": [False],
154 "text_indent_reference": "table",
155 }
156 )
157 return defaults
159 @staticmethod
160 def get_table_defaults() -> dict:
161 """Get common table attribute defaults."""
162 return {
163 "col_rel_width": [1.0],
164 "border_width": [[15]],
165 "cell_height": [[0.15]],
166 "cell_justification": [["c"]],
167 "cell_vertical_justification": [["top"]],
168 "text_font": [[1]],
169 "text_format": [[""]],
170 "text_font_size": [[9]],
171 "text_justification": [["l"]],
172 "text_indent_first": [[0]],
173 "text_indent_left": [[0]],
174 "text_indent_right": [[0]],
175 "text_space": [[1]],
176 "text_space_before": [[15]],
177 "text_space_after": [[15]],
178 "text_hyphenation": [[True]],
179 }
181 @staticmethod
182 def get_border_defaults(as_table: bool) -> dict:
183 """Get conditional border defaults based on table rendering mode."""
184 if as_table:
185 # Table rendering: has borders (R2RTF as_table=TRUE behavior)
186 return {
187 "border_left": [["single"]],
188 "border_right": [["single"]],
189 "border_top": [["single"]],
190 "border_bottom": [[""]],
191 }
192 else:
193 # Plain text rendering: no borders (R2RTF as_table=FALSE behavior)
194 return {
195 "border_left": [[""]],
196 "border_right": [[""]],
197 "border_top": [[""]],
198 "border_bottom": [[""]],
199 }
202class RTFPage(BaseModel):
203 """Configure RTF page layout and pagination settings.
205 The RTFPage component controls page dimensions, margins, orientation,
206 and pagination behavior including rows per page and border styles for
207 first/last rows across page boundaries.
209 Examples:
210 Basic portrait page with custom margins:
211 ```python
212 page = RTFPage(
213 orientation="portrait",
214 margin=[1.0, 1.0, 1.5, 1.0, 1.5, 1.0] # left, right, top, bottom, header, footer
215 )
216 ```
218 Landscape layout for wide tables:
219 ```python
220 page = RTFPage(
221 orientation="landscape",
222 nrow=30, # Fewer rows due to landscape
223 border_first="double", # Double border on first row
224 border_last="single" # Single border on last row
225 )
226 ```
228 Attributes:
229 nrow: Total number of rows per page including ALL components:
230 - Column headers (if displayed)
231 - Data rows
232 - Footnotes (if present)
233 - Source lines (if present)
234 This is NOT just data rows - it's the complete row budget.
236 border_first: Border style for the first row of the table.
237 Defaults to "double" for emphasis.
239 border_last: Border style for the last row of the table.
240 Defaults to "double" for closure.
242 Note:
243 The nrow parameter represents the total row capacity of a page,
244 not just data rows. Plan accordingly when setting this value.
245 """
247 orientation: str | None = Field(
248 default="portrait", description="Page orientation ('portrait' or 'landscape')"
249 )
251 @field_validator("orientation")
252 def validate_orientation(cls, v):
253 if v not in ["portrait", "landscape"]:
254 raise ValueError(
255 f"Invalid orientation. Must be 'portrait' or 'landscape'. Given: {v}"
256 )
257 return v
259 width: float | None = Field(default=None, description="Page width in inches")
260 height: float | None = Field(default=None, description="Page height in inches")
261 margin: Sequence[float] | None = Field(
262 default=None,
263 description="Page margins [left, right, top, bottom, header, footer] in inches",
264 )
266 @field_validator("margin")
267 def validate_margin(cls, v):
268 if v is not None and len(v) != 6:
269 raise ValueError("Margin must be a sequence of 6 values.")
270 return v
272 nrow: int | None = Field(
273 default=None,
274 description="Total rows per page including headers, data, footnotes, and sources. NOT just data rows - this is the complete page row budget.",
275 )
277 border_first: str | None = Field(
278 default="double", description="First row border style"
279 )
280 border_last: str | None = Field(
281 default="double", description="Last row border style"
282 )
283 col_width: float | None = Field(
284 default=None, description="Total width of table columns in inches"
285 )
286 use_color: bool | None = Field(
287 default=False, description="Whether to use color in the document"
288 )
290 page_title: str = Field(
291 default="all",
292 description="Where to display titles in multi-page documents ('first', 'last', 'all')",
293 )
294 page_footnote: str = Field(
295 default="last",
296 description="Where to display footnotes in multi-page documents ('first', 'last', 'all')",
297 )
298 page_source: str = Field(
299 default="last",
300 description="Where to display source in multi-page documents ('first', 'last', 'all')",
301 )
303 @field_validator("border_first", "border_last")
304 def validate_border(cls, v):
305 if v not in BORDER_CODES:
306 raise ValueError(
307 f"{cls.__field_name__.capitalize()} with invalid border style: {v}"
308 )
309 return v
311 @field_validator("page_title", "page_footnote", "page_source")
312 def validate_page_placement(cls, v):
313 valid_options = {"first", "last", "all"}
314 if v not in valid_options:
315 raise ValueError(
316 f"Invalid page placement option '{v}'. Must be one of {valid_options}"
317 )
318 return v
320 @field_validator("width", "height", "nrow", "col_width")
321 def validate_width_height(cls, v):
322 if v is not None and v <= 0:
323 raise ValueError(
324 f"{cls.__field_name__.capitalize()} must be greater than 0."
325 )
326 return v
328 def __init__(self, **data):
329 super().__init__(**data)
330 self._set_default()
332 def _set_default(self):
333 """Set default values based on page orientation."""
334 if self.orientation == "portrait":
335 self._set_portrait_defaults()
336 elif self.orientation == "landscape":
337 self._set_landscape_defaults()
339 self._validate_margin_length()
340 return self
342 def _set_portrait_defaults(self) -> None:
343 """Set default values for portrait orientation."""
344 self.width = self.width or 8.5
345 self.height = self.height or 11
346 self.margin = self.margin or [1.25, 1, 1.75, 1.25, 1.75, 1.00625]
347 self.col_width = self.col_width or self.width - 2.25
348 self.nrow = self.nrow or 40
350 def _set_landscape_defaults(self) -> None:
351 """Set default values for landscape orientation."""
352 self.width = self.width or 11
353 self.height = self.height or 8.5
354 self.margin = self.margin or [1.0, 1.0, 2, 1.25, 1.25, 1.25]
355 self.col_width = self.col_width or self.width - 2.5
356 self.nrow = self.nrow or 24
358 def _validate_margin_length(self) -> None:
359 """Validate that margin has exactly 6 values."""
360 if self.margin is not None and len(self.margin) != 6:
361 raise ValueError("Margin length must be 6.")
364class RTFPageHeader(RTFTextComponent):
365 """RTF page header component for document headers.
367 The RTFPageHeader appears at the top of every page, typically used for
368 page numbering, document titles, or study identifiers. Right-aligned by
369 default with automatic page numbering.
371 Examples:
372 Default page numbering:
373 ```python
374 header = RTFPageHeader() # Shows "Page X of Y"
375 ```
377 Custom header text:
378 ```python
379 header = RTFPageHeader(
380 text="Protocol ABC-123 | Confidential",
381 text_justification=["c"] # Center align
382 )
383 ```
385 Header with page number:
386 ```python
387 header = RTFPageHeader(
388 text="Study Report - Page \\\\chpgn", # Current page number
389 text_format=["b"], # Bold
390 text_font_size=[10]
391 )
392 ```
394 Note:
395 - Default text is "Page \\\\chpgn of {\\\\field{\\\\*\\\\fldinst NUMPAGES }}"
396 - Text conversion is disabled by default to preserve RTF field codes
397 - Right-aligned by default
398 """
400 def __init__(self, **data):
401 # Set the default header text if not provided
402 if "text" not in data:
403 data["text"] = "Page \\chpgn of {\\field{\\*\\fldinst NUMPAGES }}"
404 super().__init__(**data)
406 def _get_component_defaults(self) -> dict:
407 return DefaultsFactory.get_page_header_defaults()
410class RTFPageFooter(RTFTextComponent):
411 """RTF page footer component for document footers.
413 The RTFPageFooter appears at the bottom of every page, typically used for
414 confidentiality notices, timestamps, or file paths. Center-aligned by default.
416 Examples:
417 Simple footer:
418 ```python
419 footer = RTFPageFooter(
420 text="Company Confidential"
421 )
422 ```
424 Multi-line footer:
425 ```python
426 footer = RTFPageFooter(
427 text=[
428 "Proprietary and Confidential",
429 "Do Not Distribute"
430 ],
431 text_font_size=[8, 8]
432 )
433 ```
435 Footer with timestamp:
436 ```python
437 footer = RTFPageFooter(
438 text="Generated: 2024-01-15 14:30:00 | program.py",
439 text_justification=["l"], # Left align
440 text_font_size=[8]
441 )
442 ```
444 Note:
445 - Center-aligned by default
446 - Text conversion is disabled by default to preserve special characters
447 - Appears on every page of the document
448 """
450 def _get_component_defaults(self) -> dict:
451 return DefaultsFactory.get_page_footer_defaults()
454class RTFSubline(RTFTextComponent):
455 """RTF subline component with left-aligned text."""
457 def _get_component_defaults(self) -> dict:
458 return DefaultsFactory.get_subline_defaults()
461class RTFTableTextComponent(TableAttributes):
462 """Consolidated base class for table-based text components (footnotes and sources).
464 This class unifies RTFFootnote and RTFSource which share nearly identical structure
465 with only different default values for as_table and text justification.
466 """
468 model_config = ConfigDict(arbitrary_types_allowed=True)
470 text: Sequence[str] | None = Field(default=None, description="Text content")
471 as_table: bool = Field(
472 description="Whether to render as table (True) or plain text (False)",
473 )
475 @field_validator("text", mode="before")
476 def convert_text(cls, v):
477 return ValidationHelpers.convert_string_to_sequence(v)
479 @field_validator("as_table", mode="before")
480 def validate_as_table(cls, v):
481 return ValidationHelpers.validate_boolean_field(v, "as_table")
483 def __init__(self, **data):
484 # Set as_table default if not provided
485 if "as_table" not in data:
486 data["as_table"] = self._get_default_as_table()
488 as_table = data["as_table"]
489 defaults = self._get_component_table_defaults(as_table)
490 defaults.update(data)
491 super().__init__(**defaults)
492 self._process_text_conversion()
494 def _get_default_as_table(self) -> bool:
495 """Override in subclasses to provide component-specific as_table default."""
496 return True
498 def _get_component_table_defaults(self, as_table: bool) -> dict:
499 """Get defaults with component-specific overrides."""
500 defaults = DefaultsFactory.get_table_defaults()
501 border_defaults = DefaultsFactory.get_border_defaults(as_table)
502 component_overrides = self._get_component_overrides()
504 defaults.update(border_defaults)
505 defaults.update(component_overrides)
506 return defaults
508 def _get_component_overrides(self) -> dict:
509 """Override in subclasses to provide component-specific overrides."""
510 return {"text_convert": [[True]]} # Default: enable text conversion
512 def _process_text_conversion(self) -> None:
513 """Convert text sequence to line-separated string format."""
514 if self.text is not None:
515 if isinstance(self.text, Sequence):
516 if len(self.text) == 0:
517 self.text = []
518 else:
519 self.text = "\\line ".join(self.text)
521 def _set_default(self):
522 for attr, value in self.__dict__.items():
523 if isinstance(value, (str, int, float, bool)):
524 setattr(self, attr, [value])
525 return self
528class RTFFootnote(RTFTableTextComponent):
529 """RTF footnote component for explanatory notes and citations.
531 The RTFFootnote component displays footnote text at the bottom of tables.
532 Supports multiple footnote lines and can be rendered as a table (with borders)
533 or plain text. Text conversion is enabled by default.
535 Examples:
536 Single footnote:
537 ```python
538 footnote = RTFFootnote(
539 text="CI = Confidence Interval; N = Number of subjects"
540 )
541 ```
543 Multiple footnotes:
544 ```python
545 footnote = RTFFootnote(
546 text=[
547 "* p-value from ANCOVA model",
548 "** Missing values were imputed using LOCF",
549 "*** Baseline is defined as last value before first dose"
550 ]
551 )
552 ```
554 Footnote without table borders:
555 ```python
556 footnote = RTFFootnote(
557 text="Data cutoff date: 2023-12-31",
558 as_table=False # No borders around footnote
559 )
560 ```
562 Note:
563 - Multiple footnote lines are joined with \\\\line separator
564 - Text conversion is enabled by default (LaTeX symbols supported)
565 - Default rendering includes table borders (as_table=True)
566 """
568 def _get_default_as_table(self) -> bool:
569 return True # Footnotes default to table rendering
572class RTFSource(RTFTableTextComponent):
573 """RTF source component for data source citations.
575 The RTFSource component displays source information at the very bottom
576 of the document. Typically used for dataset names, program references,
577 or generation timestamps. Rendered as plain text without borders by default.
579 Examples:
580 Simple source citation:
581 ```python
582 source = RTFSource(
583 text="Source: ADAE dataset, generated 2024-01-15"
584 )
585 ```
587 Multiple source lines:
588 ```python
589 source = RTFSource(
590 text=[
591 "Dataset: ADAE version 3.0",
592 "Program: ae_summary.py",
593 "Generated: 2024-01-15 14:30:00"
594 ]
595 )
596 ```
598 Source with table borders:
599 ```python
600 source = RTFSource(
601 text="Database lock: 2023-12-31",
602 as_table=True, # Add borders around source
603 text_justification=[["l"]] # Left align instead of center
604 )
605 ```
607 Note:
608 - Center-aligned by default
609 - Rendered without borders by default (as_table=False)
610 - Text conversion is enabled by default
611 """
613 def _get_default_as_table(self) -> bool:
614 return False # Sources default to plain text rendering
616 def _get_component_overrides(self) -> dict:
617 base_overrides = super()._get_component_overrides()
618 base_overrides.update(
619 {
620 "text_justification": [["c"]], # Center justification for sources
621 }
622 )
623 return base_overrides
626class RTFTitle(RTFTextComponent):
627 """RTF title component with center-aligned text and LaTeX conversion enabled.
629 The RTFTitle component displays centered title text at the top of the document
630 or table. It supports multiple title lines and LaTeX-style text conversion
631 for mathematical symbols and formatting.
633 Examples:
634 Single line title:
635 ```python
636 title = RTFTitle(text="Adverse Events Summary")
637 ```
639 Multi-line title with formatting:
640 ```python
641 title = RTFTitle(
642 text=["Clinical Study Report", "Safety Analysis Set"],
643 text_format=["b", ""] # First line bold, second normal
644 )
645 ```
647 Title with LaTeX symbols:
648 ```python
649 title = RTFTitle(
650 text="Efficacy Analysis (\\\\alpha = 0.05)"
651 )
652 # Renders as: Efficacy Analysis (alpha = 0.05) with Greek alpha symbol
653 ```
655 Note:
656 Text conversion is enabled by default for titles, converting:
657 - LaTeX symbols (e.g., \\\\alpha to Greek alpha, \\\\beta to Greek beta)
658 - Subscripts (e.g., x_1 to x with subscript 1)
659 - Other mathematical notation
660 """
662 def _get_component_defaults(self) -> dict:
663 return DefaultsFactory.get_title_defaults()
666class RTFColumnHeader(TableAttributes):
667 """Configure column headers for RTF tables.
669 The RTFColumnHeader component defines column headers that appear at the
670 top of tables and repeat on each page in multi-page documents. Supports
671 multi-row headers and flexible column spanning.
673 Examples:
674 Simple column headers:
675 ```python
676 header = RTFColumnHeader(
677 text=["Name", "Age", "Treatment", "Response"]
678 )
679 ```
681 Headers with custom formatting:
682 ```python
683 header = RTFColumnHeader(
684 text=["Subject", "Baseline", "Week 4", "Week 8"],
685 text_format=["b", "b", "b", "b"], # All bold
686 text_justification=["l", "c", "c", "c"], # Left, center, center, center
687 border_bottom=["double", "double", "double", "double"]
688 )
689 ```
691 Multi-row headers with col_rel_width:
692 ```python
693 # First row spans multiple columns
694 header1 = RTFColumnHeader(
695 text=["Patient Info", "Treatment Results"],
696 col_rel_width=[2, 3] # Spans 2 and 3 columns respectively
697 )
698 # Second row with individual columns
699 header2 = RTFColumnHeader(
700 text=["ID", "Age", "Drug A", "Drug B", "Placebo"],
701 col_rel_width=[1, 1, 1, 1, 1]
702 )
703 ```
705 Note:
706 - Headers automatically repeat on each page in multi-page documents
707 - Use col_rel_width to create spanning headers
708 - Border styles from RTFPage are applied to the first row
709 """
711 model_config = ConfigDict(arbitrary_types_allowed=True)
713 text: Sequence[str] | None = Field(
714 default=None, description="Column header text. List of strings, one per column."
715 )
717 @field_validator("text", mode="before")
718 def convert_text_before(cls, v):
719 if v is not None:
720 if isinstance(v, str):
721 return [v]
722 if isinstance(v, (list, tuple)) and all(
723 isinstance(item, str) for item in v
724 ):
725 return list(v)
726 return v
728 @field_validator("text", mode="after")
729 def convert_text_after(cls, v):
730 if v is not None and isinstance(v, (list, tuple)):
731 try:
732 import polars as pl
734 schema = [f"col_{i + 1}" for i in range(len(v))]
735 return pl.DataFrame([v], schema=schema, orient="row")
736 except ImportError:
737 pass
738 return v
740 def __init__(self, **data):
741 data = self._handle_backwards_compatibility(data)
742 defaults = self._get_column_header_defaults()
743 defaults.update(data)
744 super().__init__(**defaults)
745 self._set_default()
747 def _handle_backwards_compatibility(self, data: dict) -> dict:
748 """Handle backwards compatibility for df parameter."""
749 if "df" in data and "text" not in data:
750 df = data.pop("df")
751 data["text"] = self._convert_dataframe_to_text(df)
752 return data
754 def _convert_dataframe_to_text(self, df) -> list | None:
755 """Convert DataFrame to text list based on orientation."""
756 try:
757 import polars as pl
759 if isinstance(df, pl.DataFrame):
760 return self._handle_dataframe_orientation(df)
761 except ImportError:
762 pass
763 return None
765 def _handle_dataframe_orientation(self, df) -> list:
766 """Handle DataFrame orientation for column headers."""
767 # For backwards compatibility, assume single-row DataFrame
768 # If DataFrame has multiple rows, transpose it first
769 if df.shape[0] > 1 and df.shape[1] == 1:
770 # Column-oriented: transpose to row-oriented
771 return df.get_column(df.columns[0]).to_list()
772 else:
773 # Row-oriented: take first row
774 return list(df.row(0))
776 def _get_column_header_defaults(self) -> dict:
777 """Get default configuration for column headers."""
778 return {
779 "border_left": ["single"],
780 "border_right": ["single"],
781 "border_top": ["single"],
782 "border_bottom": [""],
783 "border_width": [15],
784 "cell_height": [0.15],
785 "cell_justification": ["c"],
786 "cell_vertical_justification": ["bottom"],
787 "text_font": [1],
788 "text_format": [""],
789 "text_font_size": [9],
790 "text_justification": ["c"],
791 "text_indent_first": [0],
792 "text_indent_left": [0],
793 "text_indent_right": [0],
794 "text_space": [1],
795 "text_space_before": [15],
796 "text_space_after": [15],
797 "text_hyphenation": [False],
798 "text_convert": [True],
799 }
801 def _set_default(self):
802 for attr, value in self.__dict__.items():
803 if isinstance(value, (str, int, float, bool)):
804 setattr(self, attr, [value])
806 return self
809class RTFBody(TableAttributes):
810 """Configure table body formatting and layout.
812 The RTFBody component controls how data is displayed in the RTF table,
813 including column widths, text formatting, borders, and advanced features
814 like group_by for value suppression and subline_by for section headers.
816 Examples:
817 Basic table with custom column widths:
818 ```python
819 body = RTFBody(
820 col_rel_width=[3, 2, 2, 2],
821 text_justification=[["l", "c", "c", "c"]]
822 )
823 ```
825 Using group_by to suppress duplicate values:
826 ```python
827 body = RTFBody(
828 group_by=["SITE", "SUBJECT"],
829 col_rel_width=[2, 2, 3, 1]
830 )
831 ```
833 Using subline_by for section headers:
834 ```python
835 body = RTFBody(
836 subline_by=["SITE", "STUDY"], # Creates paragraph headers
837 col_rel_width=[3, 2, 2] # Note: subline_by columns are removed from table
838 )
839 ```
841 Note:
842 When using subline_by:
843 - The specified columns are removed from the table display
844 - Values appear as paragraph headers before each section
845 - Pagination is automatically enabled (new_page=True)
846 - Formatting attributes apply uniformly to the entire table
847 """
849 model_config = ConfigDict(arbitrary_types_allowed=True)
851 as_colheader: bool = Field(
852 default=True, description="Whether to display column headers"
853 )
854 group_by: Sequence[str] | None = Field(
855 default=None,
856 description="Column names for hierarchical value suppression. Values are shown only on first occurrence within groups, with page context restoration for multi-page tables.",
857 )
858 page_by: Sequence[str] | None = Field(
859 default=None,
860 description="Column names to trigger page breaks when values change",
861 )
862 new_page: bool = Field(
863 default=False,
864 description="Force new page before table. Automatically set to True when using subline_by.",
865 )
866 pageby_header: bool = Field(
867 default=True, description="Repeat column headers on new pages"
868 )
869 pageby_row: str = Field(
870 default="column",
871 description="Page break handling: 'column' (keep column) or 'first_row' (use first row as header)",
872 )
873 subline_by: Sequence[str] | None = Field(
874 default=None,
875 description="Column names to create paragraph headers. These columns are removed from the table and their values appear as section headers above each group. Forces pagination.",
876 )
877 last_row: bool = Field(
878 default=True,
879 description="Whether the table contains the last row of the final table",
880 )
882 @field_validator("group_by", "page_by", "subline_by", mode="before")
883 def convert_text(cls, v):
884 if v is not None:
885 if isinstance(v, str):
886 return [v]
887 return v
889 @field_validator("pageby_row")
890 def validate_pageby_row(cls, v):
891 if v not in ["column", "first_row"]:
892 raise ValueError(
893 f"Invalid pageby_row. Must be 'column' or 'first_row'. Given: {v}"
894 )
895 return v
897 def __init__(self, **data):
898 defaults = {
899 "border_left": [["single"]],
900 "border_right": [["single"]],
901 "border_first": [["single"]],
902 "border_last": [["single"]],
903 "border_width": [[15]],
904 "cell_height": [[0.15]],
905 "cell_justification": [["c"]],
906 "cell_vertical_justification": [["top"]],
907 "text_font": [[1]],
908 "text_font_size": [[9]],
909 "text_indent_first": [[0]],
910 "text_indent_left": [[0]],
911 "text_indent_right": [[0]],
912 "text_space": [[1]],
913 "text_space_before": [[15]],
914 "text_space_after": [[15]],
915 "text_hyphenation": [[False]],
916 "text_convert": [[True]],
917 }
919 # Update defaults with any provided values
920 defaults.update(data)
921 super().__init__(**defaults)
922 self._set_default()
924 def _set_default(self):
925 self._set_table_attribute_defaults()
926 self._set_border_defaults()
927 self._validate_page_by_logic()
928 return self
930 def _set_table_attribute_defaults(self) -> None:
931 """Set default values for table attributes, excluding special control attributes."""
932 excluded_attrs = {
933 "as_colheader",
934 "page_by",
935 "new_page",
936 "pageby_header",
937 "pageby_row",
938 "subline_by",
939 "last_row",
940 }
942 for attr, value in self.__dict__.items():
943 if (
944 isinstance(value, (str, int, float, bool))
945 and attr not in excluded_attrs
946 ):
947 setattr(self, attr, [value])
949 def _set_border_defaults(self) -> None:
950 """Set default values for border and justification attributes."""
951 self.border_top = self.border_top or [[""]]
952 self.border_bottom = self.border_bottom or [[""]]
953 self.border_left = self.border_left or [["single"]]
954 self.border_right = self.border_right or [["single"]]
955 self.border_first = self.border_first or [["single"]]
956 self.border_last = self.border_last or [["single"]]
957 self.cell_vertical_justification = self.cell_vertical_justification or [
958 ["center"]
959 ]
960 self.text_justification = self.text_justification or [["c"]]
962 def _validate_page_by_logic(self) -> None:
963 """Validate that page_by and new_page settings are consistent."""
964 if self.page_by is None and self.new_page:
965 raise ValueError("`new_page` must be `False` if `page_by` is not specified")
968class RTFFigure(BaseModel):
969 """RTF Figure component for embedding images in RTF documents.
971 This class handles figure embedding with support for multiple images,
972 custom sizing, and proper RTF encoding.
973 """
975 model_config = ConfigDict(arbitrary_types_allowed=True)
977 # Figure data
978 figures: str | Path | list[str | Path] | None = Field(
979 default=None,
980 description="Image file path(s) - single path or list of paths to PNG, JPEG, or EMF files",
981 )
983 # Figure dimensions
984 fig_height: float | list[float] = Field(
985 default=5.0, description="Height of figures in inches (single value or list)"
986 )
987 fig_width: float | list[float] = Field(
988 default=5.0, description="Width of figures in inches (single value or list)"
989 )
991 # Figure positioning
992 fig_align: str = Field(
993 default="center",
994 description="Horizontal alignment of figures ('left', 'center', 'right')",
995 )
996 fig_pos: str = Field(
997 default="after",
998 description="Position relative to table content ('before' or 'after')",
999 )
1001 @field_validator("fig_height", "fig_width", mode="before")
1002 def convert_dimensions(cls, v):
1003 """Convert single value to list if needed."""
1004 if isinstance(v, (int, float)):
1005 return [v]
1006 return v
1008 @field_validator("fig_align")
1009 def validate_alignment(cls, v):
1010 """Validate figure alignment value."""
1011 valid_alignments = ["left", "center", "right"]
1012 if v not in valid_alignments:
1013 raise ValueError(
1014 f"Invalid fig_align. Must be one of {valid_alignments}. Given: {v}"
1015 )
1016 return v
1018 @field_validator("fig_pos")
1019 def validate_position(cls, v):
1020 """Validate figure position value."""
1021 valid_positions = ["before", "after"]
1022 if v not in valid_positions:
1023 raise ValueError(
1024 f"Invalid fig_pos. Must be one of {valid_positions}. Given: {v}"
1025 )
1026 return v
1028 @model_validator(mode="after")
1029 def validate_figure_data(self):
1030 """Validate figure paths and convert to list format."""
1031 if self.figures is not None:
1032 # Convert single path to list
1033 if isinstance(self.figures, (str, Path)):
1034 self.figures = [self.figures]
1036 # Validate that all files exist
1037 for fig_path in self.figures:
1038 path_obj = Path(fig_path)
1039 if not path_obj.exists():
1040 raise FileNotFoundError(f"Figure file not found: {fig_path}")
1042 return self