Coverage for src/rtflite/encode.py: 75%
130 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-10 22:38 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-10 22:38 +0000
1"""RTF Document class - main entry point for RTF generation.
3This module provides the RTFDocument class with a clean, service-oriented architecture.
4All complex logic has been delegated to specialized services and strategies.
5"""
7from collections.abc import Sequence
9import polars as pl
10from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
12from .input import (
13 RTFBody,
14 RTFColumnHeader,
15 RTFFigure,
16 RTFFootnote,
17 RTFPage,
18 RTFPageFooter,
19 RTFPageHeader,
20 RTFSource,
21 RTFSubline,
22 RTFTitle,
23)
24from .row import Utils
27class RTFDocument(BaseModel):
28 """Main class for creating RTF documents with tables, text, and figures.
30 RTFDocument is the central class for generating Rich Text Format (RTF) files
31 containing formatted tables, titles, footnotes, and other document elements.
32 It provides a comprehensive API for creating professional documents commonly
33 used in clinical trials, scientific research, and data reporting.
35 Examples:
36 Simple table with title:
37 ```python
38 import rtflite as rtf
39 import polars as pl
41 df = pl.DataFrame({
42 "Subject": ["001", "002", "003"],
43 "Age": [45, 52, 38],
44 "Treatment": ["Drug A", "Drug B", "Placebo"]
45 })
47 doc = rtf.RTFDocument(
48 df=df,
49 rtf_title=rtf.RTFTitle(text="Patient Demographics"),
50 rtf_body=rtf.RTFBody(col_rel_width=[2, 1, 2])
51 )
52 doc.write_rtf("demographics.rtf")
53 ```
55 Multi-page document with headers and footers:
56 ```python
57 doc = rtf.RTFDocument(
58 df=large_df,
59 rtf_page=rtf.RTFPage(nrow=40, orientation="landscape"),
60 rtf_page_header=rtf.RTFPageHeader(), # Default page numbering
61 rtf_page_footer=rtf.RTFPageFooter(text="Confidential"),
62 rtf_title=rtf.RTFTitle(text="Clinical Study Results"),
63 rtf_column_header=rtf.RTFColumnHeader(
64 text=["Subject ID", "Visit", "Result", "Units"]
65 ),
66 rtf_body=rtf.RTFBody(
67 col_rel_width=[2, 1, 1, 1],
68 text_justification=[["l", "c", "r", "c"]]
69 ),
70 rtf_footnote=rtf.RTFFootnote(
71 text="Results are mean +/- SD"
72 )
73 )
74 doc.write_rtf("results.rtf")
75 ```
77 Document with grouped data and sublines:
78 ```python
79 doc = rtf.RTFDocument(
80 df=grouped_df,
81 rtf_body=rtf.RTFBody(
82 group_by=["SITE", "TREATMENT"], # Suppress duplicate values
83 subline_by=["STUDY_PHASE"], # Create section headers
84 col_rel_width=[2, 2, 1, 1]
85 )
86 )
87 ```
89 Attributes:
90 df: Data to display in the table. Can be a single DataFrame or list of
91 DataFrames for multi-section documents. Accepts pandas or polars
92 DataFrames (automatically converted to polars internally).
94 rtf_page: Page configuration including size, orientation, margins, and
95 pagination settings.
97 rtf_page_header: Optional header appearing at the top of every page.
99 rtf_page_footer: Optional footer appearing at the bottom of every page.
101 rtf_title: Document title(s) displayed at the top.
103 rtf_column_header: Column headers for the table. Can be a single header
104 or list of headers for multi-row headers.
106 rtf_body: Table body configuration including column widths, formatting,
107 borders, and special features like group_by and subline_by.
109 rtf_footnote: Optional footnote text displayed after the table.
111 rtf_source: Optional source citation displayed at the very bottom.
113 rtf_figure: Optional figure/image to embed in the document.
115 Methods:
116 rtf_encode(): Generate the complete RTF document as a string.
117 write_rtf(file_path): Write the RTF document to a file.
118 """
120 model_config = ConfigDict(arbitrary_types_allowed=True)
122 # Core data
123 df: pl.DataFrame | list[pl.DataFrame] | None = Field(
124 default=None,
125 description="The DataFrame(s) containing the data for the RTF document. Accepts single DataFrame or list of DataFrames for multi-section documents. Accepts pandas or polars DataFrame, internally converted to polars. Optional when using figure-only documents.",
126 )
128 # Document structure
129 rtf_page: RTFPage = Field(
130 default_factory=lambda: RTFPage(),
131 description="Page settings including size, orientation and margins",
132 )
133 rtf_page_header: RTFPageHeader | None = Field(
134 default=None, description="Text to appear in the header of each page"
135 )
136 rtf_title: RTFTitle | None = Field(
137 default_factory=lambda: RTFTitle(),
138 description="Title section settings including text and formatting",
139 )
140 rtf_subline: RTFSubline | None = Field(
141 default=None, description="Subject line text to appear below the title"
142 )
143 rtf_column_header: (
144 Sequence[RTFColumnHeader] | Sequence[Sequence[RTFColumnHeader | None]]
145 ) = Field(
146 default_factory=lambda: [RTFColumnHeader()],
147 description="Column header settings. For multi-section documents, use nested list format: [[header1], [header2], [None]] where None means no header for that section.",
148 )
149 rtf_body: RTFBody | Sequence[RTFBody] | None = Field(
150 default_factory=lambda: RTFBody(),
151 description="Table body section settings including column widths and formatting. For multi-section documents, provide a list of RTFBody objects.",
152 )
153 rtf_footnote: RTFFootnote | None = Field(
154 default=None, description="Footnote text to appear at bottom of document"
155 )
156 rtf_source: RTFSource | None = Field(
157 default=None, description="Data source citation text"
158 )
159 rtf_page_footer: RTFPageFooter | None = Field(
160 default=None, description="Text to appear in the footer of each page"
161 )
162 rtf_figure: RTFFigure | None = Field(
163 default=None, description="Figure/image content to embed in the document"
164 )
166 @field_validator("rtf_column_header", mode="before")
167 def convert_column_header_to_list(cls, v):
168 """Convert single RTFColumnHeader to list or handle nested list format"""
169 if v is not None and isinstance(v, RTFColumnHeader):
170 return [v]
171 return v
173 @model_validator(mode="before")
174 @classmethod
175 def validate_dataframe(cls, values):
176 """Convert DataFrame(s) to polars for internal processing."""
177 if "df" in values and values["df"] is not None:
178 df = values["df"]
179 import narwhals as nw
180 import polars as pl
182 # Handle single DataFrame
183 if not isinstance(df, list):
184 if isinstance(df, pl.DataFrame):
185 pass # Already polars
186 else:
187 # Use narwhals to handle any DataFrame type
188 try:
189 nw_df = nw.from_native(df)
190 values["df"] = nw_df.to_native(pl.DataFrame)
191 except Exception as e:
192 raise ValueError(
193 f"DataFrame must be a valid DataFrame: {str(e)}"
194 )
195 # Handle list of DataFrames
196 else:
197 converted_dfs = []
198 for i, single_df in enumerate(df):
199 if isinstance(single_df, pl.DataFrame):
200 converted_dfs.append(single_df)
201 else:
202 try:
203 # Use narwhals to handle any DataFrame type
204 nw_df = nw.from_native(single_df)
205 converted_dfs.append(nw_df.to_native(pl.DataFrame))
206 except Exception as e:
207 raise ValueError(
208 f"DataFrame at index {i} must be a valid DataFrame: {str(e)}"
209 )
210 values["df"] = converted_dfs
211 return values
213 @model_validator(mode="after")
214 def validate_column_names(self):
215 """Validate that column references exist in DataFrame and multi-section consistency."""
216 # Validate df and rtf_figure usage
217 if self.df is None and self.rtf_figure is None:
218 raise ValueError("Either 'df' or 'rtf_figure' must be provided")
220 if self.df is not None and self.rtf_figure is not None:
221 raise ValueError(
222 "Cannot use both 'df' and 'rtf_figure' together. Use either tables or figures in a single document."
223 )
225 # When RTFFigure is used, enforce as_table=False for footnotes and sources
226 if self.rtf_figure is not None:
227 if self.rtf_footnote is not None and getattr(
228 self.rtf_footnote, "as_table", True
229 ):
230 raise ValueError(
231 "When using RTFFigure, RTFFootnote must have as_table=False"
232 )
233 if self.rtf_source is not None and getattr(
234 self.rtf_source, "as_table", False
235 ):
236 raise ValueError(
237 "When using RTFFigure, RTFSource must have as_table=False"
238 )
240 # Skip column validation if no DataFrame provided (figure-only documents)
241 if self.df is None:
242 return self
244 # Multi-section validation
245 is_multi_section = isinstance(self.df, list)
246 if is_multi_section:
247 # Validate rtf_body is also a list with matching length
248 if not isinstance(self.rtf_body, list):
249 raise ValueError("When df is a list, rtf_body must also be a list")
250 if len(self.df) != len(self.rtf_body):
251 raise ValueError(
252 f"df list length ({len(self.df)}) must match rtf_body list length ({len(self.rtf_body)})"
253 )
255 # Validate rtf_column_header if it's nested list format
256 if isinstance(self.rtf_column_header[0], list):
257 if len(self.rtf_column_header) != len(self.df):
258 raise ValueError(
259 f"rtf_column_header nested list length ({len(self.rtf_column_header)}) must match df list length ({len(self.df)})"
260 )
262 # Per-section column validation
263 for i, (section_df, section_body) in enumerate(zip(self.df, self.rtf_body)):
264 self._validate_section_columns(section_df, section_body, i)
265 else:
266 # Single section validation (existing logic)
267 self._validate_section_columns(self.df, self.rtf_body, 0)
269 return self
271 def _validate_section_columns(self, df, body, section_index):
272 """Validate column references for a single section."""
273 columns = df.columns
274 section_label = f"section {section_index}" if section_index > 0 else "df"
276 if body.group_by is not None:
277 for column in body.group_by:
278 if column not in columns:
279 raise ValueError(
280 f"`group_by` column {column} not found in {section_label}"
281 )
283 if body.page_by is not None:
284 for column in body.page_by:
285 if column not in columns:
286 raise ValueError(
287 f"`page_by` column {column} not found in {section_label}"
288 )
290 if body.subline_by is not None:
291 for column in body.subline_by:
292 if column not in columns:
293 raise ValueError(
294 f"`subline_by` column {column} not found in {section_label}"
295 )
297 def __init__(self, **data):
298 super().__init__(**data)
300 # Set default column widths based on DataFrame dimensions (if DataFrame provided)
301 if self.df is not None:
302 is_multi_section = isinstance(self.df, list)
304 if is_multi_section:
305 # Handle multi-section documents
306 for section_df, section_body in zip(self.df, self.rtf_body):
307 dim = section_df.shape
308 section_body.col_rel_width = (
309 section_body.col_rel_width or [1] * dim[1]
310 )
312 # Handle column headers for multi-section
313 if self.rtf_column_header and isinstance(
314 self.rtf_column_header[0], list
315 ):
316 # Nested list format: [[header1], [header2], [None]]
317 for section_headers, section_body in zip(
318 self.rtf_column_header, self.rtf_body
319 ):
320 if section_headers: # Skip if [None]
321 for header in section_headers:
322 if header and header.col_rel_width is None:
323 header.col_rel_width = (
324 section_body.col_rel_width.copy()
325 )
326 elif self.rtf_column_header:
327 # Flat list format - apply to first section only
328 for header in self.rtf_column_header:
329 if header.col_rel_width is None:
330 header.col_rel_width = self.rtf_body[0].col_rel_width.copy()
331 else:
332 # Handle single section documents (existing logic)
333 dim = self.df.shape
334 self.rtf_body.col_rel_width = (
335 self.rtf_body.col_rel_width or [1] * dim[1]
336 )
338 # Inherit col_rel_width from rtf_body to rtf_column_header if not specified
339 if self.rtf_column_header:
340 for header in self.rtf_column_header:
341 if header.col_rel_width is None:
342 header.col_rel_width = self.rtf_body.col_rel_width.copy()
344 # Calculate table spacing for text components
345 self._table_space = int(
346 Utils._inch_to_twip(self.rtf_page.width - self.rtf_page.col_width) / 2
347 )
349 # Apply table spacing to text components if needed
350 self._apply_table_spacing()
352 def _apply_table_spacing(self):
353 """Apply table-based spacing to text components that reference the table."""
354 for component in [self.rtf_subline, self.rtf_page_header, self.rtf_page_footer]:
355 if component is not None and component.text_indent_reference == "table":
356 component.text_space_before = (
357 self._table_space + component.text_space_before
358 )
359 component.text_space_after = (
360 self._table_space + component.text_space_after
361 )
363 def rtf_encode(self) -> str:
364 """Generate the complete RTF document as a string.
366 This method processes all document components and generates the final
367 RTF code including headers, formatting, tables, and all other elements.
368 The resulting string can be written to a file or processed further.
370 Returns:
371 str: Complete RTF document string ready to be saved as an .rtf file.
373 Examples:
374 ```python
375 doc = RTFDocument(df=data, rtf_title=RTFTitle(text="Report"))
376 rtf_string = doc.rtf_encode()
377 # Can write manually or process further
378 with open("output.rtf", "w") as f:
379 f.write(rtf_string)
380 ```
381 """
382 from .encoding import RTFEncodingEngine
384 engine = RTFEncodingEngine()
385 return engine.encode_document(self)
387 def write_rtf(self, file_path: str) -> None:
388 """Write the RTF document to a file.
390 Generates the complete RTF document and writes it to the specified file path.
391 The file is written in UTF-8 encoding and will have the .rtf extension.
393 Args:
394 file_path: Path where the RTF file should be saved. Can be absolute
395 or relative path. Directory must exist.
397 Examples:
398 ```python
399 doc = RTFDocument(df=data, rtf_title=RTFTitle(text="Report"))
400 doc.write_rtf("output/report.rtf")
401 ```
403 Note:
404 The method prints the file path to stdout for confirmation.
405 Ensure the directory exists before calling this method.
406 """
407 print(file_path)
408 rtf_code = self.rtf_encode()
409 with open(file_path, "w", encoding="utf-8") as f:
410 f.write(rtf_code)