Coverage for src/rtflite/encode.py: 75%
129 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 16:35 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 16:35 +0000
1"""RTF Document class - main entry point for RTF generation.
3This module provides the RTFDocument class with a clean, service-oriented architecture.
4All complex logic has been delegated to specialized services and strategies.
5"""
7import polars as pl
8from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
10from .input import (
11 RTFBody,
12 RTFColumnHeader,
13 RTFFigure,
14 RTFFootnote,
15 RTFPage,
16 RTFPageFooter,
17 RTFPageHeader,
18 RTFSource,
19 RTFSubline,
20 RTFTitle,
21)
22from .row import Utils
25class RTFDocument(BaseModel):
26 """Main class for creating RTF documents with tables, text, and figures.
28 RTFDocument is the central class for generating Rich Text Format (RTF) files
29 containing formatted tables, titles, footnotes, and other document elements.
30 It provides a comprehensive API for creating professional documents commonly
31 used in clinical trials, scientific research, and data reporting.
33 Examples:
34 Simple table with title:
35 ```python
36 import rtflite as rtf
37 import polars as pl
39 df = pl.DataFrame({
40 "Subject": ["001", "002", "003"],
41 "Age": [45, 52, 38],
42 "Treatment": ["Drug A", "Drug B", "Placebo"]
43 })
45 doc = rtf.RTFDocument(
46 df=df,
47 rtf_title=rtf.RTFTitle(text="Patient Demographics"),
48 rtf_body=rtf.RTFBody(col_rel_width=[2, 1, 2])
49 )
50 doc.write_rtf("demographics.rtf")
51 ```
53 Multi-page document with headers and footers:
54 ```python
55 doc = rtf.RTFDocument(
56 df=large_df,
57 rtf_page=rtf.RTFPage(nrow=40, orientation="landscape"),
58 rtf_page_header=rtf.RTFPageHeader(), # Default page numbering
59 rtf_page_footer=rtf.RTFPageFooter(text="Confidential"),
60 rtf_title=rtf.RTFTitle(text="Clinical Study Results"),
61 rtf_column_header=rtf.RTFColumnHeader(
62 text=["Subject ID", "Visit", "Result", "Units"]
63 ),
64 rtf_body=rtf.RTFBody(
65 col_rel_width=[2, 1, 1, 1],
66 text_justification=[["l", "c", "r", "c"]]
67 ),
68 rtf_footnote=rtf.RTFFootnote(
69 text="Results are mean +/- SD"
70 )
71 )
72 doc.write_rtf("results.rtf")
73 ```
75 Document with grouped data and sublines:
76 ```python
77 doc = rtf.RTFDocument(
78 df=grouped_df,
79 rtf_body=rtf.RTFBody(
80 group_by=["SITE", "TREATMENT"], # Suppress duplicate values
81 subline_by=["STUDY_PHASE"], # Create section headers
82 col_rel_width=[2, 2, 1, 1]
83 )
84 )
85 ```
87 Attributes:
88 df: Data to display in the table. Can be a single DataFrame or list of
89 DataFrames for multi-section documents. Accepts pandas or polars
90 DataFrames (automatically converted to polars internally).
92 rtf_page: Page configuration including size, orientation, margins, and
93 pagination settings.
95 rtf_page_header: Optional header appearing at the top of every page.
97 rtf_page_footer: Optional footer appearing at the bottom of every page.
99 rtf_title: Document title(s) displayed at the top.
101 rtf_column_header: Column headers for the table. Can be a single header
102 or list of headers for multi-row headers.
104 rtf_body: Table body configuration including column widths, formatting,
105 borders, and special features like group_by and subline_by.
107 rtf_footnote: Optional footnote text displayed after the table.
109 rtf_source: Optional source citation displayed at the very bottom.
111 rtf_figure: Optional figure/image to embed in the document.
113 Methods:
114 rtf_encode(): Generate the complete RTF document as a string.
115 write_rtf(file_path): Write the RTF document to a file.
116 """
118 model_config = ConfigDict(arbitrary_types_allowed=True)
120 # Core data
121 df: pl.DataFrame | list[pl.DataFrame] | None = Field(
122 default=None,
123 description="The DataFrame(s) containing the data for the RTF document. Accepts single DataFrame or list of DataFrames for multi-section documents. Accepts pandas or polars DataFrame, internally converted to polars. Optional when using figure-only documents.",
124 )
126 # Document structure
127 rtf_page: RTFPage = Field(
128 default_factory=lambda: RTFPage(),
129 description="Page settings including size, orientation and margins",
130 )
131 rtf_page_header: RTFPageHeader | None = Field(
132 default=None, description="Text to appear in the header of each page"
133 )
134 rtf_title: RTFTitle | None = Field(
135 default_factory=lambda: RTFTitle(),
136 description="Title section settings including text and formatting",
137 )
138 rtf_subline: RTFSubline | None = Field(
139 default=None, description="Subject line text to appear below the title"
140 )
141 rtf_column_header: list[RTFColumnHeader] | list[list[RTFColumnHeader | None]] = (
142 Field(
143 default_factory=lambda: [RTFColumnHeader()],
144 description="Column header settings. For multi-section documents, use nested list format: [[header1], [header2], [None]] where None means no header for that section.",
145 )
146 )
147 rtf_body: RTFBody | list[RTFBody] | None = Field(
148 default_factory=lambda: RTFBody(),
149 description="Table body section settings including column widths and formatting. For multi-section documents, provide a list of RTFBody objects.",
150 )
151 rtf_footnote: RTFFootnote | None = Field(
152 default=None, description="Footnote text to appear at bottom of document"
153 )
154 rtf_source: RTFSource | None = Field(
155 default=None, description="Data source citation text"
156 )
157 rtf_page_footer: RTFPageFooter | None = Field(
158 default=None, description="Text to appear in the footer of each page"
159 )
160 rtf_figure: RTFFigure | None = Field(
161 default=None, description="Figure/image content to embed in the document"
162 )
164 @field_validator("rtf_column_header", mode="before")
165 def convert_column_header_to_list(cls, v):
166 """Convert single RTFColumnHeader to list or handle nested list format"""
167 if v is not None and isinstance(v, RTFColumnHeader):
168 return [v]
169 return v
171 @model_validator(mode="before")
172 @classmethod
173 def validate_dataframe(cls, values):
174 """Convert DataFrame(s) to polars for internal processing."""
175 if "df" in values and values["df"] is not None:
176 df = values["df"]
177 import narwhals as nw
178 import polars as pl
180 # Handle single DataFrame
181 if not isinstance(df, list):
182 if isinstance(df, pl.DataFrame):
183 pass # Already polars
184 else:
185 # Use narwhals to handle any DataFrame type
186 try:
187 nw_df = nw.from_native(df)
188 values["df"] = nw_df.to_native(pl.DataFrame)
189 except Exception as e:
190 raise ValueError(
191 f"DataFrame must be a valid DataFrame: {str(e)}"
192 )
193 # Handle list of DataFrames
194 else:
195 converted_dfs = []
196 for i, single_df in enumerate(df):
197 if isinstance(single_df, pl.DataFrame):
198 converted_dfs.append(single_df)
199 else:
200 try:
201 # Use narwhals to handle any DataFrame type
202 nw_df = nw.from_native(single_df)
203 converted_dfs.append(nw_df.to_native(pl.DataFrame))
204 except Exception as e:
205 raise ValueError(
206 f"DataFrame at index {i} must be a valid DataFrame: {str(e)}"
207 )
208 values["df"] = converted_dfs
209 return values
211 @model_validator(mode="after")
212 def validate_column_names(self):
213 """Validate that column references exist in DataFrame and multi-section consistency."""
214 # Validate df and rtf_figure usage
215 if self.df is None and self.rtf_figure is None:
216 raise ValueError("Either 'df' or 'rtf_figure' must be provided")
218 if self.df is not None and self.rtf_figure is not None:
219 raise ValueError(
220 "Cannot use both 'df' and 'rtf_figure' together. Use either tables or figures in a single document."
221 )
223 # When RTFFigure is used, enforce as_table=False for footnotes and sources
224 if self.rtf_figure is not None:
225 if self.rtf_footnote is not None and getattr(
226 self.rtf_footnote, "as_table", True
227 ):
228 raise ValueError(
229 "When using RTFFigure, RTFFootnote must have as_table=False"
230 )
231 if self.rtf_source is not None and getattr(
232 self.rtf_source, "as_table", False
233 ):
234 raise ValueError(
235 "When using RTFFigure, RTFSource must have as_table=False"
236 )
238 # Skip column validation if no DataFrame provided (figure-only documents)
239 if self.df is None:
240 return self
242 # Multi-section validation
243 is_multi_section = isinstance(self.df, list)
244 if is_multi_section:
245 # Validate rtf_body is also a list with matching length
246 if not isinstance(self.rtf_body, list):
247 raise ValueError("When df is a list, rtf_body must also be a list")
248 if len(self.df) != len(self.rtf_body):
249 raise ValueError(
250 f"df list length ({len(self.df)}) must match rtf_body list length ({len(self.rtf_body)})"
251 )
253 # Validate rtf_column_header if it's nested list format
254 if isinstance(self.rtf_column_header[0], list):
255 if len(self.rtf_column_header) != len(self.df):
256 raise ValueError(
257 f"rtf_column_header nested list length ({len(self.rtf_column_header)}) must match df list length ({len(self.df)})"
258 )
260 # Per-section column validation
261 for i, (section_df, section_body) in enumerate(zip(self.df, self.rtf_body)):
262 self._validate_section_columns(section_df, section_body, i)
263 else:
264 # Single section validation (existing logic)
265 self._validate_section_columns(self.df, self.rtf_body, 0)
267 return self
269 def _validate_section_columns(self, df, body, section_index):
270 """Validate column references for a single section."""
271 columns = df.columns
272 section_label = f"section {section_index}" if section_index > 0 else "df"
274 if body.group_by is not None:
275 for column in body.group_by:
276 if column not in columns:
277 raise ValueError(
278 f"`group_by` column {column} not found in {section_label}"
279 )
281 if body.page_by is not None:
282 for column in body.page_by:
283 if column not in columns:
284 raise ValueError(
285 f"`page_by` column {column} not found in {section_label}"
286 )
288 if body.subline_by is not None:
289 for column in body.subline_by:
290 if column not in columns:
291 raise ValueError(
292 f"`subline_by` column {column} not found in {section_label}"
293 )
295 def __init__(self, **data):
296 super().__init__(**data)
298 # Set default column widths based on DataFrame dimensions (if DataFrame provided)
299 if self.df is not None:
300 is_multi_section = isinstance(self.df, list)
302 if is_multi_section:
303 # Handle multi-section documents
304 for section_df, section_body in zip(self.df, self.rtf_body):
305 dim = section_df.shape
306 section_body.col_rel_width = (
307 section_body.col_rel_width or [1] * dim[1]
308 )
310 # Handle column headers for multi-section
311 if self.rtf_column_header and isinstance(
312 self.rtf_column_header[0], list
313 ):
314 # Nested list format: [[header1], [header2], [None]]
315 for section_headers, section_body in zip(
316 self.rtf_column_header, self.rtf_body
317 ):
318 if section_headers: # Skip if [None]
319 for header in section_headers:
320 if header and header.col_rel_width is None:
321 header.col_rel_width = (
322 section_body.col_rel_width.copy()
323 )
324 elif self.rtf_column_header:
325 # Flat list format - apply to first section only
326 for header in self.rtf_column_header:
327 if header.col_rel_width is None:
328 header.col_rel_width = self.rtf_body[0].col_rel_width.copy()
329 else:
330 # Handle single section documents (existing logic)
331 dim = self.df.shape
332 self.rtf_body.col_rel_width = (
333 self.rtf_body.col_rel_width or [1] * dim[1]
334 )
336 # Inherit col_rel_width from rtf_body to rtf_column_header if not specified
337 if self.rtf_column_header:
338 for header in self.rtf_column_header:
339 if header.col_rel_width is None:
340 header.col_rel_width = self.rtf_body.col_rel_width.copy()
342 # Calculate table spacing for text components
343 self._table_space = int(
344 Utils._inch_to_twip(self.rtf_page.width - self.rtf_page.col_width) / 2
345 )
347 # Apply table spacing to text components if needed
348 self._apply_table_spacing()
350 def _apply_table_spacing(self):
351 """Apply table-based spacing to text components that reference the table."""
352 for component in [self.rtf_subline, self.rtf_page_header, self.rtf_page_footer]:
353 if component is not None and component.text_indent_reference == "table":
354 component.text_space_before = (
355 self._table_space + component.text_space_before
356 )
357 component.text_space_after = (
358 self._table_space + component.text_space_after
359 )
361 def rtf_encode(self) -> str:
362 """Generate the complete RTF document as a string.
364 This method processes all document components and generates the final
365 RTF code including headers, formatting, tables, and all other elements.
366 The resulting string can be written to a file or processed further.
368 Returns:
369 str: Complete RTF document string ready to be saved as an .rtf file.
371 Examples:
372 ```python
373 doc = RTFDocument(df=data, rtf_title=RTFTitle(text="Report"))
374 rtf_string = doc.rtf_encode()
375 # Can write manually or process further
376 with open("output.rtf", "w") as f:
377 f.write(rtf_string)
378 ```
379 """
380 from .encoding import RTFEncodingEngine
382 engine = RTFEncodingEngine()
383 return engine.encode_document(self)
385 def write_rtf(self, file_path: str) -> None:
386 """Write the RTF document to a file.
388 Generates the complete RTF document and writes it to the specified file path.
389 The file is written in UTF-8 encoding and will have the .rtf extension.
391 Args:
392 file_path: Path where the RTF file should be saved. Can be absolute
393 or relative path. Directory must exist.
395 Examples:
396 ```python
397 doc = RTFDocument(df=data, rtf_title=RTFTitle(text="Report"))
398 doc.write_rtf("output/report.rtf")
399 ```
401 Note:
402 The method prints the file path to stdout for confirmation.
403 Ensure the directory exists before calling this method.
404 """
405 print(file_path)
406 rtf_code = self.rtf_encode()
407 with open(file_path, "w", encoding="utf-8") as f:
408 f.write(rtf_code)