Coverage for src / rtflite / services / encoding_service.py: 90%
184 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-08 04:50 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-08 04:50 +0000
1"""RTF encoding service that handles document component encoding."""
3from collections.abc import Sequence
6class RTFEncodingService:
7 """Service class that handles RTF component encoding operations.
9 This class extracts encoding logic from RTFDocument to improve separation
10 of concerns and enable better testing and maintainability.
11 """
13 def __init__(self):
14 from ..rtf import RTFSyntaxGenerator
16 self.syntax = RTFSyntaxGenerator()
18 def encode_spanning_row(
19 self,
20 text: str,
21 page_width: float,
22 rtf_body_attrs=None,
23 col_idx: int = 0,
24 ) -> Sequence[str]:
25 """Generate a spanning table row (single cell spanning full width).
27 This is used for page_by group headers that span across all columns.
28 Works for both single-page and paginated documents.
30 Args:
31 text: Text to display in the spanning row
32 page_width: Total page width in inches
33 rtf_body_attrs: RTFBody attributes for styling (optional)
34 col_idx: Column index to inherit attributes from (default: 0)
36 Returns:
37 List of RTF strings for the spanning row
38 """
39 from ..attributes import BroadcastValue
40 from ..row import Border, Cell, Row, TextContent
42 def get_attr(attr_name, default_val):
43 if rtf_body_attrs is None:
44 return default_val
45 val = getattr(rtf_body_attrs, attr_name, None)
46 if val is None:
47 return default_val
48 # Use BroadcastValue to resolve the attribute for the specific column
49 # We use row 0 as the reference for column-based attributes
50 return BroadcastValue(value=val, dimension=None).iloc(0, col_idx)
52 # Extract attributes using the helper
53 font = get_attr("text_font", 0)
54 size = get_attr("text_font_size", 18)
55 text_format = get_attr("text_format", "")
56 color = get_attr("text_color", "")
57 bg_color = get_attr("text_background_color", "")
58 justification = get_attr("text_justification", "c")
60 indent_first = get_attr("text_indent_first", 0)
61 indent_left = get_attr("text_indent_left", 0)
62 indent_right = get_attr("text_indent_right", 0)
63 space = get_attr("text_space", 1)
64 space_before = get_attr("text_space_before", 15)
65 space_after = get_attr("text_space_after", 15)
66 convert = get_attr("text_convert", False)
67 hyphenation = get_attr("text_hyphenation", True)
69 border_left = get_attr("border_left", "single")
70 border_right = get_attr("border_right", "single")
71 border_top = get_attr("border_top", "single")
72 border_bottom = get_attr("border_bottom", "single")
74 v_just = get_attr("cell_vertical_justification", "bottom")
75 cell_just = get_attr("cell_justification", "c")
76 cell_height = get_attr("cell_height", 0.15)
78 # Create spanning cell
79 cell = Cell(
80 text=TextContent(
81 text=text,
82 font=font,
83 size=size,
84 format=text_format,
85 color=color,
86 background_color=bg_color,
87 justification=justification,
88 indent_first=indent_first,
89 indent_left=indent_left,
90 indent_right=indent_right,
91 space=space,
92 space_before=space_before,
93 space_after=space_after,
94 convert=convert,
95 hyphenation=hyphenation,
96 ),
97 width=page_width,
98 border_left=Border(style=border_left),
99 border_right=Border(style=border_right),
100 border_top=Border(style=border_top),
101 border_bottom=Border(style=border_bottom),
102 vertical_justification=v_just,
103 )
105 # Create row with single spanning cell
106 row = Row(row_cells=[cell], justification=cell_just, height=cell_height)
108 return row._as_rtf()
110 def encode_document_start(self) -> str:
111 """Encode RTF document start."""
112 return "{\\rtf1\\ansi\n\\deff0\\deflang1033"
114 def encode_font_table(self) -> str:
115 """Encode RTF font table."""
116 return self.syntax.generate_font_table()
118 def encode_color_table(
119 self, document=None, used_colors: Sequence[str] | None = None
120 ) -> str:
121 """Encode RTF color table with comprehensive 657-color support.
123 Args:
124 document: RTF document to analyze for color usage (preferred)
125 used_colors: Color names used in the document. If None and a
126 document is provided, colors are auto-detected.
128 Returns:
129 RTF color table string (empty if no colors beyond black/"" are used)
130 """
131 if document is not None and used_colors is None:
132 # Auto-detect colors from document
133 from ..services.color_service import color_service
135 used_colors = color_service.collect_document_colors(document)
137 return self.syntax.generate_color_table(used_colors)
139 def encode_page_settings(self, page_config) -> str:
140 """Encode RTF page settings.
142 Args:
143 page_config: RTFPage configuration object
145 Returns:
146 RTF page settings string
147 """
148 return self.syntax.generate_page_settings(
149 page_config.width,
150 page_config.height,
151 page_config.margin,
152 page_config.orientation,
153 )
155 def encode_page_header(self, header_config, method: str = "line") -> str:
156 """Encode page header component.
158 Args:
159 header_config: RTFPageHeader configuration
160 method: Encoding method
162 Returns:
163 RTF header string
164 """
165 if header_config is None or not header_config.text:
166 return ""
168 # Use the existing text encoding method
169 result = header_config._encode_text(text=header_config.text, method=method)
171 return f"{{\\header{result}}}"
173 def encode_page_footer(self, footer_config, method: str = "line") -> str:
174 """Encode page footer component.
176 Args:
177 footer_config: RTFPageFooter configuration
178 method: Encoding method
180 Returns:
181 RTF footer string
182 """
183 if footer_config is None or not footer_config.text:
184 return ""
186 # Use the existing text encoding method
187 result = footer_config._encode_text(text=footer_config.text, method=method)
188 return f"{{\\footer{result}}}"
190 def encode_title(self, title_config, method: str = "line") -> str:
191 """Encode title component.
193 Args:
194 title_config: RTFTitle configuration
195 method: Encoding method
197 Returns:
198 RTF title string
199 """
200 if not title_config or not title_config.text:
201 return ""
203 # Use the existing text encoding method
204 return title_config._encode_text(text=title_config.text, method=method)
206 def encode_subline(self, subline_config, method: str = "line") -> str:
207 """Encode subline component.
209 Args:
210 subline_config: RTFSubline configuration
211 method: Encoding method
213 Returns:
214 RTF subline string
215 """
216 if subline_config is None or not subline_config.text:
217 return ""
219 # Use the existing text encoding method
220 return subline_config._encode_text(text=subline_config.text, method=method)
222 def encode_footnote(
223 self,
224 footnote_config,
225 page_number: int | None = None,
226 page_col_width: float | None = None,
227 border_style: str | None = None,
228 ) -> Sequence[str]:
229 """Encode footnote component with advanced formatting.
231 Args:
232 footnote_config: RTFFootnote configuration
233 page_number: Page number for footnote
234 page_col_width: Page column width for calculations
235 border_style: Optional border style to override defaults
237 Returns:
238 List of RTF footnote strings
239 """
240 if footnote_config is None:
241 return []
243 rtf_attrs = footnote_config
245 # Apply explicitly passed border style
246 if border_style:
247 # Create a copy with modified border
248 rtf_attrs = rtf_attrs.model_copy()
249 rtf_attrs.border_bottom = [[border_style]]
251 # Check if footnote should be rendered as table or paragraph
252 if hasattr(rtf_attrs, "as_table") and not rtf_attrs.as_table:
253 # Render as paragraph (plain text)
254 if isinstance(rtf_attrs.text, list):
255 text_list = rtf_attrs.text
256 else:
257 text_list = [rtf_attrs.text] if rtf_attrs.text else []
259 # Use TextAttributes._encode_text method directly for paragraph rendering
260 return rtf_attrs._encode_text(text_list, method="paragraph")
261 else:
262 # Render as table (default behavior)
263 if page_col_width is not None:
264 from ..row import Utils
266 col_total_width = page_col_width
267 col_widths = Utils._col_widths(rtf_attrs.col_rel_width, col_total_width)
269 # Create DataFrame from text string
270 import polars as pl
272 df = pl.DataFrame([[rtf_attrs.text]])
273 return rtf_attrs._encode(df, col_widths)
274 else:
275 # Fallback without column width calculations
276 import polars as pl
278 df = pl.DataFrame([[rtf_attrs.text]])
279 return rtf_attrs._encode(df)
281 def encode_source(
282 self,
283 source_config,
284 page_number: int | None = None,
285 page_col_width: float | None = None,
286 border_style: str | None = None,
287 ) -> Sequence[str]:
288 """Encode source component with advanced formatting.
290 Args:
291 source_config: RTFSource configuration
292 page_number: Page number for source
293 page_col_width: Page column width for calculations
294 border_style: Optional border style to override defaults
296 Returns:
297 List of RTF source strings
298 """
299 if source_config is None:
300 return []
302 rtf_attrs = source_config
304 # Apply explicitly passed border style
305 if border_style:
306 # Create a copy with modified border
307 rtf_attrs = rtf_attrs.model_copy()
308 rtf_attrs.border_bottom = [[border_style]]
310 # Check if source should be rendered as table or paragraph
311 if hasattr(rtf_attrs, "as_table") and not rtf_attrs.as_table:
312 # Render as paragraph (plain text)
313 if isinstance(rtf_attrs.text, list):
314 text_list = rtf_attrs.text
315 else:
316 text_list = [rtf_attrs.text] if rtf_attrs.text else []
318 # Use TextAttributes._encode_text method directly for paragraph rendering
319 return rtf_attrs._encode_text(text_list, method="paragraph")
320 else:
321 # Render as table (default behavior)
322 if page_col_width is not None:
323 from ..row import Utils
325 col_total_width = page_col_width
326 col_widths = Utils._col_widths(rtf_attrs.col_rel_width, col_total_width)
328 # Create DataFrame from text string
329 import polars as pl
331 df = pl.DataFrame([[rtf_attrs.text]])
332 return rtf_attrs._encode(df, col_widths)
333 else:
334 # Fallback without column width calculations
335 import polars as pl
337 df = pl.DataFrame([[rtf_attrs.text]])
338 return rtf_attrs._encode(df)
340 def prepare_dataframe_for_body_encoding(self, df, rtf_attrs):
341 """Prepare DataFrame for body encoding with group_by and column removal.
343 Args:
344 df: Input DataFrame
345 rtf_attrs: RTFBody attributes
347 Returns:
348 Tuple of (processed_df, original_df) where processed_df has
349 transformations applied
350 """
351 original_df = df.clone()
352 processed_df = df.clone()
354 # Collect columns to remove
355 columns_to_remove = set()
357 # Remove subline_by columns from the processed DataFrame
358 if rtf_attrs.subline_by is not None:
359 columns_to_remove.update(rtf_attrs.subline_by)
361 # Remove page_by columns from table display
362 # page_by columns are shown as spanning rows, not as table columns
363 # The new_page flag only controls whether to force page breaks
364 # at group boundaries
365 if rtf_attrs.page_by is not None:
366 # Restore previous behavior:
367 # - If new_page=True: Respect pageby_row (default 'column' -> keep column)
368 # - If new_page=False: Always remove columns (legacy behavior
369 # implies spanning rows)
370 if rtf_attrs.new_page:
371 pageby_row = getattr(rtf_attrs, "pageby_row", "column")
372 if pageby_row != "column":
373 columns_to_remove.update(rtf_attrs.page_by)
374 else:
375 columns_to_remove.update(rtf_attrs.page_by)
377 # Apply column removal if any columns need to be removed
378 if columns_to_remove:
379 remaining_columns = [
380 col for col in processed_df.columns if col not in columns_to_remove
381 ]
382 processed_df = processed_df.select(remaining_columns)
384 # Create a copy of attributes to modify
385 processed_attrs = rtf_attrs.model_copy(deep=True)
387 # Handle attribute slicing for removed columns
388 # We need to slice list-based attributes to match the new column structure
389 from ..attributes import BroadcastValue
391 # Add footer content
392 # For now, we assume standard document footers are handled outside.
393 # But typically footers are page footers handled by RTFPageFooter.
394 # Get indices of removed columns in the original dataframe
395 removed_indices = [
396 original_df.columns.index(col) for col in columns_to_remove
397 ]
398 removed_indices.sort(reverse=True) # Sort reverse to remove safely
400 rows, cols = original_df.shape
402 # attributes to slice
403 # We iterate over all fields that could be list-based
404 for attr_name in type(processed_attrs).model_fields:
405 if attr_name == "col_rel_width":
406 continue # Handled separately below
408 val = getattr(processed_attrs, attr_name)
409 if val is None:
410 continue
412 # Check if it's a list/sequence that needs slicing
413 # We use BroadcastValue to expand it to full grid, then slice
414 if isinstance(val, (list, tuple)):
415 # Expand to full grid
416 expanded = BroadcastValue(
417 value=val, dimension=(rows, cols)
418 ).to_list()
420 # Slice each row
421 sliced_expanded = []
422 if expanded:
423 for row_data in expanded:
424 # Remove items at specified indices
425 new_row = [
426 item
427 for i, item in enumerate(row_data)
428 if i not in removed_indices
429 ]
430 sliced_expanded.append(new_row)
432 # Update attribute
433 setattr(processed_attrs, attr_name, sliced_expanded)
435 # Update col_rel_width separately (it's 1D usually)
436 if processed_attrs.col_rel_width is not None:
437 # Expand if needed (though usually 1D)
438 current_widths = processed_attrs.col_rel_width
439 # If it matches original columns, slice it
440 if len(current_widths) == cols:
441 new_widths = [
442 w
443 for i, w in enumerate(current_widths)
444 if i not in removed_indices
445 ]
446 processed_attrs.col_rel_width = new_widths
447 else:
448 processed_attrs = rtf_attrs
450 # Note: group_by suppression is handled in the pagination strategy
451 # for documents that need pagination. For non-paginated documents,
452 # group_by is handled separately in encode_body method.
454 return processed_df, original_df, processed_attrs
456 def encode_column_header(
457 self, df, rtf_attrs, page_col_width: float
458 ) -> Sequence[str] | None:
459 """Encode column header component with column width support.
461 Args:
462 df: DataFrame containing header data
463 rtf_attrs: RTFColumnHeader attributes
464 page_col_width: Page column width for calculations
466 Returns:
467 List of RTF header strings
468 """
469 if rtf_attrs is None:
470 return None
472 # Convert text list to DataFrame for encoding if needed
473 import polars as pl
475 df_to_encode = df
476 if isinstance(df, (list, tuple)):
477 # Create DataFrame from list
478 schema = [f"col_{i + 1}" for i in range(len(df))]
479 df_to_encode = pl.DataFrame([df], schema=schema, orient="row")
480 elif df is None and rtf_attrs.text:
481 # Fallback to rtf_attrs.text if df is None
482 text = rtf_attrs.text
483 if isinstance(text, (list, tuple)):
484 schema = [f"col_{i + 1}" for i in range(len(text))]
485 df_to_encode = pl.DataFrame([text], schema=schema, orient="row")
487 if df_to_encode is None:
488 return None
490 dim = df_to_encode.shape
492 rtf_attrs.col_rel_width = rtf_attrs.col_rel_width or [1] * dim[1]
493 rtf_attrs = rtf_attrs._set_default()
495 from ..row import Utils
497 col_widths = Utils._col_widths(rtf_attrs.col_rel_width, page_col_width)
499 return rtf_attrs._encode(df_to_encode, col_widths)
501 def encode_page_break(self, page_config, page_margin_encode_func) -> str:
502 """Generate proper RTF page break sequence matching r2rtf format.
504 Args:
505 page_config: RTFPage configuration
506 page_margin_encode_func: Function to encode page margins
508 Returns:
509 RTF page break string
510 """
511 from ..core import RTFConstants
513 page_setup = (
514 f"\\paperw{int(page_config.width * RTFConstants.TWIPS_PER_INCH)}"
515 f"\\paperh{int(page_config.height * RTFConstants.TWIPS_PER_INCH)}\n\n"
516 f"{page_margin_encode_func()}\n"
517 )
519 return f"{{\\pard\\fs2\\par}}\\page{{\\pard\\fs2\\par}}\n{page_setup}"
521 def encode_page_margin(self, page_config) -> str:
522 """Define RTF margin settings.
524 Args:
525 page_config: RTFPage configuration with margin settings
527 Returns:
528 RTF margin settings string
529 """
530 from ..row import Utils
532 margin_codes = [
533 "\\margl",
534 "\\margr",
535 "\\margt",
536 "\\margb",
537 "\\headery",
538 "\\footery",
539 ]
540 margins = [Utils._inch_to_twip(m) for m in page_config.margin]
541 margin = "".join(
542 f"{code}{margin}"
543 for code, margin in zip(margin_codes, margins, strict=True)
544 )
545 return margin + "\n"