Coverage for src / rtflite / assemble.py: 93%
152 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-08 04:50 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-08 04:50 +0000
1"""Assemble multiple RTF files into a single RTF or DOCX file."""
3import os
4from collections.abc import Sequence
5from copy import deepcopy
6from pathlib import Path
7from typing import TYPE_CHECKING
9if TYPE_CHECKING: # pragma: no cover
10 from docx.document import Document as DocxDocument
11 from docx.section import Section
13# from .input import RTFPage # Unused
16def assemble_rtf(
17 input_files: list[str],
18 output_file: str,
19) -> None:
20 """Combine multiple RTF files into a single RTF file.
22 Args:
23 input_files: List of paths to RTF files to combine.
24 output_file: Path to the output RTF file.
25 """
26 if not input_files:
27 return
29 # Check if files exist
30 missing_files = [f for f in input_files if not os.path.exists(f)]
31 if missing_files:
32 raise FileNotFoundError(f"Missing files: {', '.join(missing_files)}")
34 # Read all files
35 rtf_contents = []
36 for f in input_files:
37 with open(f, encoding="utf-8") as file:
38 rtf_contents.append(file.readlines())
40 if not rtf_contents:
41 return
43 # Process first file
44 # We keep everything from the first file except the last closing brace '}'
46 # Remove last line if it contains only '}' or remove the last '}' char
47 # r2rtf simply removes the last line: end[-n] <- end[-n] - 1
49 # Helper to find start index based on fcharset
50 def find_start_index(lines):
51 last_idx = 0
52 found = False
53 for i, line in enumerate(lines):
54 if "fcharset" in line:
55 last_idx = i
56 found = True
58 if found:
59 return last_idx + 2
60 return 0
62 new_page_cmd = r"\page" + "\n"
64 processed_parts = []
66 for i, lines in enumerate(rtf_contents):
67 start_idx = 0
68 if i > 0:
69 # For subsequent files, skip header
70 start_idx = find_start_index(lines)
72 end_idx = len(lines)
73 if i < len(rtf_contents) - 1 and lines[-1].strip() == "}":
74 # Remove last line (closing brace) for all but last file
75 end_idx -= 1
77 part = lines[start_idx:end_idx]
78 processed_parts.extend(part)
80 if i < len(rtf_contents) - 1:
81 processed_parts.append(new_page_cmd)
83 # Write output
84 with open(output_file, "w", encoding="utf-8") as outfile:
85 outfile.writelines(processed_parts)
88def assemble_docx(
89 input_files: list[str],
90 output_file: str,
91 landscape: bool | list[bool] = False,
92) -> None:
93 """Combine multiple RTF files into a single DOCX file.
95 Args:
96 input_files: List of paths to input RTF files.
97 output_file: Path to the output DOCX file.
98 landscape: Whether the output should be landscape. Can be a single bool
99 (applies to all) or a list of bools (one per file). Defaults to False.
100 """
101 try:
102 import docx # type: ignore
103 from docx.enum.section import WD_ORIENT # type: ignore
104 except ImportError as e:
105 raise ImportError(
106 "python-docx is required for assemble_docx. "
107 "Install it with: pip install 'rtflite[docx]'"
108 ) from e
110 if not input_files:
111 raise ValueError("Input files list cannot be empty")
113 # Check input files exist
114 missing_files = [f for f in input_files if not os.path.exists(f)]
115 if missing_files:
116 raise FileNotFoundError(f"Missing files: {', '.join(missing_files)}")
118 # Handle landscape argument
119 if isinstance(landscape, bool):
120 landscape_list = [landscape] * len(input_files)
121 else:
122 if len(landscape) != len(input_files):
123 raise ValueError("Length of landscape list must match input files")
124 landscape_list = landscape
126 # Create new document
127 doc = docx.Document()
129 for i, (input_file, is_landscape) in enumerate(
130 zip(input_files, landscape_list, strict=True)
131 ):
132 # Set orientation for the current section
133 section = doc.sections[-1]
134 if is_landscape:
135 section.orientation = WD_ORIENT.LANDSCAPE
136 w, h = section.page_width, section.page_height
137 if w is not None and h is not None and w < h: # If currently portrait
138 section.page_width = h
139 section.page_height = w
140 else:
141 section.orientation = WD_ORIENT.PORTRAIT
142 w, h = section.page_width, section.page_height
143 if w is not None and h is not None and w > h: # If currently landscape
144 section.page_width = h
145 section.page_height = w
147 # Absolute path needed for fields
148 abs_path = os.path.abspath(input_file)
150 # Escape backslashes for the field code
151 path_str = abs_path.replace("\\", "\\\\")
153 # Create INCLUDETEXT field
154 field_code = f'INCLUDETEXT "{path_str}"'
156 # Add "Table X" caption
157 p = doc.add_paragraph()
158 p.add_run("Table ")
159 _add_field(p, r"SEQ Table \* ARABIC")
160 p.add_run("\n") # Linebreak
162 # Add the INCLUDETEXT field
163 _add_field(p, field_code)
165 # Handle section breaks
166 if i < len(input_files) - 1:
167 doc.add_section()
169 doc.save(output_file)
172def _add_field(paragraph, field_code):
173 """Add a complex field to a paragraph."""
174 # This is low-level XML manipulation for python-docx to add fields
175 from docx.oxml.ns import qn # type: ignore
176 from docx.oxml.shared import OxmlElement # type: ignore
178 run = paragraph.add_run()
179 r = run._r
180 fldChar = OxmlElement("w:fldChar")
181 fldChar.set(qn("w:fldCharType"), "begin")
182 r.append(fldChar)
184 run = paragraph.add_run()
185 r = run._r
186 instrText = OxmlElement("w:instrText")
187 instrText.set(qn("xml:space"), "preserve")
188 instrText.text = field_code
189 r.append(instrText)
191 run = paragraph.add_run()
192 r = run._r
193 fldChar = OxmlElement("w:fldChar")
194 fldChar.set(qn("w:fldCharType"), "separate")
195 r.append(fldChar)
197 # Add placeholder text so the field is visible/clickable
198 if "SEQ" in field_code:
199 run = paragraph.add_run("1")
200 else:
201 run = paragraph.add_run("Error! Reference source not found.")
203 run = paragraph.add_run()
204 r = run._r
205 fldChar = OxmlElement("w:fldChar")
206 fldChar.set(qn("w:fldCharType"), "end")
207 r.append(fldChar)
210def concatenate_docx(
211 input_files: Sequence[str | os.PathLike[str]],
212 output_file: str | os.PathLike[str],
213 landscape: bool | Sequence[bool] = False,
214) -> None:
215 """Concatenate DOCX files without relying on Word field toggles.
217 This helper is useful when `RTFDocument.write_docx` already produced DOCX
218 files and you need to stitch them together into a single document that can
219 be distributed without refreshing fields in Microsoft Word.
221 Args:
222 input_files: Ordered collection of DOCX file paths to combine. The
223 first document becomes the base; subsequent documents are appended
224 as new sections.
225 output_file: Path to the combined DOCX file.
226 landscape: Whether each appended section should be landscape. Accepts
227 a single boolean applied to every section or a list/tuple matching
228 ``input_files``.
230 Raises:
231 ImportError: If ``python-docx`` is not installed.
232 ValueError: If ``input_files`` is empty or the ``landscape`` list length
233 does not match ``input_files``.
234 FileNotFoundError: If any input file is missing.
235 """
236 try:
237 from docx import Document # type: ignore
238 from docx.enum.section import WD_SECTION # type: ignore
239 except ImportError as exc:
240 raise ImportError(
241 "python-docx is required for concatenate_docx. "
242 "Install it with: pip install 'rtflite[docx]'"
243 ) from exc
245 paths = [Path(path).expanduser() for path in input_files]
246 if not paths:
247 raise ValueError("Input files list cannot be empty")
249 missing_files = [str(path) for path in paths if not path.exists()]
250 if missing_files:
251 raise FileNotFoundError(f"Missing files: {', '.join(missing_files)}")
253 orientation_flags = _coerce_landscape_flags(landscape, len(paths))
255 combined_doc = Document(str(paths[0]))
256 _set_section_orientation(combined_doc.sections[0], orientation_flags[0])
258 for source_path, is_landscape in zip(paths[1:], orientation_flags[1:], strict=True):
259 combined_doc.add_section(WD_SECTION.NEW_PAGE)
260 _set_section_orientation(combined_doc.sections[-1], is_landscape)
261 _append_document_body(combined_doc, Document(str(source_path)))
263 output_path = Path(output_file).expanduser()
264 output_path.parent.mkdir(parents=True, exist_ok=True)
265 combined_doc.save(str(output_path))
268def _coerce_landscape_flags(
269 landscape: bool | Sequence[bool],
270 expected_length: int,
271) -> list[bool]:
272 """Normalize the ``landscape`` argument to a list and validate its length."""
273 if isinstance(landscape, bool):
274 return [landscape] * expected_length
276 flags = list(landscape)
277 if len(flags) != expected_length:
278 raise ValueError("Length of landscape list must match input files")
280 return flags
283def _set_section_orientation(section: "Section", landscape: bool) -> None:
284 """Set section orientation and swap dimensions if needed."""
285 from docx.enum.section import WD_ORIENT # type: ignore
287 section.orientation = WD_ORIENT.LANDSCAPE if landscape else WD_ORIENT.PORTRAIT
288 width, height = section.page_width, section.page_height
289 if width is None or height is None:
290 return
292 if (landscape and width < height) or (not landscape and width > height):
293 section.page_width, section.page_height = height, width
296def _append_document_body(target: "DocxDocument", source: "DocxDocument") -> None:
297 """Copy body content from ``source`` into ``target`` without section props."""
298 for element in list(source.element.body):
299 if element.tag.endswith("}sectPr"):
300 continue
301 target.element.body.append(deepcopy(element))