Coverage for src/rtflite/assemble.py: 93%

1"""Assemble multiple RTF files into a single RTF or DOCX file."""

3import os

4from collections.abc import Sequence

5from copy import deepcopy

6from pathlib import Path

7from typing import TYPE_CHECKING

9if TYPE_CHECKING: # pragma: no cover

10 from docx.document import Document as DocxDocument

11 from docx.section import Section

13# from .input import RTFPage # Unused

16def assemble_rtf(

17 input_files: list[str],

18 output_file: str,

19) -> None:

20 """Combine multiple RTF files into a single RTF file.

22 Args:

23 input_files: List of paths to RTF files to combine.

24 output_file: Path to the output RTF file.

25 """

26 if not input_files:

27 return

29 # Check if files exist

30 missing_files = [f for f in input_files if not os.path.exists(f)]

31 if missing_files:

32 raise FileNotFoundError(f"Missing files: {', '.join(missing_files)}")

34 # Read all files

35 rtf_contents = []

36 for f in input_files:

37 with open(f, encoding="utf-8") as file:

38 rtf_contents.append(file.readlines())

40 if not rtf_contents:

41 return

43 # Process first file

44 # We keep everything from the first file except the last closing brace '}'

46 # Remove last line if it contains only '}' or remove the last '}' char

47 # r2rtf simply removes the last line: end[-n] <- end[-n] - 1

49 # Helper to find start index based on fcharset

50 def find_start_index(lines):

51 last_idx = 0

52 found = False

53 for i, line in enumerate(lines):

54 if "fcharset" in line:

55 last_idx = i

56 found = True

58 if found:

59 return last_idx + 2

60 return 0

62 new_page_cmd = r"\page" + "\n"

64 processed_parts = []

66 for i, lines in enumerate(rtf_contents):

67 start_idx = 0

68 if i > 0:

69 # For subsequent files, skip header

70 start_idx = find_start_index(lines)

72 end_idx = len(lines)

73 if i < len(rtf_contents) - 1 and lines[-1].strip() == "}":

74 # Remove last line (closing brace) for all but last file

75 end_idx -= 1

77 part = lines[start_idx:end_idx]

78 processed_parts.extend(part)

80 if i < len(rtf_contents) - 1:

81 processed_parts.append(new_page_cmd)

83 # Write output

84 with open(output_file, "w", encoding="utf-8") as outfile:

85 outfile.writelines(processed_parts)

88def assemble_docx(

89 input_files: list[str],

90 output_file: str,

91 landscape: bool | list[bool] = False,

92) -> None:

93 """Combine multiple RTF files into a single DOCX file.

95 Args:

96 input_files: List of paths to input RTF files.

97 output_file: Path to the output DOCX file.

98 landscape: Whether the output should be landscape. Can be a single bool

99 (applies to all) or a list of bools (one per file). Defaults to False.

100 """

101 try:

102 import docx # type: ignore

103 from docx.enum.section import WD_ORIENT # type: ignore

104 except ImportError as e:

105 raise ImportError(

106 "python-docx is required for assemble_docx. "

107 "Install it with: pip install 'rtflite[docx]'"

108 ) from e

109

110 if not input_files:

111 raise ValueError("Input files list cannot be empty")

112

113 # Check input files exist

114 missing_files = [f for f in input_files if not os.path.exists(f)]

115 if missing_files:

116 raise FileNotFoundError(f"Missing files: {', '.join(missing_files)}")

117

118 # Handle landscape argument

119 if isinstance(landscape, bool):

120 landscape_list = [landscape] * len(input_files)

121 else:

122 if len(landscape) != len(input_files):

123 raise ValueError("Length of landscape list must match input files")

124 landscape_list = landscape

125

126 # Create new document

127 doc = docx.Document()

128

129 for i, (input_file, is_landscape) in enumerate(

130 zip(input_files, landscape_list, strict=True)

131 ):

132 # Set orientation for the current section

133 section = doc.sections[-1]

134 if is_landscape:

135 section.orientation = WD_ORIENT.LANDSCAPE

136 w, h = section.page_width, section.page_height

137 if w is not None and h is not None and w < h: # If currently portrait

138 section.page_width = h

139 section.page_height = w

140 else:

141 section.orientation = WD_ORIENT.PORTRAIT

142 w, h = section.page_width, section.page_height

143 if w is not None and h is not None and w > h: # If currently landscape

144 section.page_width = h

145 section.page_height = w

146

147 # Absolute path needed for fields

148 abs_path = os.path.abspath(input_file)

149

150 # Escape backslashes for the field code

151 path_str = abs_path.replace("\\", "\\\\")

152

153 # Create INCLUDETEXT field

154 field_code = f'INCLUDETEXT "{path_str}"'

155

156 # Add "Table X" caption

157 p = doc.add_paragraph()

158 p.add_run("Table ")

159 _add_field(p, r"SEQ Table \* ARABIC")

160 p.add_run("\n") # Linebreak

161

162 # Add the INCLUDETEXT field

163 _add_field(p, field_code)

164

165 # Handle section breaks

166 if i < len(input_files) - 1:

167 doc.add_section()

168

169 doc.save(output_file)

170

171

172def _add_field(paragraph, field_code):

173 """Add a complex field to a paragraph."""

174 # This is low-level XML manipulation for python-docx to add fields

175 from docx.oxml.ns import qn # type: ignore

176 from docx.oxml.shared import OxmlElement # type: ignore

177

178 run = paragraph.add_run()

179 r = run._r

180 fldChar = OxmlElement("w:fldChar")

181 fldChar.set(qn("w:fldCharType"), "begin")

182 r.append(fldChar)

183

184 run = paragraph.add_run()

185 r = run._r

186 instrText = OxmlElement("w:instrText")

187 instrText.set(qn("xml:space"), "preserve")

188 instrText.text = field_code

189 r.append(instrText)

190

191 run = paragraph.add_run()

192 r = run._r

193 fldChar = OxmlElement("w:fldChar")

194 fldChar.set(qn("w:fldCharType"), "separate")

195 r.append(fldChar)

196

197 # Add placeholder text so the field is visible/clickable

198 if "SEQ" in field_code:

199 run = paragraph.add_run("1")

200 else:

201 run = paragraph.add_run("Error! Reference source not found.")

202

203 run = paragraph.add_run()

204 r = run._r

205 fldChar = OxmlElement("w:fldChar")

206 fldChar.set(qn("w:fldCharType"), "end")

207 r.append(fldChar)

208

209

210def concatenate_docx(

211 input_files: Sequence[str | os.PathLike[str]],

212 output_file: str | os.PathLike[str],

213 landscape: bool | Sequence[bool] = False,

214) -> None:

215 """Concatenate DOCX files without relying on Word field toggles.

216

217 This helper is useful when `RTFDocument.write_docx` already produced DOCX

218 files and you need to stitch them together into a single document that can

219 be distributed without refreshing fields in Microsoft Word.

220

221 Args:

222 input_files: Ordered collection of DOCX file paths to combine. The

223 first document becomes the base; subsequent documents are appended

224 as new sections.

225 output_file: Path to the combined DOCX file.

226 landscape: Whether each appended section should be landscape. Accepts

227 a single boolean applied to every section or a list/tuple matching

228 ``input_files``.

229

230 Raises:

231 ImportError: If ``python-docx`` is not installed.

232 ValueError: If ``input_files`` is empty or the ``landscape`` list length

233 does not match ``input_files``.

234 FileNotFoundError: If any input file is missing.

235 """

236 try:

237 from docx import Document # type: ignore

238 from docx.enum.section import WD_SECTION # type: ignore

239 except ImportError as exc:

240 raise ImportError(

241 "python-docx is required for concatenate_docx. "

242 "Install it with: pip install 'rtflite[docx]'"

243 ) from exc

244

245 paths = [Path(path).expanduser() for path in input_files]

246 if not paths:

247 raise ValueError("Input files list cannot be empty")

248

249 missing_files = [str(path) for path in paths if not path.exists()]

250 if missing_files:

251 raise FileNotFoundError(f"Missing files: {', '.join(missing_files)}")

252

253 orientation_flags = _coerce_landscape_flags(landscape, len(paths))

254

255 combined_doc = Document(str(paths[0]))

256 _set_section_orientation(combined_doc.sections[0], orientation_flags[0])

257

258 for source_path, is_landscape in zip(paths[1:], orientation_flags[1:], strict=True):

259 combined_doc.add_section(WD_SECTION.NEW_PAGE)

260 _set_section_orientation(combined_doc.sections[-1], is_landscape)

261 _append_document_body(combined_doc, Document(str(source_path)))

262

263 output_path = Path(output_file).expanduser()

264 output_path.parent.mkdir(parents=True, exist_ok=True)

265 combined_doc.save(str(output_path))

266

267

268def _coerce_landscape_flags(

269 landscape: bool | Sequence[bool],

270 expected_length: int,

271) -> list[bool]:

272 """Normalize the ``landscape`` argument to a list and validate its length."""

273 if isinstance(landscape, bool):

274 return [landscape] * expected_length

275

276 flags = list(landscape)

277 if len(flags) != expected_length:

278 raise ValueError("Length of landscape list must match input files")

279

280 return flags

281

282

283def _set_section_orientation(section: "Section", landscape: bool) -> None:

284 """Set section orientation and swap dimensions if needed."""

285 from docx.enum.section import WD_ORIENT # type: ignore

286

287 section.orientation = WD_ORIENT.LANDSCAPE if landscape else WD_ORIENT.PORTRAIT

288 width, height = section.page_width, section.page_height

289 if width is None or height is None:

290 return

291

292 if (landscape and width < height) or (not landscape and width > height):

293 section.page_width, section.page_height = height, width

294

295

296def _append_document_body(target: "DocxDocument", source: "DocxDocument") -> None:

297 """Copy body content from ``source`` into ``target`` without section props."""

298 for element in list(source.element.body):

299 if element.tag.endswith("}sectPr"):

300 continue

301 target.element.body.append(deepcopy(element))

Coverage for src / rtflite / assemble.py: 93%

152 statements