Coverage for src / rtflite / convert.py: 80%
84 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-02 08:02 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-02 08:02 +0000
1import os
2import platform
3import re
4import shutil
5import subprocess
6from collections.abc import Sequence
7from pathlib import Path
9from packaging import version
11from .dictionary.libreoffice import DEFAULT_PATHS, MIN_VERSION
14class LibreOfficeConverter:
15 """Convert RTF documents to other formats using LibreOffice.
17 Convert RTF files to various formats including PDF, DOCX, HTML, and others
18 using LibreOffice in headless mode.
20 Requirements:
21 - LibreOffice 7.1 or later must be installed.
22 - Automatically finds LibreOffice in standard installation paths.
23 - For custom installations, provide `executable_path` parameter.
25 Note:
26 The converter runs LibreOffice in headless mode, so no GUI is required.
27 This makes it suitable for server environments and automated workflows.
28 """
30 def __init__(self, executable_path: str | Path | None = None):
31 """Initialize converter with optional executable path.
33 Args:
34 executable_path: Path (or executable name) to LibreOffice. If None,
35 searches standard installation locations for each platform.
37 Raises:
38 FileNotFoundError: If LibreOffice executable cannot be found.
39 ValueError: If LibreOffice version is below minimum requirement.
40 """
41 self.executable_path = self._resolve_executable_path(executable_path)
43 self._verify_version()
45 def _resolve_executable_path(self, executable_path: str | Path | None) -> Path:
46 """Resolve the LibreOffice executable path."""
47 if executable_path is None:
48 found_executable = self._find_executable()
49 if found_executable is None:
50 raise FileNotFoundError("Can't find LibreOffice executable.")
51 return found_executable
53 executable = os.fspath(executable_path)
54 expanded = os.path.expanduser(executable)
55 candidate = Path(expanded)
56 candidate_str = str(candidate)
57 looks_like_path = (
58 candidate.is_absolute()
59 or os.sep in candidate_str
60 or (os.altsep is not None and os.altsep in candidate_str)
61 )
62 if looks_like_path:
63 if candidate.is_file():
64 return candidate
65 raise FileNotFoundError(
66 f"LibreOffice executable not found at: {candidate}."
67 )
69 resolved_executable = shutil.which(executable)
70 if resolved_executable is None:
71 raise FileNotFoundError(f"Can't find LibreOffice executable: {executable}.")
72 return Path(resolved_executable)
74 def _find_executable(self) -> Path | None:
75 """Find LibreOffice executable in default locations."""
76 for name in ("soffice", "libreoffice"):
77 resolved = shutil.which(name)
78 if resolved is not None:
79 return Path(resolved)
81 system = platform.system()
82 if system not in DEFAULT_PATHS:
83 raise RuntimeError(f"Unsupported operating system: {system}.")
85 for path in DEFAULT_PATHS[system]:
86 candidate = Path(path)
87 if candidate.is_file():
88 return candidate
89 return None
91 def _verify_version(self):
92 """Verify LibreOffice version meets minimum requirement."""
93 try:
94 result = subprocess.run(
95 [str(self.executable_path), "--version"],
96 capture_output=True,
97 text=True,
98 check=True,
99 )
100 version_str = result.stdout.strip()
101 # Extract version number (for example, "24.8.3.2" from the output)
102 match = re.search(r"LibreOffice (\d+\.\d+)", version_str)
103 if not match:
104 raise ValueError(
105 f"Can't parse LibreOffice version from: {version_str}."
106 )
108 current_version = version.parse(match.group(1))
109 min_version = version.parse(MIN_VERSION)
111 if current_version < min_version:
112 raise RuntimeError(
113 "LibreOffice version "
114 f"{current_version} is below minimum required "
115 f"version {min_version}."
116 )
117 except subprocess.CalledProcessError as e:
118 raise RuntimeError(f"Failed to get LibreOffice version: {e}.") from e
120 def convert(
121 self,
122 input_files: str | Path | Sequence[str | Path],
123 output_dir: str | Path,
124 format: str = "pdf",
125 overwrite: bool = False,
126 ) -> Path | Sequence[Path]:
127 """Convert RTF file(s) to specified format using LibreOffice.
129 Performs the actual conversion of RTF files to the target format using
130 LibreOffice in headless mode. Supports single file or batch conversion.
132 Args:
133 input_files: Path to input RTF file or list of paths. Can be string
134 or Path object. For batch conversion, provide a list/tuple.
135 output_dir: Directory where converted files will be saved. Created
136 if it doesn't exist. Can be string or Path object.
137 format: Target format for conversion. Supported formats:
139 - `'pdf'`: Portable Document Format (default)
140 - `'docx'`: Microsoft Word (Office Open XML)
141 - `'doc'`: Microsoft Word 97-2003
142 - `'html'`: HTML Document
143 - `'odt'`: OpenDocument Text
144 - `'txt'`: Plain Text
145 overwrite: If `True`, overwrites existing files in output directory.
146 If `False`, raises error if output file already exists.
148 Returns:
149 Path | Sequence[Path]: For single file input, returns Path to the
150 converted file. For multiple files, returns list of Paths.
152 Raises:
153 FileExistsError: If output file exists and overwrite=False.
154 RuntimeError: If LibreOffice conversion fails.
156 Examples:
157 Single file conversion:
158 ```python
159 converter = LibreOfficeConverter()
160 pdf_path = converter.convert(
161 "report.rtf",
162 output_dir="pdfs/",
163 format="pdf"
164 )
165 print(f"Created: {pdf_path}")
166 ```
168 Batch conversion with overwrite:
169 ```python
170 rtf_files = ["report1.rtf", "report2.rtf", "report3.rtf"]
171 pdf_paths = converter.convert(
172 input_files=rtf_files,
173 output_dir="output/pdfs/",
174 format="pdf",
175 overwrite=True
176 )
177 for path in pdf_paths:
178 print(f"Converted: {path}")
179 ```
180 """
181 output_dir = Path(os.path.expanduser(str(output_dir)))
182 if not output_dir.exists():
183 output_dir.mkdir(parents=True)
185 # Handle single input file
186 if isinstance(input_files, (str, Path)):
187 input_path = Path(os.path.expanduser(str(input_files)))
188 if not input_path.exists():
189 raise FileNotFoundError(f"Input file not found: {input_path}.")
190 return self._convert_single_file(input_path, output_dir, format, overwrite)
192 # Handle multiple input files
193 input_paths = [Path(os.path.expanduser(str(f))) for f in input_files]
194 for path in input_paths:
195 if not path.exists():
196 raise FileNotFoundError(f"Input file not found: {path}.")
198 return [
199 self._convert_single_file(input_path, output_dir, format, overwrite)
200 for input_path in input_paths
201 ]
203 def _convert_single_file(
204 self, input_file: Path, output_dir: Path, format: str, overwrite: bool
205 ) -> Path:
206 """Convert a single file using LibreOffice."""
207 output_file = output_dir / f"{input_file.stem}.{format}"
209 if output_file.exists() and not overwrite:
210 raise FileExistsError(
211 f"Output file already exists: {output_file}. "
212 "Use overwrite=True to force."
213 )
215 cmd = [
216 str(self.executable_path),
217 "--invisible",
218 "--headless",
219 "--nologo",
220 "--convert-to",
221 format,
222 "--outdir",
223 str(output_dir),
224 str(input_file),
225 ]
227 try:
228 result = subprocess.run(cmd, capture_output=True, text=True, check=True)
230 if not output_file.exists():
231 raise RuntimeError(
232 f"Conversion failed: Output file not created.\n"
233 f"Command output: {result.stdout}\n"
234 f"Error output: {result.stderr}"
235 )
237 return output_file
239 except subprocess.CalledProcessError as e:
240 raise RuntimeError(
241 f"LibreOffice conversion failed:\n"
242 f"Command output: {e.stdout}\n"
243 f"Error output: {e.stderr}"
244 ) from e