Coverage for src / rtflite / convert.py: 84%
62 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-08 04:50 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-08 04:50 +0000
1import os
2import platform
3import re
4import subprocess
5from collections.abc import Sequence
6from pathlib import Path
8from packaging import version
10from .dictionary.libreoffice import DEFAULT_PATHS, MIN_VERSION
13class LibreOfficeConverter:
14 """Convert RTF documents to other formats using LibreOffice.
16 Convert RTF files to various formats including PDF, DOCX, HTML, and others
17 using LibreOffice in headless mode.
19 Requirements:
20 - LibreOffice 7.3 or later must be installed.
21 - Automatically finds LibreOffice in standard installation paths.
22 - For custom installations, provide `executable_path` parameter.
24 Note:
25 The converter runs LibreOffice in headless mode, so no GUI is required.
26 This makes it suitable for server environments and automated workflows.
27 """
29 def __init__(self, executable_path: str | None = None):
30 """Initialize converter with optional executable path.
32 Args:
33 executable_path: Path to LibreOffice executable. If None, searches
34 standard installation locations for each platform.
36 Raises:
37 FileNotFoundError: If LibreOffice executable cannot be found.
38 ValueError: If LibreOffice version is below minimum requirement.
39 """
40 self.executable_path = executable_path or self._find_executable()
41 if not self.executable_path:
42 raise FileNotFoundError("Can't find LibreOffice executable.")
44 self._verify_version()
46 def _find_executable(self) -> str | None:
47 """Find LibreOffice executable in default locations."""
48 system = platform.system()
49 if system not in DEFAULT_PATHS:
50 raise RuntimeError(f"Unsupported operating system: {system}.")
52 for path in DEFAULT_PATHS[system]:
53 if os.path.isfile(path):
54 return path
55 return None
57 def _verify_version(self):
58 """Verify LibreOffice version meets minimum requirement."""
59 try:
60 result = subprocess.run(
61 [self.executable_path, "--version"],
62 capture_output=True,
63 text=True,
64 check=True,
65 )
66 version_str = result.stdout.strip()
67 # Extract version number (for example, "24.8.3.2" from the output)
68 match = re.search(r"LibreOffice (\d+\.\d+)", version_str)
69 if not match:
70 raise ValueError(
71 f"Can't parse LibreOffice version from: {version_str}."
72 )
74 current_version = version.parse(match.group(1))
75 min_version = version.parse(MIN_VERSION)
77 if current_version < min_version:
78 raise RuntimeError(
79 "LibreOffice version "
80 f"{current_version} is below minimum required "
81 f"version {min_version}."
82 )
83 except subprocess.CalledProcessError as e:
84 raise RuntimeError(f"Failed to get LibreOffice version: {e}.") from e
86 def convert(
87 self,
88 input_files: str | Path | Sequence[str | Path],
89 output_dir: str | Path,
90 format: str = "pdf",
91 overwrite: bool = False,
92 ) -> Path | Sequence[Path]:
93 """Convert RTF file(s) to specified format using LibreOffice.
95 Performs the actual conversion of RTF files to the target format using
96 LibreOffice in headless mode. Supports single file or batch conversion.
98 Args:
99 input_files: Path to input RTF file or list of paths. Can be string
100 or Path object. For batch conversion, provide a list/tuple.
101 output_dir: Directory where converted files will be saved. Created
102 if it doesn't exist. Can be string or Path object.
103 format: Target format for conversion. Supported formats:
105 - `'pdf'`: Portable Document Format (default)
106 - `'docx'`: Microsoft Word (Office Open XML)
107 - `'doc'`: Microsoft Word 97-2003
108 - `'html'`: HTML Document
109 - `'odt'`: OpenDocument Text
110 - `'txt'`: Plain Text
111 overwrite: If `True`, overwrites existing files in output directory.
112 If `False`, raises error if output file already exists.
114 Returns:
115 Path | Sequence[Path]: For single file input, returns Path to the
116 converted file. For multiple files, returns list of Paths.
118 Raises:
119 FileExistsError: If output file exists and overwrite=False.
120 RuntimeError: If LibreOffice conversion fails.
122 Examples:
123 Single file conversion:
124 ```python
125 converter = LibreOfficeConverter()
126 pdf_path = converter.convert(
127 "report.rtf",
128 output_dir="pdfs/",
129 format="pdf"
130 )
131 print(f"Created: {pdf_path}")
132 ```
134 Batch conversion with overwrite:
135 ```python
136 rtf_files = ["report1.rtf", "report2.rtf", "report3.rtf"]
137 pdf_paths = converter.convert(
138 input_files=rtf_files,
139 output_dir="output/pdfs/",
140 format="pdf",
141 overwrite=True
142 )
143 for path in pdf_paths:
144 print(f"Converted: {path}")
145 ```
146 """
147 output_dir = Path(os.path.expanduser(str(output_dir)))
148 if not output_dir.exists():
149 output_dir.mkdir(parents=True)
151 # Handle single input file
152 if isinstance(input_files, (str, Path)):
153 input_path = Path(os.path.expanduser(str(input_files)))
154 if not input_path.exists():
155 raise FileNotFoundError(f"Input file not found: {input_path}.")
156 return self._convert_single_file(input_path, output_dir, format, overwrite)
158 # Handle multiple input files
159 input_paths = [Path(os.path.expanduser(str(f))) for f in input_files]
160 for path in input_paths:
161 if not path.exists():
162 raise FileNotFoundError(f"Input file not found: {path}.")
164 return [
165 self._convert_single_file(input_path, output_dir, format, overwrite)
166 for input_path in input_paths
167 ]
169 def _convert_single_file(
170 self, input_file: Path, output_dir: Path, format: str, overwrite: bool
171 ) -> Path:
172 """Convert a single file using LibreOffice."""
173 output_file = output_dir / f"{input_file.stem}.{format}"
175 if output_file.exists() and not overwrite:
176 raise FileExistsError(
177 f"Output file already exists: {output_file}. "
178 "Use overwrite=True to force."
179 )
181 # executable_path is guaranteed to be non-None after __init__
182 assert self.executable_path is not None
183 cmd = [
184 self.executable_path,
185 "--invisible",
186 "--headless",
187 "--nologo",
188 "--convert-to",
189 format,
190 "--outdir",
191 str(output_dir),
192 str(input_file),
193 ]
195 try:
196 result = subprocess.run(cmd, capture_output=True, text=True, check=True)
198 if not output_file.exists():
199 raise RuntimeError(
200 f"Conversion failed: Output file not created.\n"
201 f"Command output: {result.stdout}\n"
202 f"Error output: {result.stderr}"
203 )
205 return output_file
207 except subprocess.CalledProcessError as e:
208 raise RuntimeError(
209 f"LibreOffice conversion failed:\n"
210 f"Command output: {e.stdout}\n"
211 f"Error output: {e.stderr}"
212 ) from e