Coverage for src/rtflite/convert.py: 84%
62 statements
« prev ^ index » next coverage.py v7.11.3, created at 2025-11-17 05:16 +0000
« prev ^ index » next coverage.py v7.11.3, created at 2025-11-17 05:16 +0000
1import os
2import platform
3import re
4import subprocess
5from collections.abc import Sequence
6from pathlib import Path
8from packaging import version
10from .dictionary.libreoffice import DEFAULT_PATHS, MIN_VERSION
13class LibreOfficeConverter:
14 """Convert RTF documents to other formats using LibreOffice.
16 Convert RTF files to various formats including PDF, DOCX, HTML, and others
17 using LibreOffice in headless mode.
19 Requirements:
20 - LibreOffice 7.3 or later must be installed.
21 - Automatically finds LibreOffice in standard installation paths.
22 - For custom installations, provide `executable_path` parameter.
24 Note:
25 The converter runs LibreOffice in headless mode, so no GUI is required.
26 This makes it suitable for server environments and automated workflows.
27 """
29 def __init__(self, executable_path: str | None = None):
30 """Initialize converter with optional executable path.
32 Args:
33 executable_path: Path to LibreOffice executable. If None, searches
34 standard installation locations for each platform.
36 Raises:
37 FileNotFoundError: If LibreOffice executable cannot be found.
38 ValueError: If LibreOffice version is below minimum requirement.
39 """
40 self.executable_path = executable_path or self._find_executable()
41 if not self.executable_path:
42 raise FileNotFoundError("Can't find LibreOffice executable.")
44 self._verify_version()
46 def _find_executable(self) -> str | None:
47 """Find LibreOffice executable in default locations."""
48 system = platform.system()
49 if system not in DEFAULT_PATHS:
50 raise RuntimeError(f"Unsupported operating system: {system}.")
52 for path in DEFAULT_PATHS[system]:
53 if os.path.isfile(path):
54 return path
55 return None
57 def _verify_version(self):
58 """Verify LibreOffice version meets minimum requirement."""
59 try:
60 result = subprocess.run(
61 [self.executable_path, "--version"],
62 capture_output=True,
63 text=True,
64 check=True,
65 )
66 version_str = result.stdout.strip()
67 # Extract version number (for example, "24.8.3.2" from the output)
68 match = re.search(r"LibreOffice (\d+\.\d+)", version_str)
69 if not match:
70 raise ValueError(
71 f"Can't parse LibreOffice version from: {version_str}."
72 )
74 current_version = version.parse(match.group(1))
75 min_version = version.parse(MIN_VERSION)
77 if current_version < min_version:
78 raise RuntimeError(
79 "LibreOffice version "
80 f"{current_version} is below minimum required "
81 f"version {min_version}."
82 )
83 except subprocess.CalledProcessError as e:
84 raise RuntimeError(f"Failed to get LibreOffice version: {e}.") from e
86 def convert(
87 self,
88 input_files: str | Path | Sequence[str | Path],
89 output_dir: str | Path,
90 format: str = "pdf",
91 overwrite: bool = False,
92 ) -> Path | Sequence[Path]:
93 """Convert RTF file(s) to specified format using LibreOffice.
95 Performs the actual conversion of RTF files to the target format using
96 LibreOffice in headless mode. Supports single file or batch conversion.
98 Args:
99 input_files: Path to input RTF file or list of paths. Can be string
100 or Path object. For batch conversion, provide a list/tuple.
101 output_dir: Directory where converted files will be saved. Created
102 if it doesn't exist. Can be string or Path object.
103 format: Target format for conversion. Supported formats:
104 - 'pdf': Portable Document Format (default)
105 - 'docx': Microsoft Word (Office Open XML)
106 - 'doc': Microsoft Word 97-2003
107 - 'html': HTML Document
108 - 'odt': OpenDocument Text
109 - 'txt': Plain Text
110 overwrite: If True, overwrites existing files in output directory.
111 If False, raises error if output file already exists.
113 Returns:
114 Path | Sequence[Path]: For single file input, returns Path to the
115 converted file. For multiple files, returns list of Paths.
117 Raises:
118 FileExistsError: If output file exists and overwrite=False.
119 RuntimeError: If LibreOffice conversion fails.
121 Examples:
122 Single file conversion:
123 ```python
124 converter = LibreOfficeConverter()
125 pdf_path = converter.convert(
126 "report.rtf",
127 output_dir="pdfs/",
128 format="pdf"
129 )
130 print(f"Created: {pdf_path}")
131 ```
133 Batch conversion with overwrite:
134 ```python
135 rtf_files = ["report1.rtf", "report2.rtf", "report3.rtf"]
136 pdf_paths = converter.convert(
137 input_files=rtf_files,
138 output_dir="output/pdfs/",
139 format="pdf",
140 overwrite=True
141 )
142 for path in pdf_paths:
143 print(f"Converted: {path}")
144 ```
145 """
146 output_dir = Path(os.path.expanduser(str(output_dir)))
147 if not output_dir.exists():
148 output_dir.mkdir(parents=True)
150 # Handle single input file
151 if isinstance(input_files, (str, Path)):
152 input_path = Path(os.path.expanduser(str(input_files)))
153 if not input_path.exists():
154 raise FileNotFoundError(f"Input file not found: {input_path}.")
155 return self._convert_single_file(input_path, output_dir, format, overwrite)
157 # Handle multiple input files
158 input_paths = [Path(os.path.expanduser(str(f))) for f in input_files]
159 for path in input_paths:
160 if not path.exists():
161 raise FileNotFoundError(f"Input file not found: {path}.")
163 return [
164 self._convert_single_file(input_path, output_dir, format, overwrite)
165 for input_path in input_paths
166 ]
168 def _convert_single_file(
169 self, input_file: Path, output_dir: Path, format: str, overwrite: bool
170 ) -> Path:
171 """Convert a single file using LibreOffice."""
172 output_file = output_dir / f"{input_file.stem}.{format}"
174 if output_file.exists() and not overwrite:
175 raise FileExistsError(
176 f"Output file already exists: {output_file}. "
177 "Use overwrite=True to force."
178 )
180 # executable_path is guaranteed to be non-None after __init__
181 assert self.executable_path is not None
182 cmd = [
183 self.executable_path,
184 "--invisible",
185 "--headless",
186 "--nologo",
187 "--convert-to",
188 format,
189 "--outdir",
190 str(output_dir),
191 str(input_file),
192 ]
194 try:
195 result = subprocess.run(cmd, capture_output=True, text=True, check=True)
197 if not output_file.exists():
198 raise RuntimeError(
199 f"Conversion failed: Output file not created.\n"
200 f"Command output: {result.stdout}\n"
201 f"Error output: {result.stderr}"
202 )
204 return output_file
206 except subprocess.CalledProcessError as e:
207 raise RuntimeError(
208 f"LibreOffice conversion failed:\n"
209 f"Command output: {e.stdout}\n"
210 f"Error output: {e.stderr}"
211 ) from e