Coverage for src/rtflite/convert.py: 84%
62 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 16:35 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 16:35 +0000
1import os
2import platform
3import re
4import subprocess
5from collections.abc import Sequence
6from pathlib import Path
8from packaging import version
10from .dictionary.libreoffice import DEFAULT_PATHS, MIN_VERSION
13class LibreOfficeConverter:
14 """Convert RTF documents to other formats using LibreOffice.
16 Convert RTF files to various formats including PDF, DOCX, HTML, and others
17 using LibreOffice in headless mode.
19 Requirements:
20 - LibreOffice 7.3 or later must be installed.
21 - Automatically finds LibreOffice in standard installation paths.
22 - For custom installations, provide `executable_path` parameter.
24 Note:
25 The converter runs LibreOffice in headless mode, so no GUI is required.
26 This makes it suitable for server environments and automated workflows.
27 """
29 def __init__(self, executable_path: str | None = None):
30 """Initialize converter with optional executable path.
32 Args:
33 executable_path: Path to LibreOffice executable. If None, searches
34 standard installation locations for each platform.
36 Raises:
37 FileNotFoundError: If LibreOffice executable cannot be found.
38 ValueError: If LibreOffice version is below minimum requirement.
39 """
40 self.executable_path = executable_path or self._find_executable()
41 if not self.executable_path:
42 raise FileNotFoundError("Can't find LibreOffice executable.")
44 self._verify_version()
46 def _find_executable(self) -> str | None:
47 """Find LibreOffice executable in default locations."""
48 system = platform.system()
49 if system not in DEFAULT_PATHS:
50 raise RuntimeError(f"Unsupported operating system: {system}.")
52 for path in DEFAULT_PATHS[system]:
53 if os.path.isfile(path):
54 return path
55 return None
57 def _verify_version(self):
58 """Verify LibreOffice version meets minimum requirement."""
59 try:
60 result = subprocess.run(
61 [self.executable_path, "--version"],
62 capture_output=True,
63 text=True,
64 check=True,
65 )
66 version_str = result.stdout.strip()
67 # Extract version number (for example, "24.8.3.2" from the output)
68 match = re.search(r"LibreOffice (\d+\.\d+)", version_str)
69 if not match:
70 raise ValueError(
71 f"Can't parse LibreOffice version from: {version_str}."
72 )
74 current_version = version.parse(match.group(1))
75 min_version = version.parse(MIN_VERSION)
77 if current_version < min_version:
78 raise RuntimeError(
79 f"LibreOffice version {current_version} is below minimum required version {min_version}."
80 )
81 except subprocess.CalledProcessError as e:
82 raise RuntimeError(f"Failed to get LibreOffice version: {e}.")
84 def convert(
85 self,
86 input_files: str | Path | Sequence[str | Path],
87 output_dir: str | Path,
88 format: str = "pdf",
89 overwrite: bool = False,
90 ) -> Path | Sequence[Path]:
91 """Convert RTF file(s) to specified format using LibreOffice.
93 Performs the actual conversion of RTF files to the target format using
94 LibreOffice in headless mode. Supports single file or batch conversion.
96 Args:
97 input_files: Path to input RTF file or list of paths. Can be string
98 or Path object. For batch conversion, provide a list/tuple.
99 output_dir: Directory where converted files will be saved. Created
100 if it doesn't exist. Can be string or Path object.
101 format: Target format for conversion. Supported formats:
102 - 'pdf': Portable Document Format (default)
103 - 'docx': Microsoft Word (Office Open XML)
104 - 'doc': Microsoft Word 97-2003
105 - 'html': HTML Document
106 - 'odt': OpenDocument Text
107 - 'txt': Plain Text
108 overwrite: If True, overwrites existing files in output directory.
109 If False, raises error if output file already exists.
111 Returns:
112 Path | Sequence[Path]: For single file input, returns Path to the
113 converted file. For multiple files, returns list of Paths.
115 Raises:
116 FileExistsError: If output file exists and overwrite=False.
117 RuntimeError: If LibreOffice conversion fails.
119 Examples:
120 Single file conversion:
121 ```python
122 converter = LibreOfficeConverter()
123 pdf_path = converter.convert(
124 "report.rtf",
125 output_dir="pdfs/",
126 format="pdf"
127 )
128 print(f"Created: {pdf_path}")
129 ```
131 Batch conversion with overwrite:
132 ```python
133 rtf_files = ["report1.rtf", "report2.rtf", "report3.rtf"]
134 pdf_paths = converter.convert(
135 input_files=rtf_files,
136 output_dir="output/pdfs/",
137 format="pdf",
138 overwrite=True
139 )
140 for path in pdf_paths:
141 print(f"Converted: {path}")
142 ```
143 """
144 output_dir = Path(os.path.expanduser(str(output_dir)))
145 if not output_dir.exists():
146 output_dir.mkdir(parents=True)
148 # Handle single input file
149 if isinstance(input_files, (str, Path)):
150 input_path = Path(os.path.expanduser(str(input_files)))
151 if not input_path.exists():
152 raise FileNotFoundError(f"Input file not found: {input_path}.")
153 return self._convert_single_file(input_path, output_dir, format, overwrite)
155 # Handle multiple input files
156 input_paths = [Path(os.path.expanduser(str(f))) for f in input_files]
157 for path in input_paths:
158 if not path.exists():
159 raise FileNotFoundError(f"Input file not found: {path}.")
161 return [
162 self._convert_single_file(input_path, output_dir, format, overwrite)
163 for input_path in input_paths
164 ]
166 def _convert_single_file(
167 self, input_file: Path, output_dir: Path, format: str, overwrite: bool
168 ) -> Path:
169 """Convert a single file using LibreOffice."""
170 output_file = output_dir / f"{input_file.stem}.{format}"
172 if output_file.exists() and not overwrite:
173 raise FileExistsError(
174 f"Output file already exists: {output_file}. Use overwrite=True to force."
175 )
177 # executable_path is guaranteed to be non-None after __init__
178 assert self.executable_path is not None
179 cmd = [
180 self.executable_path,
181 "--invisible",
182 "--headless",
183 "--nologo",
184 "--convert-to",
185 format,
186 "--outdir",
187 str(output_dir),
188 str(input_file),
189 ]
191 try:
192 result = subprocess.run(cmd, capture_output=True, text=True, check=True)
194 if not output_file.exists():
195 raise RuntimeError(
196 f"Conversion failed: Output file not created.\n"
197 f"Command output: {result.stdout}\n"
198 f"Error output: {result.stderr}"
199 )
201 return output_file
203 except subprocess.CalledProcessError as e:
204 raise RuntimeError(
205 f"LibreOffice conversion failed:\n"
206 f"Command output: {e.stdout}\n"
207 f"Error output: {e.stderr}"
208 )