Coverage for src/rtflite/convert.py: 36%
61 statements
« prev ^ index » next coverage.py v7.6.10, created at 2025-02-03 15:40 +0000
« prev ^ index » next coverage.py v7.6.10, created at 2025-02-03 15:40 +0000
1import os
2import platform
3import re
4import subprocess
5from collections.abc import Sequence
6from pathlib import Path
8from packaging import version
10from .dictionary.libreoffice import DEFAULT_PATHS, MIN_VERSION
13class LibreOfficeConverter:
14 """LibreOffice-based document converter."""
16 def __init__(self, executable_path: str | None = None):
17 """Initialize converter with optional executable path."""
18 self.executable_path = executable_path or self._find_executable()
19 if not self.executable_path:
20 raise FileNotFoundError("Can't find LibreOffice executable.")
22 self._verify_version()
24 def _find_executable(self) -> str | None:
25 """Find LibreOffice executable in default locations."""
26 system = platform.system()
27 if system not in DEFAULT_PATHS:
28 raise RuntimeError(f"Unsupported operating system: {system}.")
30 for path in DEFAULT_PATHS[system]:
31 if os.path.isfile(path):
32 return path
33 return None
35 def _verify_version(self):
36 """Verify LibreOffice version meets minimum requirement."""
37 try:
38 result = subprocess.run(
39 [self.executable_path, "--version"],
40 capture_output=True,
41 text=True,
42 check=True,
43 )
44 version_str = result.stdout.strip()
45 # Extract version number (for example, "24.8.3.2" from the output)
46 match = re.search(r"LibreOffice (\d+\.\d+)", version_str)
47 if not match:
48 raise ValueError(
49 f"Can't parse LibreOffice version from: {version_str}."
50 )
52 current_version = version.parse(match.group(1))
53 min_version = version.parse(MIN_VERSION)
55 if current_version < min_version:
56 raise RuntimeError(
57 f"LibreOffice version {current_version} is below minimum required version {min_version}."
58 )
59 except subprocess.CalledProcessError as e:
60 raise RuntimeError(f"Failed to get LibreOffice version: {e}.")
62 def convert(
63 self,
64 input_files: str | Path | Sequence[str | Path],
65 output_dir: str | Path,
66 format: str = "pdf",
67 overwrite: bool = False,
68 ) -> Path | Sequence[Path]:
69 """
70 Convert RTF file(s) to specified format using LibreOffice.
72 Args:
73 input_files: Path to input RTF file or list of paths.
74 output_dir: Directory for output files.
75 format: Output format (`'pdf'`, `'docx'`, or `'html'`).
76 overwrite: Whether to overwrite existing output files.
78 Returns:
79 Path to converted file, or list of paths for multiple files.
80 """
81 output_dir = Path(os.path.expanduser(str(output_dir)))
82 if not output_dir.exists():
83 output_dir.mkdir(parents=True)
85 # Handle single input file
86 if isinstance(input_files, (str, Path)):
87 input_path = Path(os.path.expanduser(str(input_files)))
88 if not input_path.exists():
89 raise FileNotFoundError(f"Input file not found: {input_path}.")
90 return self._convert_single_file(input_path, output_dir, format, overwrite)
92 # Handle multiple input files
93 input_paths = [Path(os.path.expanduser(str(f))) for f in input_files]
94 for path in input_paths:
95 if not path.exists():
96 raise FileNotFoundError(f"Input file not found: {path}.")
98 return [
99 self._convert_single_file(input_path, output_dir, format, overwrite)
100 for input_path in input_paths
101 ]
103 def _convert_single_file(
104 self, input_file: Path, output_dir: Path, format: str, overwrite: bool
105 ) -> Path:
106 """Convert a single file using LibreOffice."""
107 output_file = output_dir / f"{input_file.stem}.{format}"
109 if output_file.exists() and not overwrite:
110 raise FileExistsError(
111 f"Output file already exists: {output_file}. Use overwrite=True to force."
112 )
114 cmd = [
115 self.executable_path,
116 "--invisible",
117 "--headless",
118 "--nologo",
119 "--convert-to",
120 format,
121 "--outdir",
122 str(output_dir),
123 str(input_file),
124 ]
126 try:
127 result = subprocess.run(cmd, capture_output=True, text=True, check=True)
129 if not output_file.exists():
130 raise RuntimeError(
131 f"Conversion failed: Output file not created.\n"
132 f"Command output: {result.stdout}\n"
133 f"Error output: {result.stderr}"
134 )
136 return output_file
138 except subprocess.CalledProcessError as e:
139 raise RuntimeError(
140 f"LibreOffice conversion failed:\n"
141 f"Command output: {e.stdout}\n"
142 f"Error output: {e.stderr}"
143 )