Coverage for src / rtflite / convert.py: 80%

84 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-02 08:02 +0000

1import os 

2import platform 

3import re 

4import shutil 

5import subprocess 

6from collections.abc import Sequence 

7from pathlib import Path 

8 

9from packaging import version 

10 

11from .dictionary.libreoffice import DEFAULT_PATHS, MIN_VERSION 

12 

13 

14class LibreOfficeConverter: 

15 """Convert RTF documents to other formats using LibreOffice. 

16 

17 Convert RTF files to various formats including PDF, DOCX, HTML, and others 

18 using LibreOffice in headless mode. 

19 

20 Requirements: 

21 - LibreOffice 7.1 or later must be installed. 

22 - Automatically finds LibreOffice in standard installation paths. 

23 - For custom installations, provide `executable_path` parameter. 

24 

25 Note: 

26 The converter runs LibreOffice in headless mode, so no GUI is required. 

27 This makes it suitable for server environments and automated workflows. 

28 """ 

29 

30 def __init__(self, executable_path: str | Path | None = None): 

31 """Initialize converter with optional executable path. 

32 

33 Args: 

34 executable_path: Path (or executable name) to LibreOffice. If None, 

35 searches standard installation locations for each platform. 

36 

37 Raises: 

38 FileNotFoundError: If LibreOffice executable cannot be found. 

39 ValueError: If LibreOffice version is below minimum requirement. 

40 """ 

41 self.executable_path = self._resolve_executable_path(executable_path) 

42 

43 self._verify_version() 

44 

45 def _resolve_executable_path(self, executable_path: str | Path | None) -> Path: 

46 """Resolve the LibreOffice executable path.""" 

47 if executable_path is None: 

48 found_executable = self._find_executable() 

49 if found_executable is None: 

50 raise FileNotFoundError("Can't find LibreOffice executable.") 

51 return found_executable 

52 

53 executable = os.fspath(executable_path) 

54 expanded = os.path.expanduser(executable) 

55 candidate = Path(expanded) 

56 candidate_str = str(candidate) 

57 looks_like_path = ( 

58 candidate.is_absolute() 

59 or os.sep in candidate_str 

60 or (os.altsep is not None and os.altsep in candidate_str) 

61 ) 

62 if looks_like_path: 

63 if candidate.is_file(): 

64 return candidate 

65 raise FileNotFoundError( 

66 f"LibreOffice executable not found at: {candidate}." 

67 ) 

68 

69 resolved_executable = shutil.which(executable) 

70 if resolved_executable is None: 

71 raise FileNotFoundError(f"Can't find LibreOffice executable: {executable}.") 

72 return Path(resolved_executable) 

73 

74 def _find_executable(self) -> Path | None: 

75 """Find LibreOffice executable in default locations.""" 

76 for name in ("soffice", "libreoffice"): 

77 resolved = shutil.which(name) 

78 if resolved is not None: 

79 return Path(resolved) 

80 

81 system = platform.system() 

82 if system not in DEFAULT_PATHS: 

83 raise RuntimeError(f"Unsupported operating system: {system}.") 

84 

85 for path in DEFAULT_PATHS[system]: 

86 candidate = Path(path) 

87 if candidate.is_file(): 

88 return candidate 

89 return None 

90 

91 def _verify_version(self): 

92 """Verify LibreOffice version meets minimum requirement.""" 

93 try: 

94 result = subprocess.run( 

95 [str(self.executable_path), "--version"], 

96 capture_output=True, 

97 text=True, 

98 check=True, 

99 ) 

100 version_str = result.stdout.strip() 

101 # Extract version number (for example, "24.8.3.2" from the output) 

102 match = re.search(r"LibreOffice (\d+\.\d+)", version_str) 

103 if not match: 

104 raise ValueError( 

105 f"Can't parse LibreOffice version from: {version_str}." 

106 ) 

107 

108 current_version = version.parse(match.group(1)) 

109 min_version = version.parse(MIN_VERSION) 

110 

111 if current_version < min_version: 

112 raise RuntimeError( 

113 "LibreOffice version " 

114 f"{current_version} is below minimum required " 

115 f"version {min_version}." 

116 ) 

117 except subprocess.CalledProcessError as e: 

118 raise RuntimeError(f"Failed to get LibreOffice version: {e}.") from e 

119 

120 def convert( 

121 self, 

122 input_files: str | Path | Sequence[str | Path], 

123 output_dir: str | Path, 

124 format: str = "pdf", 

125 overwrite: bool = False, 

126 ) -> Path | Sequence[Path]: 

127 """Convert RTF file(s) to specified format using LibreOffice. 

128 

129 Performs the actual conversion of RTF files to the target format using 

130 LibreOffice in headless mode. Supports single file or batch conversion. 

131 

132 Args: 

133 input_files: Path to input RTF file or list of paths. Can be string 

134 or Path object. For batch conversion, provide a list/tuple. 

135 output_dir: Directory where converted files will be saved. Created 

136 if it doesn't exist. Can be string or Path object. 

137 format: Target format for conversion. Supported formats: 

138 

139 - `'pdf'`: Portable Document Format (default) 

140 - `'docx'`: Microsoft Word (Office Open XML) 

141 - `'doc'`: Microsoft Word 97-2003 

142 - `'html'`: HTML Document 

143 - `'odt'`: OpenDocument Text 

144 - `'txt'`: Plain Text 

145 overwrite: If `True`, overwrites existing files in output directory. 

146 If `False`, raises error if output file already exists. 

147 

148 Returns: 

149 Path | Sequence[Path]: For single file input, returns Path to the 

150 converted file. For multiple files, returns list of Paths. 

151 

152 Raises: 

153 FileExistsError: If output file exists and overwrite=False. 

154 RuntimeError: If LibreOffice conversion fails. 

155 

156 Examples: 

157 Single file conversion: 

158 ```python 

159 converter = LibreOfficeConverter() 

160 pdf_path = converter.convert( 

161 "report.rtf", 

162 output_dir="pdfs/", 

163 format="pdf" 

164 ) 

165 print(f"Created: {pdf_path}") 

166 ``` 

167 

168 Batch conversion with overwrite: 

169 ```python 

170 rtf_files = ["report1.rtf", "report2.rtf", "report3.rtf"] 

171 pdf_paths = converter.convert( 

172 input_files=rtf_files, 

173 output_dir="output/pdfs/", 

174 format="pdf", 

175 overwrite=True 

176 ) 

177 for path in pdf_paths: 

178 print(f"Converted: {path}") 

179 ``` 

180 """ 

181 output_dir = Path(os.path.expanduser(str(output_dir))) 

182 if not output_dir.exists(): 

183 output_dir.mkdir(parents=True) 

184 

185 # Handle single input file 

186 if isinstance(input_files, (str, Path)): 

187 input_path = Path(os.path.expanduser(str(input_files))) 

188 if not input_path.exists(): 

189 raise FileNotFoundError(f"Input file not found: {input_path}.") 

190 return self._convert_single_file(input_path, output_dir, format, overwrite) 

191 

192 # Handle multiple input files 

193 input_paths = [Path(os.path.expanduser(str(f))) for f in input_files] 

194 for path in input_paths: 

195 if not path.exists(): 

196 raise FileNotFoundError(f"Input file not found: {path}.") 

197 

198 return [ 

199 self._convert_single_file(input_path, output_dir, format, overwrite) 

200 for input_path in input_paths 

201 ] 

202 

203 def _convert_single_file( 

204 self, input_file: Path, output_dir: Path, format: str, overwrite: bool 

205 ) -> Path: 

206 """Convert a single file using LibreOffice.""" 

207 output_file = output_dir / f"{input_file.stem}.{format}" 

208 

209 if output_file.exists() and not overwrite: 

210 raise FileExistsError( 

211 f"Output file already exists: {output_file}. " 

212 "Use overwrite=True to force." 

213 ) 

214 

215 cmd = [ 

216 str(self.executable_path), 

217 "--invisible", 

218 "--headless", 

219 "--nologo", 

220 "--convert-to", 

221 format, 

222 "--outdir", 

223 str(output_dir), 

224 str(input_file), 

225 ] 

226 

227 try: 

228 result = subprocess.run(cmd, capture_output=True, text=True, check=True) 

229 

230 if not output_file.exists(): 

231 raise RuntimeError( 

232 f"Conversion failed: Output file not created.\n" 

233 f"Command output: {result.stdout}\n" 

234 f"Error output: {result.stderr}" 

235 ) 

236 

237 return output_file 

238 

239 except subprocess.CalledProcessError as e: 

240 raise RuntimeError( 

241 f"LibreOffice conversion failed:\n" 

242 f"Command output: {e.stdout}\n" 

243 f"Error output: {e.stderr}" 

244 ) from e