Coverage for src/rtflite/convert.py: 84%

62 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2025-11-17 05:16 +0000

1import os 

2import platform 

3import re 

4import subprocess 

5from collections.abc import Sequence 

6from pathlib import Path 

7 

8from packaging import version 

9 

10from .dictionary.libreoffice import DEFAULT_PATHS, MIN_VERSION 

11 

12 

13class LibreOfficeConverter: 

14 """Convert RTF documents to other formats using LibreOffice. 

15 

16 Convert RTF files to various formats including PDF, DOCX, HTML, and others 

17 using LibreOffice in headless mode. 

18 

19 Requirements: 

20 - LibreOffice 7.3 or later must be installed. 

21 - Automatically finds LibreOffice in standard installation paths. 

22 - For custom installations, provide `executable_path` parameter. 

23 

24 Note: 

25 The converter runs LibreOffice in headless mode, so no GUI is required. 

26 This makes it suitable for server environments and automated workflows. 

27 """ 

28 

29 def __init__(self, executable_path: str | None = None): 

30 """Initialize converter with optional executable path. 

31 

32 Args: 

33 executable_path: Path to LibreOffice executable. If None, searches 

34 standard installation locations for each platform. 

35 

36 Raises: 

37 FileNotFoundError: If LibreOffice executable cannot be found. 

38 ValueError: If LibreOffice version is below minimum requirement. 

39 """ 

40 self.executable_path = executable_path or self._find_executable() 

41 if not self.executable_path: 

42 raise FileNotFoundError("Can't find LibreOffice executable.") 

43 

44 self._verify_version() 

45 

46 def _find_executable(self) -> str | None: 

47 """Find LibreOffice executable in default locations.""" 

48 system = platform.system() 

49 if system not in DEFAULT_PATHS: 

50 raise RuntimeError(f"Unsupported operating system: {system}.") 

51 

52 for path in DEFAULT_PATHS[system]: 

53 if os.path.isfile(path): 

54 return path 

55 return None 

56 

57 def _verify_version(self): 

58 """Verify LibreOffice version meets minimum requirement.""" 

59 try: 

60 result = subprocess.run( 

61 [self.executable_path, "--version"], 

62 capture_output=True, 

63 text=True, 

64 check=True, 

65 ) 

66 version_str = result.stdout.strip() 

67 # Extract version number (for example, "24.8.3.2" from the output) 

68 match = re.search(r"LibreOffice (\d+\.\d+)", version_str) 

69 if not match: 

70 raise ValueError( 

71 f"Can't parse LibreOffice version from: {version_str}." 

72 ) 

73 

74 current_version = version.parse(match.group(1)) 

75 min_version = version.parse(MIN_VERSION) 

76 

77 if current_version < min_version: 

78 raise RuntimeError( 

79 "LibreOffice version " 

80 f"{current_version} is below minimum required " 

81 f"version {min_version}." 

82 ) 

83 except subprocess.CalledProcessError as e: 

84 raise RuntimeError(f"Failed to get LibreOffice version: {e}.") from e 

85 

86 def convert( 

87 self, 

88 input_files: str | Path | Sequence[str | Path], 

89 output_dir: str | Path, 

90 format: str = "pdf", 

91 overwrite: bool = False, 

92 ) -> Path | Sequence[Path]: 

93 """Convert RTF file(s) to specified format using LibreOffice. 

94 

95 Performs the actual conversion of RTF files to the target format using 

96 LibreOffice in headless mode. Supports single file or batch conversion. 

97 

98 Args: 

99 input_files: Path to input RTF file or list of paths. Can be string 

100 or Path object. For batch conversion, provide a list/tuple. 

101 output_dir: Directory where converted files will be saved. Created 

102 if it doesn't exist. Can be string or Path object. 

103 format: Target format for conversion. Supported formats: 

104 - 'pdf': Portable Document Format (default) 

105 - 'docx': Microsoft Word (Office Open XML) 

106 - 'doc': Microsoft Word 97-2003 

107 - 'html': HTML Document 

108 - 'odt': OpenDocument Text 

109 - 'txt': Plain Text 

110 overwrite: If True, overwrites existing files in output directory. 

111 If False, raises error if output file already exists. 

112 

113 Returns: 

114 Path | Sequence[Path]: For single file input, returns Path to the 

115 converted file. For multiple files, returns list of Paths. 

116 

117 Raises: 

118 FileExistsError: If output file exists and overwrite=False. 

119 RuntimeError: If LibreOffice conversion fails. 

120 

121 Examples: 

122 Single file conversion: 

123 ```python 

124 converter = LibreOfficeConverter() 

125 pdf_path = converter.convert( 

126 "report.rtf", 

127 output_dir="pdfs/", 

128 format="pdf" 

129 ) 

130 print(f"Created: {pdf_path}") 

131 ``` 

132 

133 Batch conversion with overwrite: 

134 ```python 

135 rtf_files = ["report1.rtf", "report2.rtf", "report3.rtf"] 

136 pdf_paths = converter.convert( 

137 input_files=rtf_files, 

138 output_dir="output/pdfs/", 

139 format="pdf", 

140 overwrite=True 

141 ) 

142 for path in pdf_paths: 

143 print(f"Converted: {path}") 

144 ``` 

145 """ 

146 output_dir = Path(os.path.expanduser(str(output_dir))) 

147 if not output_dir.exists(): 

148 output_dir.mkdir(parents=True) 

149 

150 # Handle single input file 

151 if isinstance(input_files, (str, Path)): 

152 input_path = Path(os.path.expanduser(str(input_files))) 

153 if not input_path.exists(): 

154 raise FileNotFoundError(f"Input file not found: {input_path}.") 

155 return self._convert_single_file(input_path, output_dir, format, overwrite) 

156 

157 # Handle multiple input files 

158 input_paths = [Path(os.path.expanduser(str(f))) for f in input_files] 

159 for path in input_paths: 

160 if not path.exists(): 

161 raise FileNotFoundError(f"Input file not found: {path}.") 

162 

163 return [ 

164 self._convert_single_file(input_path, output_dir, format, overwrite) 

165 for input_path in input_paths 

166 ] 

167 

168 def _convert_single_file( 

169 self, input_file: Path, output_dir: Path, format: str, overwrite: bool 

170 ) -> Path: 

171 """Convert a single file using LibreOffice.""" 

172 output_file = output_dir / f"{input_file.stem}.{format}" 

173 

174 if output_file.exists() and not overwrite: 

175 raise FileExistsError( 

176 f"Output file already exists: {output_file}. " 

177 "Use overwrite=True to force." 

178 ) 

179 

180 # executable_path is guaranteed to be non-None after __init__ 

181 assert self.executable_path is not None 

182 cmd = [ 

183 self.executable_path, 

184 "--invisible", 

185 "--headless", 

186 "--nologo", 

187 "--convert-to", 

188 format, 

189 "--outdir", 

190 str(output_dir), 

191 str(input_file), 

192 ] 

193 

194 try: 

195 result = subprocess.run(cmd, capture_output=True, text=True, check=True) 

196 

197 if not output_file.exists(): 

198 raise RuntimeError( 

199 f"Conversion failed: Output file not created.\n" 

200 f"Command output: {result.stdout}\n" 

201 f"Error output: {result.stderr}" 

202 ) 

203 

204 return output_file 

205 

206 except subprocess.CalledProcessError as e: 

207 raise RuntimeError( 

208 f"LibreOffice conversion failed:\n" 

209 f"Command output: {e.stdout}\n" 

210 f"Error output: {e.stderr}" 

211 ) from e