Coverage for src / rtflite / convert.py: 84%

62 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-08 04:50 +0000

1import os 

2import platform 

3import re 

4import subprocess 

5from collections.abc import Sequence 

6from pathlib import Path 

7 

8from packaging import version 

9 

10from .dictionary.libreoffice import DEFAULT_PATHS, MIN_VERSION 

11 

12 

13class LibreOfficeConverter: 

14 """Convert RTF documents to other formats using LibreOffice. 

15 

16 Convert RTF files to various formats including PDF, DOCX, HTML, and others 

17 using LibreOffice in headless mode. 

18 

19 Requirements: 

20 - LibreOffice 7.3 or later must be installed. 

21 - Automatically finds LibreOffice in standard installation paths. 

22 - For custom installations, provide `executable_path` parameter. 

23 

24 Note: 

25 The converter runs LibreOffice in headless mode, so no GUI is required. 

26 This makes it suitable for server environments and automated workflows. 

27 """ 

28 

29 def __init__(self, executable_path: str | None = None): 

30 """Initialize converter with optional executable path. 

31 

32 Args: 

33 executable_path: Path to LibreOffice executable. If None, searches 

34 standard installation locations for each platform. 

35 

36 Raises: 

37 FileNotFoundError: If LibreOffice executable cannot be found. 

38 ValueError: If LibreOffice version is below minimum requirement. 

39 """ 

40 self.executable_path = executable_path or self._find_executable() 

41 if not self.executable_path: 

42 raise FileNotFoundError("Can't find LibreOffice executable.") 

43 

44 self._verify_version() 

45 

46 def _find_executable(self) -> str | None: 

47 """Find LibreOffice executable in default locations.""" 

48 system = platform.system() 

49 if system not in DEFAULT_PATHS: 

50 raise RuntimeError(f"Unsupported operating system: {system}.") 

51 

52 for path in DEFAULT_PATHS[system]: 

53 if os.path.isfile(path): 

54 return path 

55 return None 

56 

57 def _verify_version(self): 

58 """Verify LibreOffice version meets minimum requirement.""" 

59 try: 

60 result = subprocess.run( 

61 [self.executable_path, "--version"], 

62 capture_output=True, 

63 text=True, 

64 check=True, 

65 ) 

66 version_str = result.stdout.strip() 

67 # Extract version number (for example, "24.8.3.2" from the output) 

68 match = re.search(r"LibreOffice (\d+\.\d+)", version_str) 

69 if not match: 

70 raise ValueError( 

71 f"Can't parse LibreOffice version from: {version_str}." 

72 ) 

73 

74 current_version = version.parse(match.group(1)) 

75 min_version = version.parse(MIN_VERSION) 

76 

77 if current_version < min_version: 

78 raise RuntimeError( 

79 "LibreOffice version " 

80 f"{current_version} is below minimum required " 

81 f"version {min_version}." 

82 ) 

83 except subprocess.CalledProcessError as e: 

84 raise RuntimeError(f"Failed to get LibreOffice version: {e}.") from e 

85 

86 def convert( 

87 self, 

88 input_files: str | Path | Sequence[str | Path], 

89 output_dir: str | Path, 

90 format: str = "pdf", 

91 overwrite: bool = False, 

92 ) -> Path | Sequence[Path]: 

93 """Convert RTF file(s) to specified format using LibreOffice. 

94 

95 Performs the actual conversion of RTF files to the target format using 

96 LibreOffice in headless mode. Supports single file or batch conversion. 

97 

98 Args: 

99 input_files: Path to input RTF file or list of paths. Can be string 

100 or Path object. For batch conversion, provide a list/tuple. 

101 output_dir: Directory where converted files will be saved. Created 

102 if it doesn't exist. Can be string or Path object. 

103 format: Target format for conversion. Supported formats: 

104 

105 - `'pdf'`: Portable Document Format (default) 

106 - `'docx'`: Microsoft Word (Office Open XML) 

107 - `'doc'`: Microsoft Word 97-2003 

108 - `'html'`: HTML Document 

109 - `'odt'`: OpenDocument Text 

110 - `'txt'`: Plain Text 

111 overwrite: If `True`, overwrites existing files in output directory. 

112 If `False`, raises error if output file already exists. 

113 

114 Returns: 

115 Path | Sequence[Path]: For single file input, returns Path to the 

116 converted file. For multiple files, returns list of Paths. 

117 

118 Raises: 

119 FileExistsError: If output file exists and overwrite=False. 

120 RuntimeError: If LibreOffice conversion fails. 

121 

122 Examples: 

123 Single file conversion: 

124 ```python 

125 converter = LibreOfficeConverter() 

126 pdf_path = converter.convert( 

127 "report.rtf", 

128 output_dir="pdfs/", 

129 format="pdf" 

130 ) 

131 print(f"Created: {pdf_path}") 

132 ``` 

133 

134 Batch conversion with overwrite: 

135 ```python 

136 rtf_files = ["report1.rtf", "report2.rtf", "report3.rtf"] 

137 pdf_paths = converter.convert( 

138 input_files=rtf_files, 

139 output_dir="output/pdfs/", 

140 format="pdf", 

141 overwrite=True 

142 ) 

143 for path in pdf_paths: 

144 print(f"Converted: {path}") 

145 ``` 

146 """ 

147 output_dir = Path(os.path.expanduser(str(output_dir))) 

148 if not output_dir.exists(): 

149 output_dir.mkdir(parents=True) 

150 

151 # Handle single input file 

152 if isinstance(input_files, (str, Path)): 

153 input_path = Path(os.path.expanduser(str(input_files))) 

154 if not input_path.exists(): 

155 raise FileNotFoundError(f"Input file not found: {input_path}.") 

156 return self._convert_single_file(input_path, output_dir, format, overwrite) 

157 

158 # Handle multiple input files 

159 input_paths = [Path(os.path.expanduser(str(f))) for f in input_files] 

160 for path in input_paths: 

161 if not path.exists(): 

162 raise FileNotFoundError(f"Input file not found: {path}.") 

163 

164 return [ 

165 self._convert_single_file(input_path, output_dir, format, overwrite) 

166 for input_path in input_paths 

167 ] 

168 

169 def _convert_single_file( 

170 self, input_file: Path, output_dir: Path, format: str, overwrite: bool 

171 ) -> Path: 

172 """Convert a single file using LibreOffice.""" 

173 output_file = output_dir / f"{input_file.stem}.{format}" 

174 

175 if output_file.exists() and not overwrite: 

176 raise FileExistsError( 

177 f"Output file already exists: {output_file}. " 

178 "Use overwrite=True to force." 

179 ) 

180 

181 # executable_path is guaranteed to be non-None after __init__ 

182 assert self.executable_path is not None 

183 cmd = [ 

184 self.executable_path, 

185 "--invisible", 

186 "--headless", 

187 "--nologo", 

188 "--convert-to", 

189 format, 

190 "--outdir", 

191 str(output_dir), 

192 str(input_file), 

193 ] 

194 

195 try: 

196 result = subprocess.run(cmd, capture_output=True, text=True, check=True) 

197 

198 if not output_file.exists(): 

199 raise RuntimeError( 

200 f"Conversion failed: Output file not created.\n" 

201 f"Command output: {result.stdout}\n" 

202 f"Error output: {result.stderr}" 

203 ) 

204 

205 return output_file 

206 

207 except subprocess.CalledProcessError as e: 

208 raise RuntimeError( 

209 f"LibreOffice conversion failed:\n" 

210 f"Command output: {e.stdout}\n" 

211 f"Error output: {e.stderr}" 

212 ) from e