Coverage for src/rtflite/convert.py: 84%

62 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-14 16:35 +0000

1import os 

2import platform 

3import re 

4import subprocess 

5from collections.abc import Sequence 

6from pathlib import Path 

7 

8from packaging import version 

9 

10from .dictionary.libreoffice import DEFAULT_PATHS, MIN_VERSION 

11 

12 

13class LibreOfficeConverter: 

14 """Convert RTF documents to other formats using LibreOffice. 

15 

16 Convert RTF files to various formats including PDF, DOCX, HTML, and others 

17 using LibreOffice in headless mode. 

18 

19 Requirements: 

20 - LibreOffice 7.3 or later must be installed. 

21 - Automatically finds LibreOffice in standard installation paths. 

22 - For custom installations, provide `executable_path` parameter. 

23 

24 Note: 

25 The converter runs LibreOffice in headless mode, so no GUI is required. 

26 This makes it suitable for server environments and automated workflows. 

27 """ 

28 

29 def __init__(self, executable_path: str | None = None): 

30 """Initialize converter with optional executable path. 

31 

32 Args: 

33 executable_path: Path to LibreOffice executable. If None, searches 

34 standard installation locations for each platform. 

35 

36 Raises: 

37 FileNotFoundError: If LibreOffice executable cannot be found. 

38 ValueError: If LibreOffice version is below minimum requirement. 

39 """ 

40 self.executable_path = executable_path or self._find_executable() 

41 if not self.executable_path: 

42 raise FileNotFoundError("Can't find LibreOffice executable.") 

43 

44 self._verify_version() 

45 

46 def _find_executable(self) -> str | None: 

47 """Find LibreOffice executable in default locations.""" 

48 system = platform.system() 

49 if system not in DEFAULT_PATHS: 

50 raise RuntimeError(f"Unsupported operating system: {system}.") 

51 

52 for path in DEFAULT_PATHS[system]: 

53 if os.path.isfile(path): 

54 return path 

55 return None 

56 

57 def _verify_version(self): 

58 """Verify LibreOffice version meets minimum requirement.""" 

59 try: 

60 result = subprocess.run( 

61 [self.executable_path, "--version"], 

62 capture_output=True, 

63 text=True, 

64 check=True, 

65 ) 

66 version_str = result.stdout.strip() 

67 # Extract version number (for example, "24.8.3.2" from the output) 

68 match = re.search(r"LibreOffice (\d+\.\d+)", version_str) 

69 if not match: 

70 raise ValueError( 

71 f"Can't parse LibreOffice version from: {version_str}." 

72 ) 

73 

74 current_version = version.parse(match.group(1)) 

75 min_version = version.parse(MIN_VERSION) 

76 

77 if current_version < min_version: 

78 raise RuntimeError( 

79 f"LibreOffice version {current_version} is below minimum required version {min_version}." 

80 ) 

81 except subprocess.CalledProcessError as e: 

82 raise RuntimeError(f"Failed to get LibreOffice version: {e}.") 

83 

84 def convert( 

85 self, 

86 input_files: str | Path | Sequence[str | Path], 

87 output_dir: str | Path, 

88 format: str = "pdf", 

89 overwrite: bool = False, 

90 ) -> Path | Sequence[Path]: 

91 """Convert RTF file(s) to specified format using LibreOffice. 

92 

93 Performs the actual conversion of RTF files to the target format using 

94 LibreOffice in headless mode. Supports single file or batch conversion. 

95 

96 Args: 

97 input_files: Path to input RTF file or list of paths. Can be string 

98 or Path object. For batch conversion, provide a list/tuple. 

99 output_dir: Directory where converted files will be saved. Created 

100 if it doesn't exist. Can be string or Path object. 

101 format: Target format for conversion. Supported formats: 

102 - 'pdf': Portable Document Format (default) 

103 - 'docx': Microsoft Word (Office Open XML) 

104 - 'doc': Microsoft Word 97-2003 

105 - 'html': HTML Document 

106 - 'odt': OpenDocument Text 

107 - 'txt': Plain Text 

108 overwrite: If True, overwrites existing files in output directory. 

109 If False, raises error if output file already exists. 

110 

111 Returns: 

112 Path | Sequence[Path]: For single file input, returns Path to the 

113 converted file. For multiple files, returns list of Paths. 

114 

115 Raises: 

116 FileExistsError: If output file exists and overwrite=False. 

117 RuntimeError: If LibreOffice conversion fails. 

118 

119 Examples: 

120 Single file conversion: 

121 ```python 

122 converter = LibreOfficeConverter() 

123 pdf_path = converter.convert( 

124 "report.rtf", 

125 output_dir="pdfs/", 

126 format="pdf" 

127 ) 

128 print(f"Created: {pdf_path}") 

129 ``` 

130 

131 Batch conversion with overwrite: 

132 ```python 

133 rtf_files = ["report1.rtf", "report2.rtf", "report3.rtf"] 

134 pdf_paths = converter.convert( 

135 input_files=rtf_files, 

136 output_dir="output/pdfs/", 

137 format="pdf", 

138 overwrite=True 

139 ) 

140 for path in pdf_paths: 

141 print(f"Converted: {path}") 

142 ``` 

143 """ 

144 output_dir = Path(os.path.expanduser(str(output_dir))) 

145 if not output_dir.exists(): 

146 output_dir.mkdir(parents=True) 

147 

148 # Handle single input file 

149 if isinstance(input_files, (str, Path)): 

150 input_path = Path(os.path.expanduser(str(input_files))) 

151 if not input_path.exists(): 

152 raise FileNotFoundError(f"Input file not found: {input_path}.") 

153 return self._convert_single_file(input_path, output_dir, format, overwrite) 

154 

155 # Handle multiple input files 

156 input_paths = [Path(os.path.expanduser(str(f))) for f in input_files] 

157 for path in input_paths: 

158 if not path.exists(): 

159 raise FileNotFoundError(f"Input file not found: {path}.") 

160 

161 return [ 

162 self._convert_single_file(input_path, output_dir, format, overwrite) 

163 for input_path in input_paths 

164 ] 

165 

166 def _convert_single_file( 

167 self, input_file: Path, output_dir: Path, format: str, overwrite: bool 

168 ) -> Path: 

169 """Convert a single file using LibreOffice.""" 

170 output_file = output_dir / f"{input_file.stem}.{format}" 

171 

172 if output_file.exists() and not overwrite: 

173 raise FileExistsError( 

174 f"Output file already exists: {output_file}. Use overwrite=True to force." 

175 ) 

176 

177 # executable_path is guaranteed to be non-None after __init__ 

178 assert self.executable_path is not None 

179 cmd = [ 

180 self.executable_path, 

181 "--invisible", 

182 "--headless", 

183 "--nologo", 

184 "--convert-to", 

185 format, 

186 "--outdir", 

187 str(output_dir), 

188 str(input_file), 

189 ] 

190 

191 try: 

192 result = subprocess.run(cmd, capture_output=True, text=True, check=True) 

193 

194 if not output_file.exists(): 

195 raise RuntimeError( 

196 f"Conversion failed: Output file not created.\n" 

197 f"Command output: {result.stdout}\n" 

198 f"Error output: {result.stderr}" 

199 ) 

200 

201 return output_file 

202 

203 except subprocess.CalledProcessError as e: 

204 raise RuntimeError( 

205 f"LibreOffice conversion failed:\n" 

206 f"Command output: {e.stdout}\n" 

207 f"Error output: {e.stderr}" 

208 )