Coverage for src/rtflite/convert.py: 36%

61 statements  

« prev     ^ index     » next       coverage.py v7.6.10, created at 2025-02-03 15:40 +0000

1import os 

2import platform 

3import re 

4import subprocess 

5from collections.abc import Sequence 

6from pathlib import Path 

7 

8from packaging import version 

9 

10from .dictionary.libreoffice import DEFAULT_PATHS, MIN_VERSION 

11 

12 

13class LibreOfficeConverter: 

14 """LibreOffice-based document converter.""" 

15 

16 def __init__(self, executable_path: str | None = None): 

17 """Initialize converter with optional executable path.""" 

18 self.executable_path = executable_path or self._find_executable() 

19 if not self.executable_path: 

20 raise FileNotFoundError("Can't find LibreOffice executable.") 

21 

22 self._verify_version() 

23 

24 def _find_executable(self) -> str | None: 

25 """Find LibreOffice executable in default locations.""" 

26 system = platform.system() 

27 if system not in DEFAULT_PATHS: 

28 raise RuntimeError(f"Unsupported operating system: {system}.") 

29 

30 for path in DEFAULT_PATHS[system]: 

31 if os.path.isfile(path): 

32 return path 

33 return None 

34 

35 def _verify_version(self): 

36 """Verify LibreOffice version meets minimum requirement.""" 

37 try: 

38 result = subprocess.run( 

39 [self.executable_path, "--version"], 

40 capture_output=True, 

41 text=True, 

42 check=True, 

43 ) 

44 version_str = result.stdout.strip() 

45 # Extract version number (for example, "24.8.3.2" from the output) 

46 match = re.search(r"LibreOffice (\d+\.\d+)", version_str) 

47 if not match: 

48 raise ValueError( 

49 f"Can't parse LibreOffice version from: {version_str}." 

50 ) 

51 

52 current_version = version.parse(match.group(1)) 

53 min_version = version.parse(MIN_VERSION) 

54 

55 if current_version < min_version: 

56 raise RuntimeError( 

57 f"LibreOffice version {current_version} is below minimum required version {min_version}." 

58 ) 

59 except subprocess.CalledProcessError as e: 

60 raise RuntimeError(f"Failed to get LibreOffice version: {e}.") 

61 

62 def convert( 

63 self, 

64 input_files: str | Path | Sequence[str | Path], 

65 output_dir: str | Path, 

66 format: str = "pdf", 

67 overwrite: bool = False, 

68 ) -> Path | Sequence[Path]: 

69 """ 

70 Convert RTF file(s) to specified format using LibreOffice. 

71 

72 Args: 

73 input_files: Path to input RTF file or list of paths. 

74 output_dir: Directory for output files. 

75 format: Output format (`'pdf'`, `'docx'`, or `'html'`). 

76 overwrite: Whether to overwrite existing output files. 

77 

78 Returns: 

79 Path to converted file, or list of paths for multiple files. 

80 """ 

81 output_dir = Path(os.path.expanduser(str(output_dir))) 

82 if not output_dir.exists(): 

83 output_dir.mkdir(parents=True) 

84 

85 # Handle single input file 

86 if isinstance(input_files, (str, Path)): 

87 input_path = Path(os.path.expanduser(str(input_files))) 

88 if not input_path.exists(): 

89 raise FileNotFoundError(f"Input file not found: {input_path}.") 

90 return self._convert_single_file(input_path, output_dir, format, overwrite) 

91 

92 # Handle multiple input files 

93 input_paths = [Path(os.path.expanduser(str(f))) for f in input_files] 

94 for path in input_paths: 

95 if not path.exists(): 

96 raise FileNotFoundError(f"Input file not found: {path}.") 

97 

98 return [ 

99 self._convert_single_file(input_path, output_dir, format, overwrite) 

100 for input_path in input_paths 

101 ] 

102 

103 def _convert_single_file( 

104 self, input_file: Path, output_dir: Path, format: str, overwrite: bool 

105 ) -> Path: 

106 """Convert a single file using LibreOffice.""" 

107 output_file = output_dir / f"{input_file.stem}.{format}" 

108 

109 if output_file.exists() and not overwrite: 

110 raise FileExistsError( 

111 f"Output file already exists: {output_file}. Use overwrite=True to force." 

112 ) 

113 

114 cmd = [ 

115 self.executable_path, 

116 "--invisible", 

117 "--headless", 

118 "--nologo", 

119 "--convert-to", 

120 format, 

121 "--outdir", 

122 str(output_dir), 

123 str(input_file), 

124 ] 

125 

126 try: 

127 result = subprocess.run(cmd, capture_output=True, text=True, check=True) 

128 

129 if not output_file.exists(): 

130 raise RuntimeError( 

131 f"Conversion failed: Output file not created.\n" 

132 f"Command output: {result.stdout}\n" 

133 f"Error output: {result.stderr}" 

134 ) 

135 

136 return output_file 

137 

138 except subprocess.CalledProcessError as e: 

139 raise RuntimeError( 

140 f"LibreOffice conversion failed:\n" 

141 f"Command output: {e.stdout}\n" 

142 f"Error output: {e.stderr}" 

143 )