Coverage for src / rtflite / services / text_conversion_service.py: 93%

46 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-11-28 05:09 +0000

1""" 

2Text conversion service for the RTF encoding pipeline. 

3 

4This service provides a clean interface for text conversion operations 

5within the RTF document generation process. It integrates the text 

6conversion functionality with the broader service architecture. 

7""" 

8 

9from collections.abc import Mapping, Sequence 

10 

11from ..text_conversion import LaTeXSymbolMapper, TextConverter 

12 

13 

14class TextConversionService: 

15 """ 

16 Service for handling text conversion operations in RTF documents. 

17 

18 This service provides a unified interface for text conversion that 

19 can be used throughout the RTF encoding pipeline. It handles the 

20 conversion of LaTeX commands to Unicode characters with proper 

21 error handling and logging capabilities. 

22 """ 

23 

24 def __init__(self): 

25 """Initialize the text conversion service.""" 

26 self.converter = TextConverter() 

27 self.symbol_mapper = LaTeXSymbolMapper() 

28 

29 def convert_text_content( 

30 self, text: str | Sequence[str] | None, enable_conversion: bool = True 

31 ) -> str | Sequence[str] | None: 

32 """ 

33 Convert text content with LaTeX commands to Unicode. 

34 

35 This method handles various text input formats commonly found 

36 in RTF components and applies conversion consistently. 

37 

38 Args: 

39 text: Text content to convert (string, list of strings, or None) 

40 enable_conversion: Whether to enable LaTeX to Unicode conversion 

41 

42 Returns: 

43 Converted text in the same format as input 

44 

45 Examples: 

46 >>> service = TextConversionService() 

47 >>> service.convert_text_content("\\alpha test", True) 

48 "\\u03b1 test" 

49 

50 >>> service.convert_text_content(["\\alpha", "\\beta"], True) 

51 ["\\u03b1", "\\u03b2"] 

52 """ 

53 if not enable_conversion or text is None: 

54 return text 

55 

56 if isinstance(text, str): 

57 return self._convert_single_text(text) 

58 elif isinstance(text, list): 

59 return self._convert_text_list(text) 

60 else: 

61 # Handle other types by converting to string first 

62 return self._convert_single_text(str(text)) 

63 

64 def _convert_single_text(self, text: str) -> str: 

65 """ 

66 Convert a single text string. 

67 

68 Args: 

69 text: Text string to convert 

70 

71 Returns: 

72 Converted text string 

73 """ 

74 if not text: 

75 return text 

76 

77 try: 

78 return self.converter.convert_latex_to_unicode(text) 

79 except Exception as e: 

80 # Log the error but don't fail the conversion 

81 # In a production environment, this would use proper logging 

82 print(f"Warning: Text conversion failed for '{text}': {e}") 

83 return text 

84 

85 def _convert_text_list(self, text_list: Sequence[str]) -> list[str]: 

86 """ 

87 Convert a list of text strings. 

88 

89 Args: 

90 text_list: List of text strings to convert 

91 

92 Returns: 

93 List of converted text strings 

94 """ 

95 return [self._convert_single_text(item) for item in text_list] 

96 

97 def get_supported_symbols(self) -> Sequence[str]: 

98 """ 

99 Get a list of all supported LaTeX symbols. 

100 

101 Returns: 

102 List of supported LaTeX commands 

103 """ 

104 return self.symbol_mapper.get_all_supported_commands() 

105 

106 def get_symbol_categories(self) -> Mapping[str, Sequence[str]]: 

107 """ 

108 Get LaTeX symbols organized by category. 

109 

110 Returns: 

111 Dictionary mapping categories to symbol lists 

112 """ 

113 return self.symbol_mapper.get_commands_by_category() 

114 

115 def validate_latex_commands(self, text: str) -> Mapping[str, object]: 

116 """ 

117 Validate LaTeX commands in text and provide feedback. 

118 

119 This method analyzes text for LaTeX commands and reports 

120 which ones will be converted and which ones are unsupported. 

121 

122 Args: 

123 text: Text to validate 

124 

125 Returns: 

126 Dictionary with validation results 

127 """ 

128 if not text: 

129 return { 

130 "valid_commands": [], 

131 "invalid_commands": [], 

132 "validation_status": "empty_text", 

133 } 

134 

135 stats = self.converter.get_conversion_statistics(text) 

136 

137 # Extract valid commands from the stats (need to capture the converted 

138 # commands themselves) 

139 import re 

140 

141 latex_pattern = re.compile(r"\\[a-zA-Z]+(?:\{[^}]*\})?") 

142 all_commands = latex_pattern.findall(text) 

143 

144 valid_commands = [] 

145 for cmd in all_commands: 

146 if self.symbol_mapper.has_mapping(cmd): 

147 valid_commands.append(cmd) 

148 

149 return { 

150 "valid_commands": valid_commands, 

151 "invalid_commands": stats.get("unconverted", []), 

152 "total_commands": stats.get("total_commands", 0), 

153 "conversion_rate": stats.get("conversion_rate", 0), 

154 "validation_status": "analyzed", 

155 } 

156 

157 def convert_with_validation( 

158 self, text: str, enable_conversion: bool = True 

159 ) -> Mapping[str, object]: 

160 """ 

161 Convert text and return both result and validation information. 

162 

163 This method provides comprehensive information about the conversion 

164 process, useful for debugging and quality assurance. 

165 

166 Args: 

167 text: Text to convert 

168 enable_conversion: Whether to enable conversion 

169 

170 Returns: 

171 Dictionary with converted text and validation info 

172 """ 

173 if not enable_conversion: 

174 return { 

175 "original_text": text, 

176 "converted_text": text, 

177 "conversion_enabled": False, 

178 "validation": {"status": "conversion_disabled"}, 

179 } 

180 

181 validation = self.validate_latex_commands(text) 

182 converted_text = self.convert_text_content(text, enable_conversion) 

183 

184 return { 

185 "original_text": text, 

186 "converted_text": converted_text, 

187 "conversion_enabled": True, 

188 "validation": validation, 

189 "conversion_applied": converted_text != text, 

190 }