Coverage for src/rtflite/text_conversion/symbols.py: 100%

29 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-14 16:35 +0000

1""" 

2LaTeX symbol mapping functionality. 

3 

4This module provides a clean interface for mapping LaTeX commands to Unicode 

5characters. It organizes the symbols into logical categories for better 

6maintainability and readability. 

7""" 

8 

9from typing import Dict 

10 

11from ..dictionary.unicode_latex import latex_to_char, latex_to_unicode, unicode_to_int 

12 

13 

14class LaTeXSymbolMapper: 

15 """ 

16 Manages LaTeX to Unicode symbol mappings. 

17 

18 This class provides a clean interface for converting individual LaTeX 

19 commands to their Unicode equivalents. It encapsulates the symbol 

20 lookup logic and provides helpful methods for symbol management. 

21 """ 

22 

23 def __init__(self): 

24 """Initialize the symbol mapper with the standard LaTeX mappings.""" 

25 self.latex_to_unicode = latex_to_unicode 

26 self.unicode_to_int = unicode_to_int 

27 self.latex_to_char = latex_to_char # Optimized single-lookup mapping 

28 

29 def get_unicode_char(self, latex_command: str) -> str: 

30 """ 

31 Convert a single LaTeX command to its Unicode character. 

32 

33 Args: 

34 latex_command: LaTeX command (e.g., "\\alpha", "\\pm", "\\mathbb{R}") 

35 

36 Returns: 

37 Unicode character if the command is found, otherwise the original command 

38 

39 Examples: 

40 >>> mapper = LaTeXSymbolMapper() 

41 >>> mapper.get_unicode_char("\\alpha") 

42 "alpha" 

43 >>> mapper.get_unicode_char("\\pm") 

44 "+/-" 

45 >>> mapper.get_unicode_char("\\unknown") 

46 "\\unknown" 

47 """ 

48 # Optimized: single dictionary lookup instead of double lookup 

49 return self.latex_to_char.get(latex_command, latex_command) 

50 

51 def has_mapping(self, latex_command: str) -> bool: 

52 """ 

53 Check if a LaTeX command has a Unicode mapping. 

54 

55 Args: 

56 latex_command: LaTeX command to check 

57 

58 Returns: 

59 True if the command has a mapping, False otherwise 

60 """ 

61 # Optimized: use the single-lookup dictionary for consistency 

62 return latex_command in self.latex_to_char 

63 

64 def get_all_supported_commands(self) -> list[str]: 

65 """ 

66 Get a list of all supported LaTeX commands. 

67 

68 Returns: 

69 List of all LaTeX commands that can be converted 

70 """ 

71 # Optimized: use the single-lookup dictionary 

72 return list(self.latex_to_char.keys()) 

73 

74 def get_commands_by_category(self) -> Dict[str, list[str]]: 

75 """ 

76 Organize LaTeX commands by category for better understanding. 

77 

78 Returns: 

79 Dictionary mapping categories to lists of commands 

80 """ 

81 # Optimized categorization with pre-defined sets for O(1) lookup 

82 greek_letters = { 

83 "\\alpha", 

84 "\\beta", 

85 "\\gamma", 

86 "\\delta", 

87 "\\epsilon", 

88 "\\varepsilon", 

89 "\\zeta", 

90 "\\eta", 

91 "\\theta", 

92 "\\vartheta", 

93 "\\iota", 

94 "\\kappa", 

95 "\\varkappa", 

96 "\\lambda", 

97 "\\mu", 

98 "\\nu", 

99 "\\xi", 

100 "\\pi", 

101 "\\varpi", 

102 "\\rho", 

103 "\\varrho", 

104 "\\sigma", 

105 "\\varsigma", 

106 "\\tau", 

107 "\\upsilon", 

108 "\\phi", 

109 "\\varphi", 

110 "\\chi", 

111 "\\psi", 

112 "\\omega", 

113 "\\Gamma", 

114 "\\Delta", 

115 "\\Theta", 

116 "\\Lambda", 

117 "\\Xi", 

118 "\\Pi", 

119 "\\Sigma", 

120 "\\Upsilon", 

121 "\\Phi", 

122 "\\Psi", 

123 "\\Omega", 

124 } 

125 

126 operators = { 

127 "\\pm", 

128 "\\mp", 

129 "\\times", 

130 "\\div", 

131 "\\cdot", 

132 "\\sum", 

133 "\\prod", 

134 "\\int", 

135 "\\oint", 

136 "\\partial", 

137 "\\nabla", 

138 "\\infty", 

139 "\\propto", 

140 "\\approx", 

141 "\\equiv", 

142 "\\neq", 

143 "\\leq", 

144 "\\geq", 

145 "\\ll", 

146 "\\gg", 

147 "\\subset", 

148 "\\supset", 

149 "\\in", 

150 "\\notin", 

151 "\\cup", 

152 "\\cap", 

153 "\\setminus", 

154 "\\oplus", 

155 "\\otimes", 

156 } 

157 

158 accents = { 

159 "\\hat", 

160 "\\bar", 

161 "\\dot", 

162 "\\ddot", 

163 "\\dddot", 

164 "\\ddddot", 

165 "\\tilde", 

166 "\\grave", 

167 "\\acute", 

168 "\\check", 

169 "\\breve", 

170 "\\vec", 

171 "\\overline", 

172 "\\underline", 

173 } 

174 

175 categories: dict[str, list[str]] = { 

176 "Greek Letters": [], 

177 "Mathematical Operators": [], 

178 "Blackboard Bold": [], 

179 "Accents": [], 

180 "Other": [], 

181 } 

182 

183 # Optimized: use single dictionary and set lookups 

184 for command in self.latex_to_char.keys(): 

185 if command in greek_letters: 

186 categories["Greek Letters"].append(command) 

187 elif command in operators: 

188 categories["Mathematical Operators"].append(command) 

189 elif "\\mathbb{" in command: 

190 categories["Blackboard Bold"].append(command) 

191 elif command in accents: 

192 categories["Accents"].append(command) 

193 else: 

194 categories["Other"].append(command) 

195 

196 return categories