Coverage for src/rtflite/text_conversion/symbols.py: 100%
29 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 16:35 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 16:35 +0000
1"""
2LaTeX symbol mapping functionality.
4This module provides a clean interface for mapping LaTeX commands to Unicode
5characters. It organizes the symbols into logical categories for better
6maintainability and readability.
7"""
9from typing import Dict
11from ..dictionary.unicode_latex import latex_to_char, latex_to_unicode, unicode_to_int
14class LaTeXSymbolMapper:
15 """
16 Manages LaTeX to Unicode symbol mappings.
18 This class provides a clean interface for converting individual LaTeX
19 commands to their Unicode equivalents. It encapsulates the symbol
20 lookup logic and provides helpful methods for symbol management.
21 """
23 def __init__(self):
24 """Initialize the symbol mapper with the standard LaTeX mappings."""
25 self.latex_to_unicode = latex_to_unicode
26 self.unicode_to_int = unicode_to_int
27 self.latex_to_char = latex_to_char # Optimized single-lookup mapping
29 def get_unicode_char(self, latex_command: str) -> str:
30 """
31 Convert a single LaTeX command to its Unicode character.
33 Args:
34 latex_command: LaTeX command (e.g., "\\alpha", "\\pm", "\\mathbb{R}")
36 Returns:
37 Unicode character if the command is found, otherwise the original command
39 Examples:
40 >>> mapper = LaTeXSymbolMapper()
41 >>> mapper.get_unicode_char("\\alpha")
42 "alpha"
43 >>> mapper.get_unicode_char("\\pm")
44 "+/-"
45 >>> mapper.get_unicode_char("\\unknown")
46 "\\unknown"
47 """
48 # Optimized: single dictionary lookup instead of double lookup
49 return self.latex_to_char.get(latex_command, latex_command)
51 def has_mapping(self, latex_command: str) -> bool:
52 """
53 Check if a LaTeX command has a Unicode mapping.
55 Args:
56 latex_command: LaTeX command to check
58 Returns:
59 True if the command has a mapping, False otherwise
60 """
61 # Optimized: use the single-lookup dictionary for consistency
62 return latex_command in self.latex_to_char
64 def get_all_supported_commands(self) -> list[str]:
65 """
66 Get a list of all supported LaTeX commands.
68 Returns:
69 List of all LaTeX commands that can be converted
70 """
71 # Optimized: use the single-lookup dictionary
72 return list(self.latex_to_char.keys())
74 def get_commands_by_category(self) -> Dict[str, list[str]]:
75 """
76 Organize LaTeX commands by category for better understanding.
78 Returns:
79 Dictionary mapping categories to lists of commands
80 """
81 # Optimized categorization with pre-defined sets for O(1) lookup
82 greek_letters = {
83 "\\alpha",
84 "\\beta",
85 "\\gamma",
86 "\\delta",
87 "\\epsilon",
88 "\\varepsilon",
89 "\\zeta",
90 "\\eta",
91 "\\theta",
92 "\\vartheta",
93 "\\iota",
94 "\\kappa",
95 "\\varkappa",
96 "\\lambda",
97 "\\mu",
98 "\\nu",
99 "\\xi",
100 "\\pi",
101 "\\varpi",
102 "\\rho",
103 "\\varrho",
104 "\\sigma",
105 "\\varsigma",
106 "\\tau",
107 "\\upsilon",
108 "\\phi",
109 "\\varphi",
110 "\\chi",
111 "\\psi",
112 "\\omega",
113 "\\Gamma",
114 "\\Delta",
115 "\\Theta",
116 "\\Lambda",
117 "\\Xi",
118 "\\Pi",
119 "\\Sigma",
120 "\\Upsilon",
121 "\\Phi",
122 "\\Psi",
123 "\\Omega",
124 }
126 operators = {
127 "\\pm",
128 "\\mp",
129 "\\times",
130 "\\div",
131 "\\cdot",
132 "\\sum",
133 "\\prod",
134 "\\int",
135 "\\oint",
136 "\\partial",
137 "\\nabla",
138 "\\infty",
139 "\\propto",
140 "\\approx",
141 "\\equiv",
142 "\\neq",
143 "\\leq",
144 "\\geq",
145 "\\ll",
146 "\\gg",
147 "\\subset",
148 "\\supset",
149 "\\in",
150 "\\notin",
151 "\\cup",
152 "\\cap",
153 "\\setminus",
154 "\\oplus",
155 "\\otimes",
156 }
158 accents = {
159 "\\hat",
160 "\\bar",
161 "\\dot",
162 "\\ddot",
163 "\\dddot",
164 "\\ddddot",
165 "\\tilde",
166 "\\grave",
167 "\\acute",
168 "\\check",
169 "\\breve",
170 "\\vec",
171 "\\overline",
172 "\\underline",
173 }
175 categories: dict[str, list[str]] = {
176 "Greek Letters": [],
177 "Mathematical Operators": [],
178 "Blackboard Bold": [],
179 "Accents": [],
180 "Other": [],
181 }
183 # Optimized: use single dictionary and set lookups
184 for command in self.latex_to_char.keys():
185 if command in greek_letters:
186 categories["Greek Letters"].append(command)
187 elif command in operators:
188 categories["Mathematical Operators"].append(command)
189 elif "\\mathbb{" in command:
190 categories["Blackboard Bold"].append(command)
191 elif command in accents:
192 categories["Accents"].append(command)
193 else:
194 categories["Other"].append(command)
196 return categories