Coverage for src/rtflite/text_convert.py: 91%
23 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 16:35 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 16:35 +0000
1"""Text conversion utilities for LaTeX to Unicode mapping."""
3import re
5from .dictionary.unicode_latex import latex_to_unicode, unicode_to_int
8def latex_to_unicode_char(latex_command: str) -> str:
9 """Convert a single LaTeX command to Unicode character.
11 Args:
12 latex_command: LaTeX command (e.g., "\\alpha", "\\pm")
14 Returns:
15 Unicode character if found, otherwise the original LaTeX command
16 """
17 if latex_command in latex_to_unicode:
18 unicode_hex = latex_to_unicode[latex_command]
19 unicode_int = unicode_to_int[unicode_hex]
20 return chr(unicode_int)
21 return latex_command
24def convert_latex_to_unicode(text: str) -> str:
25 """Convert LaTeX commands in text to Unicode characters.
27 This function finds LaTeX commands (starting with backslash) and converts
28 them to their Unicode equivalents based on the r2rtf mapping.
30 Args:
31 text: Input text potentially containing LaTeX commands
33 Returns:
34 Text with LaTeX commands converted to Unicode characters
35 """
36 if not text:
37 return text
39 # Pattern to match LaTeX commands: \command or \command{}
40 # This matches:
41 # - Backslash followed by letters (e.g., \alpha, \beta)
42 # - Optionally followed by {} (e.g., \alpha{}, \mathbb{R})
43 latex_pattern = r"\\[a-zA-Z]+(?:\{[^}]*\})?"
45 def replace_latex(match):
46 latex_cmd = match.group(0)
48 # Handle commands with braces like \mathbb{R}
49 if "{" in latex_cmd and "}" in latex_cmd:
50 # For now, try the full command as-is
51 return latex_to_unicode_char(latex_cmd)
52 else:
53 # Simple command like \alpha
54 return latex_to_unicode_char(latex_cmd)
56 # Replace all LaTeX commands with their Unicode equivalents
57 converted_text = re.sub(latex_pattern, replace_latex, text)
59 return converted_text
62def text_convert(text: str | None, enable_conversion: bool = True) -> str | None:
63 """Main text conversion function matching r2rtf behavior.
65 Args:
66 text: Input text
67 enable_conversion: Whether to enable LaTeX to Unicode conversion
69 Returns:
70 Converted text
71 """
72 if not enable_conversion or not text:
73 return text
75 return convert_latex_to_unicode(text)