Coverage for src/rtflite/text_convert.py: 91%

23 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-14 16:35 +0000

1"""Text conversion utilities for LaTeX to Unicode mapping.""" 

2 

3import re 

4 

5from .dictionary.unicode_latex import latex_to_unicode, unicode_to_int 

6 

7 

8def latex_to_unicode_char(latex_command: str) -> str: 

9 """Convert a single LaTeX command to Unicode character. 

10 

11 Args: 

12 latex_command: LaTeX command (e.g., "\\alpha", "\\pm") 

13 

14 Returns: 

15 Unicode character if found, otherwise the original LaTeX command 

16 """ 

17 if latex_command in latex_to_unicode: 

18 unicode_hex = latex_to_unicode[latex_command] 

19 unicode_int = unicode_to_int[unicode_hex] 

20 return chr(unicode_int) 

21 return latex_command 

22 

23 

24def convert_latex_to_unicode(text: str) -> str: 

25 """Convert LaTeX commands in text to Unicode characters. 

26 

27 This function finds LaTeX commands (starting with backslash) and converts 

28 them to their Unicode equivalents based on the r2rtf mapping. 

29 

30 Args: 

31 text: Input text potentially containing LaTeX commands 

32 

33 Returns: 

34 Text with LaTeX commands converted to Unicode characters 

35 """ 

36 if not text: 

37 return text 

38 

39 # Pattern to match LaTeX commands: \command or \command{} 

40 # This matches: 

41 # - Backslash followed by letters (e.g., \alpha, \beta) 

42 # - Optionally followed by {} (e.g., \alpha{}, \mathbb{R}) 

43 latex_pattern = r"\\[a-zA-Z]+(?:\{[^}]*\})?" 

44 

45 def replace_latex(match): 

46 latex_cmd = match.group(0) 

47 

48 # Handle commands with braces like \mathbb{R} 

49 if "{" in latex_cmd and "}" in latex_cmd: 

50 # For now, try the full command as-is 

51 return latex_to_unicode_char(latex_cmd) 

52 else: 

53 # Simple command like \alpha 

54 return latex_to_unicode_char(latex_cmd) 

55 

56 # Replace all LaTeX commands with their Unicode equivalents 

57 converted_text = re.sub(latex_pattern, replace_latex, text) 

58 

59 return converted_text 

60 

61 

62def text_convert(text: str | None, enable_conversion: bool = True) -> str | None: 

63 """Main text conversion function matching r2rtf behavior. 

64 

65 Args: 

66 text: Input text 

67 enable_conversion: Whether to enable LaTeX to Unicode conversion 

68 

69 Returns: 

70 Converted text 

71 """ 

72 if not enable_conversion or not text: 

73 return text 

74 

75 return convert_latex_to_unicode(text)