Skip to content

Commit 3ff5ca2

Browse files
committed
add bidirectional renderMath: MathML → LaTeX conversion
renderMath() now auto-detects input format — pass LaTeX to get MathML, pass MathML to get LaTeX. Also exports mathmlToLatex() for direct use with DOM elements or MathML strings.
1 parent 3fef542 commit 3ff5ca2

File tree

5 files changed

+386
-14
lines changed

5 files changed

+386
-14
lines changed

README.md

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,26 @@ editor.value = "\\frac{1}{2}";
5050

5151
### 2. Headless API
5252

53-
If you just want to render LaTeX strings directly to valid MathML without the UI overhead:
53+
`renderMath` is bidirectional — it auto-detects the input format and converts in the opposite direction:
5454

5555
```javascript
5656
import { renderMath } from '@lbilharz/accessible-math-editor/core';
5757

58-
// Returns valid <math>...</math> XML string
59-
const xml = renderMath("\\sqrt{x}");
58+
// LaTeX → MathML
59+
const mathml = renderMath("\\sqrt{x}");
60+
// Returns: <math display="block" ...><msqrt><mi>x</mi></msqrt></math>
61+
62+
// MathML → LaTeX
63+
const latex = renderMath('<math><mfrac><mn>1</mn><mn>2</mn></mfrac></math>');
64+
// Returns: \frac{1}{2}
65+
```
66+
67+
You can also use `mathmlToLatex` directly for MathML DOM elements or strings:
68+
69+
```javascript
70+
import { mathmlToLatex } from '@lbilharz/accessible-math-editor';
71+
72+
const latex = mathmlToLatex(document.querySelector('math'));
6073
```
6174

6275
### 3. Engine API
@@ -98,6 +111,7 @@ This structural approach is more reliable than a text cursor in 2D math layout
98111

99112
```
100113
LaTeX string → Tokenizer → Parser (AST) → MathML renderer → Browser native math
114+
MathML string/DOM → mathmlToLatex → LaTeX string
101115
```
102116

103117
- **Framework-Agnostic** — Pure vanilla JS (`src/` compiles entirely decoupled)
@@ -112,7 +126,7 @@ npm run dev
112126
npm test
113127
```
114128

115-
131 tests across 7 test files covering tokenizer, parser, renderer, navigation stops, error detection, validation, and MathML export.
129+
155 tests across 9 test files covering tokenizer, parser, renderer, MathML-to-LaTeX conversion, navigation stops, error detection, validation, and MathML export.
116130

117131

118132

src/core.js

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,28 @@
11
import { tokenize } from './tokenizer.js';
22
import { Parser } from './parser.js';
33
import { toMathML } from './renderer.js';
4+
import { mathmlToLatex } from './mathmlToLatex.js';
45

56
/**
6-
* Headless rendering wrapper.
7-
* Processes a LaTeX string and returns the structural MathML string representation.
7+
* Bidirectional rendering wrapper.
8+
* - LaTeX string in → MathML string out
9+
* - MathML string in → LaTeX string out
10+
*
11+
* Auto-detects direction by checking if input starts with '<'.
812
*/
9-
export function renderMath(latexString) {
10-
if (!latexString || !latexString.trim()) {
11-
return '';
13+
export function renderMath(input) {
14+
if (!input || !input.trim()) return '';
15+
16+
const trimmed = input.trim();
17+
18+
if (trimmed.startsWith('<')) {
19+
// MathML → LaTeX
20+
return mathmlToLatex(trimmed);
1221
}
1322

14-
const tokens = tokenize(latexString);
15-
const parser = new Parser(tokens, latexString.length);
23+
// LaTeX → MathML
24+
const tokens = tokenize(trimmed);
25+
const parser = new Parser(tokens, trimmed.length);
1626
const ast = parser.parse();
17-
18-
// We could return a DOM element here instead, but string is safer for headless usage
19-
// until the consumer explicitly wants to parse it via DOMParser.
2027
return `<math display="block" xmlns="http://www.w3.org/1998/Math/MathML">${toMathML(ast)}</math>`;
2128
}

src/index.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,5 @@ export { toMathCoreXML } from './export.js';
55
export { getNavigableStops } from './cursor.js';
66
export { collectErrors } from './errors.js';
77
export { renderMath } from './core.js';
8+
export { mathmlToLatex } from './mathmlToLatex.js';
89
export { createMathEditor } from './ui/MathEditor.js';

src/mathmlToLatex.js

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
// mathmlToLatex.js — Convert MathML DOM or string to LaTeX
2+
3+
import { SYMBOLS, FUNCTIONS, LARGE_OPS } from './data.js';
4+
5+
// Build reverse lookup tables: Unicode char → LaTeX command
6+
const CHAR_TO_LATEX = {};
7+
for (const [cmd, ch] of Object.entries(SYMBOLS)) {
8+
// Prefer shorter command names when duplicates exist
9+
if (!CHAR_TO_LATEX[ch] || cmd.length < CHAR_TO_LATEX[ch].length) {
10+
CHAR_TO_LATEX[ch] = cmd;
11+
}
12+
}
13+
for (const [cmd, ch] of Object.entries(LARGE_OPS)) {
14+
if (!CHAR_TO_LATEX[ch] || cmd.length < CHAR_TO_LATEX[ch].length) {
15+
CHAR_TO_LATEX[ch] = cmd;
16+
}
17+
}
18+
19+
/**
20+
* Convert a MathML string or DOM element to LaTeX.
21+
* @param {string|Element} input — MathML markup string or a DOM <math> element
22+
* @returns {string} LaTeX string
23+
*/
24+
export function mathmlToLatex(input) {
25+
let root;
26+
if (typeof input === 'string') {
27+
const doc = new DOMParser().parseFromString(input, 'application/xml');
28+
root = doc.documentElement;
29+
// Check for parse errors
30+
if (root.tagName === 'parsererror' || root.querySelector('parsererror')) {
31+
throw new Error('Invalid MathML: ' + root.textContent);
32+
}
33+
} else {
34+
root = input;
35+
}
36+
37+
return convertNode(root).trim();
38+
}
39+
40+
function convertNode(node) {
41+
if (node.nodeType === 3) { // text node
42+
return node.textContent.trim();
43+
}
44+
if (node.nodeType !== 1) return ''; // skip non-element nodes
45+
46+
const tag = localName(node);
47+
48+
switch (tag) {
49+
case 'math':
50+
return convertChildren(node);
51+
52+
case 'semantics': {
53+
// If there's a LaTeX annotation, prefer it
54+
const annotation = node.querySelector('annotation[encoding="LaTeX"], annotation[encoding="application/x-tex"]');
55+
if (annotation) return annotation.textContent;
56+
// Otherwise convert the presentation MathML (first child)
57+
return node.firstElementChild ? convertNode(node.firstElementChild) : '';
58+
}
59+
60+
case 'mrow':
61+
return convertChildren(node);
62+
63+
case 'mn':
64+
return node.textContent.trim();
65+
66+
case 'mi': {
67+
const text = node.textContent.trim();
68+
const variant = node.getAttribute('mathvariant');
69+
// Function names rendered as upright
70+
if (variant === 'normal' && FUNCTIONS.has(text)) {
71+
return '\\' + text;
72+
}
73+
// Check for special symbols (Greek, etc.)
74+
if (text.length === 1 && CHAR_TO_LATEX[text]) {
75+
return CHAR_TO_LATEX[text];
76+
}
77+
return text;
78+
}
79+
80+
case 'mo': {
81+
const text = node.textContent.trim();
82+
if (CHAR_TO_LATEX[text]) return CHAR_TO_LATEX[text];
83+
// Stretchy delimiters handled by parent (mrow with fences)
84+
return text;
85+
}
86+
87+
case 'mtext':
88+
return '\\text{' + node.textContent + '}';
89+
90+
case 'mspace':
91+
return '';
92+
93+
case 'mfrac': {
94+
const children = elements(node);
95+
if (children.length < 2) return '\\frac{}{}';
96+
const num = convertNode(children[0]);
97+
const den = convertNode(children[1]);
98+
const linethickness = node.getAttribute('linethickness');
99+
if (linethickness === '0') {
100+
return '\\binom{' + num + '}{' + den + '}';
101+
}
102+
return '\\frac{' + num + '}{' + den + '}';
103+
}
104+
105+
case 'msqrt': {
106+
const body = convertChildren(node);
107+
return '\\sqrt{' + body + '}';
108+
}
109+
110+
case 'mroot': {
111+
const children = elements(node);
112+
if (children.length < 2) return '\\sqrt{}';
113+
const body = convertNode(children[0]);
114+
const index = convertNode(children[1]);
115+
return '\\sqrt[' + index + ']{' + body + '}';
116+
}
117+
118+
case 'msup': {
119+
const children = elements(node);
120+
if (children.length < 2) return '';
121+
const base = convertNode(children[0]);
122+
const sup = convertNode(children[1]);
123+
return wrapBase(base) + '^{' + sup + '}';
124+
}
125+
126+
case 'msub': {
127+
const children = elements(node);
128+
if (children.length < 2) return '';
129+
const base = convertNode(children[0]);
130+
const sub = convertNode(children[1]);
131+
return wrapBase(base) + '_{' + sub + '}';
132+
}
133+
134+
case 'msubsup': {
135+
const children = elements(node);
136+
if (children.length < 3) return '';
137+
const base = convertNode(children[0]);
138+
const sub = convertNode(children[1]);
139+
const sup = convertNode(children[2]);
140+
return wrapBase(base) + '_{' + sub + '}^{' + sup + '}';
141+
}
142+
143+
case 'mover': {
144+
const children = elements(node);
145+
if (children.length < 2) return '';
146+
const body = convertNode(children[0]);
147+
const accent = children[1].textContent.trim();
148+
const cmd = OVER_ACCENTS[accent];
149+
if (cmd) return cmd + '{' + body + '}';
150+
return '\\overset{' + accent + '}{' + body + '}';
151+
}
152+
153+
case 'munder': {
154+
const children = elements(node);
155+
if (children.length < 2) return '';
156+
const body = convertNode(children[0]);
157+
const accent = children[1].textContent.trim();
158+
const cmd = UNDER_ACCENTS[accent];
159+
if (cmd) return cmd + '{' + body + '}';
160+
return '\\underset{' + accent + '}{' + body + '}';
161+
}
162+
163+
case 'mtable':
164+
return convertTable(node);
165+
166+
case 'mtr':
167+
return elements(node).map(convertNode).join(' & ');
168+
169+
case 'mtd':
170+
return convertChildren(node);
171+
172+
case 'mpadded':
173+
case 'mstyle':
174+
case 'merror':
175+
return convertChildren(node);
176+
177+
case 'annotation':
178+
case 'annotation-xml':
179+
return ''; // skip annotations (handled in semantics)
180+
181+
default:
182+
// Unknown element — just convert children
183+
return convertChildren(node);
184+
}
185+
}
186+
187+
const OVER_ACCENTS = {
188+
'\u20D7': '\\vec', // combining right arrow above
189+
'\u2192': '\\vec', // rightwards arrow
190+
'\u0302': '\\hat', // combining circumflex
191+
'\u005E': '\\hat', // circumflex
192+
'^': '\\hat',
193+
'\u0303': '\\tilde', // combining tilde
194+
'~': '\\tilde',
195+
'\u0304': '\\bar', // combining macron
196+
'\u00AF': '\\bar', // macron
197+
'\u0305': '\\overline', // combining overline
198+
'\u0307': '\\dot', // combining dot above
199+
'\u0308': '\\ddot', // combining diaeresis
200+
'¯': '\\overline',
201+
};
202+
203+
const UNDER_ACCENTS = {
204+
'\u0332': '\\underline',
205+
'_': '\\underline',
206+
};
207+
208+
function convertTable(node) {
209+
const rows = elements(node).filter(el => localName(el) === 'mtr');
210+
const body = rows.map(row => {
211+
const cells = elements(row).filter(el => localName(el) === 'mtd');
212+
return cells.map(convertNode).join(' & ');
213+
}).join(' \\\\ ');
214+
return '\\begin{matrix} ' + body + ' \\end{matrix}';
215+
}
216+
217+
/** Convert all child elements, concatenated */
218+
function convertChildren(node) {
219+
let result = '';
220+
for (const child of node.childNodes) {
221+
result += convertNode(child);
222+
}
223+
return result;
224+
}
225+
226+
/** Get element children only (skip text/comment nodes) */
227+
function elements(node) {
228+
return Array.from(node.children || []);
229+
}
230+
231+
/** Get local name without namespace prefix */
232+
function localName(el) {
233+
return (el.localName || el.tagName || '').replace(/^.*:/, '').toLowerCase();
234+
}
235+
236+
/** Wrap multi-char base in braces for sup/sub */
237+
function wrapBase(base) {
238+
// Single char or single command doesn't need braces
239+
if (base.length <= 1) return base;
240+
if (base.startsWith('\\') && !base.includes('{')) return base;
241+
return base;
242+
}

0 commit comments

Comments
 (0)