-
-
Notifications
You must be signed in to change notification settings - Fork 589
Expand file tree
/
Copy pathstrings.ts
More file actions
168 lines (151 loc) · 4.76 KB
/
strings.ts
File metadata and controls
168 lines (151 loc) · 4.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import type { HighlighterGeneric, Position } from '@shikijs/types'
import { splitLines } from '@shikijs/primitive'
/**
* Creates a converter between index and position in a code block.
*
* Overflow/underflow are unchecked.
*/
export function createPositionConverter(code: string): {
lines: string[]
indexToPos: (index: number) => Position
posToIndex: (line: number, character: number) => number
} {
const lines = splitLines(code, true).map(([line]) => line)
function indexToPos(index: number): Position {
if (index === code.length) {
return {
line: lines.length - 1,
character: lines[lines.length - 1].length,
}
}
let character = index
let line = 0
for (const lineText of lines) {
if (character < lineText.length)
break
character -= lineText.length
line++
}
return { line, character }
}
function posToIndex(line: number, character: number): number {
let index = 0
for (let i = 0; i < line; i++)
index += lines[i].length
index += character
return index
}
return {
lines,
indexToPos,
posToIndex,
}
}
/**
* Guess embedded languages from given code and highlighter.
*
* When highlighter is provided, only bundled languages will be included.
*
* @param code - The code string to analyze
* @param _lang - The primary language of the code (currently unused)
* @param highlighter - Optional highlighter instance to validate languages
* @returns Array of detected language identifiers
*
* @example
* ```ts
* // Detects 'javascript' from Vue SFC
* guessEmbeddedLanguages('<script lang="javascript">')
*
* // Detects 'python' from markdown code block
* guessEmbeddedLanguages('```python\nprint("hi")\n```')
* ```
*/
export function guessEmbeddedLanguages(
code: string,
_lang: string | undefined,
highlighter?: HighlighterGeneric<any, any>,
): string[] {
const langs = new Set<string>()
// For HTML code blocks like Vue SFC, support both single and double quotes
// Matches: lang="js", lang='ts', :lang="typescript", etc.
// Allow spaces around the language name
for (const match of code.matchAll(/:?lang=["']([^"']+)["']/g)) {
const lang = match[1].toLowerCase().trim()
if (lang)
langs.add(lang)
}
// For markdown code blocks, support both ``` and ~~~ fences
// Matches: ```typescript, ~~~javascript, etc.
for (const match of code.matchAll(/(?:```|~~~)([\w-]+)/g)) {
const lang = match[1].toLowerCase().trim()
if (lang)
langs.add(lang)
}
// For LaTeX environments
// Matches: \begin{equation}, \begin{align}, etc.
for (const match of code.matchAll(/\\begin\{([\w-]+)\}/g)) {
const lang = match[1].toLowerCase().trim()
if (lang)
langs.add(lang)
}
// For script tags in HTML/Vue
// Matches: <script type="text/javascript">, <script lang="ts">, etc.
// Allow spaces around the language name
for (const match of code.matchAll(/<script\s+(?:type|lang)=["']([^"']+)["']/gi)) {
// Extract language from MIME types like 'text/javascript' or 'application/typescript'
const fullType = match[1].toLowerCase().trim()
const lang = fullType.includes('/') ? fullType.split('/').pop() : fullType
if (lang)
langs.add(lang)
}
// For shebangs
// Matches: #!/usr/bin/env node, #!/bin/bash, etc.
if (code.startsWith('#!')) {
const firstLine = code.split('\n', 1)[0]
const parts = firstLine.slice(2).trim().split(/\s+/)
if (parts.length > 0) {
let lang = parts[0].split('/').pop()
if (lang === 'env') {
lang = undefined
// Find first part that doesn't start with '-'
for (let i = 1; i < parts.length; i++) {
if (parts[i] && !parts[i].startsWith('-')) {
lang = parts[i].split('/').pop()
break
}
}
}
if (lang) {
lang = lang.toLowerCase()
// Map common executable names to Shiki aliases
if (lang === 'node')
lang = 'javascript'
else if (lang === 'python3')
lang = 'python'
else if (lang === 'rb')
lang = 'ruby'
else if (lang === 'sh' || lang === 'zsh')
lang = 'shell'
langs.add(lang)
}
}
}
// For common comments
// Matches: <!-- language: lang-js --> (StackOverflow), @lang javascript (JSDoc), etc.
for (const match of code.matchAll(/language:\s*lang-([\w-]+)/g)) {
const lang = match[1].toLowerCase().trim()
if (lang)
langs.add(lang)
}
for (const match of code.matchAll(/@lang\s+([\w-]+)/g)) {
const lang = match[1].toLowerCase().trim()
if (lang)
langs.add(lang)
}
if (!highlighter)
return Array.from(langs)
// Only include known languages
const bundle = highlighter.getBundledLanguages()
return Array.from(langs)
.filter(l => l && bundle[l])
}