Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions packages/cli/src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,16 @@ export async function run(
process.stdin.on('error', reject)
})

const lang = options.lang || 'text'
const { codeToHtml, guessEmbeddedLanguages } = await import('shiki')
let lang = (options.lang as string | undefined)?.toLowerCase()
if (!lang) {
const guessed = guessEmbeddedLanguages(content, undefined)
if (guessed.length > 0)
lang = guessed[0]
}
lang ||= 'text'

if (options.format === 'html') {
const { codeToHtml } = await import('shiki')
log(await codeToHtml(content, {
lang: lang as BundledLanguage,
theme: options.theme,
Expand All @@ -100,11 +107,18 @@ export async function run(
return
}

const { codeToHtml, guessEmbeddedLanguages } = await import('shiki')
const codes = await Promise.all(files.map(async (path) => {
const { content, ext } = await readSource(path)
const lang = (options.lang || ext).toLowerCase()
let lang = (options.lang || ext)?.toLowerCase()
if (!lang || lang === 'text') {
const guessed = guessEmbeddedLanguages(content, undefined)
if (guessed.length > 0)
lang = guessed[0]
}
lang ||= 'text'

if (options.format === 'html') {
const { codeToHtml } = await import('shiki')
return await codeToHtml(content, {
lang: lang as BundledLanguage,
theme: options.theme,
Expand Down
31 changes: 31 additions & 0 deletions packages/cli/test/cli.test.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { Buffer } from 'node:buffer'
import fs from 'node:fs/promises'
import path from 'node:path'
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
Expand Down Expand Up @@ -174,4 +175,34 @@ describe('run', () => {
expect(output).toContain('javascript')
expect(output).toContain('python')
})

describe('stdin', () => {
it('reads from stdin and guesses language', async () => {
const output: string[] = []
const content = '#!/usr/bin/env node\nconsole.log("hi")'

const mockStdin = {
isTTY: false,
on: vi.fn((event, handler) => {
if (event === 'data')
handler(Buffer.from(content))
if (event === 'end')
handler()
return mockStdin
}),
}

const originalStdin = process.stdin
Object.defineProperty(process, 'stdin', { value: mockStdin, configurable: true })

await run(['node', 'shiki'], msg => output.push(msg))

expect(output.length).toBe(1)
expect(output[0]).toContain('console') // Should be highlighted
// Since we can't easily check for colors in this test environment without more setup,
// we at least verify it didn't throw and produced output.

Object.defineProperty(process, 'stdin', { value: originalStdin, configurable: true })
})
})
})
37 changes: 31 additions & 6 deletions packages/core/src/highlight/code-to-tokens.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import type { CodeOptionsMultipleThemes, CodeToTokensOptions, GrammarState, ShikiPrimitive, StringLiteralUnion, ThemedToken, ThemeRegistrationAny, TokensResult } from '@shikijs/types'
import type { CodeOptionsMultipleThemes, CodeToTokensOptions, GrammarState, ShikiPrimitive, StringLiteralUnion, ThemedToken, ThemedTokenWithVariants, ThemeRegistrationAny, TokensResult } from '@shikijs/types'
import { codeToTokensWithThemes, getLastGrammarStateFromMap, setLastGrammarStateToMap } from '@shikijs/primitive'
import { ShikiError } from '@shikijs/types'
import { applyColorReplacements, flatTokenVariants, resolveColorReplacements } from '../utils'
import { DEFAULT_COLOR_LIGHT_DARK } from '../utils/constants'
import { tokenizeAnsiWithTheme } from './code-to-tokens-ansi'
import { codeToTokensBase } from './code-to-tokens-base'

/**
Expand Down Expand Up @@ -38,11 +39,35 @@ export function codeToTokens(
if (themes.length === 0)
throw new ShikiError('`themes` option must not be empty')

const themeTokens = codeToTokensWithThemes(
primitive,
code,
options,
)
const lang = primitive.resolveLangAlias(options.lang || 'text')
let themeTokens: any[][]
if (lang === 'ansi') {
themeTokens = themes.map((t) => {
const { theme } = primitive.setTheme(t.theme)
return tokenizeAnsiWithTheme(theme, code, options)
})

// Align tokens (they should already be aligned for ANSI, but we use the merged format)
themeTokens = themeTokens[0].map((line, lineIdx) => line.map((_token: ThemedToken, tokenIdx: number) => {
const mergedToken: ThemedTokenWithVariants = {
content: _token.content,
variants: {},
offset: _token.offset,
}
themeTokens.forEach((t, themeIdx) => {
const { content: _, offset: __, ...styles } = t[lineIdx][tokenIdx]
mergedToken.variants[themes[themeIdx].color] = styles
})
return mergedToken
}))
}
else {
themeTokens = codeToTokensWithThemes(
primitive,
code,
options,
)
}

grammarState = getLastGrammarStateFromMap(themeTokens)

Expand Down
54 changes: 53 additions & 1 deletion packages/core/src/utils/strings.test.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,24 @@
/* eslint-disable style/no-tabs */
import { describe, expect, it } from 'vitest'
import { guessEmbeddedLanguages, splitLines } from '.'
import { createPositionConverter, guessEmbeddedLanguages, splitLines } from '.'

describe('createPositionConverter', () => {
it('basic conversion', () => {
const code = 'abc\ndef\n'
const converter = createPositionConverter(code)
expect(converter.lines).toEqual(['abc\n', 'def\n', ''])

expect(converter.indexToPos(0)).toEqual({ line: 0, character: 0 })
expect(converter.indexToPos(2)).toEqual({ line: 0, character: 2 })
expect(converter.indexToPos(4)).toEqual({ line: 1, character: 0 })
expect(converter.indexToPos(8)).toEqual({ line: 2, character: 0 })

expect(converter.posToIndex(0, 0)).toBe(0)
expect(converter.posToIndex(0, 2)).toBe(2)
expect(converter.posToIndex(1, 0)).toBe(4)
expect(converter.posToIndex(1, 4)).toBe(8)
})
})

describe('splitLines', () => {
it('splitLines', () => {
Expand Down Expand Up @@ -197,4 +215,38 @@ print("hello")
expect(langs).toContain('javascript')
expect(langs).toContain('python')
})

it('detects languages from shebangs', () => {
expect(guessEmbeddedLanguages('#!/usr/bin/node', undefined)).toContain('javascript')
expect(guessEmbeddedLanguages('#!/bin/bash', undefined)).toContain('bash')
expect(guessEmbeddedLanguages('#!/usr/bin/env python3', undefined)).toContain('python')
expect(guessEmbeddedLanguages('#!/usr/bin/env -S ts-node --foo', undefined)).toContain('ts-node')
expect(guessEmbeddedLanguages('#!/usr/bin/env -S node --inspect', undefined)).toContain('javascript')
expect(guessEmbeddedLanguages('#!/bin/zsh', undefined)).toContain('shell')
expect(guessEmbeddedLanguages('#!/usr/bin/env', undefined)).toEqual([])
expect(guessEmbeddedLanguages('#!', undefined)).toEqual([])
expect(guessEmbeddedLanguages(' #!/bin/bash', undefined)).toEqual([]) // Must be at start
})

it('filters languages with highlighter', () => {
const mockHighlighter: any = {
getBundledLanguages: () => ({
javascript: {},
python: {},
}),
}
const code = '```javascript\n```\n```rust\n```'
const detected = guessEmbeddedLanguages(code, undefined, mockHighlighter)
expect(detected).toContain('javascript')
expect(detected).not.toContain('rust')
})

it('detects languages from comments', () => {
expect(guessEmbeddedLanguages('<!-- language: lang-js -->', undefined)).toContain('js')
expect(guessEmbeddedLanguages('// @lang typescript', undefined)).toContain('typescript')
expect(guessEmbeddedLanguages('/** @lang python */', undefined)).toContain('python')
expect(guessEmbeddedLanguages('<!-- language: lang- -->', undefined)).toEqual([])
expect(guessEmbeddedLanguages('// @lang ', undefined)).toEqual([])
expect(guessEmbeddedLanguages('<!-- language: lang-JS-variant -->', undefined)).toContain('js-variant')
})
})
48 changes: 48 additions & 0 deletions packages/core/src/utils/strings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,54 @@ export function guessEmbeddedLanguages(
langs.add(lang)
}

// For shebangs
// Matches: #!/usr/bin/env node, #!/bin/bash, etc.
if (code.startsWith('#!')) {
const firstLine = code.split('\n', 1)[0]
const parts = firstLine.slice(2).trim().split(/\s+/)
if (parts.length > 0) {
let lang = parts[0].split('/').pop()
if (lang === 'env') {
lang = undefined
// Find first part that doesn't start with '-'
for (let i = 1; i < parts.length; i++) {
if (parts[i] && !parts[i].startsWith('-')) {
lang = parts[i].split('/').pop()
break
}
}
}

if (lang) {
lang = lang.toLowerCase()
// Map common executable names to Shiki aliases
if (lang === 'node')
lang = 'javascript'
else if (lang === 'python3')
lang = 'python'
else if (lang === 'rb')
lang = 'ruby'
else if (lang === 'sh' || lang === 'zsh')
lang = 'shell'

langs.add(lang)
}
}
}

// For common comments
// Matches: <!-- language: lang-js --> (StackOverflow), @lang javascript (JSDoc), etc.
for (const match of code.matchAll(/language:\s*lang-([\w-]+)/g)) {
const lang = match[1].toLowerCase().trim()
if (lang)
langs.add(lang)
}
for (const match of code.matchAll(/@lang\s+([\w-]+)/g)) {
const lang = match[1].toLowerCase().trim()
if (lang)
langs.add(lang)
}

if (!highlighter)
return Array.from(langs)

Expand Down
43 changes: 43 additions & 0 deletions packages/core/test/tokens.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -197,3 +197,46 @@ it('colorsRendering none', async () => {
}),
).toMatchSnapshot('colorsRendering none')
})

it('ansi with multiple themes', async () => {
using engine = await createShikiPrimitiveAsync({
themes: [
import('@shikijs/themes/vitesse-light'),
import('@shikijs/themes/vitesse-dark'),
],
langs: [],
engine: createJavaScriptRegexEngine(),
})

const code = '\x1B[31mred\x1B[0m'
const html = codeToHtml(engine, code, {
lang: 'ansi',
themes: {
light: 'vitesse-light',
dark: 'vitesse-dark',
},
})

expect(html).toContain('<span style="color:#')
expect(html).toContain('--shiki-dark:#')
})

it('throws on empty themes', async () => {
using engine = await createShikiPrimitiveAsync({
themes: [],
langs: [],
engine: createJavaScriptRegexEngine(),
})
expect(() => codeToTokens(engine, 'code', { themes: {} }))
.toThrowError('`themes` option must not be empty')
})

it('throws on invalid options', async () => {
using engine = await createShikiPrimitiveAsync({
themes: [import('@shikijs/themes/vitesse-dark')],
langs: [],
engine: createJavaScriptRegexEngine(),
})
expect(() => codeToTokens(engine, 'code', {} as any))
.toThrowError('Invalid options, either `theme` or `themes` must be provided')
})
2 changes: 1 addition & 1 deletion scripts/report-engine-js-compat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ async function run() {
'',
createTable(reportOk),
'',
'###### Table Field Explanations',
'### Table Field Explanations',
'',
'- **Highlight Match**: Whether the highlighting results matched with the WASM engine for the [sample snippet](https://github.com/shikijs/textmate-grammars-themes/tree/main/samples).',
'- **Patterns Parsable**: Number of regex patterns that can be parsed by the JavaScript RegExp engine.',
Expand Down