diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 0a3cb48ba..adfee17f1 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -82,9 +82,16 @@ export async function run( process.stdin.on('error', reject) }) - const lang = options.lang || 'text' + const { codeToHtml, guessEmbeddedLanguages } = await import('shiki') + let lang = (options.lang as string | undefined)?.toLowerCase() + if (!lang) { + const guessed = guessEmbeddedLanguages(content, undefined) + if (guessed.length > 0) + lang = guessed[0] + } + lang ||= 'text' + if (options.format === 'html') { - const { codeToHtml } = await import('shiki') log(await codeToHtml(content, { lang: lang as BundledLanguage, theme: options.theme, @@ -100,11 +107,18 @@ export async function run( return } + const { codeToHtml, guessEmbeddedLanguages } = await import('shiki') const codes = await Promise.all(files.map(async (path) => { const { content, ext } = await readSource(path) - const lang = (options.lang || ext).toLowerCase() + let lang = (options.lang || ext)?.toLowerCase() + if (!lang || lang === 'text') { + const guessed = guessEmbeddedLanguages(content, undefined) + if (guessed.length > 0) + lang = guessed[0] + } + lang ||= 'text' + if (options.format === 'html') { - const { codeToHtml } = await import('shiki') return await codeToHtml(content, { lang: lang as BundledLanguage, theme: options.theme, diff --git a/packages/cli/test/cli.test.ts b/packages/cli/test/cli.test.ts index 907ce4709..5774521d7 100644 --- a/packages/cli/test/cli.test.ts +++ b/packages/cli/test/cli.test.ts @@ -1,3 +1,4 @@ +import { Buffer } from 'node:buffer' import fs from 'node:fs/promises' import path from 'node:path' import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' @@ -174,4 +175,34 @@ describe('run', () => { expect(output).toContain('javascript') expect(output).toContain('python') }) + + describe('stdin', () => { + it('reads from stdin and guesses language', async () => { + const output: string[] = [] + const content = '#!/usr/bin/env node\nconsole.log("hi")' + + const mockStdin = { + isTTY: false, + on: vi.fn((event, handler) => { + if (event === 'data') + handler(Buffer.from(content)) + if (event === 'end') + handler() + return mockStdin + }), + } + + const originalStdin = process.stdin + Object.defineProperty(process, 'stdin', { value: mockStdin, configurable: true }) + + await run(['node', 'shiki'], msg => output.push(msg)) + + expect(output.length).toBe(1) + expect(output[0]).toContain('console') // Should be highlighted + // Since we can't easily check for colors in this test environment without more setup, + // we at least verify it didn't throw and produced output. + + Object.defineProperty(process, 'stdin', { value: originalStdin, configurable: true }) + }) + }) }) diff --git a/packages/core/src/highlight/code-to-tokens.ts b/packages/core/src/highlight/code-to-tokens.ts index ea52bd249..725c92cf9 100644 --- a/packages/core/src/highlight/code-to-tokens.ts +++ b/packages/core/src/highlight/code-to-tokens.ts @@ -1,8 +1,9 @@ -import type { CodeOptionsMultipleThemes, CodeToTokensOptions, GrammarState, ShikiPrimitive, StringLiteralUnion, ThemedToken, ThemeRegistrationAny, TokensResult } from '@shikijs/types' +import type { CodeOptionsMultipleThemes, CodeToTokensOptions, GrammarState, ShikiPrimitive, StringLiteralUnion, ThemedToken, ThemedTokenWithVariants, ThemeRegistrationAny, TokensResult } from '@shikijs/types' import { codeToTokensWithThemes, getLastGrammarStateFromMap, setLastGrammarStateToMap } from '@shikijs/primitive' import { ShikiError } from '@shikijs/types' import { applyColorReplacements, flatTokenVariants, resolveColorReplacements } from '../utils' import { DEFAULT_COLOR_LIGHT_DARK } from '../utils/constants' +import { tokenizeAnsiWithTheme } from './code-to-tokens-ansi' import { codeToTokensBase } from './code-to-tokens-base' /** @@ -38,11 +39,35 @@ export function codeToTokens( if (themes.length === 0) throw new ShikiError('`themes` option must not be empty') - const themeTokens = codeToTokensWithThemes( - primitive, - code, - options, - ) + const lang = primitive.resolveLangAlias(options.lang || 'text') + let themeTokens: any[][] + if (lang === 'ansi') { + themeTokens = themes.map((t) => { + const { theme } = primitive.setTheme(t.theme) + return tokenizeAnsiWithTheme(theme, code, options) + }) + + // Align tokens (they should already be aligned for ANSI, but we use the merged format) + themeTokens = themeTokens[0].map((line, lineIdx) => line.map((_token: ThemedToken, tokenIdx: number) => { + const mergedToken: ThemedTokenWithVariants = { + content: _token.content, + variants: {}, + offset: _token.offset, + } + themeTokens.forEach((t, themeIdx) => { + const { content: _, offset: __, ...styles } = t[lineIdx][tokenIdx] + mergedToken.variants[themes[themeIdx].color] = styles + }) + return mergedToken + })) + } + else { + themeTokens = codeToTokensWithThemes( + primitive, + code, + options, + ) + } grammarState = getLastGrammarStateFromMap(themeTokens) diff --git a/packages/core/src/utils/strings.test.ts b/packages/core/src/utils/strings.test.ts index 303748e7c..a443e8710 100644 --- a/packages/core/src/utils/strings.test.ts +++ b/packages/core/src/utils/strings.test.ts @@ -1,6 +1,24 @@ /* eslint-disable style/no-tabs */ import { describe, expect, it } from 'vitest' -import { guessEmbeddedLanguages, splitLines } from '.' +import { createPositionConverter, guessEmbeddedLanguages, splitLines } from '.' + +describe('createPositionConverter', () => { + it('basic conversion', () => { + const code = 'abc\ndef\n' + const converter = createPositionConverter(code) + expect(converter.lines).toEqual(['abc\n', 'def\n', '']) + + expect(converter.indexToPos(0)).toEqual({ line: 0, character: 0 }) + expect(converter.indexToPos(2)).toEqual({ line: 0, character: 2 }) + expect(converter.indexToPos(4)).toEqual({ line: 1, character: 0 }) + expect(converter.indexToPos(8)).toEqual({ line: 2, character: 0 }) + + expect(converter.posToIndex(0, 0)).toBe(0) + expect(converter.posToIndex(0, 2)).toBe(2) + expect(converter.posToIndex(1, 0)).toBe(4) + expect(converter.posToIndex(1, 4)).toBe(8) + }) +}) describe('splitLines', () => { it('splitLines', () => { @@ -197,4 +215,38 @@ print("hello") expect(langs).toContain('javascript') expect(langs).toContain('python') }) + + it('detects languages from shebangs', () => { + expect(guessEmbeddedLanguages('#!/usr/bin/node', undefined)).toContain('javascript') + expect(guessEmbeddedLanguages('#!/bin/bash', undefined)).toContain('bash') + expect(guessEmbeddedLanguages('#!/usr/bin/env python3', undefined)).toContain('python') + expect(guessEmbeddedLanguages('#!/usr/bin/env -S ts-node --foo', undefined)).toContain('ts-node') + expect(guessEmbeddedLanguages('#!/usr/bin/env -S node --inspect', undefined)).toContain('javascript') + expect(guessEmbeddedLanguages('#!/bin/zsh', undefined)).toContain('shell') + expect(guessEmbeddedLanguages('#!/usr/bin/env', undefined)).toEqual([]) + expect(guessEmbeddedLanguages('#!', undefined)).toEqual([]) + expect(guessEmbeddedLanguages(' #!/bin/bash', undefined)).toEqual([]) // Must be at start + }) + + it('filters languages with highlighter', () => { + const mockHighlighter: any = { + getBundledLanguages: () => ({ + javascript: {}, + python: {}, + }), + } + const code = '```javascript\n```\n```rust\n```' + const detected = guessEmbeddedLanguages(code, undefined, mockHighlighter) + expect(detected).toContain('javascript') + expect(detected).not.toContain('rust') + }) + + it('detects languages from comments', () => { + expect(guessEmbeddedLanguages('', undefined)).toContain('js') + expect(guessEmbeddedLanguages('// @lang typescript', undefined)).toContain('typescript') + expect(guessEmbeddedLanguages('/** @lang python */', undefined)).toContain('python') + expect(guessEmbeddedLanguages('', undefined)).toEqual([]) + expect(guessEmbeddedLanguages('// @lang ', undefined)).toEqual([]) + expect(guessEmbeddedLanguages('', undefined)).toContain('js-variant') + }) }) diff --git a/packages/core/src/utils/strings.ts b/packages/core/src/utils/strings.ts index 45fc352c0..683825018 100644 --- a/packages/core/src/utils/strings.ts +++ b/packages/core/src/utils/strings.ts @@ -110,6 +110,54 @@ export function guessEmbeddedLanguages( langs.add(lang) } + // For shebangs + // Matches: #!/usr/bin/env node, #!/bin/bash, etc. + if (code.startsWith('#!')) { + const firstLine = code.split('\n', 1)[0] + const parts = firstLine.slice(2).trim().split(/\s+/) + if (parts.length > 0) { + let lang = parts[0].split('/').pop() + if (lang === 'env') { + lang = undefined + // Find first part that doesn't start with '-' + for (let i = 1; i < parts.length; i++) { + if (parts[i] && !parts[i].startsWith('-')) { + lang = parts[i].split('/').pop() + break + } + } + } + + if (lang) { + lang = lang.toLowerCase() + // Map common executable names to Shiki aliases + if (lang === 'node') + lang = 'javascript' + else if (lang === 'python3') + lang = 'python' + else if (lang === 'rb') + lang = 'ruby' + else if (lang === 'sh' || lang === 'zsh') + lang = 'shell' + + langs.add(lang) + } + } + } + + // For common comments + // Matches: (StackOverflow), @lang javascript (JSDoc), etc. + for (const match of code.matchAll(/language:\s*lang-([\w-]+)/g)) { + const lang = match[1].toLowerCase().trim() + if (lang) + langs.add(lang) + } + for (const match of code.matchAll(/@lang\s+([\w-]+)/g)) { + const lang = match[1].toLowerCase().trim() + if (lang) + langs.add(lang) + } + if (!highlighter) return Array.from(langs) diff --git a/packages/core/test/tokens.test.ts b/packages/core/test/tokens.test.ts index 2c30cbd02..bd52fefa0 100644 --- a/packages/core/test/tokens.test.ts +++ b/packages/core/test/tokens.test.ts @@ -197,3 +197,46 @@ it('colorsRendering none', async () => { }), ).toMatchSnapshot('colorsRendering none') }) + +it('ansi with multiple themes', async () => { + using engine = await createShikiPrimitiveAsync({ + themes: [ + import('@shikijs/themes/vitesse-light'), + import('@shikijs/themes/vitesse-dark'), + ], + langs: [], + engine: createJavaScriptRegexEngine(), + }) + + const code = '\x1B[31mred\x1B[0m' + const html = codeToHtml(engine, code, { + lang: 'ansi', + themes: { + light: 'vitesse-light', + dark: 'vitesse-dark', + }, + }) + + expect(html).toContain('