diff --git a/sample_files/compare.expected b/sample_files/compare.expected index 1f9691e9d0..20aa1f2736 100644 --- a/sample_files/compare.expected +++ b/sample_files/compare.expected @@ -71,7 +71,7 @@ sample_files/elixir_1.ex sample_files/elixir_2.ex 5ab1a845f7cb3ca6db1fdef7bc4a042f - sample_files/elm_1.elm sample_files/elm_2.elm -8a67269f2dd2e4913f885b3b6e6d6a07 - +384f1695fb6c512c9510e9eae47b82fc - sample_files/elvish_1.elv sample_files/elvish_2.elv 93af1d46752d57db84011ca7482ae842 - @@ -80,7 +80,7 @@ sample_files/erlang_1.erl sample_files/erlang_2.erl 77c259baf4751716db4da1503088e742 - sample_files/f_sharp_1.fs sample_files/f_sharp_2.fs -63defe290bfc6754b51457a6c374838b - +eb2a87f6e7d0cd4a545c3a6860eee448 - sample_files/fortran_1.f90 sample_files/fortran_2.f90 b5fae2c0a3e0755ca8182df1f728400b - @@ -119,7 +119,16 @@ sample_files/identical_1.scala sample_files/identical_2.scala 15c5a789e644348cb7e0de051ff4b63e - sample_files/if_1.py sample_files/if_2.py -afcc7b60dd5c267ae183a496e0af511b - +224c74aaee642194766600a680cd48a0 - + +sample_files/if_multiline_1.py sample_files/if_multiline_2.py +f135f2c5b6b7a12668c8859e888d19b6 - + +sample_files/if_nested_1.py sample_files/if_nested_2.py +37bc29631193efba13845783688f9c7f - + +sample_files/indented_string_1.py sample_files/indented_string_2.py +044539323b209aa31042ebee5de034d8 - sample_files/insert_blank_1.txt sample_files/insert_blank_2.txt a5fd75afcc99aa7b2b285f1f9ced8607 - @@ -326,7 +335,7 @@ sample_files/xml_1.xml sample_files/xml_2.xml 38600945878fdea4b06c5da026e1fb78 - sample_files/yaml_1.yaml sample_files/yaml_2.yaml -3a4a35bf75297de64796eafb1feb4f97 - +77efbb4ab9adfec62f8d5187f3ca2737 - sample_files/zig_1.zig sample_files/zig_2.zig ee5a604e604d310066deeff30e28f9d7 - diff --git a/sample_files/if_multiline_1.py b/sample_files/if_multiline_1.py new file mode 100644 index 0000000000..7eba63b15d --- /dev/null +++ b/sample_files/if_multiline_1.py @@ -0,0 +1,3 @@ +if x: + foo + bar diff --git a/sample_files/if_multiline_2.py b/sample_files/if_multiline_2.py new file mode 100644 index 0000000000..3bd1f0e297 --- /dev/null +++ b/sample_files/if_multiline_2.py @@ -0,0 +1,2 @@ +foo +bar diff --git a/sample_files/if_nested_1.py b/sample_files/if_nested_1.py new file mode 100644 index 0000000000..a5df73093a --- /dev/null +++ b/sample_files/if_nested_1.py @@ -0,0 +1,4 @@ +if a: + foo + if b: + bar diff --git a/sample_files/if_nested_2.py b/sample_files/if_nested_2.py new file mode 100644 index 0000000000..15e5c67ce6 --- /dev/null +++ b/sample_files/if_nested_2.py @@ -0,0 +1,4 @@ +if a: + foo +if b: + bar diff --git a/sample_files/indented_string_1.py b/sample_files/indented_string_1.py new file mode 100644 index 0000000000..3d85d1e4b9 --- /dev/null +++ b/sample_files/indented_string_1.py @@ -0,0 +1,5 @@ +t = ( + "Hello" + " " + "World" + ) diff --git a/sample_files/indented_string_2.py b/sample_files/indented_string_2.py new file mode 100644 index 0000000000..b6ce4878a4 --- /dev/null +++ b/sample_files/indented_string_2.py @@ -0,0 +1,5 @@ +t = ( + "Hello" + " " + "World" + ) diff --git a/src/display/json.rs b/src/display/json.rs index d1391ce9be..501196474e 100644 --- a/src/display/json.rs +++ b/src/display/json.rs @@ -285,7 +285,7 @@ impl Highlight { AtomKind::Keyword => Self::Keyword, AtomKind::Comment => Self::Comment, AtomKind::Type => Self::Type, - AtomKind::Normal => Self::Normal, + AtomKind::Normal | AtomKind::Indent => Self::Normal, AtomKind::CanIgnore => Self::Normal, AtomKind::TreeSitterError => Self::TreeSitterError, }, diff --git a/src/display/side_by_side.rs b/src/display/side_by_side.rs index d196da4602..1a83d6f6c0 100644 --- a/src/display/side_by_side.rs +++ b/src/display/side_by_side.rs @@ -3,14 +3,14 @@ use std::cmp::{max, min}; use line_numbers::{LineNumber, SingleLineSpan}; -use owo_colors::{OwoColorize, Style}; +use owo_colors::OwoColorize; use crate::constants::Side; use crate::display::context::all_matched_lines_filled; use crate::display::hunks::{matched_lines_indexes_for_hunk, Hunk}; use crate::display::style::{ self, apply_colors, apply_line_number_color, color_positions, novel_style, replace_tabs, - split_and_apply, width_respecting_tabs, BackgroundColor, + split_and_apply, width_respecting_tabs, BackgroundColor, DftStyle, }; use crate::hash::{DftHashMap, DftHashSet}; use crate::lines::{format_line_num, split_on_newlines}; @@ -46,7 +46,7 @@ fn format_missing_line_num( Side::Right => prev_num >= source_dims.rhs_max_line_in_file, }; - let mut style = Style::new(); + let mut style = DftStyle::new(); if use_color { style = style.dimmed(); } @@ -70,7 +70,7 @@ fn format_missing_line_num( c.repeat(num_digits), width = column_width - 1 ) - .style(style) + .style(style.into_owo_style()) .to_string() } @@ -99,16 +99,16 @@ fn display_single_column( header_line.push('\n'); formatted_lines.push(header_line); - let mut style = Style::new(); + let mut style = DftStyle::new(); if display_options.use_color { - style = novel_style(Style::new(), side, display_options.background_color); + style = novel_style(DftStyle::new(), side, display_options.background_color); } for (i, line) in src_lines.iter().enumerate() { let mut formatted_line = String::with_capacity(line.len()); formatted_line.push_str( &format_line_num_padded((i as u32).into(), column_width) - .style(style) + .style(style.into_owo_style()) .to_string(), ); formatted_line.push_str(line); @@ -281,8 +281,8 @@ fn highlight_positions( lhs_mps: &[MatchedPos], rhs_mps: &[MatchedPos], ) -> ( - DftHashMap>, - DftHashMap>, + DftHashMap>, + DftHashMap>, ) { let lhs_positions = color_positions( Side::Left, @@ -292,7 +292,7 @@ fn highlight_positions( lhs_mps, ); // Preallocate the hashmap assuming the average line will have 2 items on it. - let mut lhs_styles: DftHashMap> = + let mut lhs_styles: DftHashMap> = DftHashMap::default(); for (span, style) in lhs_positions { let styles = lhs_styles.entry(span.line).or_insert_with(Vec::new); @@ -306,7 +306,7 @@ fn highlight_positions( file_format, rhs_mps, ); - let mut rhs_styles: DftHashMap> = + let mut rhs_styles: DftHashMap> = DftHashMap::default(); for (span, style) in rhs_positions { let styles = rhs_styles.entry(span.line).or_insert_with(Vec::new); diff --git a/src/display/style.rs b/src/display/style.rs index 6a4536844d..a6fc914c7d 100644 --- a/src/display/style.rs +++ b/src/display/style.rs @@ -1,6 +1,8 @@ //! Apply colours and styling to strings. +use std::borrow::Cow; use std::cmp::{max, min}; +use std::env; use line_numbers::{LineNumber, SingleLineSpan}; use owo_colors::{OwoColorize, Style}; @@ -25,6 +27,69 @@ impl BackgroundColor { } } +#[derive(Clone, Copy, Debug, PartialEq)] +pub(crate) struct DftStyle { + style: Style, + replace_whitespace: bool, +} + +impl DftStyle { + pub(crate) fn new() -> Self { + Self { + style: Style::new(), + replace_whitespace: false, + } + } + + fn with_style(self, style: Style) -> Self { + Self { style, ..self } + } + + fn with_replace_whitespace(self, replace_whitespace: bool) -> Self { + Self { + replace_whitespace, + ..self + } + } + + pub(crate) fn into_owo_style(self) -> Style { + self.style + } +} + +/// Pass through calls to underlying Style +macro_rules! dft_style_methods { + ($($name:ident),+ $(,)?) => { + impl DftStyle { + $( + pub(crate) fn $name(self) -> Self { + self.with_style(self.style.$name()) + } + )+ + } + }; +} + +dft_style_methods!( + red, + green, + magenta, + bright_red, + bright_green, + bright_blue, + bright_magenta, + on_bright_red, + on_bright_green, + on_red, + on_green, + italic, + bold, + blue, + purple, + underline, + dimmed, +); + /// Find the largest byte offset in `s` that gives the longest /// starting substring whose display width does not exceed `width`. /// @@ -128,6 +193,24 @@ pub(crate) fn replace_tabs(src: &str, tab_width: usize) -> String { src.replace('\t', &tab_as_spaces) } +fn replace_styled_whitespace(s: &str) -> Cow<'_, str> { + Cow::Owned( + s.chars() + .map(|ch| if ch.is_whitespace() { '·' } else { ch }) + .collect(), + ) +} + +fn apply_dft_style(s: &str, style: DftStyle) -> String { + let s = if style.replace_whitespace { + replace_styled_whitespace(s) + } else { + Cow::Borrowed(s) + }; + + s.as_ref().style(style.into_owo_style()).to_string() +} + /// Split `line` (from the source code) into multiple lines of /// `max_len` (i.e. word wrapping), and apply `styles` to each part /// according to its original position in `line`. @@ -135,7 +218,7 @@ pub(crate) fn split_and_apply( line: &str, max_len: usize, tab_width: usize, - styles: &[(SingleLineSpan, Style)], + styles: &[(SingleLineSpan, DftStyle)], side: Side, ) -> Vec { assert!( @@ -201,7 +284,7 @@ pub(crate) fn split_and_apply( min(byte_len(line_part), end_col - part_start), tab_width, ); - res.push_str(&span_s.style(*style).to_string()); + res.push_str(&apply_dft_style(&span_s, *style)); } prev_style_end = end_col; } @@ -236,7 +319,7 @@ pub(crate) fn split_and_apply( /// Return a copy of `line` with styles applied to all the spans /// specified. -fn apply_line(line: &str, styles: &[(SingleLineSpan, Style)]) -> String { +fn apply_line(line: &str, styles: &[(SingleLineSpan, DftStyle)]) -> String { let line_bytes = byte_len(line); let mut styled_line = String::with_capacity(line.len()); let mut i = 0; @@ -257,7 +340,7 @@ fn apply_line(line: &str, styles: &[(SingleLineSpan, Style)]) -> String { // Apply style to the substring in this span. let span_s = substring_by_byte(line, start_col, min(line_bytes, end_col)); - styled_line.push_str(&span_s.style(*style).to_string()); + styled_line.push_str(&apply_dft_style(span_s, *style)); i = end_col; } @@ -270,8 +353,8 @@ fn apply_line(line: &str, styles: &[(SingleLineSpan, Style)]) -> String { } fn group_by_line( - ranges: &[(SingleLineSpan, Style)], -) -> DftHashMap> { + ranges: &[(SingleLineSpan, DftStyle)], +) -> DftHashMap> { let mut ranges_by_line: DftHashMap<_, Vec<_>> = DftHashMap::default(); for range in ranges { if let Some(matching_ranges) = ranges_by_line.get_mut(&range.0.line) { @@ -284,11 +367,11 @@ fn group_by_line( ranges_by_line } -/// Apply the `Style`s to the spans specified. Return a vec of the +/// Apply the `DftStyle`s to the spans specified. Return a vec of the /// styled strings, including trailing newlines. /// /// Tolerant against lines in `s` being shorter than the spans. -fn style_lines(lines: &[&str], styles: &[(SingleLineSpan, Style)]) -> Vec { +fn style_lines(lines: &[&str], styles: &[(SingleLineSpan, DftStyle)]) -> Vec { let mut ranges_by_line = group_by_line(styles); let mut styled_lines = Vec::with_capacity(lines.len()); @@ -305,7 +388,7 @@ fn style_lines(lines: &[&str], styles: &[(SingleLineSpan, Style)]) -> Vec Style { +pub(crate) fn novel_style(style: DftStyle, side: Side, background: BackgroundColor) -> DftStyle { if background.is_dark() { match side { Side::Left => style.bright_red(), @@ -319,6 +402,20 @@ pub(crate) fn novel_style(style: Style, side: Side, background: BackgroundColor) } } +fn novel_background_style(style: DftStyle, side: Side, background: BackgroundColor) -> DftStyle { + if background.is_dark() { + match side { + Side::Left => style.on_bright_red(), + Side::Right => style.on_bright_green(), + } + } else { + match side { + Side::Left => style.on_red(), + Side::Right => style.on_green(), + } + } +} + /// Merge spans where the end of one span matches the start of the /// next span. /// @@ -330,14 +427,15 @@ pub(crate) fn novel_style(style: Style, side: Side, background: BackgroundColor) /// considers `ab` to be distinct from /// `ab`. Merging the spans normalises /// the output to `ab`. -fn merge_adjacent(items: &[(SingleLineSpan, Style)]) -> Vec<(SingleLineSpan, Style)> { - let mut merged: Vec<(SingleLineSpan, Style)> = vec![]; - let mut prev_item: Option<(SingleLineSpan, Style)> = None; +fn merge_adjacent(items: &[(SingleLineSpan, DftStyle)]) -> Vec<(SingleLineSpan, DftStyle)> { + let mut merged: Vec<(SingleLineSpan, DftStyle)> = vec![]; + let mut prev_item: Option<(SingleLineSpan, DftStyle)> = None; for (span, style) in items.iter().copied() { match prev_item.take() { Some((mut prev_span, prev_style)) => { - if prev_style == style + if prev_style.style == style.style + && prev_style.replace_whitespace == style.replace_whitespace && prev_span.line == span.line && prev_span.end_col == span.start_col { @@ -367,10 +465,11 @@ pub(crate) fn color_positions( syntax_highlight: bool, file_format: &FileFormat, mps: &[MatchedPos], -) -> Vec<(SingleLineSpan, Style)> { +) -> Vec<(SingleLineSpan, DftStyle)> { let mut styles = vec![]; + let highlight_indents_with_background = env::var("DFT_WHITESPACE") == Ok("bg".to_string()); for mp in mps { - let mut style = Style::new(); + let mut style = DftStyle::new(); match mp.kind { MatchKind::UnchangedToken { highlight, .. } | MatchKind::Ignored { highlight } => { if syntax_highlight { @@ -396,13 +495,24 @@ pub(crate) fn color_positions( style = style.bold(); } AtomKind::TreeSitterError => style = style.purple(), - AtomKind::Normal | AtomKind::CanIgnore => {} + AtomKind::Normal | AtomKind::Indent | AtomKind::CanIgnore => {} } } } } MatchKind::Novel { highlight, .. } => { - style = novel_style(style, side, background); + let is_indent = matches!(highlight, TokenKind::Atom(AtomKind::Indent)); + style = if is_indent && highlight_indents_with_background { + novel_background_style(style, side, background) + } else { + let style = novel_style(style, side, background); + if is_indent { + style.with_replace_whitespace(true) + } else { + style + } + }; + if syntax_highlight && matches!( highlight, @@ -515,7 +625,7 @@ pub(crate) fn apply_line_number_color( display_options: &DisplayOptions, ) -> String { if display_options.use_color { - let mut style = Style::new(); + let mut style = DftStyle::new(); // The goal here is to choose a style for line numbers that is // visually distinct from content. @@ -530,7 +640,7 @@ pub(crate) fn apply_line_number_color( style = style.dimmed() } - s.style(style).to_string() + s.style(style.into_owo_style()).to_string() } else { s.to_owned() } @@ -637,7 +747,7 @@ mod tests { start_col: 0, end_col: 3, }, - Style::new(), + DftStyle::new(), )], Side::Left, ); @@ -656,7 +766,7 @@ mod tests { start_col: 0, end_col: 3, }, - Style::new(), + DftStyle::new(), )], Side::Left, ); @@ -676,7 +786,7 @@ mod tests { start_col: 0, end_col: 2, }, - Style::new(), + DftStyle::new(), ), ( SingleLineSpan { @@ -684,7 +794,7 @@ mod tests { start_col: 4, end_col: 6, }, - Style::new(), + DftStyle::new(), ), ], Side::Left, @@ -704,7 +814,7 @@ mod tests { start_col: 0, end_col: 3, }, - Style::new(), + DftStyle::new(), )], Side::Left, ); diff --git a/src/parse/syntax.rs b/src/parse/syntax.rs index 3ecf3a9a79..f533521d1d 100644 --- a/src/parse/syntax.rs +++ b/src/parse/syntax.rs @@ -624,6 +624,9 @@ pub(crate) enum AtomKind { /// it. This is typically a variable, e.g. `foo`, or a literal /// `123`. Note that string literals have a separate kind. Normal, + /// A marker inserted by indentation-aware parsers when a node + /// starts on a new line but not at column 0. + Indent, // TODO: We should either have a AtomWithWords(HighlightKind) or a // separate String, Text and Comment kind. String(StringKind), diff --git a/src/parse/tree_sitter_parser.rs b/src/parse/tree_sitter_parser.rs index 3c33adb9c4..40b6f32b86 100644 --- a/src/parse/tree_sitter_parser.rs +++ b/src/parse/tree_sitter_parser.rs @@ -1,6 +1,6 @@ //! Load and configure parsers written with tree-sitter. -use line_numbers::LinePositions; +use line_numbers::{LinePositions, SingleLineSpan}; use streaming_iterator::StreamingIterator as _; use tree_sitter as ts; use typed_arena::Arena; @@ -77,6 +77,10 @@ pub(crate) struct TreeSitterConfig { /// Sub-languages in use, if any. sub_languages: Vec, + + /// Should difftastic insert synthetic indentation atoms when a + /// node starts on a new line? + indent_aware: bool, } extern "C" { @@ -118,6 +122,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Apex => { @@ -145,6 +150,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Asm => { @@ -159,6 +165,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_asm::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Bash => { @@ -175,6 +182,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_bash::HIGHLIGHT_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } C => { @@ -187,6 +195,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ignore_trailing_tokens: vec![], highlight_query: ts::Query::new(&language, tree_sitter_c::HIGHLIGHT_QUERY).unwrap(), sub_languages: vec![], + indent_aware: false, } } CPlusPlus => { @@ -205,6 +214,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ignore_trailing_tokens: vec![], highlight_query: ts::Query::new(&language, &highlight_query).unwrap(), sub_languages: vec![], + indent_aware: false, } } Clojure => { @@ -224,6 +234,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } CMake => { @@ -240,6 +251,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } CommonLisp => { @@ -253,6 +265,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ignore_trailing_tokens: vec![], highlight_query: ts::Query::new(&language, "").unwrap(), sub_languages: vec![], + indent_aware: false, } } CSharp => { @@ -276,6 +289,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Css => { @@ -297,6 +311,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_css::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Dart => { @@ -313,6 +328,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } DeviceTree => { @@ -331,6 +347,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Elixir => { @@ -347,6 +364,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_elixir::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Elm => { @@ -361,6 +379,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_elm::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: true, } } Elvish => { @@ -376,6 +395,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } EmacsLisp => { @@ -395,6 +415,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Erlang => { @@ -409,6 +430,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_erlang::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } FSharp => { @@ -424,6 +446,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { .unwrap(), sub_languages: vec![], + indent_aware: true, } } Fortran => { @@ -440,6 +463,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Gleam => { @@ -453,6 +477,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_gleam::HIGHLIGHT_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Go => { @@ -471,6 +496,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_go::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Hare => { @@ -486,6 +512,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Haskell => { @@ -508,6 +535,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_haskell::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: true, } } Hcl => { @@ -531,6 +559,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Html => { @@ -566,6 +595,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { parse_as: JavaScript, }, ], + indent_aware: false, } } Janet => { @@ -590,6 +620,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Java => { @@ -620,6 +651,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_java::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } JavaScript | JavascriptJsx => { @@ -647,6 +679,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_javascript::HIGHLIGHT_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Json => { @@ -661,6 +694,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_json::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Julia => { @@ -685,6 +719,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Kotlin => { @@ -713,6 +748,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } LaTeX => { @@ -728,6 +764,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Lua => { @@ -744,6 +781,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_lua::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Make => { @@ -762,6 +800,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { .unwrap(), parse_as: Bash, }], + indent_aware: false, } } Newick => { @@ -779,6 +818,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Nix => { @@ -795,6 +835,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_nix::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } ObjC => { @@ -815,6 +856,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_objc::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } OCaml => { @@ -828,6 +870,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_ocaml::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } OCamlInterface => { @@ -841,6 +884,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { // TODO: why doesn't tree_sitter_ocaml::HIGHLIGHTS_QUERY work here? highlight_query: ts::Query::new(&language, "").unwrap(), sub_languages: vec![], + indent_aware: false, } } Pascal => { @@ -858,6 +902,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Perl => { @@ -882,6 +927,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { .unwrap(), ignore_trailing_tokens: vec![], sub_languages: vec![], + indent_aware: false, } } Php => { @@ -896,6 +942,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_php::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Proto => { @@ -912,6 +959,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Python => { @@ -933,6 +981,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_python::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: true, } } Qml => { @@ -950,6 +999,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ignore_trailing_tokens: vec![], highlight_query: ts::Query::new(&language, &highlight_query).unwrap(), sub_languages: vec![], + indent_aware: false, } } R => { @@ -963,6 +1013,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_r::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Racket => { @@ -978,6 +1029,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_racket::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Ruby => { @@ -999,6 +1051,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_ruby::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Rust => { @@ -1027,11 +1080,17 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Scala => { let language_fn = tree_sitter_scala::LANGUAGE; let language = tree_sitter::Language::new(language_fn); + let indent_aware = language + .metadata() + .map(|meta| meta.major_version >= 3) + .unwrap_or(true); + TreeSitterConfig { language: language.clone(), atom_nodes: [ @@ -1046,6 +1105,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_scala::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: indent_aware, } } Scheme => { @@ -1059,6 +1119,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_scheme::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Scss => { @@ -1076,6 +1137,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Smali => { @@ -1091,6 +1153,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: Vec::new(), + indent_aware: false, } } Solidity => { @@ -1106,6 +1169,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_solidity::HIGHLIGHT_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Sql => { @@ -1119,6 +1183,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_sequel::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Swift => { @@ -1137,6 +1202,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_swift::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Toml => { @@ -1151,6 +1217,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_toml_ng::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } TypeScript | TypeScriptTsx => { @@ -1178,6 +1245,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ], highlight_query: ts::Query::new(&language, &highlight_query).unwrap(), sub_languages: vec![], + indent_aware: false, } } Xml => { @@ -1195,6 +1263,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_xml::XML_HIGHLIGHT_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Yaml => { @@ -1216,6 +1285,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_yaml::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: true, } } Verilog => { @@ -1232,6 +1302,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { ) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Vhdl => { @@ -1246,6 +1317,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_vhdl::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } Zig => { @@ -1262,6 +1334,7 @@ pub(crate) fn from_language(language: guess::Language) -> TreeSitterConfig { highlight_query: ts::Query::new(&language, tree_sitter_zig::HIGHLIGHTS_QUERY) .unwrap(), sub_languages: vec![], + indent_aware: false, } } } @@ -1552,6 +1625,7 @@ pub(crate) fn to_syntax<'a>( // each top level syntax item. cursor.goto_first_child(); + let mut last_line = None; let nodes = all_syntaxes_from_cursor( arena, src, @@ -1562,6 +1636,7 @@ pub(crate) fn to_syntax<'a>( &highlights, &subtrees, ignore_comments, + &mut last_line, ); (nodes, error_count) } @@ -1648,6 +1723,7 @@ fn all_syntaxes_from_cursor<'a>( highlights: &HighlightedNodeIds, subtrees: &DftHashMap, ignore_comments: bool, + last_line: &mut Option, ) -> Vec<&'a Syntax<'a>> { let mut nodes: Vec<&Syntax> = vec![]; @@ -1662,6 +1738,7 @@ fn all_syntaxes_from_cursor<'a>( highlights, subtrees, ignore_comments, + last_line, )); if !cursor.goto_next_sibling() { @@ -1672,8 +1749,8 @@ fn all_syntaxes_from_cursor<'a>( nodes } -/// Convert the tree-sitter node at `cursor` to a difftastic syntax -/// node. +/// Convert the tree-sitter node at `cursor` to one or more +/// difftastic syntax nodes. fn syntax_from_cursor<'a>( arena: &'a Arena>, src: &str, @@ -1684,7 +1761,8 @@ fn syntax_from_cursor<'a>( highlights: &HighlightedNodeIds, subtrees: &DftHashMap, ignore_comments: bool, -) -> Option<&'a Syntax<'a>> { + last_line: &mut Option, +) -> Vec<&'a Syntax<'a>> { let node = cursor.node(); // See if we should go into a sub-document instead (e.g. embedded JavaScript in HTML). @@ -1700,6 +1778,7 @@ fn syntax_from_cursor<'a>( subhighlights, &DftHashMap::default(), ignore_comments, + last_line, ); } @@ -1713,14 +1792,32 @@ fn syntax_from_cursor<'a>( // // Also, if this node is highlighted as a comment, treat it as // an atom unconditionally. - atom_from_cursor(arena, src, nl_pos, cursor, highlights, ignore_comments) + atom_from_cursor( + arena, + src, + nl_pos, + cursor, + config, + highlights, + ignore_comments, + last_line, + ) } else if highlights.keyword_ids.contains(&node.id()) && node.child_count() == 1 { // If this list has a single child, and the list itself (not // the child) is marked as a keyword, treat it as an atom with // keyword highlighting. - atom_from_cursor(arena, src, nl_pos, cursor, highlights, ignore_comments) + atom_from_cursor( + arena, + src, + nl_pos, + cursor, + config, + highlights, + ignore_comments, + last_line, + ) } else if node.child_count() > 0 { - Some(list_from_cursor( + vec![list_from_cursor( arena, src, nl_pos, @@ -1730,9 +1827,19 @@ fn syntax_from_cursor<'a>( highlights, subtrees, ignore_comments, - )) + last_line, + )] } else { - atom_from_cursor(arena, src, nl_pos, cursor, highlights, ignore_comments) + atom_from_cursor( + arena, + src, + nl_pos, + cursor, + config, + highlights, + ignore_comments, + last_line, + ) } } @@ -1770,6 +1877,7 @@ fn list_from_cursor<'a>( highlights: &HighlightedNodeIds, subtrees: &DftHashMap, ignore_comments: bool, + last_line: &mut Option, ) -> &'a Syntax<'a> { let root_node = cursor.node(); @@ -1824,10 +1932,17 @@ fn list_from_cursor<'a>( highlights, subtrees, ignore_comments, + last_line, )); } else if node_i == i { inner_open_content = &src[node.start_byte()..node.end_byte()]; inner_open_position = nl_pos.from_region(node.start_byte(), node.end_byte()); + before_delim.extend(indent_nodes_before_position( + arena, + config, + &inner_open_position, + last_line, + )); } else if node_i < j { between_delim.extend(syntax_from_cursor( arena, @@ -1839,10 +1954,17 @@ fn list_from_cursor<'a>( highlights, subtrees, ignore_comments, + last_line, )); } else if node_i == j { inner_close_content = &src[node.start_byte()..node.end_byte()]; inner_close_position = nl_pos.from_region(node.start_byte(), node.end_byte()); + between_delim.extend(indent_nodes_before_position( + arena, + config, + &inner_close_position, + last_line, + )); } else if node_i > j { after_delim.extend(syntax_from_cursor( arena, @@ -1854,6 +1976,7 @@ fn list_from_cursor<'a>( highlights, subtrees, ignore_comments, + last_line, )); } @@ -1912,15 +2035,46 @@ fn list_from_cursor<'a>( } } -/// Convert the tree-sitter node at `cursor` to a difftastic atom. +fn indent_nodes_before_position<'a>( + arena: &'a Arena>, + config: &TreeSitterConfig, + position: &[SingleLineSpan], + last_line: &mut Option, +) -> Vec<&'a Syntax<'a>> { + let start_line = position.first().map(|line_pos| line_pos.line.0); + let mut nodes = vec![]; + if config.indent_aware && start_line.is_some() && start_line != *last_line { + *last_line = position.last().map(|line_pos| line_pos.line.0); + let start_col = position.first().unwrap().start_col; + if start_col != 0 { + let indent_position = vec![SingleLineSpan { + line: start_line.unwrap().into(), + start_col: 0, + end_col: start_col, + }]; + nodes.push(Syntax::new_atom( + arena, + indent_position, + "·".repeat(start_col.try_into().unwrap()), + AtomKind::Indent, + )); + } + } + + nodes +} + +/// Convert the tree-sitter node at `cursor` to zero or more difftastic atoms. fn atom_from_cursor<'a>( arena: &'a Arena>, src: &str, nl_pos: &LinePositions, cursor: &mut ts::TreeCursor, + config: &TreeSitterConfig, highlights: &HighlightedNodeIds, ignore_comments: bool, -) -> Option<&'a Syntax<'a>> { + last_line: &mut Option, +) -> Vec<&'a Syntax<'a>> { let node = cursor.node(); let position = nl_pos.from_region(node.start_byte(), node.end_byte()); let mut content = &src[node.start_byte()..node.end_byte()]; @@ -1930,7 +2084,7 @@ fn atom_from_cursor<'a>( // not visible, but leads us to highlight unchanged lines that // happen to have preceding newline node. if node.kind() == "\n" { - return None; + return vec![]; } // JSX trims whitespace at the beginning and end of text nodes. @@ -1954,7 +2108,7 @@ fn atom_from_cursor<'a>( // highlighting. if ignore_comments { - return None; + return vec![]; } AtomKind::Comment @@ -1970,12 +2124,16 @@ fn atom_from_cursor<'a>( AtomKind::Normal }; - Some(Syntax::new_atom( + let mut nodes = indent_nodes_before_position(arena, config, &position, last_line); + + nodes.push(Syntax::new_atom( arena, position, content.to_owned(), highlight, - )) + )); + + nodes } #[cfg(test)] @@ -2027,6 +2185,54 @@ mod tests { }; } + fn collect_indent_atoms(node: &Syntax<'_>, contents: &mut Vec) { + match node { + Syntax::List { children, .. } => { + for child in children { + collect_indent_atoms(child, contents); + } + } + Syntax::Atom { content, kind, .. } => { + if *kind == AtomKind::Indent { + contents.push(content.clone()); + } + } + } + } + + #[test] + fn test_parse_python_indent_atoms() { + let arena = Arena::new(); + let config = from_language(guess::Language::Python); + assert!(config.indent_aware); + + let res = parse(&arena, "if foo:\n bar\n baz\n", &config, false); + + let mut contents = vec![]; + for node in &res { + collect_indent_atoms(node, &mut contents); + } + + assert_eq!(contents, vec!["····".to_owned(), "····".to_owned()]); + } + + #[test] + fn test_parse_python_with_delimiters() { + // TODO: It would be even better if no indent would be generated + // for this case. That would require a more complicated language + // specific logic. + let arena = Arena::new(); + let config = from_language(guess::Language::Python); + let res = parse(&arena, "x = (\n a\n) + b\n", &config, false); + + let mut contents = vec![]; + for node in &res { + collect_indent_atoms(node, &mut contents); + } + + assert_eq!(contents, vec!["····".to_owned()]); + } + /// Ensure that we don't crash when loading any of the /// configs. This can happen on bad highlighting/foo.scm files. #[test]