From b2c06c066cd4df34ec541fad7e3df69f121acea7 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Thu, 20 Jul 2023 14:39:19 -0400 Subject: [PATCH 01/42] expose pattern index --- examples/print_match_text.rs | 4 ++-- src/lib.rs | 11 ++++++++--- src/searcher/mod.rs | 21 +++++++++++++-------- 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/examples/print_match_text.rs b/examples/print_match_text.rs index 3806033..b1dc3ce 100644 --- a/examples/print_match_text.rs +++ b/examples/print_match_text.rs @@ -3,10 +3,10 @@ use tree_sitter_grep::{run_with_callback, Args}; fn main() { let args = Args::parse_from(["tree_sitter_grep", "-q", "(function_item) @f"]); - run_with_callback(args, |node, file_contents, path| { + run_with_callback(args, |capture_info, file_contents, path| { println!( "Found match in {path:?}: {}", - std::str::from_utf8(&file_contents[node.byte_range()]).unwrap(), + std::str::from_utf8(&file_contents[capture_info.node.byte_range()]).unwrap(), ); }) .unwrap(); diff --git a/src/lib.rs b/src/lib.rs index 6415c2b..b199cc4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -283,9 +283,14 @@ pub fn run_print(args: Args) -> Result { ) } +pub struct CaptureInfo<'node> { + pub node: Node<'node>, + pub pattern_index: usize, +} + pub fn run_with_callback( args: Args, - callback: impl Fn(Node, &[u8], &Path) + Sync, + callback: impl Fn(CaptureInfo, &[u8], &Path) + Sync, ) -> Result { run_for_context( args, @@ -300,8 +305,8 @@ pub fn run_with_callback( .search_path_callback::<_, io::Error>( query_context, path, - |node: Node, file_contents: &[u8], path: &Path| { - callback(node, file_contents, path); + |capture_info: CaptureInfo, file_contents: &[u8], path: &Path| { + callback(capture_info, file_contents, path); matched.store(true, Ordering::SeqCst); }, ) diff --git a/src/searcher/mod.rs b/src/searcher/mod.rs index 1da30af..6d5bf79 100644 --- a/src/searcher/mod.rs +++ b/src/searcher/mod.rs @@ -9,7 +9,7 @@ use std::{ }; use encoding_rs_io::DecodeReaderBytesBuilder; -use tree_sitter::{Node, QueryCursor}; +use tree_sitter::QueryCursor; pub use self::mmap::MmapChoice; use crate::{ @@ -19,6 +19,7 @@ use crate::{ searcher::glue::MultiLine, sink::{Sink, SinkError}, treesitter::get_parser, + CaptureInfo, }; mod core; @@ -218,7 +219,7 @@ impl Searcher { &mut self, query_context: QueryContext, path: P, - callback: impl Fn(Node, &[u8], &Path), + callback: impl Fn(CaptureInfo, &[u8], &Path), ) -> Result<(), TError> where P: AsRef, @@ -338,7 +339,7 @@ impl Searcher { &mut self, query_context: QueryContext, slice: &[u8], - callback: impl Fn(Node, &[u8], &Path), + callback: impl Fn(CaptureInfo, &[u8], &Path), path: &Path, ) -> Result<(), ConfigError> { self.check_config()?; @@ -353,7 +354,7 @@ impl Searcher { &self, query_context: QueryContext, slice: &[u8], - callback: impl Fn(Node, &[u8], &Path), + callback: impl Fn(CaptureInfo, &[u8], &Path), path: &Path, ) { let mut query_cursor = QueryCursor::new(); @@ -377,10 +378,14 @@ impl Searcher { "I guess .captures() always wraps up the single capture like this?" ); match filter.as_ref() { - None => Some(single_captured_node), - Some(filter) => filter - .call(&single_captured_node) - .then_some(single_captured_node), + None => Some(CaptureInfo { + node: single_captured_node, + pattern_index: match_.pattern_index, + }), + Some(filter) => filter.call(&single_captured_node).then_some(CaptureInfo { + node: single_captured_node, + pattern_index: match_.pattern_index, + }), } }) .for_each(|node| { From 3d4682c055f50776f85124bdd57bca1b59d8f5d1 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Thu, 20 Jul 2023 15:53:11 -0400 Subject: [PATCH 02/42] correct interpretation of capture index --- src/searcher/mod.rs | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/searcher/mod.rs b/src/searcher/mod.rs index 6d5bf79..f3e9594 100644 --- a/src/searcher/mod.rs +++ b/src/searcher/mod.rs @@ -366,17 +366,12 @@ impl Searcher { let filter = &query_context.filter; query_cursor .captures(query, tree.root_node(), slice) - .filter_map(|(match_, found_capture_index)| { - let found_capture_index = found_capture_index as u32; - if found_capture_index != capture_index { + .filter_map(|(match_, index_into_query_match_captures)| { + let this_capture = &match_.captures[index_into_query_match_captures]; + if this_capture.index != capture_index { return None; } - let mut nodes_for_this_capture = match_.nodes_for_capture_index(capture_index); - let single_captured_node = nodes_for_this_capture.next().unwrap(); - assert!( - nodes_for_this_capture.next().is_none(), - "I guess .captures() always wraps up the single capture like this?" - ); + let single_captured_node = this_capture.node; match filter.as_ref() { None => Some(CaptureInfo { node: single_captured_node, From 422058dfd81d606626c598aefa4e922626915d6d Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Thu, 20 Jul 2023 23:16:55 -0400 Subject: [PATCH 03/42] package name --- Cargo.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 56dbf82..c5fc489 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "tree-sitter-grep" +name = "tree_sitter_lint_tree-sitter-grep" version = "0.1.0" edition = "2021" license = "Unlicense OR MIT" @@ -8,6 +8,7 @@ authors = [ "Peter Stuart " ] description = """ +(not-yet-landed version used by tree-sitter-lint) tree-sitter-grep is a grep-like search tool that recursively searches the current directory for a tree-sitter query pattern. Like ripgrep, it respects From 2118d3cb531212388689a449500065a8ebe11613 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Thu, 20 Jul 2023 23:26:05 -0400 Subject: [PATCH 04/42] package name --- src/bin/tree-sitter-grep.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bin/tree-sitter-grep.rs b/src/bin/tree-sitter-grep.rs index b250691..87bed28 100644 --- a/src/bin/tree-sitter-grep.rs +++ b/src/bin/tree-sitter-grep.rs @@ -1,7 +1,7 @@ use std::process; use clap::Parser; -use tree_sitter_grep::{run_print, Args, RunStatus}; +use tree_sitter_lint_tree_sitter_grep::{run_print, Args, RunStatus}; pub fn main() { let args = Args::parse(); From 54b39ed74abbee9e387f8017fb635398a9632928 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Fri, 21 Jul 2023 20:59:02 -0400 Subject: [PATCH 05/42] expose supported language --- src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index b199cc4..c2cb68b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,7 +32,8 @@ mod use_printer; mod use_searcher; pub use args::Args; -use language::{BySupportedLanguage, SupportedLanguage}; +use language::BySupportedLanguage; +pub use language::SupportedLanguage; pub use plugin::PluginInitializeReturn; use query_context::QueryContext; use treesitter::maybe_get_query; From b05884b2c3b7a32763d43374295657b5d231ebd2 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Fri, 21 Jul 2023 21:19:47 -0400 Subject: [PATCH 06/42] try exposing search slice endpoint --- examples/filter_before_line_number.rs | 2 +- examples/print_match_text.rs | 2 +- src/lib.rs | 46 +++++++++++++++++++++++++++ src/searcher/mod.rs | 46 +++++++++++++++++++++++++-- 4 files changed, 92 insertions(+), 4 deletions(-) diff --git a/examples/filter_before_line_number.rs b/examples/filter_before_line_number.rs index ae05948..3eae9f5 100644 --- a/examples/filter_before_line_number.rs +++ b/examples/filter_before_line_number.rs @@ -5,7 +5,7 @@ use std::{ use libc::c_char; use tree_sitter::Node; -use tree_sitter_grep::PluginInitializeReturn; +use tree_sitter_lint_tree_sitter_grep::PluginInitializeReturn; static ROW_NUMBER: AtomicUsize = AtomicUsize::new(0); diff --git a/examples/print_match_text.rs b/examples/print_match_text.rs index b1dc3ce..90ec8ec 100644 --- a/examples/print_match_text.rs +++ b/examples/print_match_text.rs @@ -1,5 +1,5 @@ use clap::Parser; -use tree_sitter_grep::{run_with_callback, Args}; +use tree_sitter_lint_tree_sitter_grep::{run_with_callback, Args}; fn main() { let args = Args::parse_from(["tree_sitter_grep", "-q", "(function_item) @f"]); diff --git a/src/lib.rs b/src/lib.rs index dd4355c..e06be61 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -75,6 +75,8 @@ pub enum Error { FilterPluginExpectedArgument, #[error("plugin couldn't parse argument {filter_arg:?}")] FilterPluginCouldntParseArgument { filter_arg: String }, + #[error("language is required when passing a slice")] + LanguageMissingForSlice, } #[derive(Clone, Debug, Error)] @@ -418,6 +420,50 @@ fn run_for_context( }) } +pub fn run_for_slice_with_callback( + slice: &[u8], + args: Args, + callback: impl Fn(CaptureInfo) + Sync, +) -> Result { + let language = args.language.ok_or(Error::LanguageMissingForSlice)?; + let query_text = args.get_loaded_query_text()?; + let filter = args.get_loaded_filter()?; + let cached_queries: CachedQueries = Default::default(); + let capture_index = CaptureIndex::default(); + let matched = AtomicBool::new(false); + let non_fatal_errors: Arc>> = Default::default(); + + let query = match cached_queries.get_and_cache_query_for_language(&query_text, language) { + Some(query) => query, + None => { + return Err(cached_queries + .error_if_no_successful_query_parsing() + .unwrap_err()) + } + }; + let capture_index = capture_index.get_or_init(&query, args.capture_name.as_deref())?; + + let query_context = QueryContext::new(query, capture_index, language.language(), filter); + + get_searcher(&args) + .borrow_mut() + .search_slice_callback_no_path(query_context, slice, |capture_info: CaptureInfo| { + callback(capture_info); + matched.store(true, Ordering::SeqCst); + }) + .unwrap(); + + let non_fatal_errors = non_fatal_errors.lock().unwrap().clone(); + if non_fatal_errors.is_empty() { + cached_queries.error_if_no_successful_query_parsing()?; + } + + Ok(RunStatus { + matched: matched.load(Ordering::SeqCst), + non_fatal_errors, + }) +} + fn for_each_project_file( args: &Args, non_fatal_errors: Arc>>, diff --git a/src/searcher/mod.rs b/src/searcher/mod.rs index f3e9594..5033012 100644 --- a/src/searcher/mod.rs +++ b/src/searcher/mod.rs @@ -350,6 +350,48 @@ impl Searcher { Ok(()) } + pub fn search_slice_callback_no_path( + &mut self, + query_context: QueryContext, + slice: &[u8], + callback: impl Fn(CaptureInfo), + ) -> Result<(), ConfigError> { + self.check_config()?; + + log::trace!("slice reader: searching via multiline strategy"); + let mut query_cursor = QueryCursor::new(); + let tree = get_parser(query_context.language) + .parse(slice, None) + .unwrap(); + let query = &query_context.query; + let capture_index = query_context.capture_index; + let filter = &query_context.filter; + query_cursor + .captures(query, tree.root_node(), slice) + .filter_map(|(match_, index_into_query_match_captures)| { + let this_capture = &match_.captures[index_into_query_match_captures]; + if this_capture.index != capture_index { + return None; + } + let single_captured_node = this_capture.node; + match filter.as_ref() { + None => Some(CaptureInfo { + node: single_captured_node, + pattern_index: match_.pattern_index, + }), + Some(filter) => filter.call(&single_captured_node).then_some(CaptureInfo { + node: single_captured_node, + pattern_index: match_.pattern_index, + }), + } + }) + .for_each(|capture_info| { + callback(capture_info); + }); + + Ok(()) + } + fn run_with_callback( &self, query_context: QueryContext, @@ -383,8 +425,8 @@ impl Searcher { }), } }) - .for_each(|node| { - callback(node, slice, path); + .for_each(|capture_info| { + callback(capture_info, slice, path); }); } From d3417478978326117cdcff1a255a7fd1fcb699ca Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Fri, 21 Jul 2023 21:20:50 -0400 Subject: [PATCH 07/42] clone args --- src/args.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/args.rs b/src/args.rs index b625291..4f10b4a 100644 --- a/src/args.rs +++ b/src/args.rs @@ -23,7 +23,7 @@ use crate::{ const ALL_NODES_QUERY: &str = "(_) @node"; -#[derive(Parser)] +#[derive(Clone, Parser)] #[clap(group( ArgGroup::new("query_or_filter") .multiple(true) From 1930ccf3b64ca1f2b020b901ca8f0978253f6edb Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Fri, 21 Jul 2023 21:23:14 -0400 Subject: [PATCH 08/42] fn mut --- src/lib.rs | 2 +- src/searcher/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index e06be61..10a5038 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -423,7 +423,7 @@ fn run_for_context( pub fn run_for_slice_with_callback( slice: &[u8], args: Args, - callback: impl Fn(CaptureInfo) + Sync, + mut callback: impl FnMut(CaptureInfo) + Sync, ) -> Result { let language = args.language.ok_or(Error::LanguageMissingForSlice)?; let query_text = args.get_loaded_query_text()?; diff --git a/src/searcher/mod.rs b/src/searcher/mod.rs index 5033012..0937569 100644 --- a/src/searcher/mod.rs +++ b/src/searcher/mod.rs @@ -354,7 +354,7 @@ impl Searcher { &mut self, query_context: QueryContext, slice: &[u8], - callback: impl Fn(CaptureInfo), + mut callback: impl FnMut(CaptureInfo), ) -> Result<(), ConfigError> { self.check_config()?; From 3c2a56786d668f70fc75cc9727bd50c9b1cd2987 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Sat, 22 Jul 2023 08:45:19 -0400 Subject: [PATCH 09/42] handle multiple args instances --- src/use_searcher.rs | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/src/use_searcher.rs b/src/use_searcher.rs index 10d824a..ac477b5 100644 --- a/src/use_searcher.rs +++ b/src/use_searcher.rs @@ -1,22 +1,16 @@ -use std::{ - cell::{OnceCell, RefCell}, - ptr, - rc::Rc, -}; +use std::{cell::RefCell, collections::HashMap, rc::Rc}; use crate::{searcher::Searcher, Args}; thread_local! { - static SEARCHER: OnceCell<(Rc>, *const Args)> = Default::default(); + static SEARCHER_PER_ARGS_INSTANCE: RefCell>>> = Default::default(); } pub(crate) fn get_searcher(args: &Args) -> Rc> { - SEARCHER.with(|searcher| { - let (searcher, args_when_initialized) = - searcher.get_or_init(|| (Rc::new(RefCell::new(args.get_searcher())), args)); - assert!( - ptr::eq(*args_when_initialized, args), - "Using multiple instances of args not supported" - ); - searcher.clone() + SEARCHER_PER_ARGS_INSTANCE.with(|searcher_per_args_instance| { + searcher_per_args_instance + .borrow_mut() + .entry(args) + .or_insert_with(|| Rc::new(RefCell::new(args.get_searcher()))) + .clone() }) } From 5fe38b3893d63ed76b99076d8327419c4f52c702 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Sun, 23 Jul 2023 09:04:40 -0400 Subject: [PATCH 10/42] per file callback --- src/lib.rs | 110 +++++++++++++++++++++++++++++++++++++++++++ src/query_context.rs | 1 + src/searcher/mod.rs | 6 +-- 3 files changed, 114 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 10a5038..f6f9823 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -464,6 +464,116 @@ pub fn run_for_slice_with_callback( }) } +pub fn run_with_per_file_callback( + args: Args, + per_file_callback: impl Fn(&DirEntry, Box) + '_>) + + Sync, +) -> Result { + let query_text = args.get_loaded_query_text()?; + let filter = args.get_loaded_filter()?; + let cached_queries: CachedQueries = Default::default(); + let capture_index = CaptureIndex::default(); + let matched = AtomicBool::new(false); + let searched = AtomicBool::new(false); + let non_fatal_errors: Arc>> = Default::default(); + + for_each_project_file( + &args, + non_fatal_errors.clone(), + |project_file_dir_entry, matched_languages| { + searched.store(true, Ordering::SeqCst); + let language = match args.language { + Some(specified_language) => { + if !matched_languages.contains(&specified_language) { + return NonFatalError::ExplicitPathArgumentNotOfSpecifiedType { + path: project_file_dir_entry.path().to_owned(), + specified_language, + } + .into(); + } + specified_language + } + None => match matched_languages.len() { + 0 => { + return NonFatalError::ExplicitPathArgumentNotOfKnownType { + path: project_file_dir_entry.path().to_owned(), + } + .into(); + } + 1 => matched_languages[0], + _ => { + let successfully_parsed_query_languages = matched_languages + .iter() + .filter_map(|&matched_language| { + cached_queries + .get_and_cache_query_for_language(&query_text, matched_language) + .map(|_| matched_language) + }) + .collect::>(); + match successfully_parsed_query_languages.len() { + 0 => { + return Ok(SingleFileSearchNonFailure::QueryNotParseableForFile); + } + 1 => successfully_parsed_query_languages[0], + _ => { + return NonFatalError::AmbiguousLanguageForFile { + path: project_file_dir_entry.path().to_owned(), + languages: successfully_parsed_query_languages, + } + .into(); + } + } + } + }, + }; + let query = match cached_queries.get_and_cache_query_for_language(&query_text, language) + { + Some(query) => query, + None => return Ok(SingleFileSearchNonFailure::QueryNotParseableForFile), + }; + let capture_index = capture_index.get_or_init(&query, args.capture_name.as_deref())?; + let path = + format_relative_path(project_file_dir_entry.path(), args.is_using_default_paths()); + + let query_context = + QueryContext::new(query, capture_index, language.language(), filter.clone()); + + per_file_callback( + &project_file_dir_entry, + Box::new(|mut per_match_callback| { + get_searcher(&args) + .borrow_mut() + .search_path_callback::<_, io::Error>( + query_context.clone(), + path, + |capture_info: CaptureInfo, file_contents: &[u8], path: &Path| { + per_match_callback(capture_info, file_contents, path); + matched.store(true, Ordering::SeqCst); + }, + ) + .unwrap(); + }), + ); + + Ok(SingleFileSearchNonFailure::RanQuery) + }, + )?; + + let mut non_fatal_errors = non_fatal_errors.lock().unwrap().clone(); + if non_fatal_errors.is_empty() { + if !searched.load(Ordering::SeqCst) { + non_fatal_errors.push(NonFatalError::NothingSearched); + } else { + cached_queries.error_if_no_successful_query_parsing()?; + } + } + + Ok(RunStatus { + matched: matched.load(Ordering::SeqCst), + non_fatal_errors, + }) +} + fn for_each_project_file( args: &Args, non_fatal_errors: Arc>>, diff --git a/src/query_context.rs b/src/query_context.rs index d788956..c79ff0b 100644 --- a/src/query_context.rs +++ b/src/query_context.rs @@ -4,6 +4,7 @@ use tree_sitter::{Language, Query}; use crate::plugin::Filterer; +#[derive(Clone)] pub struct QueryContext { pub query: Arc, pub capture_index: u32, diff --git a/src/searcher/mod.rs b/src/searcher/mod.rs index 0937569..7d16ed5 100644 --- a/src/searcher/mod.rs +++ b/src/searcher/mod.rs @@ -219,7 +219,7 @@ impl Searcher { &mut self, query_context: QueryContext, path: P, - callback: impl Fn(CaptureInfo, &[u8], &Path), + callback: impl FnMut(CaptureInfo, &[u8], &Path), ) -> Result<(), TError> where P: AsRef, @@ -339,7 +339,7 @@ impl Searcher { &mut self, query_context: QueryContext, slice: &[u8], - callback: impl Fn(CaptureInfo, &[u8], &Path), + callback: impl FnMut(CaptureInfo, &[u8], &Path), path: &Path, ) -> Result<(), ConfigError> { self.check_config()?; @@ -396,7 +396,7 @@ impl Searcher { &self, query_context: QueryContext, slice: &[u8], - callback: impl Fn(CaptureInfo, &[u8], &Path), + mut callback: impl FnMut(CaptureInfo, &[u8], &Path), path: &Path, ) { let mut query_cursor = QueryCursor::new(); From 5a3ede19ade3dc08afc7c98fa79e1d9b404d22b6 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Tue, 25 Jul 2023 18:20:52 -0400 Subject: [PATCH 11/42] take tree, rope --- Cargo.toml | 2 + src/lib.rs | 14 ++++-- src/searcher/mod.rs | 24 +++++++--- src/treesitter.rs | 106 +++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 134 insertions(+), 12 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c5fc489..8de842f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,9 +35,11 @@ log = "0.4.5" memchr = "2.1" memmap = { package = "memmap2", version = "0.5.3" } once_cell = "1.18.0" +ouroboros = "0.17.2" proc_macros = { package = "tree_sitter_grep_proc_macros", path = "proc_macros", version = "0.1.0" } rayon = "1.7.0" regex = "1.8.2" +ropey = "1.6.0" serde = { version = "1.0.77", features = ["derive"] } strum_macros = "0.25.1" termcolor = "1.2.0" diff --git a/src/lib.rs b/src/lib.rs index f6f9823..0fe1c0f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,7 +13,7 @@ use ignore::DirEntry; use rayon::prelude::*; use termcolor::{BufferWriter, ColorChoice}; use thiserror::Error; -use tree_sitter::{Node, Query, QueryError}; +use tree_sitter::{Node, Query, QueryError, Tree}; mod args; mod language; @@ -37,9 +37,13 @@ pub use language::SupportedLanguage; pub use plugin::PluginInitializeReturn; use query_context::QueryContext; use treesitter::maybe_get_query; +pub use treesitter::{Parseable, RopeOrSlice}; use use_printer::get_printer; use use_searcher::get_searcher; +pub extern crate ropey; +pub extern crate tree_sitter; + #[derive(Debug, Error)] pub enum Error { #[error("couldn't read query file {path_to_query_file:?}")] @@ -420,11 +424,13 @@ fn run_for_context( }) } -pub fn run_for_slice_with_callback( - slice: &[u8], +pub fn run_for_slice_with_callback<'a>( + slice: impl Into>, + tree: Option<&Tree>, args: Args, mut callback: impl FnMut(CaptureInfo) + Sync, ) -> Result { + let slice = slice.into(); let language = args.language.ok_or(Error::LanguageMissingForSlice)?; let query_text = args.get_loaded_query_text()?; let filter = args.get_loaded_filter()?; @@ -447,7 +453,7 @@ pub fn run_for_slice_with_callback( get_searcher(&args) .borrow_mut() - .search_slice_callback_no_path(query_context, slice, |capture_info: CaptureInfo| { + .search_slice_callback_no_path(query_context, slice, tree, |capture_info: CaptureInfo| { callback(capture_info); matched.store(true, Ordering::SeqCst); }) diff --git a/src/searcher/mod.rs b/src/searcher/mod.rs index 7d16ed5..beb4459 100644 --- a/src/searcher/mod.rs +++ b/src/searcher/mod.rs @@ -1,6 +1,7 @@ // derived from https://github.com/BurntSushi/ripgrep/blob/master/crates/searcher/src/searcher/mod.rs use std::{ + borrow::Cow, cell::RefCell, cmp, fmt, fs::File, @@ -9,7 +10,7 @@ use std::{ }; use encoding_rs_io::DecodeReaderBytesBuilder; -use tree_sitter::QueryCursor; +use tree_sitter::{QueryCursor, Tree}; pub use self::mmap::MmapChoice; use crate::{ @@ -18,8 +19,8 @@ use crate::{ query_context::QueryContext, searcher::glue::MultiLine, sink::{Sink, SinkError}, - treesitter::get_parser, - CaptureInfo, + treesitter::{get_parser, Parseable}, + CaptureInfo, RopeOrSlice, }; mod core; @@ -353,19 +354,28 @@ impl Searcher { pub fn search_slice_callback_no_path( &mut self, query_context: QueryContext, - slice: &[u8], + slice: RopeOrSlice, + // slice: impl TextProvider<'a> + Parseable + 'a, + tree: Option<&Tree>, mut callback: impl FnMut(CaptureInfo), ) -> Result<(), ConfigError> { self.check_config()?; log::trace!("slice reader: searching via multiline strategy"); let mut query_cursor = QueryCursor::new(); - let tree = get_parser(query_context.language) - .parse(slice, None) - .unwrap(); let query = &query_context.query; let capture_index = query_context.capture_index; let filter = &query_context.filter; + let tree: Cow<'_, Tree> = tree.map_or_else( + || { + Cow::Owned( + slice + .parse(&mut get_parser(query_context.language), None) + .unwrap(), + ) + }, + Cow::Borrowed, + ); query_cursor .captures(query, tree.root_node(), slice) .filter_map(|(match_, index_into_query_match_captures)| { diff --git a/src/treesitter.rs b/src/treesitter.rs index 5575a04..cfc3715 100644 --- a/src/treesitter.rs +++ b/src/treesitter.rs @@ -1,4 +1,8 @@ -use tree_sitter::{Language, Node, Parser, Query, QueryError}; +use std::iter; + +use ouroboros::self_referencing; +use ropey::{iter::Chunks, Rope, RopeSlice}; +use tree_sitter::{Language, Node, Parser, Query, QueryError, TextProvider, Tree}; use crate::matcher::Match; @@ -21,3 +25,103 @@ impl From<&'_ Node<'_>> for Match { Self::new(range.start_byte, range.end_byte) } } + +pub trait Parseable { + fn parse(&self, parser: &mut Parser, old_tree: Option<&Tree>) -> Option; +} + +impl<'a> Parseable for &'a [u8] { + fn parse(&self, parser: &mut Parser, old_tree: Option<&Tree>) -> Option { + parser.parse(self, old_tree) + } +} + +impl<'a> Parseable for &'a Rope { + fn parse(&self, parser: &mut Parser, old_tree: Option<&Tree>) -> Option { + parser.parse_with( + &mut |byte_offset, _| { + let (chunk, chunk_start_byte_index, _, _) = self.chunk_at_byte(byte_offset); + &chunk[byte_offset - chunk_start_byte_index..] + }, + old_tree, + ) + } +} + +#[derive(Copy, Clone)] +pub enum RopeOrSlice<'a> { + Slice(&'a [u8]), + Rope(&'a Rope), +} + +impl<'a> TextProvider<'a> for RopeOrSlice<'a> { + type I = RopeOrSliceTextProviderIterator<'a>; + + fn text(&mut self, node: Node) -> Self::I { + match self { + Self::Slice(slice) => { + RopeOrSliceTextProviderIterator::Slice(iter::once(&slice[node.byte_range()])) + } + Self::Rope(rope) => { + let rope_slice = rope.byte_slice(node.byte_range()); + RopeOrSliceTextProviderIterator::Rope(RopeOrSliceRopeTextProviderIterator::new( + rope_slice, + |rope_slice| rope_slice.chunks(), + )) + } + } + } +} + +impl<'a> Parseable for RopeOrSlice<'a> { + fn parse(&self, parser: &mut Parser, old_tree: Option<&Tree>) -> Option { + match self { + Self::Slice(slice) => slice.parse(parser, old_tree), + Self::Rope(rope) => rope.parse(parser, old_tree), + } + } +} + +impl<'a> From<&'a [u8]> for RopeOrSlice<'a> { + fn from(value: &'a [u8]) -> Self { + Self::Slice(value) + } +} + +impl<'a> From<&'a Rope> for RopeOrSlice<'a> { + fn from(value: &'a Rope) -> Self { + Self::Rope(value) + } +} + +pub enum RopeOrSliceTextProviderIterator<'a> { + Slice(iter::Once<&'a [u8]>), + Rope(RopeOrSliceRopeTextProviderIterator<'a>), +} + +impl<'a> Iterator for RopeOrSliceTextProviderIterator<'a> { + type Item = &'a [u8]; + + fn next(&mut self) -> Option { + match self { + Self::Slice(slice_iterator) => slice_iterator.next(), + Self::Rope(rope_iterator) => rope_iterator.next().map(str::as_bytes), + } + } +} + +#[self_referencing] +pub struct RopeOrSliceRopeTextProviderIterator<'a> { + rope_slice: RopeSlice<'a>, + + #[borrows(rope_slice)] + chunks_iterator: Chunks<'a>, +} + +impl<'a> Iterator for RopeOrSliceRopeTextProviderIterator<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option { + self.with_chunks_iterator_mut(|chunks_iterator| chunks_iterator.next()) + } +} From 0ad0481150b059a9cc3a56a24c40a06fe17d135a Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Thu, 27 Jul 2023 04:45:57 -0400 Subject: [PATCH 12/42] use patched tree-sitter --- Cargo.toml | 3 +++ src/searcher/mod.rs | 9 ++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8de842f..a3c6b31 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -68,6 +68,9 @@ tree-sitter-swift = "0.3.6" tree-sitter-toml = "0.20.0" tree-sitter-typescript = "0.20.2" +[patch.crates-io] +tree-sitter = { path = "../tree-sitter/lib" } + [[bin]] name = "tree-sitter-grep" diff --git a/src/searcher/mod.rs b/src/searcher/mod.rs index beb4459..8273616 100644 --- a/src/searcher/mod.rs +++ b/src/searcher/mod.rs @@ -10,7 +10,7 @@ use std::{ }; use encoding_rs_io::DecodeReaderBytesBuilder; -use tree_sitter::{QueryCursor, Tree}; +use tree_sitter::{QueryCursor, TextProvider, Tree}; pub use self::mmap::MmapChoice; use crate::{ @@ -20,7 +20,7 @@ use crate::{ searcher::glue::MultiLine, sink::{Sink, SinkError}, treesitter::{get_parser, Parseable}, - CaptureInfo, RopeOrSlice, + CaptureInfo, }; mod core; @@ -351,11 +351,10 @@ impl Searcher { Ok(()) } - pub fn search_slice_callback_no_path( + pub fn search_slice_callback_no_path<'a>( &mut self, query_context: QueryContext, - slice: RopeOrSlice, - // slice: impl TextProvider<'a> + Parseable + 'a, + slice: impl TextProvider<'a> + Parseable + 'a, tree: Option<&Tree>, mut callback: impl FnMut(CaptureInfo), ) -> Result<(), ConfigError> { From c0ae26e4b3c09da8f3338132dde9bb79dfe09284 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Thu, 27 Jul 2023 13:25:02 -0400 Subject: [PATCH 13/42] get_captures() compiling --- Cargo.toml | 1 + rustfmt.toml | 3 +- src/lib.rs | 9 +-- src/searcher/mod.rs | 5 ++ src/treesitter.rs | 183 +++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 190 insertions(+), 11 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index a3c6b31..bf9b46b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,6 +41,7 @@ rayon = "1.7.0" regex = "1.8.2" ropey = "1.6.0" serde = { version = "1.0.77", features = ["derive"] } +streaming-iterator = "0.1.9" strum_macros = "0.25.1" termcolor = "1.2.0" thiserror = "1.0.43" diff --git a/rustfmt.toml b/rustfmt.toml index cd90377..5528c07 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -3,4 +3,5 @@ format_macro_bodies = true format_macro_matchers = true group_imports = "StdExternalCrate" imports_granularity = "Crate" -wrap_comments = true +edition = "2021" +# wrap_comments = true diff --git a/src/lib.rs b/src/lib.rs index 0fe1c0f..fae128a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,7 +13,7 @@ use ignore::DirEntry; use rayon::prelude::*; use termcolor::{BufferWriter, ColorChoice}; use thiserror::Error; -use tree_sitter::{Node, Query, QueryError, Tree}; +use tree_sitter::{Query, QueryError, Tree}; mod args; mod language; @@ -37,7 +37,7 @@ pub use language::SupportedLanguage; pub use plugin::PluginInitializeReturn; use query_context::QueryContext; use treesitter::maybe_get_query; -pub use treesitter::{Parseable, RopeOrSlice}; +pub use treesitter::{CaptureInfo, Parseable, RopeOrSlice}; use use_printer::get_printer; use use_searcher::get_searcher; @@ -297,11 +297,6 @@ pub fn run_print(args: Args) -> Result { ) } -pub struct CaptureInfo<'node> { - pub node: Node<'node>, - pub pattern_index: usize, -} - pub fn run_with_callback( args: Args, callback: impl Fn(CaptureInfo, &[u8], &Path) + Sync, diff --git a/src/searcher/mod.rs b/src/searcher/mod.rs index 8273616..19a0dde 100644 --- a/src/searcher/mod.rs +++ b/src/searcher/mod.rs @@ -361,6 +361,11 @@ impl Searcher { self.check_config()?; log::trace!("slice reader: searching via multiline strategy"); + // get_captures( + // self.core.query_context().language, + // self.slice, + // &self.core.query_context().query, + // ) let mut query_cursor = QueryCursor::new(); let query = &query_context.query; let capture_index = query_context.capture_index; diff --git a/src/treesitter.rs b/src/treesitter.rs index cfc3715..360a81c 100644 --- a/src/treesitter.rs +++ b/src/treesitter.rs @@ -1,10 +1,17 @@ -use std::iter; +use std::{ + borrow::Cow, + iter::{self}, + mem, +}; use ouroboros::self_referencing; use ropey::{iter::Chunks, Rope, RopeSlice}; -use tree_sitter::{Language, Node, Parser, Query, QueryError, TextProvider, Tree}; +use streaming_iterator::StreamingIterator; +use tree_sitter::{ + Language, Node, Parser, Query, QueryCaptures, QueryCursor, QueryError, TextProvider, Tree, +}; -use crate::matcher::Match; +use crate::{matcher::Match, plugin::Filterer}; pub(crate) fn get_parser(language: Language) -> Parser { let mut parser = Parser::new(); @@ -73,6 +80,25 @@ impl<'a> TextProvider<'a> for RopeOrSlice<'a> { } } +impl<'a> TextProvider<'a> for &'a RopeOrSlice<'a> { + type I = RopeOrSliceTextProviderIterator<'a>; + + fn text(&mut self, node: Node) -> Self::I { + match self { + RopeOrSlice::Slice(slice) => { + RopeOrSliceTextProviderIterator::Slice(iter::once(&slice[node.byte_range()])) + } + RopeOrSlice::Rope(rope) => { + let rope_slice = rope.byte_slice(node.byte_range()); + RopeOrSliceTextProviderIterator::Rope(RopeOrSliceRopeTextProviderIterator::new( + rope_slice, + |rope_slice| rope_slice.chunks(), + )) + } + } + } +} + impl<'a> Parseable for RopeOrSlice<'a> { fn parse(&self, parser: &mut Parser, old_tree: Option<&Tree>) -> Option { match self { @@ -82,6 +108,15 @@ impl<'a> Parseable for RopeOrSlice<'a> { } } +impl<'a> Parseable for &'a RopeOrSlice<'a> { + fn parse(&self, parser: &mut Parser, old_tree: Option<&Tree>) -> Option { + match self { + RopeOrSlice::Slice(slice) => slice.parse(parser, old_tree), + RopeOrSlice::Rope(rope) => rope.parse(parser, old_tree), + } + } +} + impl<'a> From<&'a [u8]> for RopeOrSlice<'a> { fn from(value: &'a [u8]) -> Self { Self::Slice(value) @@ -125,3 +160,145 @@ impl<'a> Iterator for RopeOrSliceRopeTextProviderIterator<'a> { self.with_chunks_iterator_mut(|chunks_iterator| chunks_iterator.next()) } } + +// I believe this type can't be Copy/Clone in order for the +// `get_captures()` unsafe stuff to be sound +pub struct CaptureInfo<'a> { + pub node: Node<'a>, + pub pattern_index: usize, +} + +#[allow(clippy::too_many_arguments)] +#[self_referencing] +pub struct Captures<'a> { + text: RopeOrSlice<'a>, + query_cursor: QueryCursor, + query: &'a Query, + filter: Option<&'a Filterer>, + tree: Cow<'a, Tree>, + capture_index: u32, + #[borrows(text, mut query_cursor, query, filter, tree)] + #[covariant] + captures_iterator: QueryCaptures<'this, 'this, 'this, RopeOrSlice<'this>>, + #[borrows(tree)] + #[covariant] + next_capture: Option>, +} + +pub fn get_captures<'a>( + language: Language, + // text: impl TextProvider<'a> + Parseable, + text: impl Into>, + query: &'a Query, + capture_index: u32, + filter: Option<&'a Filterer>, + tree: Option<&'a Tree>, +) -> Captures<'a> { + let text = text.into(); + let query_cursor = QueryCursor::new(); + let tree: Cow<'a, Tree> = tree.map_or_else( + || Cow::Owned(text.parse(&mut get_parser(language), None).unwrap()), + Cow::Borrowed, + ); + Captures::new( + text, + query_cursor, + query, + filter, + tree, + capture_index, + |text, query_cursor, query, filter, tree| { + let text = text.clone(); + query_cursor.captures(query, tree.root_node(), text) + // .filter_map(move |(match_, index_into_query_match_captures)| { + // let this_capture = &match_.captures[*index_into_query_match_captures]; + // if this_capture.index != capture_index { + // return None; + // } + // let single_captured_node = this_capture.node; + // match filter.as_ref() { + // None => Some(CaptureInfo { + // node: single_captured_node, + // pattern_index: match_.pattern_index, + // }), + // Some(filter) => filter.call(&single_captured_node).then_some(CaptureInfo { + // node: single_captured_node, + // pattern_index: match_.pattern_index, + // }), + // } + // }) + }, + |_| None, + ) +} + +// impl<'a> Iterator for Captures<'a> { +// type Item = (QueryMatch<'a, 'a>, usize); + +// fn next(&mut self) -> Option { +// self.with_captures_iterator_mut(|captures_iterator| captures_iterator.next()) +// } +// } + +// impl<'a, TFilterMapCallback: FnMut((QueryMatch<'a, 'a>, usize)) -> Option>> Iterator +// for Captures<'a, TFilterMapCallback> +// { +// type Item = CaptureInfo<'a>; + +// fn next(&mut self) -> Option { +// self.with_filtered_captures_iterator_mut(|filtered_captures_iterator| { +// filtered_captures_iterator.next() +// }) +// } +// } + +// impl<'a> Iterator for Captures<'a> { +// type Item = CaptureInfo<'a>; + +// fn next(&mut self) -> Option { +// self.with_captures_iterator_mut(|captures_iterator| captures_iterator.next()) +// } +// } + +impl<'a> StreamingIterator for Captures<'a> { + type Item = CaptureInfo<'a>; + + fn advance(&mut self) { + self.with_mut(|all_fields| { + while let Some((match_, index_into_query_match_captures)) = + all_fields.captures_iterator.next() + { + let this_capture = &match_.captures[index_into_query_match_captures]; + if this_capture.index != *all_fields.capture_index { + continue; + } + let single_captured_node = this_capture.node; + if all_fields + .filter + .as_ref() + .map_or(true, |filter| filter.call(&single_captured_node)) + { + *all_fields.next_capture = Some(CaptureInfo { + node: single_captured_node, + pattern_index: match_.pattern_index, + }); + return; + } + } + *all_fields.next_capture = None; + }); + } + + fn get<'this>(&'this self) -> Option<&'this Self::Item> { + let next_capture = self.borrow_next_capture(); + // SAFETY: I think this is ok as long as CaptureInfo isn't + // Copy/Clone? + // Since at that point there's no way for the "inner" + // CaptureInfo's contents to "outlive" the returned reference? + // Did this because otherwise was running into not being able + // to express that the "real" Item type for this trait (I think) + // should be CaptureInfo<'this>, not CaptureInfo<'a> + let next_capture: &'this Option> = unsafe { mem::transmute(next_capture) }; + next_capture.as_ref() + } +} From c73c3da6a7361240d85c1c306b69100aeffbb853 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Thu, 27 Jul 2023 13:42:07 -0400 Subject: [PATCH 14/42] use of get_captures() compiling --- src/lib.rs | 4 +-- src/searcher/mod.rs | 66 ++++++++++------------------------ src/treesitter.rs | 88 ++++++++++----------------------------------- 3 files changed, 39 insertions(+), 119 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fae128a..fad05d7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -423,7 +423,7 @@ pub fn run_for_slice_with_callback<'a>( slice: impl Into>, tree: Option<&Tree>, args: Args, - mut callback: impl FnMut(CaptureInfo) + Sync, + mut callback: impl FnMut(&CaptureInfo) + Sync, ) -> Result { let slice = slice.into(); let language = args.language.ok_or(Error::LanguageMissingForSlice)?; @@ -448,7 +448,7 @@ pub fn run_for_slice_with_callback<'a>( get_searcher(&args) .borrow_mut() - .search_slice_callback_no_path(query_context, slice, tree, |capture_info: CaptureInfo| { + .search_slice_callback_no_path(query_context, slice, tree, |capture_info: &CaptureInfo| { callback(capture_info); matched.store(true, Ordering::SeqCst); }) diff --git a/src/searcher/mod.rs b/src/searcher/mod.rs index 19a0dde..c340cac 100644 --- a/src/searcher/mod.rs +++ b/src/searcher/mod.rs @@ -10,6 +10,7 @@ use std::{ }; use encoding_rs_io::DecodeReaderBytesBuilder; +use streaming_iterator::StreamingIterator; use tree_sitter::{QueryCursor, TextProvider, Tree}; pub use self::mmap::MmapChoice; @@ -19,8 +20,8 @@ use crate::{ query_context::QueryContext, searcher::glue::MultiLine, sink::{Sink, SinkError}, - treesitter::{get_parser, Parseable}, - CaptureInfo, + treesitter::{get_captures, get_parser, Parseable}, + CaptureInfo, RopeOrSlice, }; mod core; @@ -351,57 +352,28 @@ impl Searcher { Ok(()) } - pub fn search_slice_callback_no_path<'a>( + pub fn search_slice_callback_no_path<'a, 'text, 'tree>( &mut self, query_context: QueryContext, - slice: impl TextProvider<'a> + Parseable + 'a, - tree: Option<&Tree>, - mut callback: impl FnMut(CaptureInfo), + // slice: impl TextProvider<'a> + Parseable + 'a, + slice: impl Into>, + tree: Option<&'tree Tree>, + mut callback: impl FnMut(&CaptureInfo), ) -> Result<(), ConfigError> { self.check_config()?; log::trace!("slice reader: searching via multiline strategy"); - // get_captures( - // self.core.query_context().language, - // self.slice, - // &self.core.query_context().query, - // ) - let mut query_cursor = QueryCursor::new(); - let query = &query_context.query; - let capture_index = query_context.capture_index; - let filter = &query_context.filter; - let tree: Cow<'_, Tree> = tree.map_or_else( - || { - Cow::Owned( - slice - .parse(&mut get_parser(query_context.language), None) - .unwrap(), - ) - }, - Cow::Borrowed, - ); - query_cursor - .captures(query, tree.root_node(), slice) - .filter_map(|(match_, index_into_query_match_captures)| { - let this_capture = &match_.captures[index_into_query_match_captures]; - if this_capture.index != capture_index { - return None; - } - let single_captured_node = this_capture.node; - match filter.as_ref() { - None => Some(CaptureInfo { - node: single_captured_node, - pattern_index: match_.pattern_index, - }), - Some(filter) => filter.call(&single_captured_node).then_some(CaptureInfo { - node: single_captured_node, - pattern_index: match_.pattern_index, - }), - } - }) - .for_each(|capture_info| { - callback(capture_info); - }); + get_captures( + query_context.language, + slice, + &query_context.query, + query_context.capture_index, + query_context.filter.as_deref(), + tree, + ) + .for_each(|capture_info| { + callback(capture_info); + }); Ok(()) } diff --git a/src/treesitter.rs b/src/treesitter.rs index 360a81c..7b3bdb8 100644 --- a/src/treesitter.rs +++ b/src/treesitter.rs @@ -1,8 +1,6 @@ -use std::{ - borrow::Cow, - iter::{self}, - mem, -}; +#![allow(clippy::too_many_arguments)] + +use std::{borrow::Cow, iter, mem}; use ouroboros::self_referencing; use ropey::{iter::Chunks, Rope, RopeSlice}; @@ -168,16 +166,15 @@ pub struct CaptureInfo<'a> { pub pattern_index: usize, } -#[allow(clippy::too_many_arguments)] #[self_referencing] -pub struct Captures<'a> { - text: RopeOrSlice<'a>, +pub struct Captures<'a, 'text: 'a, 'tree: 'a> { + text: RopeOrSlice<'text>, query_cursor: QueryCursor, query: &'a Query, filter: Option<&'a Filterer>, - tree: Cow<'a, Tree>, + tree: Cow<'tree, Tree>, capture_index: u32, - #[borrows(text, mut query_cursor, query, filter, tree)] + #[borrows(text, mut query_cursor, query, tree)] #[covariant] captures_iterator: QueryCaptures<'this, 'this, 'this, RopeOrSlice<'this>>, #[borrows(tree)] @@ -185,18 +182,18 @@ pub struct Captures<'a> { next_capture: Option>, } -pub fn get_captures<'a>( +pub fn get_captures<'a, 'text, 'tree>( language: Language, // text: impl TextProvider<'a> + Parseable, - text: impl Into>, + text: impl Into>, query: &'a Query, capture_index: u32, filter: Option<&'a Filterer>, - tree: Option<&'a Tree>, -) -> Captures<'a> { + tree: Option<&'tree Tree>, +) -> Captures<'a, 'text, 'tree> { let text = text.into(); let query_cursor = QueryCursor::new(); - let tree: Cow<'a, Tree> = tree.map_or_else( + let tree: Cow<'tree, Tree> = tree.map_or_else( || Cow::Owned(text.parse(&mut get_parser(language), None).unwrap()), Cow::Borrowed, ); @@ -207,67 +204,17 @@ pub fn get_captures<'a>( filter, tree, capture_index, - |text, query_cursor, query, filter, tree| { - let text = text.clone(); - query_cursor.captures(query, tree.root_node(), text) - // .filter_map(move |(match_, index_into_query_match_captures)| { - // let this_capture = &match_.captures[*index_into_query_match_captures]; - // if this_capture.index != capture_index { - // return None; - // } - // let single_captured_node = this_capture.node; - // match filter.as_ref() { - // None => Some(CaptureInfo { - // node: single_captured_node, - // pattern_index: match_.pattern_index, - // }), - // Some(filter) => filter.call(&single_captured_node).then_some(CaptureInfo { - // node: single_captured_node, - // pattern_index: match_.pattern_index, - // }), - // } - // }) - }, + |text, query_cursor, query, tree| query_cursor.captures(query, tree.root_node(), *text), |_| None, ) } -// impl<'a> Iterator for Captures<'a> { -// type Item = (QueryMatch<'a, 'a>, usize); - -// fn next(&mut self) -> Option { -// self.with_captures_iterator_mut(|captures_iterator| captures_iterator.next()) -// } -// } - -// impl<'a, TFilterMapCallback: FnMut((QueryMatch<'a, 'a>, usize)) -> Option>> Iterator -// for Captures<'a, TFilterMapCallback> -// { -// type Item = CaptureInfo<'a>; - -// fn next(&mut self) -> Option { -// self.with_filtered_captures_iterator_mut(|filtered_captures_iterator| { -// filtered_captures_iterator.next() -// }) -// } -// } - -// impl<'a> Iterator for Captures<'a> { -// type Item = CaptureInfo<'a>; - -// fn next(&mut self) -> Option { -// self.with_captures_iterator_mut(|captures_iterator| captures_iterator.next()) -// } -// } - -impl<'a> StreamingIterator for Captures<'a> { - type Item = CaptureInfo<'a>; +impl<'a, 'text, 'tree> StreamingIterator for Captures<'a, 'text, 'tree> { + type Item = CaptureInfo<'tree>; fn advance(&mut self) { self.with_mut(|all_fields| { - while let Some((match_, index_into_query_match_captures)) = - all_fields.captures_iterator.next() - { + for (match_, index_into_query_match_captures) in all_fields.captures_iterator.by_ref() { let this_capture = &match_.captures[index_into_query_match_captures]; if this_capture.index != *all_fields.capture_index { continue; @@ -298,7 +245,8 @@ impl<'a> StreamingIterator for Captures<'a> { // Did this because otherwise was running into not being able // to express that the "real" Item type for this trait (I think) // should be CaptureInfo<'this>, not CaptureInfo<'a> - let next_capture: &'this Option> = unsafe { mem::transmute(next_capture) }; + let next_capture: &'this Option> = + unsafe { mem::transmute(next_capture) }; next_capture.as_ref() } } From d8f21b058ab4e458ef842d5e83b3309a0607c8bc Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Thu, 27 Jul 2023 13:52:15 -0400 Subject: [PATCH 15/42] use everywhere --- src/lib.rs | 8 +++---- src/searcher/glue.rs | 53 ++++++++++++++++++-------------------------- src/searcher/mod.rs | 46 ++++++++++++-------------------------- 3 files changed, 39 insertions(+), 68 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fad05d7..f6c9392 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -299,7 +299,7 @@ pub fn run_print(args: Args) -> Result { pub fn run_with_callback( args: Args, - callback: impl Fn(CaptureInfo, &[u8], &Path) + Sync, + callback: impl Fn(&CaptureInfo, &[u8], &Path) + Sync, ) -> Result { run_for_context( args, @@ -314,7 +314,7 @@ pub fn run_with_callback( .search_path_callback::<_, io::Error>( query_context, path, - |capture_info: CaptureInfo, file_contents: &[u8], path: &Path| { + |capture_info: &CaptureInfo, file_contents: &[u8], path: &Path| { callback(capture_info, file_contents, path); matched.store(true, Ordering::SeqCst); }, @@ -467,7 +467,7 @@ pub fn run_for_slice_with_callback<'a>( pub fn run_with_per_file_callback( args: Args, - per_file_callback: impl Fn(&DirEntry, Box) + '_>) + per_file_callback: impl Fn(&DirEntry, Box) + '_>) + Sync, ) -> Result { let query_text = args.get_loaded_query_text()?; @@ -547,7 +547,7 @@ pub fn run_with_per_file_callback( .search_path_callback::<_, io::Error>( query_context.clone(), path, - |capture_info: CaptureInfo, file_contents: &[u8], path: &Path| { + |capture_info: &CaptureInfo, file_contents: &[u8], path: &Path| { per_match_callback(capture_info, file_contents, path); matched.store(true, Ordering::SeqCst); }, diff --git a/src/searcher/glue.rs b/src/searcher/glue.rs index 3b0c7f6..701ff4d 100644 --- a/src/searcher/glue.rs +++ b/src/searcher/glue.rs @@ -1,5 +1,6 @@ // derived from https://github.com/BurntSushi/ripgrep/blob/master/crates/searcher/src/searcher/glue.rs +use streaming_iterator::StreamingIterator; use tree_sitter::{Node, QueryCursor}; use crate::{ @@ -7,7 +8,8 @@ use crate::{ query_context::QueryContext, searcher::{core::Core, Config, Range, Searcher}, sink::Sink, - treesitter::get_parser, + treesitter::{get_captures, get_parser}, + CaptureInfo, }; #[derive(Debug, Default)] @@ -75,33 +77,17 @@ impl<'s, S: Sink> MultiLine<'s, S> { pub fn run(mut self) -> Result<(), S::Error> { if self.core.begin()? { let mut keepgoing = true; - let mut query_cursor = QueryCursor::new(); - let tree = get_parser(self.core.query_context().language) - .parse(self.slice, None) - .unwrap(); - let query = self.core.query_context().query.clone(); - let capture_index = self.core.query_context().capture_index; - let filter = self.core.query_context().filter.clone(); - let mut matches = query_cursor - .captures(&query, tree.root_node(), self.slice) - .filter_map(|(match_, found_capture_index)| { - let found_capture_index = found_capture_index as u32; - if found_capture_index != capture_index { - return None; - } - let mut nodes_for_this_capture = match_.nodes_for_capture_index(capture_index); - let single_captured_node = nodes_for_this_capture.next().unwrap(); - assert!( - nodes_for_this_capture.next().is_none(), - "I guess .captures() always wraps up the single capture like this?" - ); - match filter.as_ref() { - None => Some(single_captured_node), - Some(filter) => filter - .call(&single_captured_node) - .then_some(single_captured_node), - } - }); + let query_context = self.core.query_context(); + let query = query_context.query.clone(); + let filter = query_context.filter.clone(); + let mut matches = get_captures( + query_context.language, + self.slice, + &query, + query_context.capture_index, + filter.as_deref(), + None, + ); while !self.slice[self.core.pos()..].is_empty() && keepgoing { keepgoing = self.sink(&mut matches)?; } @@ -132,7 +118,7 @@ impl<'s, S: Sink> MultiLine<'s, S> { fn sink<'tree>( &mut self, - matches: &mut impl Iterator>, + matches: &mut impl StreamingIterator>, ) -> Result { if self.config.invert_match { return self.sink_matched_inverted(matches); @@ -173,7 +159,7 @@ impl<'s, S: Sink> MultiLine<'s, S> { fn sink_matched_inverted<'tree>( &mut self, - matches: &mut impl Iterator>, + matches: &mut impl StreamingIterator>, ) -> Result { assert!(self.config.invert_match); @@ -241,9 +227,12 @@ impl<'s, S: Sink> MultiLine<'s, S> { fn find<'tree>( &mut self, - matches: &mut impl Iterator>, + matches: &mut impl StreamingIterator>, ) -> Result, S::Error> { - Ok(matches.next().as_ref().map(Into::into)) + Ok(matches + .next() + .as_ref() + .map(|capture_info| (&capture_info.node).into())) } fn advance(&mut self, range: &Range) { diff --git a/src/searcher/mod.rs b/src/searcher/mod.rs index c340cac..f81417b 100644 --- a/src/searcher/mod.rs +++ b/src/searcher/mod.rs @@ -221,7 +221,7 @@ impl Searcher { &mut self, query_context: QueryContext, path: P, - callback: impl FnMut(CaptureInfo, &[u8], &Path), + callback: impl FnMut(&CaptureInfo, &[u8], &Path), ) -> Result<(), TError> where P: AsRef, @@ -341,7 +341,7 @@ impl Searcher { &mut self, query_context: QueryContext, slice: &[u8], - callback: impl FnMut(CaptureInfo, &[u8], &Path), + callback: impl FnMut(&CaptureInfo, &[u8], &Path), path: &Path, ) -> Result<(), ConfigError> { self.check_config()?; @@ -382,38 +382,20 @@ impl Searcher { &self, query_context: QueryContext, slice: &[u8], - mut callback: impl FnMut(CaptureInfo, &[u8], &Path), + mut callback: impl FnMut(&CaptureInfo, &[u8], &Path), path: &Path, ) { - let mut query_cursor = QueryCursor::new(); - let tree = get_parser(query_context.language) - .parse(slice, None) - .unwrap(); - let query = &query_context.query; - let capture_index = query_context.capture_index; - let filter = &query_context.filter; - query_cursor - .captures(query, tree.root_node(), slice) - .filter_map(|(match_, index_into_query_match_captures)| { - let this_capture = &match_.captures[index_into_query_match_captures]; - if this_capture.index != capture_index { - return None; - } - let single_captured_node = this_capture.node; - match filter.as_ref() { - None => Some(CaptureInfo { - node: single_captured_node, - pattern_index: match_.pattern_index, - }), - Some(filter) => filter.call(&single_captured_node).then_some(CaptureInfo { - node: single_captured_node, - pattern_index: match_.pattern_index, - }), - } - }) - .for_each(|capture_info| { - callback(capture_info, slice, path); - }); + get_captures( + query_context.language, + slice, + &query_context.query, + query_context.capture_index, + query_context.filter.as_deref(), + None, + ) + .for_each(|capture_info| { + callback(capture_info, slice, path); + }); } fn check_config(&self) -> Result<(), ConfigError> { From 3a039da9f7edb3369d97667dd48e9f0462b6683c Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Thu, 27 Jul 2023 13:53:26 -0400 Subject: [PATCH 16/42] rm unused --- src/searcher/glue.rs | 3 +-- src/searcher/mod.rs | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/searcher/glue.rs b/src/searcher/glue.rs index 701ff4d..1a1eba1 100644 --- a/src/searcher/glue.rs +++ b/src/searcher/glue.rs @@ -1,14 +1,13 @@ // derived from https://github.com/BurntSushi/ripgrep/blob/master/crates/searcher/src/searcher/glue.rs use streaming_iterator::StreamingIterator; -use tree_sitter::{Node, QueryCursor}; use crate::{ lines::{self, LineStep}, query_context::QueryContext, searcher::{core::Core, Config, Range, Searcher}, sink::Sink, - treesitter::{get_captures, get_parser}, + treesitter::get_captures, CaptureInfo, }; diff --git a/src/searcher/mod.rs b/src/searcher/mod.rs index f81417b..0111651 100644 --- a/src/searcher/mod.rs +++ b/src/searcher/mod.rs @@ -1,7 +1,6 @@ // derived from https://github.com/BurntSushi/ripgrep/blob/master/crates/searcher/src/searcher/mod.rs use std::{ - borrow::Cow, cell::RefCell, cmp, fmt, fs::File, @@ -11,7 +10,7 @@ use std::{ use encoding_rs_io::DecodeReaderBytesBuilder; use streaming_iterator::StreamingIterator; -use tree_sitter::{QueryCursor, TextProvider, Tree}; +use tree_sitter::Tree; pub use self::mmap::MmapChoice; use crate::{ @@ -20,7 +19,7 @@ use crate::{ query_context::QueryContext, searcher::glue::MultiLine, sink::{Sink, SinkError}, - treesitter::{get_captures, get_parser, Parseable}, + treesitter::get_captures, CaptureInfo, RopeOrSlice, }; From debbbc00af7f465086a2b301a75c7821c736292f Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Thu, 27 Jul 2023 14:13:35 -0400 Subject: [PATCH 17/42] non-local tree-sitter dependency --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index bf9b46b..eeb00a3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -70,7 +70,7 @@ tree-sitter-toml = "0.20.0" tree-sitter-typescript = "0.20.2" [patch.crates-io] -tree-sitter = { path = "../tree-sitter/lib" } +tree-sitter = { git = "https://github.com/helixbass/tree-sitter", rev = "57e98fb0" } [[bin]] name = "tree-sitter-grep" From 30a1c71f2b6df7db111339476171c4621d8bbf5e Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Thu, 27 Jul 2023 16:01:39 -0400 Subject: [PATCH 18/42] get captures for enclosing node --- src/lib.rs | 5 ++- src/treesitter.rs | 82 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index f6c9392..73b4407 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -37,11 +37,14 @@ pub use language::SupportedLanguage; pub use plugin::PluginInitializeReturn; use query_context::QueryContext; use treesitter::maybe_get_query; -pub use treesitter::{CaptureInfo, Parseable, RopeOrSlice}; +pub use treesitter::{ + get_captures, get_captures_for_enclosing_node, CaptureInfo, Parseable, RopeOrSlice, +}; use use_printer::get_printer; use use_searcher::get_searcher; pub extern crate ropey; +pub extern crate streaming_iterator; pub extern crate tree_sitter; #[derive(Debug, Error)] diff --git a/src/treesitter.rs b/src/treesitter.rs index 7b3bdb8..c949ab1 100644 --- a/src/treesitter.rs +++ b/src/treesitter.rs @@ -250,3 +250,85 @@ impl<'a, 'text, 'tree> StreamingIterator for Captures<'a, 'text, 'tree> { next_capture.as_ref() } } + +#[self_referencing] +pub struct CapturesForEnclosingNode<'a, 'text: 'a, 'tree: 'a> { + text: RopeOrSlice<'text>, + query_cursor: QueryCursor, + query: &'a Query, + filter: Option<&'a Filterer>, + enclosing_node: Node<'tree>, + capture_index: u32, + #[borrows(text, mut query_cursor, query, enclosing_node)] + #[covariant] + captures_iterator: QueryCaptures<'this, 'this, 'this, RopeOrSlice<'this>>, + #[borrows(enclosing_node)] + #[covariant] + next_capture: Option>, +} + +pub fn get_captures_for_enclosing_node<'a, 'text, 'tree>( + // text: impl TextProvider<'a> + Parseable, + text: impl Into>, + query: &'a Query, + capture_index: u32, + filter: Option<&'a Filterer>, + enclosing_node: Node<'tree>, +) -> CapturesForEnclosingNode<'a, 'text, 'tree> { + let text = text.into(); + let query_cursor = QueryCursor::new(); + CapturesForEnclosingNode::new( + text, + query_cursor, + query, + filter, + enclosing_node, + capture_index, + |text, query_cursor, query, enclosing_node| { + query_cursor.captures(query, *enclosing_node, *text) + }, + |_| None, + ) +} + +impl<'a, 'text, 'tree> StreamingIterator for CapturesForEnclosingNode<'a, 'text, 'tree> { + type Item = CaptureInfo<'tree>; + + fn advance(&mut self) { + self.with_mut(|all_fields| { + for (match_, index_into_query_match_captures) in all_fields.captures_iterator.by_ref() { + let this_capture = &match_.captures[index_into_query_match_captures]; + if this_capture.index != *all_fields.capture_index { + continue; + } + let single_captured_node = this_capture.node; + if all_fields + .filter + .as_ref() + .map_or(true, |filter| filter.call(&single_captured_node)) + { + *all_fields.next_capture = Some(CaptureInfo { + node: single_captured_node, + pattern_index: match_.pattern_index, + }); + return; + } + } + *all_fields.next_capture = None; + }); + } + + fn get<'this>(&'this self) -> Option<&'this Self::Item> { + let next_capture = self.borrow_next_capture(); + // SAFETY: I think this is ok as long as CaptureInfo isn't + // Copy/Clone? + // Since at that point there's no way for the "inner" + // CaptureInfo's contents to "outlive" the returned reference? + // Did this because otherwise was running into not being able + // to express that the "real" Item type for this trait (I think) + // should be CaptureInfo<'this>, not CaptureInfo<'a> + let next_capture: &'this Option> = + unsafe { mem::transmute(next_capture) }; + next_capture.as_ref() + } +} From c0bf4ca47b9fb9a9ae51206ad8a0fcd4849708e8 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Thu, 27 Jul 2023 19:25:30 -0400 Subject: [PATCH 19/42] wiring up query text per language --- proc_macros/src/lib.rs | 2 +- src/args.rs | 61 +++++++++++++++++++++++++++++++++++++++--- src/lib.rs | 48 ++++++++++++++++++++++++--------- 3 files changed, 93 insertions(+), 18 deletions(-) diff --git a/proc_macros/src/lib.rs b/proc_macros/src/lib.rs index 40fce98..31e6c3b 100644 --- a/proc_macros/src/lib.rs +++ b/proc_macros/src/lib.rs @@ -133,7 +133,7 @@ fn get_token_enum_definition( variants_with_attributes: &[ExprPath], ) -> proc_macro2::TokenStream { quote! { - #[derive(Copy, Clone, Debug, Eq, PartialEq, clap::ValueEnum, strum_macros::Display)] + #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, clap::ValueEnum, strum_macros::Display)] pub enum #name { #(#variants_with_attributes),* } diff --git a/src/args.rs b/src/args.rs index 4f10b4a..ed1f453 100644 --- a/src/args.rs +++ b/src/args.rs @@ -1,4 +1,5 @@ use std::{ + collections::HashMap, fs, path::{Path, PathBuf}, sync::{Arc, Mutex}, @@ -8,6 +9,7 @@ use clap::{ArgGroup, Parser}; use ignore::{types::Types, WalkBuilder, WalkParallel}; use rayon::iter::IterBridge; use termcolor::BufferWriter; +use tree_sitter::Query; use crate::{ language::SupportedLanguage, @@ -199,18 +201,69 @@ impl Args { Ok(get_loaded_filter(self.filter.as_deref(), self.filter_arg.as_deref())?.map(Arc::new)) } - pub(crate) fn get_loaded_query_text(&self) -> Result { + pub(crate) fn get_loaded_query_text_per_language( + &self, + ) -> Result { Ok( match (self.path_to_query_file.as_ref(), self.query_text.as_ref()) { (Some(path_to_query_file), None) => fs::read_to_string(path_to_query_file) .map_err(|source| Error::QueryFileReadError { source, path_to_query_file: path_to_query_file.clone(), - })?, - (None, Some(query_text)) => query_text.clone(), - (None, None) => ALL_NODES_QUERY.to_owned(), + })? + .into(), + (None, Some(query_text)) => query_text.clone().into(), + (None, None) => ALL_NODES_QUERY.to_owned().into(), _ => unreachable!(), }, ) } } + +pub enum QueryOrQueryTextPerLanguage { + SingleQueryText(String), + PerLanguage(HashMap>), +} + +impl From for QueryOrQueryTextPerLanguage { + fn from(value: String) -> Self { + Self::SingleQueryText(value) + } +} + +impl From>> for QueryOrQueryTextPerLanguage { + fn from(value: HashMap>) -> Self { + Self::PerLanguage(value) + } +} + +impl QueryOrQueryTextPerLanguage { + pub fn get_query_or_query_text_for_language( + &self, + language: SupportedLanguage, + ) -> QueryOrQueryText { + match self { + QueryOrQueryTextPerLanguage::SingleQueryText(query_text) => (&**query_text).into(), + QueryOrQueryTextPerLanguage::PerLanguage(per_language) => { + per_language.get(&language).unwrap().clone().into() + } + } + } +} + +pub enum QueryOrQueryText<'a> { + QueryText(&'a str), + Query(Arc), +} + +impl<'a> From<&'a str> for QueryOrQueryText<'a> { + fn from(value: &'a str) -> Self { + Self::QueryText(value) + } +} + +impl<'a> From> for QueryOrQueryText<'a> { + fn from(value: Arc) -> Self { + Self::Query(value) + } +} diff --git a/src/lib.rs b/src/lib.rs index 73b4407..294f52e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,6 +9,7 @@ use std::{ }, }; +use args::QueryOrQueryText; use ignore::DirEntry; use rayon::prelude::*; use termcolor::{BufferWriter, ColorChoice}; @@ -176,13 +177,19 @@ fn join_with_or(list: &[TItem]) -> String { struct CachedQueries(BySupportedLanguage, QueryError>>>); impl CachedQueries { - fn get_and_cache_query_for_language( + fn get_and_cache_query_for_language<'a>( &self, - query_text: &str, + query_or_query_text: impl Into>, language: SupportedLanguage, ) -> Option> { + let query_or_query_text = query_or_query_text.into(); self.0[language] - .get_or_init(|| maybe_get_query(query_text, language.language()).map(Arc::new)) + .get_or_init(|| match query_or_query_text { + QueryOrQueryText::QueryText(query_text) => { + maybe_get_query(query_text, language.language()).map(Arc::new) + } + QueryOrQueryText::Query(query) => Ok(query), + }) .as_ref() .ok() .cloned() @@ -332,7 +339,7 @@ fn run_for_context( context: TContext, search_file: impl Fn(&TContext, &Args, &Path, QueryContext, &AtomicBool) + Sync, ) -> Result { - let query_text = args.get_loaded_query_text()?; + let query_text_per_language = args.get_loaded_query_text_per_language()?; let filter = args.get_loaded_filter()?; let cached_queries: CachedQueries = Default::default(); let capture_index = CaptureIndex::default(); @@ -369,7 +376,11 @@ fn run_for_context( .iter() .filter_map(|&matched_language| { cached_queries - .get_and_cache_query_for_language(&query_text, matched_language) + .get_and_cache_query_for_language( + query_text_per_language + .get_query_or_query_text_for_language(matched_language), + matched_language, + ) .map(|_| matched_language) }) .collect::>(); @@ -389,8 +400,10 @@ fn run_for_context( } }, }; - let query = match cached_queries.get_and_cache_query_for_language(&query_text, language) - { + let query = match cached_queries.get_and_cache_query_for_language( + query_text_per_language.get_query_or_query_text_for_language(language), + language, + ) { Some(query) => query, None => return Ok(SingleFileSearchNonFailure::QueryNotParseableForFile), }; @@ -430,14 +443,17 @@ pub fn run_for_slice_with_callback<'a>( ) -> Result { let slice = slice.into(); let language = args.language.ok_or(Error::LanguageMissingForSlice)?; - let query_text = args.get_loaded_query_text()?; + let query_text_per_language = args.get_loaded_query_text_per_language()?; let filter = args.get_loaded_filter()?; let cached_queries: CachedQueries = Default::default(); let capture_index = CaptureIndex::default(); let matched = AtomicBool::new(false); let non_fatal_errors: Arc>> = Default::default(); - let query = match cached_queries.get_and_cache_query_for_language(&query_text, language) { + let query = match cached_queries.get_and_cache_query_for_language( + query_text_per_language.get_query_or_query_text_for_language(language), + language, + ) { Some(query) => query, None => { return Err(cached_queries @@ -473,7 +489,7 @@ pub fn run_with_per_file_callback( per_file_callback: impl Fn(&DirEntry, Box) + '_>) + Sync, ) -> Result { - let query_text = args.get_loaded_query_text()?; + let query_text_per_language = args.get_loaded_query_text_per_language()?; let filter = args.get_loaded_filter()?; let cached_queries: CachedQueries = Default::default(); let capture_index = CaptureIndex::default(); @@ -510,7 +526,11 @@ pub fn run_with_per_file_callback( .iter() .filter_map(|&matched_language| { cached_queries - .get_and_cache_query_for_language(&query_text, matched_language) + .get_and_cache_query_for_language( + query_text_per_language + .get_query_or_query_text_for_language(matched_language), + matched_language, + ) .map(|_| matched_language) }) .collect::>(); @@ -530,8 +550,10 @@ pub fn run_with_per_file_callback( } }, }; - let query = match cached_queries.get_and_cache_query_for_language(&query_text, language) - { + let query = match cached_queries.get_and_cache_query_for_language( + query_text_per_language.get_query_or_query_text_for_language(language), + language, + ) { Some(query) => query, None => return Ok(SingleFileSearchNonFailure::QueryNotParseableForFile), }; From 2eba2c5bfaa18a2845a5d4195e3ad6f044b1be51 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Thu, 27 Jul 2023 20:59:54 -0400 Subject: [PATCH 20/42] expose per-language queries --- Cargo.toml | 1 + src/args.rs | 61 +++++++++++++++++++++++++------------- src/lib.rs | 10 +++++-- src/project_file_walker.rs | 10 +++++-- 4 files changed, 56 insertions(+), 26 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index eeb00a3..ad7055c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ rust-version = "1.70" bstr = "1.1.0" bytecount = "0.6" clap = { version = "4.3.0", features = ["derive", "wrap_help"] } +derive_builder = "0.12.0" encoding_rs = "0.8.14" encoding_rs_io = "0.1.6" ignore = { package = "tree_sitter_grep_ignore", git = "https://github.com/helixbass/ripgrep", rev = "669ebd3", version = "0.4.20-dev.0" } diff --git a/src/args.rs b/src/args.rs index ed1f453..24dc5d5 100644 --- a/src/args.rs +++ b/src/args.rs @@ -6,6 +6,7 @@ use std::{ }; use clap::{ArgGroup, Parser}; +use derive_builder::Builder; use ignore::{types::Types, WalkBuilder, WalkParallel}; use rayon::iter::IterBridge; use termcolor::BufferWriter; @@ -25,7 +26,8 @@ use crate::{ const ALL_NODES_QUERY: &str = "(_) @node"; -#[derive(Clone, Parser)] +#[derive(Builder, Clone, Default, Parser)] +#[builder(default, setter(strip_option, into))] #[clap(group( ArgGroup::new("query_or_filter") .multiple(true) @@ -39,13 +41,16 @@ pub struct Args { /// /// This conflicts with the --query option. #[arg(short = 'Q', long = "query-file", conflicts_with = "query_text")] - pub path_to_query_file: Option, + path_to_query_file: Option, /// The source text of a tree-sitter query. /// /// This conflicts with the --query-file option. #[arg(short, long = "query", conflicts_with = "path_to_query_file")] - pub query_text: Option, + query_text: Option, + + #[clap(skip)] + query_per_language: Option>>, /// The name of the tree-sitter query capture (without leading "@") whose /// matching nodes will be output. @@ -176,7 +181,11 @@ impl Args { } pub(crate) fn get_project_file_walker_types(&self) -> Types { - get_project_file_walker_types(self.language) + get_project_file_walker_types(self.language.map(|language| vec![language]).or_else(|| { + self.query_per_language + .as_ref() + .map(|query_per_language| query_per_language.keys().cloned().collect()) + })) } pub(crate) fn get_project_file_walker(&self) -> WalkParallel { @@ -205,36 +214,36 @@ impl Args { &self, ) -> Result { Ok( - match (self.path_to_query_file.as_ref(), self.query_text.as_ref()) { - (Some(path_to_query_file), None) => fs::read_to_string(path_to_query_file) + match ( + self.path_to_query_file.as_ref(), + self.query_text.as_ref(), + self.query_per_language.as_ref(), + ) { + (Some(path_to_query_file), None, None) => fs::read_to_string(path_to_query_file) .map_err(|source| Error::QueryFileReadError { source, path_to_query_file: path_to_query_file.clone(), })? .into(), - (None, Some(query_text)) => query_text.clone().into(), - (None, None) => ALL_NODES_QUERY.to_owned().into(), + (None, Some(query_text), None) => query_text.clone().into(), + (None, None, Some(query_per_language)) => query_per_language.clone().into(), + (None, None, None) => ALL_NODES_QUERY.to_owned().into(), _ => unreachable!(), }, ) } } -pub enum QueryOrQueryTextPerLanguage { - SingleQueryText(String), - PerLanguage(HashMap>), -} - -impl From for QueryOrQueryTextPerLanguage { - fn from(value: String) -> Self { - Self::SingleQueryText(value) +impl ArgsBuilder { + pub fn maybe_language(&mut self, language: Option) -> &mut Self { + self.language = Some(language); + self } } -impl From>> for QueryOrQueryTextPerLanguage { - fn from(value: HashMap>) -> Self { - Self::PerLanguage(value) - } +pub enum QueryOrQueryTextPerLanguage { + SingleQueryText(String), + PerLanguage(HashMap>), } impl QueryOrQueryTextPerLanguage { @@ -251,6 +260,18 @@ impl QueryOrQueryTextPerLanguage { } } +impl From for QueryOrQueryTextPerLanguage { + fn from(value: String) -> Self { + Self::SingleQueryText(value) + } +} + +impl From>> for QueryOrQueryTextPerLanguage { + fn from(value: HashMap>) -> Self { + Self::PerLanguage(value) + } +} + pub enum QueryOrQueryText<'a> { QueryText(&'a str), Query(Arc), diff --git a/src/lib.rs b/src/lib.rs index 294f52e..2f1b7b6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,7 +32,7 @@ mod treesitter; mod use_printer; mod use_searcher; -pub use args::Args; +pub use args::{Args, ArgsBuilder}; use language::BySupportedLanguage; pub use language::SupportedLanguage; pub use plugin::PluginInitializeReturn; @@ -486,8 +486,11 @@ pub fn run_for_slice_with_callback<'a>( pub fn run_with_per_file_callback( args: Args, - per_file_callback: impl Fn(&DirEntry, Box) + '_>) - + Sync, + per_file_callback: impl Fn( + &DirEntry, + SupportedLanguage, + Box) + '_>, + ) + Sync, ) -> Result { let query_text_per_language = args.get_loaded_query_text_per_language()?; let filter = args.get_loaded_filter()?; @@ -566,6 +569,7 @@ pub fn run_with_per_file_callback( per_file_callback( &project_file_dir_entry, + language, Box::new(|mut per_match_callback| { get_searcher(&args) .borrow_mut() diff --git a/src/project_file_walker.rs b/src/project_file_walker.rs index 1294f10..79289fa 100644 --- a/src/project_file_walker.rs +++ b/src/project_file_walker.rs @@ -95,11 +95,15 @@ impl Iterator for WalkParallelIterator { } } -pub(crate) fn get_project_file_walker_types(language: Option) -> Types { +pub(crate) fn get_project_file_walker_types( + languages: Option>, +) -> Types { let mut types_builder = TypesBuilder::new(); types_builder.add_defaults(); - if let Some(language) = language { - types_builder.select(language.name_for_ignore_select()); + if let Some(languages) = languages { + for language in languages { + types_builder.select(language.name_for_ignore_select()); + } } else { for language in ALL_SUPPORTED_LANGUAGES.values() { types_builder.select(language.name_for_ignore_select()); From e471b0d2ef34f284e9d7a1b5b31290eb85907e02 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Thu, 27 Jul 2023 22:27:14 -0400 Subject: [PATCH 21/42] capture index per language --- src/lib.rs | 142 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 100 insertions(+), 42 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2f1b7b6..f4ebc3e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -118,7 +118,7 @@ pub enum NonFatalError { }, } -#[derive(Clone)] +#[derive(Clone, Debug)] enum CaptureIndexError { NoCaptureInQuery, InvalidCaptureName { capture_name: String }, @@ -135,31 +135,6 @@ impl From for Error { } } -#[derive(Default)] -struct CaptureIndex(OnceLock>); - -impl CaptureIndex { - pub fn get_or_init( - &self, - query: &Query, - capture_name: Option<&str>, - ) -> Result { - self.0 - .get_or_init(|| match capture_name { - None => match query.capture_names().len() { - 0 => Err(CaptureIndexError::NoCaptureInQuery), - _ => Ok(0), - }, - Some(capture_name) => query.capture_index_for_name(capture_name).ok_or_else(|| { - CaptureIndexError::InvalidCaptureName { - capture_name: capture_name.to_owned(), - } - }), - }) - .clone() - } -} - fn join_with_or(list: &[TItem]) -> String { let mut ret: String = Default::default(); for (index, item) in list.iter().enumerate() { @@ -173,22 +148,69 @@ fn join_with_or(list: &[TItem]) -> String { ret } +type CaptureIndex = u32; + +#[derive(Debug)] +enum QueryOrCaptureIndexError { + QueryError(QueryError), + CaptureIndexError(CaptureIndexError), +} + +impl From for QueryOrCaptureIndexError { + fn from(value: QueryError) -> Self { + Self::QueryError(value) + } +} + +impl From for QueryOrCaptureIndexError { + fn from(value: CaptureIndexError) -> Self { + Self::CaptureIndexError(value) + } +} + +#[allow(clippy::type_complexity)] #[derive(Default)] -struct CachedQueries(BySupportedLanguage, QueryError>>>); +struct CachedQueries( + BySupportedLanguage, CaptureIndex), QueryOrCaptureIndexError>>>, +); impl CachedQueries { fn get_and_cache_query_for_language<'a>( &self, query_or_query_text: impl Into>, language: SupportedLanguage, - ) -> Option> { + capture_name: Option<&str>, + ) -> Option<(Arc, CaptureIndex)> { let query_or_query_text = query_or_query_text.into(); self.0[language] - .get_or_init(|| match query_or_query_text { - QueryOrQueryText::QueryText(query_text) => { - maybe_get_query(query_text, language.language()).map(Arc::new) + .get_or_init(|| { + match query_or_query_text { + QueryOrQueryText::QueryText(query_text) => { + maybe_get_query(query_text, language.language()) + .map(Arc::new) + .map_err(Into::into) + } + QueryOrQueryText::Query(query) => Ok(query), } - QueryOrQueryText::Query(query) => Ok(query), + .and_then( + |query| -> Result<(Arc, CaptureIndex), QueryOrCaptureIndexError> { + match capture_name { + None => match query.capture_names().len() { + 0 => Err(CaptureIndexError::NoCaptureInQuery.into()), + _ => Ok(0), + }, + Some(capture_name) => { + query.capture_index_for_name(capture_name).ok_or_else(|| { + CaptureIndexError::InvalidCaptureName { + capture_name: capture_name.to_owned(), + } + .into() + }) + } + } + .map(|capture_index| (query, capture_index)) + }, + ) }) .as_ref() .ok() @@ -217,13 +239,44 @@ impl CachedQueries { !attempted_parsings.is_empty(), "Should've tried to parse in at least one language or else should've already failed on no candidate files" ); - return Err(Error::NoSuccessfulQueryParsing(attempted_parsings)); + if let Some((_, capture_index_error)) = + attempted_parsings + .iter() + .find(|(_, query_or_capture_index_error)| { + matches!( + query_or_capture_index_error, + QueryOrCaptureIndexError::CaptureIndexError(_) + ) + }) + { + match capture_index_error { + QueryOrCaptureIndexError::CaptureIndexError(capture_index_error) => { + return Err(capture_index_error.clone().into()) + } + _ => unreachable!(), + } + } + return Err(Error::NoSuccessfulQueryParsing( + attempted_parsings + .into_iter() + .map(|(language, query_or_capture_index_error)| { + ( + language, + match query_or_capture_index_error { + QueryOrCaptureIndexError::QueryError(query_error) => query_error, + _ => unreachable!(), + }, + ) + }) + .collect(), + )); } Ok(()) } } +#[derive(Debug)] pub struct RunStatus { pub matched: bool, pub non_fatal_errors: Vec, @@ -342,7 +395,6 @@ fn run_for_context( let query_text_per_language = args.get_loaded_query_text_per_language()?; let filter = args.get_loaded_filter()?; let cached_queries: CachedQueries = Default::default(); - let capture_index = CaptureIndex::default(); let matched = AtomicBool::new(false); let searched = AtomicBool::new(false); let non_fatal_errors: Arc>> = Default::default(); @@ -380,6 +432,7 @@ fn run_for_context( query_text_per_language .get_query_or_query_text_for_language(matched_language), matched_language, + args.capture_name.as_deref(), ) .map(|_| matched_language) }) @@ -400,14 +453,14 @@ fn run_for_context( } }, }; - let query = match cached_queries.get_and_cache_query_for_language( + let (query, capture_index) = match cached_queries.get_and_cache_query_for_language( query_text_per_language.get_query_or_query_text_for_language(language), language, + args.capture_name.as_deref(), ) { Some(query) => query, None => return Ok(SingleFileSearchNonFailure::QueryNotParseableForFile), }; - let capture_index = capture_index.get_or_init(&query, args.capture_name.as_deref())?; let path = format_relative_path(project_file_dir_entry.path(), args.is_using_default_paths()); @@ -446,13 +499,13 @@ pub fn run_for_slice_with_callback<'a>( let query_text_per_language = args.get_loaded_query_text_per_language()?; let filter = args.get_loaded_filter()?; let cached_queries: CachedQueries = Default::default(); - let capture_index = CaptureIndex::default(); let matched = AtomicBool::new(false); let non_fatal_errors: Arc>> = Default::default(); - let query = match cached_queries.get_and_cache_query_for_language( + let (query, capture_index) = match cached_queries.get_and_cache_query_for_language( query_text_per_language.get_query_or_query_text_for_language(language), language, + args.capture_name.as_deref(), ) { Some(query) => query, None => { @@ -461,7 +514,6 @@ pub fn run_for_slice_with_callback<'a>( .unwrap_err()) } }; - let capture_index = capture_index.get_or_init(&query, args.capture_name.as_deref())?; let query_context = QueryContext::new(query, capture_index, language.language(), filter); @@ -495,7 +547,6 @@ pub fn run_with_per_file_callback( let query_text_per_language = args.get_loaded_query_text_per_language()?; let filter = args.get_loaded_filter()?; let cached_queries: CachedQueries = Default::default(); - let capture_index = CaptureIndex::default(); let matched = AtomicBool::new(false); let searched = AtomicBool::new(false); let non_fatal_errors: Arc>> = Default::default(); @@ -533,6 +584,7 @@ pub fn run_with_per_file_callback( query_text_per_language .get_query_or_query_text_for_language(matched_language), matched_language, + args.capture_name.as_deref(), ) .map(|_| matched_language) }) @@ -553,14 +605,14 @@ pub fn run_with_per_file_callback( } }, }; - let query = match cached_queries.get_and_cache_query_for_language( + let (query, capture_index) = match cached_queries.get_and_cache_query_for_language( query_text_per_language.get_query_or_query_text_for_language(language), language, + args.capture_name.as_deref(), ) { Some(query) => query, None => return Ok(SingleFileSearchNonFailure::QueryNotParseableForFile), }; - let capture_index = capture_index.get_or_init(&query, args.capture_name.as_deref())?; let path = format_relative_path(project_file_dir_entry.path(), args.is_using_default_paths()); @@ -571,12 +623,18 @@ pub fn run_with_per_file_callback( &project_file_dir_entry, language, Box::new(|mut per_match_callback| { + if language == SupportedLanguage::Toml { + println!("in toml callback"); + } get_searcher(&args) .borrow_mut() .search_path_callback::<_, io::Error>( query_context.clone(), path, |capture_info: &CaptureInfo, file_contents: &[u8], path: &Path| { + if language == SupportedLanguage::Toml { + println!("in toml match callback"); + } per_match_callback(capture_info, file_contents, path); matched.store(true, Ordering::SeqCst); }, From 672ac418ac562da602118ba318a3d6db13667624 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Thu, 27 Jul 2023 22:45:53 -0400 Subject: [PATCH 22/42] rm debugging --- src/lib.rs | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f4ebc3e..67c9348 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -623,18 +623,12 @@ pub fn run_with_per_file_callback( &project_file_dir_entry, language, Box::new(|mut per_match_callback| { - if language == SupportedLanguage::Toml { - println!("in toml callback"); - } get_searcher(&args) .borrow_mut() .search_path_callback::<_, io::Error>( query_context.clone(), path, |capture_info: &CaptureInfo, file_contents: &[u8], path: &Path| { - if language == SupportedLanguage::Toml { - println!("in toml match callback"); - } per_match_callback(capture_info, file_contents, path); matched.store(true, Ordering::SeqCst); }, From c5f3fa2aec3941bbff023603c81a3a74df7ec0bf Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Fri, 28 Jul 2023 10:50:48 -0400 Subject: [PATCH 23/42] per match --- examples/print_match_text.rs | 11 +++++-- src/args.rs | 10 ++++--- src/lib.rs | 22 +++++++------- src/searcher/mod.rs | 42 +++++++++----------------- src/treesitter.rs | 57 +++++++++++++++++++++++++++++++++++- 5 files changed, 96 insertions(+), 46 deletions(-) diff --git a/examples/print_match_text.rs b/examples/print_match_text.rs index 90ec8ec..08c5ed5 100644 --- a/examples/print_match_text.rs +++ b/examples/print_match_text.rs @@ -3,10 +3,17 @@ use tree_sitter_lint_tree_sitter_grep::{run_with_callback, Args}; fn main() { let args = Args::parse_from(["tree_sitter_grep", "-q", "(function_item) @f"]); - run_with_callback(args, |capture_info, file_contents, path| { + run_with_callback(args, |query_match, file_contents, path| { println!( "Found match in {path:?}: {}", - std::str::from_utf8(&file_contents[capture_info.node.byte_range()]).unwrap(), + std::str::from_utf8( + &file_contents[query_match + .nodes_for_capture_index(0) + .next() + .unwrap() + .byte_range()] + ) + .unwrap(), ); }) .unwrap(); diff --git a/src/args.rs b/src/args.rs index 24dc5d5..7bab0c0 100644 --- a/src/args.rs +++ b/src/args.rs @@ -50,7 +50,7 @@ pub struct Args { query_text: Option, #[clap(skip)] - query_per_language: Option>>, + query_per_language: Option, /// The name of the tree-sitter query capture (without leading "@") whose /// matching nodes will be output. @@ -241,9 +241,11 @@ impl ArgsBuilder { } } +pub type QueryPerLanguage = HashMap>; + pub enum QueryOrQueryTextPerLanguage { SingleQueryText(String), - PerLanguage(HashMap>), + PerLanguage(QueryPerLanguage), } impl QueryOrQueryTextPerLanguage { @@ -266,8 +268,8 @@ impl From for QueryOrQueryTextPerLanguage { } } -impl From>> for QueryOrQueryTextPerLanguage { - fn from(value: HashMap>) -> Self { +impl From for QueryOrQueryTextPerLanguage { + fn from(value: QueryPerLanguage) -> Self { Self::PerLanguage(value) } } diff --git a/src/lib.rs b/src/lib.rs index 67c9348..c5db901 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,7 +14,7 @@ use ignore::DirEntry; use rayon::prelude::*; use termcolor::{BufferWriter, ColorChoice}; use thiserror::Error; -use tree_sitter::{Query, QueryError, Tree}; +use tree_sitter::{Query, QueryError, QueryMatch, Tree}; mod args; mod language; @@ -39,7 +39,7 @@ pub use plugin::PluginInitializeReturn; use query_context::QueryContext; use treesitter::maybe_get_query; pub use treesitter::{ - get_captures, get_captures_for_enclosing_node, CaptureInfo, Parseable, RopeOrSlice, + get_captures, get_captures_for_enclosing_node, get_matches, CaptureInfo, Parseable, RopeOrSlice, }; use use_printer::get_printer; use use_searcher::get_searcher; @@ -362,7 +362,7 @@ pub fn run_print(args: Args) -> Result { pub fn run_with_callback( args: Args, - callback: impl Fn(&CaptureInfo, &[u8], &Path) + Sync, + callback: impl Fn(&QueryMatch, &[u8], &Path) + Sync, ) -> Result { run_for_context( args, @@ -377,8 +377,8 @@ pub fn run_with_callback( .search_path_callback::<_, io::Error>( query_context, path, - |capture_info: &CaptureInfo, file_contents: &[u8], path: &Path| { - callback(capture_info, file_contents, path); + |query_match: &QueryMatch, file_contents: &[u8], path: &Path| { + callback(query_match, file_contents, path); matched.store(true, Ordering::SeqCst); }, ) @@ -492,7 +492,7 @@ pub fn run_for_slice_with_callback<'a>( slice: impl Into>, tree: Option<&Tree>, args: Args, - mut callback: impl FnMut(&CaptureInfo) + Sync, + mut callback: impl FnMut(&QueryMatch) + Sync, ) -> Result { let slice = slice.into(); let language = args.language.ok_or(Error::LanguageMissingForSlice)?; @@ -519,8 +519,8 @@ pub fn run_for_slice_with_callback<'a>( get_searcher(&args) .borrow_mut() - .search_slice_callback_no_path(query_context, slice, tree, |capture_info: &CaptureInfo| { - callback(capture_info); + .search_slice_callback_no_path(query_context, slice, tree, |query_match: &QueryMatch| { + callback(query_match); matched.store(true, Ordering::SeqCst); }) .unwrap(); @@ -541,7 +541,7 @@ pub fn run_with_per_file_callback( per_file_callback: impl Fn( &DirEntry, SupportedLanguage, - Box) + '_>, + Box) + '_>, ) + Sync, ) -> Result { let query_text_per_language = args.get_loaded_query_text_per_language()?; @@ -628,8 +628,8 @@ pub fn run_with_per_file_callback( .search_path_callback::<_, io::Error>( query_context.clone(), path, - |capture_info: &CaptureInfo, file_contents: &[u8], path: &Path| { - per_match_callback(capture_info, file_contents, path); + |query_match: &QueryMatch, file_contents: &[u8], path: &Path| { + per_match_callback(query_match, file_contents, path); matched.store(true, Ordering::SeqCst); }, ) diff --git a/src/searcher/mod.rs b/src/searcher/mod.rs index 0111651..48e28a5 100644 --- a/src/searcher/mod.rs +++ b/src/searcher/mod.rs @@ -10,17 +10,17 @@ use std::{ use encoding_rs_io::DecodeReaderBytesBuilder; use streaming_iterator::StreamingIterator; -use tree_sitter::Tree; +use tree_sitter::{QueryMatch, Tree}; pub use self::mmap::MmapChoice; use crate::{ + get_matches, line_buffer::{alloc_error, DEFAULT_BUFFER_CAPACITY}, matcher::{LineTerminator, Match}, query_context::QueryContext, searcher::glue::MultiLine, sink::{Sink, SinkError}, - treesitter::get_captures, - CaptureInfo, RopeOrSlice, + RopeOrSlice, }; mod core; @@ -220,7 +220,7 @@ impl Searcher { &mut self, query_context: QueryContext, path: P, - callback: impl FnMut(&CaptureInfo, &[u8], &Path), + callback: impl FnMut(&QueryMatch, &[u8], &Path), ) -> Result<(), TError> where P: AsRef, @@ -340,7 +340,7 @@ impl Searcher { &mut self, query_context: QueryContext, slice: &[u8], - callback: impl FnMut(&CaptureInfo, &[u8], &Path), + callback: impl FnMut(&QueryMatch, &[u8], &Path), path: &Path, ) -> Result<(), ConfigError> { self.check_config()?; @@ -357,22 +357,16 @@ impl Searcher { // slice: impl TextProvider<'a> + Parseable + 'a, slice: impl Into>, tree: Option<&'tree Tree>, - mut callback: impl FnMut(&CaptureInfo), + mut callback: impl FnMut(&QueryMatch), ) -> Result<(), ConfigError> { self.check_config()?; log::trace!("slice reader: searching via multiline strategy"); - get_captures( - query_context.language, - slice, - &query_context.query, - query_context.capture_index, - query_context.filter.as_deref(), - tree, - ) - .for_each(|capture_info| { - callback(capture_info); - }); + get_matches(query_context.language, slice, &query_context.query, tree).for_each( + |query_match| { + callback(query_match); + }, + ); Ok(()) } @@ -381,19 +375,11 @@ impl Searcher { &self, query_context: QueryContext, slice: &[u8], - mut callback: impl FnMut(&CaptureInfo, &[u8], &Path), + mut callback: impl FnMut(&QueryMatch, &[u8], &Path), path: &Path, ) { - get_captures( - query_context.language, - slice, - &query_context.query, - query_context.capture_index, - query_context.filter.as_deref(), - None, - ) - .for_each(|capture_info| { - callback(capture_info, slice, path); + get_matches(query_context.language, slice, &query_context.query, None).for_each(|match_| { + callback(match_, slice, path); }); } diff --git a/src/treesitter.rs b/src/treesitter.rs index c949ab1..08faf7b 100644 --- a/src/treesitter.rs +++ b/src/treesitter.rs @@ -6,7 +6,8 @@ use ouroboros::self_referencing; use ropey::{iter::Chunks, Rope, RopeSlice}; use streaming_iterator::StreamingIterator; use tree_sitter::{ - Language, Node, Parser, Query, QueryCaptures, QueryCursor, QueryError, TextProvider, Tree, + Language, Node, Parser, Query, QueryCaptures, QueryCursor, QueryError, QueryMatch, + QueryMatches, TextProvider, Tree, }; use crate::{matcher::Match, plugin::Filterer}; @@ -332,3 +333,57 @@ impl<'a, 'text, 'tree> StreamingIterator for CapturesForEnclosingNode<'a, 'text, next_capture.as_ref() } } + +#[self_referencing] +pub struct Matches<'a, 'text: 'a, 'tree: 'a> { + text: RopeOrSlice<'text>, + query_cursor: QueryCursor, + query: &'a Query, + tree: Cow<'tree, Tree>, + #[borrows(text, mut query_cursor, query, tree)] + #[covariant] + matches_iterator: QueryMatches<'this, 'this, 'this, RopeOrSlice<'this>>, + #[borrows(tree)] + #[covariant] + next_match: Option>, +} + +pub fn get_matches<'a, 'text, 'tree>( + language: Language, + text: impl Into>, + query: &'a Query, + tree: Option<&'tree Tree>, +) -> Matches<'a, 'text, 'tree> { + let text = text.into(); + let query_cursor = QueryCursor::new(); + let tree: Cow<'tree, Tree> = tree.map_or_else( + || Cow::Owned(text.parse(&mut get_parser(language), None).unwrap()), + Cow::Borrowed, + ); + Matches::new( + text, + query_cursor, + query, + tree, + |text, query_cursor, query, tree| query_cursor.matches(query, tree.root_node(), *text), + |_| None, + ) +} + +impl<'a, 'text, 'tree> StreamingIterator for Matches<'a, 'text, 'tree> { + type Item = QueryMatch<'a, 'tree>; + + fn advance(&mut self) { + self.with_mut(|all_fields| { + *all_fields.next_match = all_fields.matches_iterator.next(); + }); + } + + fn get<'this>(&'this self) -> Option<&'this Self::Item> { + let next_match = self.borrow_next_match(); + // SAFETY: Not as sure on this one? + let next_match: &'this Option> = + unsafe { mem::transmute(next_match) }; + next_match.as_ref() + } +} From c371ae9788068184e457e725e0708940f1ad1256 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Mon, 31 Jul 2023 17:36:47 -0400 Subject: [PATCH 24/42] update tree-sitter dependency --- Cargo.toml | 2 +- src/treesitter.rs | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ad7055c..2aa7659 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -71,7 +71,7 @@ tree-sitter-toml = "0.20.0" tree-sitter-typescript = "0.20.2" [patch.crates-io] -tree-sitter = { git = "https://github.com/helixbass/tree-sitter", rev = "57e98fb0" } +tree-sitter = { git = "https://github.com/tree-sitter/tree-sitter", rev = "c16b90d" } [[bin]] name = "tree-sitter-grep" diff --git a/src/treesitter.rs b/src/treesitter.rs index 08faf7b..edafc7f 100644 --- a/src/treesitter.rs +++ b/src/treesitter.rs @@ -60,7 +60,7 @@ pub enum RopeOrSlice<'a> { Rope(&'a Rope), } -impl<'a> TextProvider<'a> for RopeOrSlice<'a> { +impl<'a> TextProvider<&'a [u8]> for RopeOrSlice<'a> { type I = RopeOrSliceTextProviderIterator<'a>; fn text(&mut self, node: Node) -> Self::I { @@ -79,7 +79,7 @@ impl<'a> TextProvider<'a> for RopeOrSlice<'a> { } } -impl<'a> TextProvider<'a> for &'a RopeOrSlice<'a> { +impl<'a> TextProvider<&'a [u8]> for &'a RopeOrSlice<'a> { type I = RopeOrSliceTextProviderIterator<'a>; fn text(&mut self, node: Node) -> Self::I { @@ -177,7 +177,7 @@ pub struct Captures<'a, 'text: 'a, 'tree: 'a> { capture_index: u32, #[borrows(text, mut query_cursor, query, tree)] #[covariant] - captures_iterator: QueryCaptures<'this, 'this, 'this, RopeOrSlice<'this>>, + captures_iterator: QueryCaptures<'this, 'this, RopeOrSlice<'this>, &'this [u8]>, #[borrows(tree)] #[covariant] next_capture: Option>, @@ -262,7 +262,7 @@ pub struct CapturesForEnclosingNode<'a, 'text: 'a, 'tree: 'a> { capture_index: u32, #[borrows(text, mut query_cursor, query, enclosing_node)] #[covariant] - captures_iterator: QueryCaptures<'this, 'this, 'this, RopeOrSlice<'this>>, + captures_iterator: QueryCaptures<'this, 'this, RopeOrSlice<'this>, &'this [u8]>, #[borrows(enclosing_node)] #[covariant] next_capture: Option>, @@ -342,7 +342,7 @@ pub struct Matches<'a, 'text: 'a, 'tree: 'a> { tree: Cow<'tree, Tree>, #[borrows(text, mut query_cursor, query, tree)] #[covariant] - matches_iterator: QueryMatches<'this, 'this, 'this, RopeOrSlice<'this>>, + matches_iterator: QueryMatches<'this, 'this, RopeOrSlice<'this>, &'this [u8]>, #[borrows(tree)] #[covariant] next_match: Option>, From 53e844fdd1c0c1489bd22ace838defd95781b143 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Tue, 1 Aug 2023 23:31:11 -0400 Subject: [PATCH 25/42] don't require sync --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index c5db901..6a24a1c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -492,7 +492,7 @@ pub fn run_for_slice_with_callback<'a>( slice: impl Into>, tree: Option<&Tree>, args: Args, - mut callback: impl FnMut(&QueryMatch) + Sync, + mut callback: impl FnMut(&QueryMatch), ) -> Result { let slice = slice.into(); let language = args.language.ok_or(Error::LanguageMissingForSlice)?; From 57495f8857c30c9ea0065d6ac96e098600265f7d Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Wed, 2 Aug 2023 08:25:58 -0400 Subject: [PATCH 26/42] run with single per file callback --- src/lib.rs | 108 +++++++++++++++++++++++++++++++++++++++++++- src/searcher/mod.rs | 51 ++++++++++++++++++++- src/treesitter.rs | 2 +- 3 files changed, 158 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 6a24a1c..9a3f956 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -39,7 +39,8 @@ pub use plugin::PluginInitializeReturn; use query_context::QueryContext; use treesitter::maybe_get_query; pub use treesitter::{ - get_captures, get_captures_for_enclosing_node, get_matches, CaptureInfo, Parseable, RopeOrSlice, + get_captures, get_captures_for_enclosing_node, get_matches, get_parser, CaptureInfo, Parseable, + RopeOrSlice, }; use use_printer::get_printer; use use_searcher::get_searcher; @@ -656,6 +657,111 @@ pub fn run_with_per_file_callback( }) } +pub fn run_with_single_per_file_callback( + args: Args, + per_file_callback: impl Fn(&DirEntry, SupportedLanguage, &[u8], &Tree, &Arc) + Sync, +) -> Result { + let query_text_per_language = args.get_loaded_query_text_per_language()?; + let filter = args.get_loaded_filter()?; + let cached_queries: CachedQueries = Default::default(); + let non_fatal_errors: Arc>> = Default::default(); + + for_each_project_file( + &args, + non_fatal_errors.clone(), + |project_file_dir_entry, matched_languages| { + let language = match args.language { + Some(specified_language) => { + if !matched_languages.contains(&specified_language) { + return NonFatalError::ExplicitPathArgumentNotOfSpecifiedType { + path: project_file_dir_entry.path().to_owned(), + specified_language, + } + .into(); + } + specified_language + } + None => match matched_languages.len() { + 0 => { + return NonFatalError::ExplicitPathArgumentNotOfKnownType { + path: project_file_dir_entry.path().to_owned(), + } + .into(); + } + 1 => matched_languages[0], + _ => { + let successfully_parsed_query_languages = matched_languages + .iter() + .filter_map(|&matched_language| { + cached_queries + .get_and_cache_query_for_language( + query_text_per_language + .get_query_or_query_text_for_language(matched_language), + matched_language, + args.capture_name.as_deref(), + ) + .map(|_| matched_language) + }) + .collect::>(); + match successfully_parsed_query_languages.len() { + 0 => { + return Ok(SingleFileSearchNonFailure::QueryNotParseableForFile); + } + 1 => successfully_parsed_query_languages[0], + _ => { + return NonFatalError::AmbiguousLanguageForFile { + path: project_file_dir_entry.path().to_owned(), + languages: successfully_parsed_query_languages, + } + .into(); + } + } + } + }, + }; + let (query, capture_index) = match cached_queries.get_and_cache_query_for_language( + query_text_per_language.get_query_or_query_text_for_language(language), + language, + args.capture_name.as_deref(), + ) { + Some(query) => query, + None => return Ok(SingleFileSearchNonFailure::QueryNotParseableForFile), + }; + let path = + format_relative_path(project_file_dir_entry.path(), args.is_using_default_paths()); + + let query_context = + QueryContext::new(query, capture_index, language.language(), filter.clone()); + + let searcher = get_searcher(&args); + let mut searcher = searcher.borrow_mut(); + let file_contents = searcher.load_file_contents::<_, io::Error>(path).unwrap(); + let tree = (&*file_contents) + .parse(&mut get_parser(language.language()), None) + .unwrap(); + per_file_callback( + &project_file_dir_entry, + language, + &file_contents, + &tree, + &query_context.query, + ); + + Ok(SingleFileSearchNonFailure::RanQuery) + }, + )?; + + let non_fatal_errors = non_fatal_errors.lock().unwrap().clone(); + if non_fatal_errors.is_empty() { + cached_queries.error_if_no_successful_query_parsing()?; + } + + Ok(RunStatus { + matched: false, + non_fatal_errors, + }) +} + fn for_each_project_file( args: &Args, non_fatal_errors: Arc>>, diff --git a/src/searcher/mod.rs b/src/searcher/mod.rs index 48e28a5..a1a5f29 100644 --- a/src/searcher/mod.rs +++ b/src/searcher/mod.rs @@ -1,14 +1,16 @@ // derived from https://github.com/BurntSushi/ripgrep/blob/master/crates/searcher/src/searcher/mod.rs use std::{ - cell::RefCell, + cell::{Ref, RefCell}, cmp, fmt, fs::File, io::{self, Read}, + ops, path::Path, }; use encoding_rs_io::DecodeReaderBytesBuilder; +use memmap::Mmap; use streaming_iterator::StreamingIterator; use tree_sitter::{QueryMatch, Tree}; @@ -216,6 +218,25 @@ impl Searcher { self.search_file_maybe_path(query_context, Some(path), &file, write_to) } + pub fn load_file_contents( + &mut self, + path: P, + ) -> Result + where + P: AsRef, + { + let path = path.as_ref(); + let file = File::open(path).map_err(TError::error_io)?; + + if let Some(mmap) = self.config.mmap.open(&file, Some(path)) { + return Ok(mmap.into()); + } + + self.fill_multi_line_buffer_from_file(&file) + .map_err(TError::error_io)?; + return Ok(self.multi_line_buffer.borrow().into()); + } + pub fn search_path_callback( &mut self, query_context: QueryContext, @@ -486,3 +507,31 @@ impl Searcher { } } } + +pub enum MmapOrRefByteVec<'a> { + Mmap(Mmap), + RefByteVec(Ref<'a, Vec>), +} + +impl<'a> From for MmapOrRefByteVec<'a> { + fn from(value: Mmap) -> Self { + Self::Mmap(value) + } +} + +impl<'a> From>> for MmapOrRefByteVec<'a> { + fn from(value: Ref<'a, Vec>) -> Self { + Self::RefByteVec(value) + } +} + +impl<'a> ops::Deref for MmapOrRefByteVec<'a> { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + match self { + Self::Mmap(value) => value, + Self::RefByteVec(value) => value, + } + } +} diff --git a/src/treesitter.rs b/src/treesitter.rs index edafc7f..ad6fea9 100644 --- a/src/treesitter.rs +++ b/src/treesitter.rs @@ -12,7 +12,7 @@ use tree_sitter::{ use crate::{matcher::Match, plugin::Filterer}; -pub(crate) fn get_parser(language: Language) -> Parser { +pub fn get_parser(language: Language) -> Parser { let mut parser = Parser::new(); parser .set_language(language) From 31959d14cadcd832e3063dc5c6eb30a9180a0182 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Wed, 2 Aug 2023 16:48:13 -0400 Subject: [PATCH 27/42] expose language comment kinds --- src/language.rs | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/src/language.rs b/src/language.rs index d4cf908..f660b22 100644 --- a/src/language.rs +++ b/src/language.rs @@ -1,5 +1,5 @@ use std::{ - collections::HashMap, + collections::{HashMap, HashSet}, ops::{Deref, Index}, }; @@ -50,6 +50,10 @@ impl SupportedLanguage { pub fn name_for_ignore_select(&self) -> &'static str { SUPPORTED_LANGUAGE_NAMES_FOR_IGNORE_SELECT[*self] } + + pub fn comment_kinds(&self) -> &'static HashSet<&'static str> { + &SUPPORTED_LANGUAGE_COMMENT_KINDS[*self] + } } static SUPPORTED_LANGUAGE_LANGUAGES: Lazy> = Lazy::new(|| { @@ -117,3 +121,31 @@ pub static ALL_SUPPORTED_LANGUAGES_BY_NAME_FOR_IGNORE_SELECT: Lazy< }) .collect() }); + +static SUPPORTED_LANGUAGE_COMMENT_KINDS: Lazy>> = + Lazy::new(|| { + by_supported_language!( + Rust => ["comment"].into(), + Typescript => ["comment"].into(), + Javascript => ["comment"].into(), + Swift => ["comment"].into(), + ObjectiveC => ["comment"].into(), + Toml => ["comment"].into(), + Python => ["comment"].into(), + Ruby => ["comment"].into(), + C => ["comment"].into(), + Cpp => ["comment"].into(), + Go => ["comment"].into(), + Java => ["comment"].into(), + CSharp => ["comment"].into(), + Kotlin => ["comment"].into(), + Elisp => ["comment"].into(), + Elm => ["comment"].into(), + Dockerfile => ["comment"].into(), + Html => ["comment"].into(), + TreeSitterQuery => ["comment"].into(), + Json => ["comment"].into(), + Css => ["comment"].into(), + Lua => ["comment"].into(), + ) + }); From 2061904a1ab6515b8f72f88fcdd6c3ea6ffcf657 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Wed, 2 Aug 2023 19:03:02 -0400 Subject: [PATCH 28/42] comment type --- src/language.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/language.rs b/src/language.rs index f660b22..56bc205 100644 --- a/src/language.rs +++ b/src/language.rs @@ -125,7 +125,7 @@ pub static ALL_SUPPORTED_LANGUAGES_BY_NAME_FOR_IGNORE_SELECT: Lazy< static SUPPORTED_LANGUAGE_COMMENT_KINDS: Lazy>> = Lazy::new(|| { by_supported_language!( - Rust => ["comment"].into(), + Rust => ["line_comment", "block_comment"].into(), Typescript => ["comment"].into(), Javascript => ["comment"].into(), Swift => ["comment"].into(), From 6daf28af5072d4b3a9268a44f2f44e2de4ae6bcf Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Fri, 11 Aug 2023 09:00:44 -0400 Subject: [PATCH 29/42] use git dependency for js grammar --- Cargo.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2aa7659..f0f2791 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -57,7 +57,8 @@ tree-sitter-elm = "5.6.4" tree-sitter-go = "0.19.1" tree-sitter-html = "0.19.0" tree-sitter-java = "0.20.0" -tree-sitter-javascript = "0.20.0" +# tree-sitter-javascript = "0.20.0" +tree-sitter-javascript = { git = "https://github.com/tree-sitter/tree-sitter-javascript", rev = "f772967", version = "0.20.0" } tree-sitter-json = "0.19.0" tree-sitter-kotlin = "0.2.11" tree-sitter-lua = "0.0.18" From 6bb81e1b2a41ff92a9dde98311bfdf338873a873 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Tue, 15 Aug 2023 09:35:12 -0400 Subject: [PATCH 30/42] debug rope or slice --- src/treesitter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/treesitter.rs b/src/treesitter.rs index ad6fea9..1ee958a 100644 --- a/src/treesitter.rs +++ b/src/treesitter.rs @@ -54,7 +54,7 @@ impl<'a> Parseable for &'a Rope { } } -#[derive(Copy, Clone)] +#[derive(Copy, Clone, Debug)] pub enum RopeOrSlice<'a> { Slice(&'a [u8]), Rope(&'a Rope), From 325d3e09cb01b1c7ac89cc8a439e7dea1b2c8856 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Wed, 16 Aug 2023 17:56:54 -0400 Subject: [PATCH 31/42] bump tree-sitter-javascript version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index f0f2791..f7b4d94 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,7 +58,7 @@ tree-sitter-go = "0.19.1" tree-sitter-html = "0.19.0" tree-sitter-java = "0.20.0" # tree-sitter-javascript = "0.20.0" -tree-sitter-javascript = { git = "https://github.com/tree-sitter/tree-sitter-javascript", rev = "f772967", version = "0.20.0" } +tree-sitter-javascript = { git = "https://github.com/tree-sitter/tree-sitter-javascript", rev = "c69aaba", version = "0.20.0" } tree-sitter-json = "0.19.0" tree-sitter-kotlin = "0.2.11" tree-sitter-lua = "0.0.18" From 2e842ebec0ad9c41a2a90cb3a405106bf93aadea Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Thu, 24 Aug 2023 16:46:08 -0400 Subject: [PATCH 32/42] bump tree-sitter-rust dependency --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index f7b4d94..ff3cab2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,7 +66,7 @@ tree-sitter-objc = "1.1.0" tree-sitter-python = "0.20.2" tree-sitter-query = "0.1.0" tree-sitter-ruby = "0.20.0" -tree-sitter-rust = { package = "tree_sitter_grep_tree-sitter-rust", git = "https://github.com/helixbass/tree-sitter-rust", rev = "781a8d9", version = "0.20.3-dev.0" } +tree-sitter-rust = { package = "tree_sitter_grep_tree-sitter-rust", git = "https://github.com/helixbass/tree-sitter-rust", rev = "90bfca4", version = "0.20.3-dev.0" } tree-sitter-swift = "0.3.6" tree-sitter-toml = "0.20.0" tree-sitter-typescript = "0.20.2" From 3b1230c82f52f9ef2e7ffd210adbe8f9b1058f1c Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Fri, 25 Aug 2023 07:41:18 -0400 Subject: [PATCH 33/42] bump tree-sitter-javascript dependency --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index ff3cab2..6f5d424 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -58,7 +58,7 @@ tree-sitter-go = "0.19.1" tree-sitter-html = "0.19.0" tree-sitter-java = "0.20.0" # tree-sitter-javascript = "0.20.0" -tree-sitter-javascript = { git = "https://github.com/tree-sitter/tree-sitter-javascript", rev = "c69aaba", version = "0.20.0" } +tree-sitter-javascript = { git = "https://github.com/tree-sitter/tree-sitter-javascript", rev = "f1e5a09b", version = "0.20.1" } tree-sitter-json = "0.19.0" tree-sitter-kotlin = "0.2.11" tree-sitter-lua = "0.0.18" From c5a8c85dba0a802b6b539a06f1e469b31dafcf22 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Sat, 26 Aug 2023 09:39:32 -0400 Subject: [PATCH 34/42] from str --- src/treesitter.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/treesitter.rs b/src/treesitter.rs index 1ee958a..2f435a6 100644 --- a/src/treesitter.rs +++ b/src/treesitter.rs @@ -128,6 +128,12 @@ impl<'a> From<&'a Rope> for RopeOrSlice<'a> { } } +impl<'a> From<&'a str> for RopeOrSlice<'a> { + fn from(value: &'a str) -> Self { + Self::Slice(value.as_bytes()) + } +} + pub enum RopeOrSliceTextProviderIterator<'a> { Slice(iter::Once<&'a [u8]>), Rope(RopeOrSliceRopeTextProviderIterator<'a>), From d250c7c9dcfa280e7daff171e78c50c7b55d6327 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Sat, 26 Aug 2023 20:18:03 -0400 Subject: [PATCH 35/42] debug slice --- src/treesitter.rs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/treesitter.rs b/src/treesitter.rs index 2f435a6..d17c80e 100644 --- a/src/treesitter.rs +++ b/src/treesitter.rs @@ -1,6 +1,6 @@ #![allow(clippy::too_many_arguments)] -use std::{borrow::Cow, iter, mem}; +use std::{borrow::Cow, fmt, iter, mem}; use ouroboros::self_referencing; use ropey::{iter::Chunks, Rope, RopeSlice}; @@ -54,7 +54,7 @@ impl<'a> Parseable for &'a Rope { } } -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone)] pub enum RopeOrSlice<'a> { Slice(&'a [u8]), Rope(&'a Rope), @@ -116,6 +116,18 @@ impl<'a> Parseable for &'a RopeOrSlice<'a> { } } +impl<'a> fmt::Debug for RopeOrSlice<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Slice(arg0) => f + .debug_tuple("Slice") + .field(&std::str::from_utf8(arg0)) + .finish(), + Self::Rope(arg0) => f.debug_tuple("Rope").field(arg0).finish(), + } + } +} + impl<'a> From<&'a [u8]> for RopeOrSlice<'a> { fn from(value: &'a [u8]) -> Self { Self::Slice(value) From c5b2e8884944957b0d8ac008df84c7cfcd15d31b Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Fri, 6 Oct 2023 11:19:54 -0400 Subject: [PATCH 36/42] bump tree-sitter-rust version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 6f5d424..a0af5bc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,7 +66,7 @@ tree-sitter-objc = "1.1.0" tree-sitter-python = "0.20.2" tree-sitter-query = "0.1.0" tree-sitter-ruby = "0.20.0" -tree-sitter-rust = { package = "tree_sitter_grep_tree-sitter-rust", git = "https://github.com/helixbass/tree-sitter-rust", rev = "90bfca4", version = "0.20.3-dev.0" } +tree-sitter-rust = { package = "tree_sitter_grep_tree-sitter-rust", git = "https://github.com/helixbass/tree-sitter-rust", rev = "9582f43", version = "0.20.3-dev.0" } tree-sitter-swift = "0.3.6" tree-sitter-toml = "0.20.0" tree-sitter-typescript = "0.20.2" From 7626af820d406508b42b4de19070f9de4b9f5773 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Fri, 6 Oct 2023 13:55:04 -0400 Subject: [PATCH 37/42] bump tree-sitter-rust version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index a0af5bc..685bbc1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,7 +66,7 @@ tree-sitter-objc = "1.1.0" tree-sitter-python = "0.20.2" tree-sitter-query = "0.1.0" tree-sitter-ruby = "0.20.0" -tree-sitter-rust = { package = "tree_sitter_grep_tree-sitter-rust", git = "https://github.com/helixbass/tree-sitter-rust", rev = "9582f43", version = "0.20.3-dev.0" } +tree-sitter-rust = { package = "tree_sitter_grep_tree-sitter-rust", git = "https://github.com/helixbass/tree-sitter-rust", rev = "6146443", version = "0.20.3-dev.0" } tree-sitter-swift = "0.3.6" tree-sitter-toml = "0.20.0" tree-sitter-typescript = "0.20.2" From 526d11063a87c02220a3b05a42d70a5e88deda1c Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Tue, 19 Dec 2023 10:20:03 -0500 Subject: [PATCH 38/42] rm unused entry points; language from path --- src/language.rs | 82 +++++++++++++------ src/lib.rs | 207 ++++++------------------------------------------ 2 files changed, 79 insertions(+), 210 deletions(-) diff --git a/src/language.rs b/src/language.rs index 56bc205..4ceb037 100644 --- a/src/language.rs +++ b/src/language.rs @@ -1,6 +1,6 @@ use std::{ collections::{HashMap, HashSet}, - ops::{Deref, Index}, + ops::{Deref, Index}, path::Path, }; use once_cell::sync::Lazy; @@ -43,8 +43,11 @@ fixed_map! { } impl SupportedLanguage { - pub fn language(&self) -> Language { - SUPPORTED_LANGUAGE_LANGUAGES[*self] + pub fn language(&self, path: Option<&Path>) -> Language { + match &SUPPORTED_LANGUAGE_LANGUAGES[*self] { + SingleLanguageOrLanguageFromPath::SingleLanguage(language) => *language, + SingleLanguageOrLanguageFromPath::LanguageFromPath(language_from_path) => language_from_path.from_path(path), + } } pub fn name_for_ignore_select(&self) -> &'static str { @@ -56,30 +59,57 @@ impl SupportedLanguage { } } -static SUPPORTED_LANGUAGE_LANGUAGES: Lazy> = Lazy::new(|| { +enum SingleLanguageOrLanguageFromPath { + SingleLanguage(Language), + LanguageFromPath(Box), +} + +impl From for SingleLanguageOrLanguageFromPath { + fn from(value: Language) -> Self { + Self::SingleLanguage(value) + } +} + +trait LanguageFromPath: Send + Sync { + #[allow(clippy::wrong_self_convention)] + fn from_path(&self, path: Option<&Path>) -> Language; +} + +struct TypescriptLanguageFromPath; + +impl LanguageFromPath for TypescriptLanguageFromPath { + fn from_path(&self, path: Option<&Path>) -> Language { + match path.and_then(|path| path.extension()) { + Some(extension) if "tsx" == extension => tree_sitter_typescript::language_tsx(), + _ => tree_sitter_typescript::language_typescript(), + } + } +} + +static SUPPORTED_LANGUAGE_LANGUAGES: Lazy> = Lazy::new(|| { by_supported_language!( - Rust => tree_sitter_rust::language(), - Typescript => tree_sitter_typescript::language_tsx(), - Javascript => tree_sitter_javascript::language(), - Swift => tree_sitter_swift::language(), - ObjectiveC => tree_sitter_objc::language(), - Toml => tree_sitter_toml::language(), - Python => tree_sitter_python::language(), - Ruby => tree_sitter_ruby::language(), - C => tree_sitter_c::language(), - Cpp => tree_sitter_cpp::language(), - Go => tree_sitter_go::language(), - Java => tree_sitter_java::language(), - CSharp => tree_sitter_c_sharp::language(), - Kotlin => tree_sitter_kotlin::language(), - Elisp => tree_sitter_elisp::language(), - Elm => tree_sitter_elm::language(), - Dockerfile => tree_sitter_dockerfile::language(), - Html => tree_sitter_html::language(), - TreeSitterQuery => tree_sitter_query::language(), - Json => tree_sitter_json::language(), - Css => tree_sitter_css::language(), - Lua => tree_sitter_lua::language(), + Rust => tree_sitter_rust::language().into(), + Typescript => SingleLanguageOrLanguageFromPath::LanguageFromPath(Box::new(TypescriptLanguageFromPath)), + Javascript => tree_sitter_javascript::language().into(), + Swift => tree_sitter_swift::language().into(), + ObjectiveC => tree_sitter_objc::language().into(), + Toml => tree_sitter_toml::language().into(), + Python => tree_sitter_python::language().into(), + Ruby => tree_sitter_ruby::language().into(), + C => tree_sitter_c::language().into(), + Cpp => tree_sitter_cpp::language().into(), + Go => tree_sitter_go::language().into(), + Java => tree_sitter_java::language().into(), + CSharp => tree_sitter_c_sharp::language().into(), + Kotlin => tree_sitter_kotlin::language().into(), + Elisp => tree_sitter_elisp::language().into(), + Elm => tree_sitter_elm::language().into(), + Dockerfile => tree_sitter_dockerfile::language().into(), + Html => tree_sitter_html::language().into(), + TreeSitterQuery => tree_sitter_query::language().into(), + Json => tree_sitter_json::language().into(), + Css => tree_sitter_css::language().into(), + Lua => tree_sitter_lua::language().into(), ) }); diff --git a/src/lib.rs b/src/lib.rs index 9a3f956..4c33e63 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -181,13 +181,14 @@ impl CachedQueries { query_or_query_text: impl Into>, language: SupportedLanguage, capture_name: Option<&str>, + path: Option<&Path>, ) -> Option<(Arc, CaptureIndex)> { let query_or_query_text = query_or_query_text.into(); self.0[language] .get_or_init(|| { match query_or_query_text { QueryOrQueryText::QueryText(query_text) => { - maybe_get_query(query_text, language.language()) + maybe_get_query(query_text, language.language(path)) .map(Arc::new) .map_err(Into::into) } @@ -405,11 +406,12 @@ fn run_for_context( non_fatal_errors.clone(), |project_file_dir_entry, matched_languages| { searched.store(true, Ordering::SeqCst); + let path = project_file_dir_entry.path(); let language = match args.language { Some(specified_language) => { if !matched_languages.contains(&specified_language) { return NonFatalError::ExplicitPathArgumentNotOfSpecifiedType { - path: project_file_dir_entry.path().to_owned(), + path: path.to_owned(), specified_language, } .into(); @@ -419,7 +421,7 @@ fn run_for_context( None => match matched_languages.len() { 0 => { return NonFatalError::ExplicitPathArgumentNotOfKnownType { - path: project_file_dir_entry.path().to_owned(), + path: path.to_owned(), } .into(); } @@ -434,6 +436,7 @@ fn run_for_context( .get_query_or_query_text_for_language(matched_language), matched_language, args.capture_name.as_deref(), + Some(path), ) .map(|_| matched_language) }) @@ -445,7 +448,7 @@ fn run_for_context( 1 => successfully_parsed_query_languages[0], _ => { return NonFatalError::AmbiguousLanguageForFile { - path: project_file_dir_entry.path().to_owned(), + path: path.to_owned(), languages: successfully_parsed_query_languages, } .into(); @@ -458,185 +461,18 @@ fn run_for_context( query_text_per_language.get_query_or_query_text_for_language(language), language, args.capture_name.as_deref(), + Some(path), ) { Some(query) => query, None => return Ok(SingleFileSearchNonFailure::QueryNotParseableForFile), }; - let path = - format_relative_path(project_file_dir_entry.path(), args.is_using_default_paths()); + let relative_path = + format_relative_path(path, args.is_using_default_paths()); let query_context = - QueryContext::new(query, capture_index, language.language(), filter.clone()); + QueryContext::new(query, capture_index, language.language(Some(path)), filter.clone()); - search_file(&context, &args, path, query_context, &matched); - - Ok(SingleFileSearchNonFailure::RanQuery) - }, - )?; - - let mut non_fatal_errors = non_fatal_errors.lock().unwrap().clone(); - if non_fatal_errors.is_empty() { - if !searched.load(Ordering::SeqCst) { - non_fatal_errors.push(NonFatalError::NothingSearched); - } else { - cached_queries.error_if_no_successful_query_parsing()?; - } - } - - Ok(RunStatus { - matched: matched.load(Ordering::SeqCst), - non_fatal_errors, - }) -} - -pub fn run_for_slice_with_callback<'a>( - slice: impl Into>, - tree: Option<&Tree>, - args: Args, - mut callback: impl FnMut(&QueryMatch), -) -> Result { - let slice = slice.into(); - let language = args.language.ok_or(Error::LanguageMissingForSlice)?; - let query_text_per_language = args.get_loaded_query_text_per_language()?; - let filter = args.get_loaded_filter()?; - let cached_queries: CachedQueries = Default::default(); - let matched = AtomicBool::new(false); - let non_fatal_errors: Arc>> = Default::default(); - - let (query, capture_index) = match cached_queries.get_and_cache_query_for_language( - query_text_per_language.get_query_or_query_text_for_language(language), - language, - args.capture_name.as_deref(), - ) { - Some(query) => query, - None => { - return Err(cached_queries - .error_if_no_successful_query_parsing() - .unwrap_err()) - } - }; - - let query_context = QueryContext::new(query, capture_index, language.language(), filter); - - get_searcher(&args) - .borrow_mut() - .search_slice_callback_no_path(query_context, slice, tree, |query_match: &QueryMatch| { - callback(query_match); - matched.store(true, Ordering::SeqCst); - }) - .unwrap(); - - let non_fatal_errors = non_fatal_errors.lock().unwrap().clone(); - if non_fatal_errors.is_empty() { - cached_queries.error_if_no_successful_query_parsing()?; - } - - Ok(RunStatus { - matched: matched.load(Ordering::SeqCst), - non_fatal_errors, - }) -} - -pub fn run_with_per_file_callback( - args: Args, - per_file_callback: impl Fn( - &DirEntry, - SupportedLanguage, - Box) + '_>, - ) + Sync, -) -> Result { - let query_text_per_language = args.get_loaded_query_text_per_language()?; - let filter = args.get_loaded_filter()?; - let cached_queries: CachedQueries = Default::default(); - let matched = AtomicBool::new(false); - let searched = AtomicBool::new(false); - let non_fatal_errors: Arc>> = Default::default(); - - for_each_project_file( - &args, - non_fatal_errors.clone(), - |project_file_dir_entry, matched_languages| { - searched.store(true, Ordering::SeqCst); - let language = match args.language { - Some(specified_language) => { - if !matched_languages.contains(&specified_language) { - return NonFatalError::ExplicitPathArgumentNotOfSpecifiedType { - path: project_file_dir_entry.path().to_owned(), - specified_language, - } - .into(); - } - specified_language - } - None => match matched_languages.len() { - 0 => { - return NonFatalError::ExplicitPathArgumentNotOfKnownType { - path: project_file_dir_entry.path().to_owned(), - } - .into(); - } - 1 => matched_languages[0], - _ => { - let successfully_parsed_query_languages = matched_languages - .iter() - .filter_map(|&matched_language| { - cached_queries - .get_and_cache_query_for_language( - query_text_per_language - .get_query_or_query_text_for_language(matched_language), - matched_language, - args.capture_name.as_deref(), - ) - .map(|_| matched_language) - }) - .collect::>(); - match successfully_parsed_query_languages.len() { - 0 => { - return Ok(SingleFileSearchNonFailure::QueryNotParseableForFile); - } - 1 => successfully_parsed_query_languages[0], - _ => { - return NonFatalError::AmbiguousLanguageForFile { - path: project_file_dir_entry.path().to_owned(), - languages: successfully_parsed_query_languages, - } - .into(); - } - } - } - }, - }; - let (query, capture_index) = match cached_queries.get_and_cache_query_for_language( - query_text_per_language.get_query_or_query_text_for_language(language), - language, - args.capture_name.as_deref(), - ) { - Some(query) => query, - None => return Ok(SingleFileSearchNonFailure::QueryNotParseableForFile), - }; - let path = - format_relative_path(project_file_dir_entry.path(), args.is_using_default_paths()); - - let query_context = - QueryContext::new(query, capture_index, language.language(), filter.clone()); - - per_file_callback( - &project_file_dir_entry, - language, - Box::new(|mut per_match_callback| { - get_searcher(&args) - .borrow_mut() - .search_path_callback::<_, io::Error>( - query_context.clone(), - path, - |query_match: &QueryMatch, file_contents: &[u8], path: &Path| { - per_match_callback(query_match, file_contents, path); - matched.store(true, Ordering::SeqCst); - }, - ) - .unwrap(); - }), - ); + search_file(&context, &args, relative_path, query_context, &matched); Ok(SingleFileSearchNonFailure::RanQuery) }, @@ -670,11 +506,12 @@ pub fn run_with_single_per_file_callback( &args, non_fatal_errors.clone(), |project_file_dir_entry, matched_languages| { + let path = project_file_dir_entry.path(); let language = match args.language { Some(specified_language) => { if !matched_languages.contains(&specified_language) { return NonFatalError::ExplicitPathArgumentNotOfSpecifiedType { - path: project_file_dir_entry.path().to_owned(), + path: path.to_owned(), specified_language, } .into(); @@ -684,7 +521,7 @@ pub fn run_with_single_per_file_callback( None => match matched_languages.len() { 0 => { return NonFatalError::ExplicitPathArgumentNotOfKnownType { - path: project_file_dir_entry.path().to_owned(), + path: path.to_owned(), } .into(); } @@ -699,6 +536,7 @@ pub fn run_with_single_per_file_callback( .get_query_or_query_text_for_language(matched_language), matched_language, args.capture_name.as_deref(), + Some(path), ) .map(|_| matched_language) }) @@ -710,7 +548,7 @@ pub fn run_with_single_per_file_callback( 1 => successfully_parsed_query_languages[0], _ => { return NonFatalError::AmbiguousLanguageForFile { - path: project_file_dir_entry.path().to_owned(), + path: path.to_owned(), languages: successfully_parsed_query_languages, } .into(); @@ -723,21 +561,22 @@ pub fn run_with_single_per_file_callback( query_text_per_language.get_query_or_query_text_for_language(language), language, args.capture_name.as_deref(), + Some(path), ) { Some(query) => query, None => return Ok(SingleFileSearchNonFailure::QueryNotParseableForFile), }; - let path = - format_relative_path(project_file_dir_entry.path(), args.is_using_default_paths()); + let relative_path = + format_relative_path(path, args.is_using_default_paths()); let query_context = - QueryContext::new(query, capture_index, language.language(), filter.clone()); + QueryContext::new(query, capture_index, language.language(Some(path)), filter.clone()); let searcher = get_searcher(&args); let mut searcher = searcher.borrow_mut(); - let file_contents = searcher.load_file_contents::<_, io::Error>(path).unwrap(); + let file_contents = searcher.load_file_contents::<_, io::Error>(relative_path).unwrap(); let tree = (&*file_contents) - .parse(&mut get_parser(language.language()), None) + .parse(&mut get_parser(language.language(Some(path))), None) .unwrap(); per_file_callback( &project_file_dir_entry, From d93d175bb730afb86f588691e5950a406ed2a397 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Tue, 19 Dec 2023 10:25:36 -0500 Subject: [PATCH 39/42] tests --- Cargo.toml | 1 + src/language.rs | 85 +++++++++++++++++++++++++++++++++---------------- 2 files changed, 58 insertions(+), 28 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 685bbc1..35ba177 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -82,6 +82,7 @@ assert_cmd = "2.0.11" escargot = "0.5.7" predicates = "3.0.3" shlex = "1.1.0" +speculoos = "0.11.0" [features] default = ["bytecount/runtime-dispatch-simd"] diff --git a/src/language.rs b/src/language.rs index 4ceb037..2818766 100644 --- a/src/language.rs +++ b/src/language.rs @@ -1,6 +1,7 @@ use std::{ collections::{HashMap, HashSet}, - ops::{Deref, Index}, path::Path, + ops::{Deref, Index}, + path::Path, }; use once_cell::sync::Lazy; @@ -46,7 +47,9 @@ impl SupportedLanguage { pub fn language(&self, path: Option<&Path>) -> Language { match &SUPPORTED_LANGUAGE_LANGUAGES[*self] { SingleLanguageOrLanguageFromPath::SingleLanguage(language) => *language, - SingleLanguageOrLanguageFromPath::LanguageFromPath(language_from_path) => language_from_path.from_path(path), + SingleLanguageOrLanguageFromPath::LanguageFromPath(language_from_path) => { + language_from_path.from_path(path) + } } } @@ -86,32 +89,33 @@ impl LanguageFromPath for TypescriptLanguageFromPath { } } -static SUPPORTED_LANGUAGE_LANGUAGES: Lazy> = Lazy::new(|| { - by_supported_language!( - Rust => tree_sitter_rust::language().into(), - Typescript => SingleLanguageOrLanguageFromPath::LanguageFromPath(Box::new(TypescriptLanguageFromPath)), - Javascript => tree_sitter_javascript::language().into(), - Swift => tree_sitter_swift::language().into(), - ObjectiveC => tree_sitter_objc::language().into(), - Toml => tree_sitter_toml::language().into(), - Python => tree_sitter_python::language().into(), - Ruby => tree_sitter_ruby::language().into(), - C => tree_sitter_c::language().into(), - Cpp => tree_sitter_cpp::language().into(), - Go => tree_sitter_go::language().into(), - Java => tree_sitter_java::language().into(), - CSharp => tree_sitter_c_sharp::language().into(), - Kotlin => tree_sitter_kotlin::language().into(), - Elisp => tree_sitter_elisp::language().into(), - Elm => tree_sitter_elm::language().into(), - Dockerfile => tree_sitter_dockerfile::language().into(), - Html => tree_sitter_html::language().into(), - TreeSitterQuery => tree_sitter_query::language().into(), - Json => tree_sitter_json::language().into(), - Css => tree_sitter_css::language().into(), - Lua => tree_sitter_lua::language().into(), - ) -}); +static SUPPORTED_LANGUAGE_LANGUAGES: Lazy> = + Lazy::new(|| { + by_supported_language!( + Rust => tree_sitter_rust::language().into(), + Typescript => SingleLanguageOrLanguageFromPath::LanguageFromPath(Box::new(TypescriptLanguageFromPath)), + Javascript => tree_sitter_javascript::language().into(), + Swift => tree_sitter_swift::language().into(), + ObjectiveC => tree_sitter_objc::language().into(), + Toml => tree_sitter_toml::language().into(), + Python => tree_sitter_python::language().into(), + Ruby => tree_sitter_ruby::language().into(), + C => tree_sitter_c::language().into(), + Cpp => tree_sitter_cpp::language().into(), + Go => tree_sitter_go::language().into(), + Java => tree_sitter_java::language().into(), + CSharp => tree_sitter_c_sharp::language().into(), + Kotlin => tree_sitter_kotlin::language().into(), + Elisp => tree_sitter_elisp::language().into(), + Elm => tree_sitter_elm::language().into(), + Dockerfile => tree_sitter_dockerfile::language().into(), + Html => tree_sitter_html::language().into(), + TreeSitterQuery => tree_sitter_query::language().into(), + Json => tree_sitter_json::language().into(), + Css => tree_sitter_css::language().into(), + Lua => tree_sitter_lua::language().into(), + ) + }); static SUPPORTED_LANGUAGE_NAMES_FOR_IGNORE_SELECT: BySupportedLanguage<&'static str> = by_supported_language!( Rust => "rust", @@ -179,3 +183,28 @@ static SUPPORTED_LANGUAGE_COMMENT_KINDS: Lazy ["comment"].into(), ) }); + +#[cfg(test)] +mod tests { + use speculoos::prelude::*; + + use super::*; + + #[test] + fn test_supported_language_language_simple() { + assert_that!(&SupportedLanguage::Rust.language(Some("foo.rs".as_ref()))) + .is_equal_to(tree_sitter_rust::language()); + assert_that!(&SupportedLanguage::Rust.language(None)) + .is_equal_to(tree_sitter_rust::language()); + } + + #[test] + fn test_supported_language_language_typescript() { + assert_that!(&SupportedLanguage::Typescript.language(Some("foo.tsx".as_ref()))) + .is_equal_to(tree_sitter_typescript::language_tsx()); + assert_that!(&SupportedLanguage::Typescript.language(Some("foo.ts".as_ref()))) + .is_equal_to(tree_sitter_typescript::language_typescript()); + assert_that!(&SupportedLanguage::Typescript.language(None)) + .is_equal_to(tree_sitter_typescript::language_typescript()); + } +} From db220fe5699d5fece3d2b61f650976b038e43371 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Tue, 19 Dec 2023 11:11:06 -0500 Subject: [PATCH 40/42] test typescript --- proc_macros/src/lib.rs | 4 +- src/language.rs | 124 ++++++++++++++---- src/lib.rs | 11 +- .../foo.tsx | 1 + .../hello.ts | 1 + tests/languages.rs | 66 ++++++++++ tests/output.rs | 4 +- 7 files changed, 174 insertions(+), 37 deletions(-) create mode 100644 tests/fixtures/typescript_project_with_tsx_and_ts/foo.tsx create mode 100644 tests/fixtures/typescript_project_with_tsx_and_ts/hello.ts diff --git a/proc_macros/src/lib.rs b/proc_macros/src/lib.rs index 31e6c3b..1854090 100644 --- a/proc_macros/src/lib.rs +++ b/proc_macros/src/lib.rs @@ -335,8 +335,8 @@ fn get_all_variants_collection_definition( ) -> proc_macro2::TokenStream { quote! { pub static #all_variants_collection_name: #collection_type_name<#name> = { - use SupportedLanguage::*; - BySupportedLanguage([ + use #name::*; + #collection_type_name([ #(#variants),* ]) }; diff --git a/src/language.rs b/src/language.rs index 2818766..fff9729 100644 --- a/src/language.rs +++ b/src/language.rs @@ -43,8 +43,41 @@ fixed_map! { ], } +fixed_map! { + name => SupportedLanguageLanguage, + variants => [ + C, + Cpp, + CSharp, + Css, + Dockerfile, + Elisp, + Elm, + Go, + Html, + Java, + Javascript, + Json, + Kotlin, + Lua, + ObjectiveC, + Python, + Ruby, + Rust, + Swift, + Toml, + TreeSitterQuery, + Tsx, + Typescript, + ], +} + impl SupportedLanguage { pub fn language(&self, path: Option<&Path>) -> Language { + self.supported_language_language(path).language() + } + + pub fn supported_language_language(&self, path: Option<&Path>) -> SupportedLanguageLanguage { match &SUPPORTED_LANGUAGE_LANGUAGES[*self] { SingleLanguageOrLanguageFromPath::SingleLanguage(language) => *language, SingleLanguageOrLanguageFromPath::LanguageFromPath(language_from_path) => { @@ -62,29 +95,35 @@ impl SupportedLanguage { } } +impl SupportedLanguageLanguage { + pub fn language(&self) -> Language { + SUPPORTED_LANGUAGE_LANGUAGE_LANGUAGES[*self] + } +} + enum SingleLanguageOrLanguageFromPath { - SingleLanguage(Language), + SingleLanguage(SupportedLanguageLanguage), LanguageFromPath(Box), } -impl From for SingleLanguageOrLanguageFromPath { - fn from(value: Language) -> Self { +impl From for SingleLanguageOrLanguageFromPath { + fn from(value: SupportedLanguageLanguage) -> Self { Self::SingleLanguage(value) } } trait LanguageFromPath: Send + Sync { #[allow(clippy::wrong_self_convention)] - fn from_path(&self, path: Option<&Path>) -> Language; + fn from_path(&self, path: Option<&Path>) -> SupportedLanguageLanguage; } struct TypescriptLanguageFromPath; impl LanguageFromPath for TypescriptLanguageFromPath { - fn from_path(&self, path: Option<&Path>) -> Language { + fn from_path(&self, path: Option<&Path>) -> SupportedLanguageLanguage { match path.and_then(|path| path.extension()) { - Some(extension) if "tsx" == extension => tree_sitter_typescript::language_tsx(), - _ => tree_sitter_typescript::language_typescript(), + Some(extension) if "tsx" == extension => SupportedLanguageLanguage::Tsx, + _ => SupportedLanguageLanguage::Typescript, } } } @@ -92,28 +131,57 @@ impl LanguageFromPath for TypescriptLanguageFromPath { static SUPPORTED_LANGUAGE_LANGUAGES: Lazy> = Lazy::new(|| { by_supported_language!( - Rust => tree_sitter_rust::language().into(), + Rust => SupportedLanguageLanguage::Rust.into(), Typescript => SingleLanguageOrLanguageFromPath::LanguageFromPath(Box::new(TypescriptLanguageFromPath)), - Javascript => tree_sitter_javascript::language().into(), - Swift => tree_sitter_swift::language().into(), - ObjectiveC => tree_sitter_objc::language().into(), - Toml => tree_sitter_toml::language().into(), - Python => tree_sitter_python::language().into(), - Ruby => tree_sitter_ruby::language().into(), - C => tree_sitter_c::language().into(), - Cpp => tree_sitter_cpp::language().into(), - Go => tree_sitter_go::language().into(), - Java => tree_sitter_java::language().into(), - CSharp => tree_sitter_c_sharp::language().into(), - Kotlin => tree_sitter_kotlin::language().into(), - Elisp => tree_sitter_elisp::language().into(), - Elm => tree_sitter_elm::language().into(), - Dockerfile => tree_sitter_dockerfile::language().into(), - Html => tree_sitter_html::language().into(), - TreeSitterQuery => tree_sitter_query::language().into(), - Json => tree_sitter_json::language().into(), - Css => tree_sitter_css::language().into(), - Lua => tree_sitter_lua::language().into(), + Javascript => SupportedLanguageLanguage::Javascript.into(), + Swift => SupportedLanguageLanguage::Swift.into(), + ObjectiveC => SupportedLanguageLanguage::ObjectiveC.into(), + Toml => SupportedLanguageLanguage::Toml.into(), + Python => SupportedLanguageLanguage::Python.into(), + Ruby => SupportedLanguageLanguage::Ruby.into(), + C => SupportedLanguageLanguage::C.into(), + Cpp => SupportedLanguageLanguage::Cpp.into(), + Go => SupportedLanguageLanguage::Go.into(), + Java => SupportedLanguageLanguage::Java.into(), + CSharp => SupportedLanguageLanguage::CSharp.into(), + Kotlin => SupportedLanguageLanguage::Kotlin.into(), + Elisp => SupportedLanguageLanguage::Elisp.into(), + Elm => SupportedLanguageLanguage::Elm.into(), + Dockerfile => SupportedLanguageLanguage::Dockerfile.into(), + Html => SupportedLanguageLanguage::Html.into(), + TreeSitterQuery => SupportedLanguageLanguage::TreeSitterQuery.into(), + Json => SupportedLanguageLanguage::Json.into(), + Css => SupportedLanguageLanguage::Css.into(), + Lua => SupportedLanguageLanguage::Lua.into(), + ) + }); + +static SUPPORTED_LANGUAGE_LANGUAGE_LANGUAGES: Lazy> = + Lazy::new(|| { + by_supported_language_language!( + Rust => tree_sitter_rust::language(), + Typescript => tree_sitter_typescript::language_typescript(), + Tsx => tree_sitter_typescript::language_tsx(), + Javascript => tree_sitter_javascript::language(), + Swift => tree_sitter_swift::language(), + ObjectiveC => tree_sitter_objc::language(), + Toml => tree_sitter_toml::language(), + Python => tree_sitter_python::language(), + Ruby => tree_sitter_ruby::language(), + C => tree_sitter_c::language(), + Cpp => tree_sitter_cpp::language(), + Go => tree_sitter_go::language(), + Java => tree_sitter_java::language(), + CSharp => tree_sitter_c_sharp::language(), + Kotlin => tree_sitter_kotlin::language(), + Elisp => tree_sitter_elisp::language(), + Elm => tree_sitter_elm::language(), + Dockerfile => tree_sitter_dockerfile::language(), + Html => tree_sitter_html::language(), + TreeSitterQuery => tree_sitter_query::language(), + Json => tree_sitter_json::language(), + Css => tree_sitter_css::language(), + Lua => tree_sitter_lua::language(), ) }); diff --git a/src/lib.rs b/src/lib.rs index 4c33e63..f0412e5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -33,7 +33,7 @@ mod use_printer; mod use_searcher; pub use args::{Args, ArgsBuilder}; -use language::BySupportedLanguage; +use language::{BySupportedLanguageLanguage, SupportedLanguageLanguage}; pub use language::SupportedLanguage; pub use plugin::PluginInitializeReturn; use query_context::QueryContext; @@ -75,7 +75,7 @@ pub enum Error { } } )] - NoSuccessfulQueryParsing(Vec<(SupportedLanguage, QueryError)>), + NoSuccessfulQueryParsing(Vec<(SupportedLanguageLanguage, QueryError)>), #[error("query must include at least one capture (\"@whatever\")")] NoCaptureInQuery, #[error("invalid capture name '{capture_name}'")] @@ -172,7 +172,7 @@ impl From for QueryOrCaptureIndexError { #[allow(clippy::type_complexity)] #[derive(Default)] struct CachedQueries( - BySupportedLanguage, CaptureIndex), QueryOrCaptureIndexError>>>, + BySupportedLanguageLanguage, CaptureIndex), QueryOrCaptureIndexError>>>, ); impl CachedQueries { @@ -184,11 +184,12 @@ impl CachedQueries { path: Option<&Path>, ) -> Option<(Arc, CaptureIndex)> { let query_or_query_text = query_or_query_text.into(); - self.0[language] + let supported_language_language = language.supported_language_language(path); + self.0[supported_language_language] .get_or_init(|| { match query_or_query_text { QueryOrQueryText::QueryText(query_text) => { - maybe_get_query(query_text, language.language(path)) + maybe_get_query(query_text, supported_language_language.language()) .map(Arc::new) .map_err(Into::into) } diff --git a/tests/fixtures/typescript_project_with_tsx_and_ts/foo.tsx b/tests/fixtures/typescript_project_with_tsx_and_ts/foo.tsx new file mode 100644 index 0000000..4f7bbdd --- /dev/null +++ b/tests/fixtures/typescript_project_with_tsx_and_ts/foo.tsx @@ -0,0 +1 @@ +const a =
whee
; diff --git a/tests/fixtures/typescript_project_with_tsx_and_ts/hello.ts b/tests/fixtures/typescript_project_with_tsx_and_ts/hello.ts new file mode 100644 index 0000000..867813e --- /dev/null +++ b/tests/fixtures/typescript_project_with_tsx_and_ts/hello.ts @@ -0,0 +1 @@ +const x =
3; diff --git a/tests/languages.rs b/tests/languages.rs index 11e305d..772ab59 100644 --- a/tests/languages.rs +++ b/tests/languages.rs @@ -438,3 +438,69 @@ fn test_lua_auto_language() { "#, ); } + +#[test] +fn test_typescript_tsx_specific_query() { + assert_sorted_output( + "typescript_project_with_tsx_and_ts", + r#" + $ tree-sitter-grep -q '(jsx_element) @c' --language typescript + foo.tsx:1:const a =
whee
; + "#, + ); +} + +#[test] +fn test_typescript_tsx_specific_query_auto_language() { + assert_sorted_output( + "typescript_project_with_tsx_and_ts", + r#" + $ tree-sitter-grep -q '(jsx_element) @c' + foo.tsx:1:const a =
whee
; + "#, + ); +} + +#[test] +fn test_typescript_only_should_match_ts_parsing() { + assert_sorted_output( + "typescript_project_with_tsx_and_ts", + r#" + $ tree-sitter-grep -q '(type_assertion) @c' --language typescript + hello.ts:1:const x =
3; + "#, + ); +} + +#[test] +fn test_typescript_only_should_match_ts_parsing_auto_language() { + assert_sorted_output( + "typescript_project_with_tsx_and_ts", + r#" + $ tree-sitter-grep -q '(type_assertion) @c' + hello.ts:1:const x =
3; + "#, + ); +} + +#[test] +fn test_typescript_invalid_query_for_ts_or_tsx() { + assert_failure_output( + "typescript_project_with_tsx_and_ts", + r#" + $ tree-sitter-grep -q '(foo) @c' --language typescript + error: couldn't parse query for Tsx or Typescript + "#, + ); +} + +#[test] +fn test_typescript_invalid_query_for_ts_or_tsx_auto_language() { + assert_failure_output( + "typescript_project_with_tsx_and_ts", + r#" + $ tree-sitter-grep -q '(foo) @c' + error: couldn't parse query for Tsx or Typescript + "#, + ); +} diff --git a/tests/output.rs b/tests/output.rs index ef7b911..d1c1add 100644 --- a/tests/output.rs +++ b/tests/output.rs @@ -997,7 +997,7 @@ fn test_couldnt_parse_more_than_two_candidate_auto_detected_languages() { "mixed_project", r#" $ tree-sitter-grep -q '(function_itemz) @f' - error: couldn't parse query for Javascript, Rust, or Typescript + error: couldn't parse query for Javascript, Rust, or Tsx "#, ); } @@ -1008,7 +1008,7 @@ fn test_couldnt_parse_two_candidate_auto_detected_languages() { "mixed_project", r#" $ tree-sitter-grep -q '(function_itemz) @f' javascript_src/ typescript_src/ - error: couldn't parse query for Javascript or Typescript + error: couldn't parse query for Javascript or Tsx "#, ); } From 7543d9ddfcb88ae3cbf4112b9f434f7e94b1b2f4 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Tue, 19 Dec 2023 12:03:01 -0500 Subject: [PATCH 41/42] supported language all supported language languages --- src/args.rs | 17 ++++++---- src/language.rs | 73 +++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 87 +++++++++++++++++++++++++++---------------------- 3 files changed, 132 insertions(+), 45 deletions(-) diff --git a/src/args.rs b/src/args.rs index 7bab0c0..2064bc8 100644 --- a/src/args.rs +++ b/src/args.rs @@ -21,7 +21,7 @@ use crate::{ }, searcher::{Searcher, SearcherBuilder}, use_printer::Printer, - Error, NonFatalError, + Error, NonFatalError, SupportedLanguageLanguage, }; const ALL_NODES_QUERY: &str = "(_) @node"; @@ -182,9 +182,14 @@ impl Args { pub(crate) fn get_project_file_walker_types(&self) -> Types { get_project_file_walker_types(self.language.map(|language| vec![language]).or_else(|| { - self.query_per_language - .as_ref() - .map(|query_per_language| query_per_language.keys().cloned().collect()) + self.query_per_language.as_ref().map(|query_per_language| { + query_per_language + .keys() + .map(|supported_language_language| { + supported_language_language.supported_language() + }) + .collect() + }) })) } @@ -241,7 +246,7 @@ impl ArgsBuilder { } } -pub type QueryPerLanguage = HashMap>; +pub type QueryPerLanguage = HashMap>; pub enum QueryOrQueryTextPerLanguage { SingleQueryText(String), @@ -251,7 +256,7 @@ pub enum QueryOrQueryTextPerLanguage { impl QueryOrQueryTextPerLanguage { pub fn get_query_or_query_text_for_language( &self, - language: SupportedLanguage, + language: SupportedLanguageLanguage, ) -> QueryOrQueryText { match self { QueryOrQueryTextPerLanguage::SingleQueryText(query_text) => (&**query_text).into(), diff --git a/src/language.rs b/src/language.rs index fff9729..3d2a187 100644 --- a/src/language.rs +++ b/src/language.rs @@ -47,19 +47,26 @@ fixed_map! { name => SupportedLanguageLanguage, variants => [ C, + #[value(name = "c++")] + #[strum(serialize = "C++")] Cpp, + #[strum(serialize = "C#")] CSharp, + #[strum(serialize = "CSS")] Css, Dockerfile, Elisp, Elm, Go, + #[strum(serialize = "HTML")] Html, Java, Javascript, + #[strum(serialize = "JSON")] Json, Kotlin, Lua, + #[strum(serialize = "Objective-C")] ObjectiveC, Python, Ruby, @@ -86,6 +93,10 @@ impl SupportedLanguage { } } + pub fn all_supported_language_languages(&self) -> &'static [SupportedLanguageLanguage] { + &SUPPORTED_LANGUAGE_ALL_LANGUAGES[*self] + } + pub fn name_for_ignore_select(&self) -> &'static str { SUPPORTED_LANGUAGE_NAMES_FOR_IGNORE_SELECT[*self] } @@ -99,6 +110,10 @@ impl SupportedLanguageLanguage { pub fn language(&self) -> Language { SUPPORTED_LANGUAGE_LANGUAGE_LANGUAGES[*self] } + + pub fn supported_language(&self) -> SupportedLanguage { + SUPPORTED_LANGUAGE_LANGUAGE_SUPPORTED_LANGUAGES[*self] + } } enum SingleLanguageOrLanguageFromPath { @@ -156,6 +171,34 @@ static SUPPORTED_LANGUAGE_LANGUAGES: Lazy>> = + Lazy::new(|| { + by_supported_language!( + Rust => vec![SupportedLanguageLanguage::Rust], + Typescript => vec![SupportedLanguageLanguage::Tsx, SupportedLanguageLanguage::Typescript], + Javascript => vec![SupportedLanguageLanguage::Javascript], + Swift => vec![SupportedLanguageLanguage::Swift], + ObjectiveC => vec![SupportedLanguageLanguage::ObjectiveC], + Toml => vec![SupportedLanguageLanguage::Toml], + Python => vec![SupportedLanguageLanguage::Python], + Ruby => vec![SupportedLanguageLanguage::Ruby], + C => vec![SupportedLanguageLanguage::C], + Cpp => vec![SupportedLanguageLanguage::Cpp], + Go => vec![SupportedLanguageLanguage::Go], + Java => vec![SupportedLanguageLanguage::Java], + CSharp => vec![SupportedLanguageLanguage::CSharp], + Kotlin => vec![SupportedLanguageLanguage::Kotlin], + Elisp => vec![SupportedLanguageLanguage::Elisp], + Elm => vec![SupportedLanguageLanguage::Elm], + Dockerfile => vec![SupportedLanguageLanguage::Dockerfile], + Html => vec![SupportedLanguageLanguage::Html], + TreeSitterQuery => vec![SupportedLanguageLanguage::TreeSitterQuery], + Json => vec![SupportedLanguageLanguage::Json], + Css => vec![SupportedLanguageLanguage::Css], + Lua => vec![SupportedLanguageLanguage::Lua], + ) + }); + static SUPPORTED_LANGUAGE_LANGUAGE_LANGUAGES: Lazy> = Lazy::new(|| { by_supported_language_language!( @@ -185,6 +228,36 @@ static SUPPORTED_LANGUAGE_LANGUAGE_LANGUAGES: Lazy> = + Lazy::new(|| { + by_supported_language_language!( + Rust => SupportedLanguage::Rust, + Typescript => SupportedLanguage::Typescript, + Tsx => SupportedLanguage::Typescript, + Javascript => SupportedLanguage::Javascript, + Swift => SupportedLanguage::Swift, + ObjectiveC => SupportedLanguage::ObjectiveC, + Toml => SupportedLanguage::Toml, + Python => SupportedLanguage::Python, + Ruby => SupportedLanguage::Ruby, + C => SupportedLanguage::C, + Cpp => SupportedLanguage::Cpp, + Go => SupportedLanguage::Go, + Java => SupportedLanguage::Java, + CSharp => SupportedLanguage::CSharp, + Kotlin => SupportedLanguage::Kotlin, + Elisp => SupportedLanguage::Elisp, + Elm => SupportedLanguage::Elm, + Dockerfile => SupportedLanguage::Dockerfile, + Html => SupportedLanguage::Html, + TreeSitterQuery => SupportedLanguage::TreeSitterQuery, + Json => SupportedLanguage::Json, + Css => SupportedLanguage::Css, + Lua => SupportedLanguage::Lua, + ) + }); + static SUPPORTED_LANGUAGE_NAMES_FOR_IGNORE_SELECT: BySupportedLanguage<&'static str> = by_supported_language!( Rust => "rust", Typescript => "ts", diff --git a/src/lib.rs b/src/lib.rs index f0412e5..2a2f781 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -33,8 +33,8 @@ mod use_printer; mod use_searcher; pub use args::{Args, ArgsBuilder}; -use language::{BySupportedLanguageLanguage, SupportedLanguageLanguage}; -pub use language::SupportedLanguage; +use language::BySupportedLanguageLanguage; +pub use language::{SupportedLanguage, SupportedLanguageLanguage}; pub use plugin::PluginInitializeReturn; use query_context::QueryContext; use treesitter::maybe_get_query; @@ -108,7 +108,7 @@ pub enum NonFatalError { )] AmbiguousLanguageForFile { path: PathBuf, - languages: Vec, + languages: Vec, }, #[error("No files were searched")] NothingSearched, @@ -172,19 +172,19 @@ impl From for QueryOrCaptureIndexError { #[allow(clippy::type_complexity)] #[derive(Default)] struct CachedQueries( - BySupportedLanguageLanguage, CaptureIndex), QueryOrCaptureIndexError>>>, + BySupportedLanguageLanguage< + OnceLock, CaptureIndex), QueryOrCaptureIndexError>>, + >, ); impl CachedQueries { fn get_and_cache_query_for_language<'a>( &self, query_or_query_text: impl Into>, - language: SupportedLanguage, + supported_language_language: SupportedLanguageLanguage, capture_name: Option<&str>, - path: Option<&Path>, ) -> Option<(Arc, CaptureIndex)> { let query_or_query_text = query_or_query_text.into(); - let supported_language_language = language.supported_language_language(path); self.0[supported_language_language] .get_or_init(|| { match query_or_query_text { @@ -408,7 +408,7 @@ fn run_for_context( |project_file_dir_entry, matched_languages| { searched.store(true, Ordering::SeqCst); let path = project_file_dir_entry.path(); - let language = match args.language { + let supported_language_language = match args.language { Some(specified_language) => { if !matched_languages.contains(&specified_language) { return NonFatalError::ExplicitPathArgumentNotOfSpecifiedType { @@ -417,7 +417,7 @@ fn run_for_context( } .into(); } - specified_language + specified_language.supported_language_language(Some(path)) } None => match matched_languages.len() { 0 => { @@ -426,20 +426,23 @@ fn run_for_context( } .into(); } - 1 => matched_languages[0], + 1 => matched_languages[0].supported_language_language(Some(path)), _ => { let successfully_parsed_query_languages = matched_languages .iter() .filter_map(|&matched_language| { + let matched_supported_language_language = + matched_language.supported_language_language(Some(path)); cached_queries .get_and_cache_query_for_language( query_text_per_language - .get_query_or_query_text_for_language(matched_language), - matched_language, + .get_query_or_query_text_for_language( + matched_supported_language_language, + ), + matched_supported_language_language, args.capture_name.as_deref(), - Some(path), ) - .map(|_| matched_language) + .map(|_| matched_supported_language_language) }) .collect::>(); match successfully_parsed_query_languages.len() { @@ -459,19 +462,21 @@ fn run_for_context( }, }; let (query, capture_index) = match cached_queries.get_and_cache_query_for_language( - query_text_per_language.get_query_or_query_text_for_language(language), - language, + query_text_per_language.get_query_or_query_text_for_language(supported_language_language), + supported_language_language, args.capture_name.as_deref(), - Some(path), ) { Some(query) => query, None => return Ok(SingleFileSearchNonFailure::QueryNotParseableForFile), }; - let relative_path = - format_relative_path(path, args.is_using_default_paths()); + let relative_path = format_relative_path(path, args.is_using_default_paths()); - let query_context = - QueryContext::new(query, capture_index, language.language(Some(path)), filter.clone()); + let query_context = QueryContext::new( + query, + capture_index, + supported_language_language.language(), + filter.clone(), + ); search_file(&context, &args, relative_path, query_context, &matched); @@ -496,7 +501,7 @@ fn run_for_context( pub fn run_with_single_per_file_callback( args: Args, - per_file_callback: impl Fn(&DirEntry, SupportedLanguage, &[u8], &Tree, &Arc) + Sync, + per_file_callback: impl Fn(&DirEntry, SupportedLanguageLanguage, &[u8], &Tree, &Arc) + Sync, ) -> Result { let query_text_per_language = args.get_loaded_query_text_per_language()?; let filter = args.get_loaded_filter()?; @@ -508,7 +513,7 @@ pub fn run_with_single_per_file_callback( non_fatal_errors.clone(), |project_file_dir_entry, matched_languages| { let path = project_file_dir_entry.path(); - let language = match args.language { + let supported_language_language = match args.language { Some(specified_language) => { if !matched_languages.contains(&specified_language) { return NonFatalError::ExplicitPathArgumentNotOfSpecifiedType { @@ -517,7 +522,7 @@ pub fn run_with_single_per_file_callback( } .into(); } - specified_language + specified_language.supported_language_language(Some(path)) } None => match matched_languages.len() { 0 => { @@ -526,20 +531,20 @@ pub fn run_with_single_per_file_callback( } .into(); } - 1 => matched_languages[0], + 1 => matched_languages[0].supported_language_language(Some(path)), _ => { let successfully_parsed_query_languages = matched_languages .iter() .filter_map(|&matched_language| { + let matched_supported_language_language = matched_language.supported_language_language(Some(path)); cached_queries .get_and_cache_query_for_language( query_text_per_language - .get_query_or_query_text_for_language(matched_language), - matched_language, + .get_query_or_query_text_for_language(matched_supported_language_language), + matched_supported_language_language, args.capture_name.as_deref(), - Some(path), ) - .map(|_| matched_language) + .map(|_| matched_supported_language_language) }) .collect::>(); match successfully_parsed_query_languages.len() { @@ -559,29 +564,33 @@ pub fn run_with_single_per_file_callback( }, }; let (query, capture_index) = match cached_queries.get_and_cache_query_for_language( - query_text_per_language.get_query_or_query_text_for_language(language), - language, + query_text_per_language.get_query_or_query_text_for_language(supported_language_language), + supported_language_language, args.capture_name.as_deref(), - Some(path), ) { Some(query) => query, None => return Ok(SingleFileSearchNonFailure::QueryNotParseableForFile), }; - let relative_path = - format_relative_path(path, args.is_using_default_paths()); + let relative_path = format_relative_path(path, args.is_using_default_paths()); - let query_context = - QueryContext::new(query, capture_index, language.language(Some(path)), filter.clone()); + let query_context = QueryContext::new( + query, + capture_index, + supported_language_language.language(), + filter.clone(), + ); let searcher = get_searcher(&args); let mut searcher = searcher.borrow_mut(); - let file_contents = searcher.load_file_contents::<_, io::Error>(relative_path).unwrap(); + let file_contents = searcher + .load_file_contents::<_, io::Error>(relative_path) + .unwrap(); let tree = (&*file_contents) - .parse(&mut get_parser(language.language(Some(path))), None) + .parse(&mut get_parser(supported_language_language.language()), None) .unwrap(); per_file_callback( &project_file_dir_entry, - language, + supported_language_language, &file_contents, &tree, &query_context.query, From 045b98dca28f1e76a066e3143e961dd61991dc83 Mon Sep 17 00:00:00 2001 From: Julian Rosse Date: Mon, 8 Jan 2024 11:21:17 -0500 Subject: [PATCH 42/42] rope or slice --- src/treesitter.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/treesitter.rs b/src/treesitter.rs index d17c80e..97c15d7 100644 --- a/src/treesitter.rs +++ b/src/treesitter.rs @@ -146,6 +146,17 @@ impl<'a> From<&'a str> for RopeOrSlice<'a> { } } +impl<'a> From> for String { + fn from(value: RopeOrSlice<'a>) -> Self { + match value { + // TODO: should this use TryFrom instead to expose + // this fallibility? + RopeOrSlice::Slice(value) => std::str::from_utf8(value).unwrap().to_owned(), + RopeOrSlice::Rope(value) => value.into(), + } + } +} + pub enum RopeOrSliceTextProviderIterator<'a> { Slice(iter::Once<&'a [u8]>), Rope(RopeOrSliceRopeTextProviderIterator<'a>),