Skip to content
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
b2c06c0
expose pattern index
helixbass Jul 20, 2023
551e376
Merge branch 'run-context' into run-context-tree-sitter-lint
helixbass Jul 20, 2023
3d4682c
correct interpretation of capture index
helixbass Jul 20, 2023
422058d
package name
helixbass Jul 21, 2023
2118d3c
package name
helixbass Jul 21, 2023
54b39ed
expose supported language
helixbass Jul 22, 2023
0602f3c
Merge branch 'run-context' into run-context-tree-sitter-lint
helixbass Jul 22, 2023
b05884b
try exposing search slice endpoint
helixbass Jul 22, 2023
d341747
clone args
helixbass Jul 22, 2023
1930ccf
fn mut
helixbass Jul 22, 2023
3c2a567
handle multiple args instances
helixbass Jul 22, 2023
5fe38b3
per file callback
helixbass Jul 23, 2023
5a3ede1
take tree, rope
helixbass Jul 25, 2023
0ad0481
use patched tree-sitter
helixbass Jul 27, 2023
c0ae26e
get_captures() compiling
helixbass Jul 27, 2023
c73c3da
use of get_captures() compiling
helixbass Jul 27, 2023
d8f21b0
use everywhere
helixbass Jul 27, 2023
3a039da
rm unused
helixbass Jul 27, 2023
debbbc0
non-local tree-sitter dependency
helixbass Jul 27, 2023
30a1c71
get captures for enclosing node
helixbass Jul 27, 2023
c0bf4ca
wiring up query text per language
helixbass Jul 27, 2023
2eba2c5
expose per-language queries
helixbass Jul 28, 2023
e471b0d
capture index per language
helixbass Jul 28, 2023
672ac41
rm debugging
helixbass Jul 28, 2023
c5f3fa2
per match
helixbass Jul 28, 2023
c371ae9
update tree-sitter dependency
helixbass Jul 31, 2023
53e844f
don't require sync
helixbass Aug 2, 2023
57495f8
run with single per file callback
helixbass Aug 2, 2023
31959d1
expose language comment kinds
helixbass Aug 2, 2023
2061904
comment type
helixbass Aug 2, 2023
6daf28a
use git dependency for js grammar
helixbass Aug 11, 2023
6bb81e1
debug rope or slice
helixbass Aug 15, 2023
325d3e0
bump tree-sitter-javascript version
helixbass Aug 16, 2023
355ea2b
Merge branch 'run-context-tree-sitter-lint' of github.com:helixbass/t…
helixbass Aug 16, 2023
2e842eb
bump tree-sitter-rust dependency
helixbass Aug 24, 2023
3b1230c
bump tree-sitter-javascript dependency
helixbass Aug 25, 2023
c5a8c85
from str
helixbass Aug 26, 2023
d250c7c
debug slice
helixbass Aug 27, 2023
c5b2e88
bump tree-sitter-rust version
helixbass Oct 6, 2023
7626af8
bump tree-sitter-rust version
helixbass Oct 6, 2023
526d110
rm unused entry points; language from path
helixbass Dec 19, 2023
d93d175
tests
helixbass Dec 19, 2023
db220fe
test typescript
helixbass Dec 19, 2023
7543d9d
supported language all supported language languages
helixbass Dec 19, 2023
045b98d
rope or slice
helixbass Jan 8, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[package]
name = "tree-sitter-grep"
name = "tree_sitter_lint_tree-sitter-grep"
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Per helixbass/tree-sitter-lint#4, updating this to allow publishing that crate while this hasn't "landed"

version = "0.1.0"
edition = "2021"
license = "Unlicense OR MIT"
Expand All @@ -8,6 +8,7 @@ authors = [
"Peter Stuart <peter@peterstuart.org>"
]
description = """
(not-yet-landed version used by tree-sitter-lint)
tree-sitter-grep is a grep-like search tool that
recursively searches the current directory for a
tree-sitter query pattern. Like ripgrep, it respects
Expand Down
2 changes: 1 addition & 1 deletion examples/filter_before_line_number.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::{

use libc::c_char;
use tree_sitter::Node;
use tree_sitter_grep::PluginInitializeReturn;
use tree_sitter_lint_tree_sitter_grep::PluginInitializeReturn;

static ROW_NUMBER: AtomicUsize = AtomicUsize::new(0);

Expand Down
6 changes: 3 additions & 3 deletions examples/print_match_text.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
use clap::Parser;
use tree_sitter_grep::{run_with_callback, Args};
use tree_sitter_lint_tree_sitter_grep::{run_with_callback, Args};

fn main() {
let args = Args::parse_from(["tree_sitter_grep", "-q", "(function_item) @f"]);
run_with_callback(args, |node, file_contents, path| {
run_with_callback(args, |capture_info, file_contents, path| {
println!(
"Found match in {path:?}: {}",
std::str::from_utf8(&file_contents[node.byte_range()]).unwrap(),
std::str::from_utf8(&file_contents[capture_info.node.byte_range()]).unwrap(),
);
})
.unwrap();
Expand Down
2 changes: 1 addition & 1 deletion src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use crate::{

const ALL_NODES_QUERY: &str = "(_) @node";

#[derive(Parser)]
#[derive(Clone, Parser)]
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if the need to do args.clone() is a "smell" that the crate-level API entry points should be taking an &Args instead?

#[clap(group(
ArgGroup::new("query_or_filter")
.multiple(true)
Expand Down
2 changes: 1 addition & 1 deletion src/bin/tree-sitter-grep.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::process;

use clap::Parser;
use tree_sitter_grep::{run_print, Args, RunStatus};
use tree_sitter_lint_tree_sitter_grep::{run_print, Args, RunStatus};

pub fn main() {
let args = Args::parse();
Expand Down
60 changes: 56 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ mod use_printer;
mod use_searcher;

pub use args::Args;
use language::{BySupportedLanguage, SupportedLanguage};
use language::BySupportedLanguage;
pub use language::SupportedLanguage;
pub use plugin::PluginInitializeReturn;
use query_context::QueryContext;
use treesitter::maybe_get_query;
Expand Down Expand Up @@ -74,6 +75,8 @@ pub enum Error {
FilterPluginExpectedArgument,
#[error("plugin couldn't parse argument {filter_arg:?}")]
FilterPluginCouldntParseArgument { filter_arg: String },
#[error("language is required when passing a slice")]
LanguageMissingForSlice,
}

#[derive(Clone, Debug, Error)]
Expand Down Expand Up @@ -290,9 +293,14 @@ pub fn run_print(args: Args) -> Result<RunStatus, Error> {
)
}

pub struct CaptureInfo<'node> {
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Naming?

pub node: Node<'node>,
pub pattern_index: usize,
}

pub fn run_with_callback(
args: Args,
callback: impl Fn(Node, &[u8], &Path) + Sync,
callback: impl Fn(CaptureInfo, &[u8], &Path) + Sync,
) -> Result<RunStatus, Error> {
run_for_context(
args,
Expand All @@ -307,8 +315,8 @@ pub fn run_with_callback(
.search_path_callback::<_, io::Error>(
query_context,
path,
|node: Node, file_contents: &[u8], path: &Path| {
callback(node, file_contents, path);
|capture_info: CaptureInfo, file_contents: &[u8], path: &Path| {
callback(capture_info, file_contents, path);
matched.store(true, Ordering::SeqCst);
},
)
Expand Down Expand Up @@ -412,6 +420,50 @@ fn run_for_context<TContext: Sync>(
})
}

pub fn run_for_slice_with_callback(
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This entry point is needed for the use case where tree-sitter-lint itself has an in-memory "slice" that it wants to lint (specifically it's using that for its rule-testing helpers)

slice: &[u8],
args: Args,
mut callback: impl FnMut(CaptureInfo) + Sync,
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Vs the things that get passed to the callback in the run_with_callback() entry point) having the callback get passed the slice and a path didn't seem to make sense for this use case so it just gets passed the "capture info"

) -> Result<RunStatus, Error> {
let language = args.language.ok_or(Error::LanguageMissingForSlice)?;
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Basically Args has different "validation" for this entry point (specifically we need to know which language you want this slice to be treated as)

I started thinking that maybe the slice itself could/should be a field on Args and then that would be something that Args could "always validate" (ie that if its eg .slice_to_search field is present then its .language field must also be present)?

But that might be weird because then you'd have to sort of assert that "for this entry point we expect args.slice_to_search to be set" and vice-versa?

let query_text = args.get_loaded_query_text()?;
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't know if there are great ways to DRY this up more wrt other entry points, didn't worry about that too much at the moment

let filter = args.get_loaded_filter()?;
let cached_queries: CachedQueries = Default::default();
let capture_index = CaptureIndex::default();
let matched = AtomicBool::new(false);
let non_fatal_errors: Arc<Mutex<Vec<NonFatalError>>> = Default::default();

let query = match cached_queries.get_and_cache_query_for_language(&query_text, language) {
Some(query) => query,
None => {
return Err(cached_queries
.error_if_no_successful_query_parsing()
.unwrap_err())
}
};
let capture_index = capture_index.get_or_init(&query, args.capture_name.as_deref())?;

let query_context = QueryContext::new(query, capture_index, language.language(), filter);

get_searcher(&args)
.borrow_mut()
.search_slice_callback_no_path(query_context, slice, |capture_info: CaptureInfo| {
callback(capture_info);
matched.store(true, Ordering::SeqCst);
})
.unwrap();

let non_fatal_errors = non_fatal_errors.lock().unwrap().clone();
if non_fatal_errors.is_empty() {
cached_queries.error_if_no_successful_query_parsing()?;
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may not make any sense/be redundant since above we already checked that "the query for the definitely-specified language is parseable"

}

Ok(RunStatus {
matched: matched.load(Ordering::SeqCst),
non_fatal_errors,
})
}

fn for_each_project_file(
args: &Args,
non_fatal_errors: Arc<Mutex<Vec<NonFatalError>>>,
Expand Down
80 changes: 61 additions & 19 deletions src/searcher/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use std::{
};

use encoding_rs_io::DecodeReaderBytesBuilder;
use tree_sitter::{Node, QueryCursor};
use tree_sitter::QueryCursor;

pub use self::mmap::MmapChoice;
use crate::{
Expand All @@ -19,6 +19,7 @@ use crate::{
searcher::glue::MultiLine,
sink::{Sink, SinkError},
treesitter::get_parser,
CaptureInfo,
};

mod core;
Expand Down Expand Up @@ -218,7 +219,7 @@ impl Searcher {
&mut self,
query_context: QueryContext,
path: P,
callback: impl Fn(Node, &[u8], &Path),
callback: impl Fn(CaptureInfo, &[u8], &Path),
) -> Result<(), TError>
where
P: AsRef<Path>,
Expand Down Expand Up @@ -338,7 +339,7 @@ impl Searcher {
&mut self,
query_context: QueryContext,
slice: &[u8],
callback: impl Fn(Node, &[u8], &Path),
callback: impl Fn(CaptureInfo, &[u8], &Path),
path: &Path,
) -> Result<(), ConfigError> {
self.check_config()?;
Expand All @@ -349,11 +350,53 @@ impl Searcher {
Ok(())
}

pub fn search_slice_callback_no_path(
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The "organization" of these searcher methods is probably weird but not like I 100% understand them in the first place

Here there was already a .search_slice_callback() method (used internally by the .search_path_callback() "entry point") but it expected a Path to be passed and per above comment its callback argument took the additional file contents/path arguments

So just made a separate .search_slice_callback_no_path() "entry point"

&mut self,
query_context: QueryContext,
slice: &[u8],
mut callback: impl FnMut(CaptureInfo),
) -> Result<(), ConfigError> {
self.check_config()?;

log::trace!("slice reader: searching via multiline strategy");
let mut query_cursor = QueryCursor::new();
let tree = get_parser(query_context.language)
.parse(slice, None)
.unwrap();
let query = &query_context.query;
let capture_index = query_context.capture_index;
let filter = &query_context.filter;
query_cursor
.captures(query, tree.root_node(), slice)
.filter_map(|(match_, index_into_query_match_captures)| {
let this_capture = &match_.captures[index_into_query_match_captures];
if this_capture.index != capture_index {
return None;
}
let single_captured_node = this_capture.node;
match filter.as_ref() {
None => Some(CaptureInfo {
node: single_captured_node,
pattern_index: match_.pattern_index,
}),
Some(filter) => filter.call(&single_captured_node).then_some(CaptureInfo {
node: single_captured_node,
pattern_index: match_.pattern_index,
}),
}
})
.for_each(|capture_info| {
callback(capture_info);
});

Ok(())
}

fn run_with_callback(
&self,
query_context: QueryContext,
slice: &[u8],
callback: impl Fn(Node, &[u8], &Path),
callback: impl Fn(CaptureInfo, &[u8], &Path),
path: &Path,
) {
let mut query_cursor = QueryCursor::new();
Expand All @@ -365,26 +408,25 @@ impl Searcher {
let filter = &query_context.filter;
query_cursor
.captures(query, tree.root_node(), slice)
.filter_map(|(match_, found_capture_index)| {
let found_capture_index = found_capture_index as u32;
if found_capture_index != capture_index {
.filter_map(|(match_, index_into_query_match_captures)| {
let this_capture = &match_.captures[index_into_query_match_captures];
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These changes (up through let single_captured_node = ...) are "their own thing"

What I ran into was that my understanding of this (what was being called) found_capture_index usize being returned by the .captures() API was incorrect

Based on a little sanity-checking, it appears to be an index into the .captures on the other "match" (tree_sitter::QueryMatch) that gets returned

(not a "capture index" for the query as a whole) (maybe the usize -> u32 casting should've been a tip-off?)

But so this should get merged into master as its own little PR presumably (along with some test cases of queries with multiple "top-level patterns" (which I guess is the only time this issue was happening?))

if this_capture.index != capture_index {
return None;
}
let mut nodes_for_this_capture = match_.nodes_for_capture_index(capture_index);
let single_captured_node = nodes_for_this_capture.next().unwrap();
assert!(
nodes_for_this_capture.next().is_none(),
"I guess .captures() always wraps up the single capture like this?"
);
let single_captured_node = this_capture.node;
match filter.as_ref() {
None => Some(single_captured_node),
Some(filter) => filter
.call(&single_captured_node)
.then_some(single_captured_node),
None => Some(CaptureInfo {
node: single_captured_node,
pattern_index: match_.pattern_index,
}),
Some(filter) => filter.call(&single_captured_node).then_some(CaptureInfo {
node: single_captured_node,
pattern_index: match_.pattern_index,
}),
}
})
.for_each(|node| {
callback(node, slice, path);
.for_each(|capture_info| {
callback(capture_info, slice, path);
});
}

Expand Down
22 changes: 8 additions & 14 deletions src/use_searcher.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,16 @@
use std::{
cell::{OnceCell, RefCell},
ptr,
rc::Rc,
};
use std::{cell::RefCell, collections::HashMap, rc::Rc};

use crate::{searcher::Searcher, Args};

thread_local! {
static SEARCHER: OnceCell<(Rc<RefCell<Searcher>>, *const Args)> = Default::default();
static SEARCHER_PER_ARGS_INSTANCE: RefCell<HashMap<*const Args, Rc<RefCell<Searcher>>>> = Default::default();
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd forgotten that we have now invalidated this invariant (but glad I was asserting it!) (that there's only ever once instance of Args used during a program execution)

So just made this a per-Args-instance map instead (yes in theory then you should probably be more "legitimately cache-like" and eg evict once you get past a certain size but not worrying about that for now)

This same change should be made to the printer which is using the same pattern

}
pub(crate) fn get_searcher(args: &Args) -> Rc<RefCell<Searcher>> {
SEARCHER.with(|searcher| {
let (searcher, args_when_initialized) =
searcher.get_or_init(|| (Rc::new(RefCell::new(args.get_searcher())), args));
assert!(
ptr::eq(*args_when_initialized, args),
"Using multiple instances of args not supported"
);
searcher.clone()
SEARCHER_PER_ARGS_INSTANCE.with(|searcher_per_args_instance| {
searcher_per_args_instance
.borrow_mut()
.entry(args)
.or_insert_with(|| Rc::new(RefCell::new(args.get_searcher())))
.clone()
})
}