Skip to content
Open
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
b2c06c0
expose pattern index
helixbass Jul 20, 2023
551e376
Merge branch 'run-context' into run-context-tree-sitter-lint
helixbass Jul 20, 2023
3d4682c
correct interpretation of capture index
helixbass Jul 20, 2023
422058d
package name
helixbass Jul 21, 2023
2118d3c
package name
helixbass Jul 21, 2023
54b39ed
expose supported language
helixbass Jul 22, 2023
0602f3c
Merge branch 'run-context' into run-context-tree-sitter-lint
helixbass Jul 22, 2023
b05884b
try exposing search slice endpoint
helixbass Jul 22, 2023
d341747
clone args
helixbass Jul 22, 2023
1930ccf
fn mut
helixbass Jul 22, 2023
3c2a567
handle multiple args instances
helixbass Jul 22, 2023
5fe38b3
per file callback
helixbass Jul 23, 2023
5a3ede1
take tree, rope
helixbass Jul 25, 2023
0ad0481
use patched tree-sitter
helixbass Jul 27, 2023
c0ae26e
get_captures() compiling
helixbass Jul 27, 2023
c73c3da
use of get_captures() compiling
helixbass Jul 27, 2023
d8f21b0
use everywhere
helixbass Jul 27, 2023
3a039da
rm unused
helixbass Jul 27, 2023
debbbc0
non-local tree-sitter dependency
helixbass Jul 27, 2023
30a1c71
get captures for enclosing node
helixbass Jul 27, 2023
c0bf4ca
wiring up query text per language
helixbass Jul 27, 2023
2eba2c5
expose per-language queries
helixbass Jul 28, 2023
e471b0d
capture index per language
helixbass Jul 28, 2023
672ac41
rm debugging
helixbass Jul 28, 2023
c5f3fa2
per match
helixbass Jul 28, 2023
c371ae9
update tree-sitter dependency
helixbass Jul 31, 2023
53e844f
don't require sync
helixbass Aug 2, 2023
57495f8
run with single per file callback
helixbass Aug 2, 2023
31959d1
expose language comment kinds
helixbass Aug 2, 2023
2061904
comment type
helixbass Aug 2, 2023
6daf28a
use git dependency for js grammar
helixbass Aug 11, 2023
6bb81e1
debug rope or slice
helixbass Aug 15, 2023
325d3e0
bump tree-sitter-javascript version
helixbass Aug 16, 2023
355ea2b
Merge branch 'run-context-tree-sitter-lint' of github.com:helixbass/t…
helixbass Aug 16, 2023
2e842eb
bump tree-sitter-rust dependency
helixbass Aug 24, 2023
3b1230c
bump tree-sitter-javascript dependency
helixbass Aug 25, 2023
c5a8c85
from str
helixbass Aug 26, 2023
d250c7c
debug slice
helixbass Aug 27, 2023
c5b2e88
bump tree-sitter-rust version
helixbass Oct 6, 2023
7626af8
bump tree-sitter-rust version
helixbass Oct 6, 2023
526d110
rm unused entry points; language from path
helixbass Dec 19, 2023
d93d175
tests
helixbass Dec 19, 2023
db220fe
test typescript
helixbass Dec 19, 2023
7543d9d
supported language all supported language languages
helixbass Dec 19, 2023
045b98d
rope or slice
helixbass Jan 8, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[package]
name = "tree-sitter-grep"
name = "tree_sitter_lint_tree-sitter-grep"
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Per helixbass/tree-sitter-lint#4, updating this to allow publishing that crate while this hasn't "landed"

version = "0.1.0"
edition = "2021"
license = "Unlicense OR MIT"
Expand All @@ -8,6 +8,7 @@ authors = [
"Peter Stuart <peter@peterstuart.org>"
]
description = """
(not-yet-landed version used by tree-sitter-lint)
tree-sitter-grep is a grep-like search tool that
recursively searches the current directory for a
tree-sitter query pattern. Like ripgrep, it respects
Expand All @@ -34,10 +35,13 @@ log = "0.4.5"
memchr = "2.1"
memmap = { package = "memmap2", version = "0.5.3" }
once_cell = "1.18.0"
ouroboros = "0.17.2"
proc_macros = { package = "tree_sitter_grep_proc_macros", path = "proc_macros", version = "0.1.0" }
rayon = "1.7.0"
regex = "1.8.2"
ropey = "1.6.0"
serde = { version = "1.0.77", features = ["derive"] }
streaming-iterator = "0.1.9"
strum_macros = "0.25.1"
termcolor = "1.2.0"
thiserror = "1.0.43"
Expand Down Expand Up @@ -65,6 +69,9 @@ tree-sitter-swift = "0.3.6"
tree-sitter-toml = "0.20.0"
tree-sitter-typescript = "0.20.2"

[patch.crates-io]
tree-sitter = { git = "https://github.com/helixbass/tree-sitter", rev = "57e98fb0" }
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Read about how to do this "force dependencies to use this version"

The thing that I guess I wasn't anticipating is that I also had to do this in any "outer" crates (tree-sitter-lint and its project-local crates) even though they don't have their own explicit dependency on tree-sitter (they use tree-sitter-grep's re-export)?


[[bin]]
name = "tree-sitter-grep"

Expand Down
2 changes: 1 addition & 1 deletion examples/filter_before_line_number.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::{

use libc::c_char;
use tree_sitter::Node;
use tree_sitter_grep::PluginInitializeReturn;
use tree_sitter_lint_tree_sitter_grep::PluginInitializeReturn;

static ROW_NUMBER: AtomicUsize = AtomicUsize::new(0);

Expand Down
6 changes: 3 additions & 3 deletions examples/print_match_text.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
use clap::Parser;
use tree_sitter_grep::{run_with_callback, Args};
use tree_sitter_lint_tree_sitter_grep::{run_with_callback, Args};

fn main() {
let args = Args::parse_from(["tree_sitter_grep", "-q", "(function_item) @f"]);
run_with_callback(args, |node, file_contents, path| {
run_with_callback(args, |capture_info, file_contents, path| {
println!(
"Found match in {path:?}: {}",
std::str::from_utf8(&file_contents[node.byte_range()]).unwrap(),
std::str::from_utf8(&file_contents[capture_info.node.byte_range()]).unwrap(),
);
})
.unwrap();
Expand Down
3 changes: 2 additions & 1 deletion rustfmt.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ format_macro_bodies = true
format_macro_matchers = true
group_imports = "StdExternalCrate"
imports_granularity = "Crate"
wrap_comments = true
edition = "2021"
# wrap_comments = true
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The thing of rustfmt often deciding to "wrap" code that I had commented out made me decide to turn this off

2 changes: 1 addition & 1 deletion src/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use crate::{

const ALL_NODES_QUERY: &str = "(_) @node";

#[derive(Parser)]
#[derive(Clone, Parser)]
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if the need to do args.clone() is a "smell" that the crate-level API entry points should be taking an &Args instead?

#[clap(group(
ArgGroup::new("query_or_filter")
.multiple(true)
Expand Down
2 changes: 1 addition & 1 deletion src/bin/tree-sitter-grep.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::process;

use clap::Parser;
use tree_sitter_grep::{run_print, Args, RunStatus};
use tree_sitter_lint_tree_sitter_grep::{run_print, Args, RunStatus};

pub fn main() {
let args = Args::parse();
Expand Down
176 changes: 171 additions & 5 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use ignore::DirEntry;
use rayon::prelude::*;
use termcolor::{BufferWriter, ColorChoice};
use thiserror::Error;
use tree_sitter::{Node, Query, QueryError};
use tree_sitter::{Query, QueryError, Tree};

mod args;
mod language;
Expand All @@ -32,13 +32,21 @@ mod use_printer;
mod use_searcher;

pub use args::Args;
use language::{BySupportedLanguage, SupportedLanguage};
use language::BySupportedLanguage;
pub use language::SupportedLanguage;
pub use plugin::PluginInitializeReturn;
use query_context::QueryContext;
use treesitter::maybe_get_query;
pub use treesitter::{
get_captures, get_captures_for_enclosing_node, CaptureInfo, Parseable, RopeOrSlice,
};
use use_printer::get_printer;
use use_searcher::get_searcher;

pub extern crate ropey;
pub extern crate streaming_iterator;
pub extern crate tree_sitter;

#[derive(Debug, Error)]
pub enum Error {
#[error("couldn't read query file {path_to_query_file:?}")]
Expand Down Expand Up @@ -74,6 +82,8 @@ pub enum Error {
FilterPluginExpectedArgument,
#[error("plugin couldn't parse argument {filter_arg:?}")]
FilterPluginCouldntParseArgument { filter_arg: String },
#[error("language is required when passing a slice")]
LanguageMissingForSlice,
}

#[derive(Clone, Debug, Error)]
Expand Down Expand Up @@ -292,7 +302,7 @@ pub fn run_print(args: Args) -> Result<RunStatus, Error> {

pub fn run_with_callback(
args: Args,
callback: impl Fn(Node, &[u8], &Path) + Sync,
callback: impl Fn(&CaptureInfo, &[u8], &Path) + Sync,
) -> Result<RunStatus, Error> {
run_for_context(
args,
Expand All @@ -307,8 +317,8 @@ pub fn run_with_callback(
.search_path_callback::<_, io::Error>(
query_context,
path,
|node: Node, file_contents: &[u8], path: &Path| {
callback(node, file_contents, path);
|capture_info: &CaptureInfo, file_contents: &[u8], path: &Path| {
callback(capture_info, file_contents, path);
matched.store(true, Ordering::SeqCst);
},
)
Expand Down Expand Up @@ -412,6 +422,162 @@ fn run_for_context<TContext: Sync>(
})
}

pub fn run_for_slice_with_callback<'a>(
slice: impl Into<RopeOrSlice<'a>>,
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I think the idea here will be that the slice argument can be generic as something like impl TextProvider<'a> + Parseable (so not need to have any "specific" awareness of ropey at the tree_sitter_grep level)

But that is blocked by running into lifetime issues at the point of calling into tree_sitter (per tree-sitter/tree-sitter#2432)

(although as discussed maybe we could reasonably temporarily depend on a forked version of tree_sitter if there is in fact a way to tell all of the individual language-grammar crates "hey use this version of that dependency instead"?)

So for now made it take the "concrete" type of RopeOrSlice

(the point of this is to try and be more efficient where in tree-sitter-lint-lsp and maybe will also add to the tree-sitter-lint "fixing loop" we're storing file text in a ropey::Rope and so it's presumably expensive to convert that to "one big" &[u8] vs letting it be parsed/queried "in chunks")

tree: Option<&Tree>,
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than eg a separate "entry point" for when you have the pre-parsed tree_sitter::Tree, just added it as an optional param to this existing one

args: Args,
mut callback: impl FnMut(&CaptureInfo) + Sync,
) -> Result<RunStatus, Error> {
let slice = slice.into();
let language = args.language.ok_or(Error::LanguageMissingForSlice)?;
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Basically Args has different "validation" for this entry point (specifically we need to know which language you want this slice to be treated as)

I started thinking that maybe the slice itself could/should be a field on Args and then that would be something that Args could "always validate" (ie that if its eg .slice_to_search field is present then its .language field must also be present)?

But that might be weird because then you'd have to sort of assert that "for this entry point we expect args.slice_to_search to be set" and vice-versa?

let query_text = args.get_loaded_query_text()?;
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't know if there are great ways to DRY this up more wrt other entry points, didn't worry about that too much at the moment

let filter = args.get_loaded_filter()?;
let cached_queries: CachedQueries = Default::default();
let capture_index = CaptureIndex::default();
let matched = AtomicBool::new(false);
let non_fatal_errors: Arc<Mutex<Vec<NonFatalError>>> = Default::default();

let query = match cached_queries.get_and_cache_query_for_language(&query_text, language) {
Some(query) => query,
None => {
return Err(cached_queries
.error_if_no_successful_query_parsing()
.unwrap_err())
}
};
let capture_index = capture_index.get_or_init(&query, args.capture_name.as_deref())?;

let query_context = QueryContext::new(query, capture_index, language.language(), filter);

get_searcher(&args)
.borrow_mut()
.search_slice_callback_no_path(query_context, slice, tree, |capture_info: &CaptureInfo| {
callback(capture_info);
matched.store(true, Ordering::SeqCst);
})
.unwrap();

let non_fatal_errors = non_fatal_errors.lock().unwrap().clone();
if non_fatal_errors.is_empty() {
cached_queries.error_if_no_successful_query_parsing()?;
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may not make any sense/be redundant since above we already checked that "the query for the definitely-specified language is parseable"

}

Ok(RunStatus {
matched: matched.load(Ordering::SeqCst),
non_fatal_errors,
})
}

pub fn run_with_per_file_callback(
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was running into wanting callbacks in tree-sitter-lint that are happening in the context of a single file (eg rule-listener callbacks) to be able to have mutable access to things (it seemed silly to wrap all of the "per-file-run rule state" in eg Mutex's when we know that once we're in the context of a single file, things are happening "serially")

So by adding an extra layer of "callback indirection" we can accomplish that and have the "per-match" callback be an FnMut

args: Args,
per_file_callback: impl Fn(&DirEntry, Box<dyn FnMut(Box<dyn FnMut(&CaptureInfo, &[u8], &Path) + '_>) + '_>)
+ Sync,
) -> Result<RunStatus, Error> {
let query_text = args.get_loaded_query_text()?;
let filter = args.get_loaded_filter()?;
let cached_queries: CachedQueries = Default::default();
let capture_index = CaptureIndex::default();
let matched = AtomicBool::new(false);
let searched = AtomicBool::new(false);
let non_fatal_errors: Arc<Mutex<Vec<NonFatalError>>> = Default::default();

for_each_project_file(
&args,
non_fatal_errors.clone(),
|project_file_dir_entry, matched_languages| {
searched.store(true, Ordering::SeqCst);
let language = match args.language {
Some(specified_language) => {
if !matched_languages.contains(&specified_language) {
return NonFatalError::ExplicitPathArgumentNotOfSpecifiedType {
path: project_file_dir_entry.path().to_owned(),
specified_language,
}
.into();
}
specified_language
}
None => match matched_languages.len() {
0 => {
return NonFatalError::ExplicitPathArgumentNotOfKnownType {
path: project_file_dir_entry.path().to_owned(),
}
.into();
}
1 => matched_languages[0],
_ => {
let successfully_parsed_query_languages = matched_languages
.iter()
.filter_map(|&matched_language| {
cached_queries
.get_and_cache_query_for_language(&query_text, matched_language)
.map(|_| matched_language)
})
.collect::<Vec<_>>();
match successfully_parsed_query_languages.len() {
0 => {
return Ok(SingleFileSearchNonFailure::QueryNotParseableForFile);
}
1 => successfully_parsed_query_languages[0],
_ => {
return NonFatalError::AmbiguousLanguageForFile {
path: project_file_dir_entry.path().to_owned(),
languages: successfully_parsed_query_languages,
}
.into();
}
}
}
},
};
let query = match cached_queries.get_and_cache_query_for_language(&query_text, language)
{
Some(query) => query,
None => return Ok(SingleFileSearchNonFailure::QueryNotParseableForFile),
};
let capture_index = capture_index.get_or_init(&query, args.capture_name.as_deref())?;
let path =
format_relative_path(project_file_dir_entry.path(), args.is_using_default_paths());

let query_context =
QueryContext::new(query, capture_index, language.language(), filter.clone());

per_file_callback(
&project_file_dir_entry,
Box::new(|mut per_match_callback| {
get_searcher(&args)
.borrow_mut()
.search_path_callback::<_, io::Error>(
query_context.clone(),
path,
|capture_info: &CaptureInfo, file_contents: &[u8], path: &Path| {
per_match_callback(capture_info, file_contents, path);
matched.store(true, Ordering::SeqCst);
},
)
.unwrap();
}),
);

Ok(SingleFileSearchNonFailure::RanQuery)
},
)?;

let mut non_fatal_errors = non_fatal_errors.lock().unwrap().clone();
if non_fatal_errors.is_empty() {
if !searched.load(Ordering::SeqCst) {
non_fatal_errors.push(NonFatalError::NothingSearched);
} else {
cached_queries.error_if_no_successful_query_parsing()?;
}
}

Ok(RunStatus {
matched: matched.load(Ordering::SeqCst),
non_fatal_errors,
})
}

fn for_each_project_file(
args: &Args,
non_fatal_errors: Arc<Mutex<Vec<NonFatalError>>>,
Expand Down
1 change: 1 addition & 0 deletions src/query_context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use tree_sitter::{Language, Query};

use crate::plugin::Filterer;

#[derive(Clone)]
pub struct QueryContext {
pub query: Arc<Query>,
pub capture_index: u32,
Expand Down
Loading