Skip to content
Open
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
b2c06c0
expose pattern index
helixbass Jul 20, 2023
551e376
Merge branch 'run-context' into run-context-tree-sitter-lint
helixbass Jul 20, 2023
3d4682c
correct interpretation of capture index
helixbass Jul 20, 2023
422058d
package name
helixbass Jul 21, 2023
2118d3c
package name
helixbass Jul 21, 2023
54b39ed
expose supported language
helixbass Jul 22, 2023
0602f3c
Merge branch 'run-context' into run-context-tree-sitter-lint
helixbass Jul 22, 2023
b05884b
try exposing search slice endpoint
helixbass Jul 22, 2023
d341747
clone args
helixbass Jul 22, 2023
1930ccf
fn mut
helixbass Jul 22, 2023
3c2a567
handle multiple args instances
helixbass Jul 22, 2023
5fe38b3
per file callback
helixbass Jul 23, 2023
5a3ede1
take tree, rope
helixbass Jul 25, 2023
0ad0481
use patched tree-sitter
helixbass Jul 27, 2023
c0ae26e
get_captures() compiling
helixbass Jul 27, 2023
c73c3da
use of get_captures() compiling
helixbass Jul 27, 2023
d8f21b0
use everywhere
helixbass Jul 27, 2023
3a039da
rm unused
helixbass Jul 27, 2023
debbbc0
non-local tree-sitter dependency
helixbass Jul 27, 2023
30a1c71
get captures for enclosing node
helixbass Jul 27, 2023
c0bf4ca
wiring up query text per language
helixbass Jul 27, 2023
2eba2c5
expose per-language queries
helixbass Jul 28, 2023
e471b0d
capture index per language
helixbass Jul 28, 2023
672ac41
rm debugging
helixbass Jul 28, 2023
c5f3fa2
per match
helixbass Jul 28, 2023
c371ae9
update tree-sitter dependency
helixbass Jul 31, 2023
53e844f
don't require sync
helixbass Aug 2, 2023
57495f8
run with single per file callback
helixbass Aug 2, 2023
31959d1
expose language comment kinds
helixbass Aug 2, 2023
2061904
comment type
helixbass Aug 2, 2023
6daf28a
use git dependency for js grammar
helixbass Aug 11, 2023
6bb81e1
debug rope or slice
helixbass Aug 15, 2023
325d3e0
bump tree-sitter-javascript version
helixbass Aug 16, 2023
355ea2b
Merge branch 'run-context-tree-sitter-lint' of github.com:helixbass/t…
helixbass Aug 16, 2023
2e842eb
bump tree-sitter-rust dependency
helixbass Aug 24, 2023
3b1230c
bump tree-sitter-javascript dependency
helixbass Aug 25, 2023
c5a8c85
from str
helixbass Aug 26, 2023
d250c7c
debug slice
helixbass Aug 27, 2023
c5b2e88
bump tree-sitter-rust version
helixbass Oct 6, 2023
7626af8
bump tree-sitter-rust version
helixbass Oct 6, 2023
526d110
rm unused entry points; language from path
helixbass Dec 19, 2023
d93d175
tests
helixbass Dec 19, 2023
db220fe
test typescript
helixbass Dec 19, 2023
7543d9d
supported language all supported language languages
helixbass Dec 19, 2023
045b98d
rope or slice
helixbass Jan 8, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[package]
name = "tree-sitter-grep"
name = "tree_sitter_lint_tree-sitter-grep"
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Per helixbass/tree-sitter-lint#4, updating this to allow publishing that crate while this hasn't "landed"

version = "0.1.0"
edition = "2021"
license = "Unlicense OR MIT"
Expand All @@ -8,6 +8,7 @@ authors = [
"Peter Stuart <peter@peterstuart.org>"
]
description = """
(not-yet-landed version used by tree-sitter-lint)
tree-sitter-grep is a grep-like search tool that
recursively searches the current directory for a
tree-sitter query pattern. Like ripgrep, it respects
Expand All @@ -25,6 +26,7 @@ rust-version = "1.70"
bstr = "1.1.0"
bytecount = "0.6"
clap = { version = "4.3.0", features = ["derive", "wrap_help"] }
derive_builder = "0.12.0"
encoding_rs = "0.8.14"
encoding_rs_io = "0.1.6"
ignore = { package = "tree_sitter_grep_ignore", git = "https://github.com/helixbass/ripgrep", rev = "669ebd3", version = "0.4.20-dev.0" }
Expand All @@ -34,10 +36,13 @@ log = "0.4.5"
memchr = "2.1"
memmap = { package = "memmap2", version = "0.5.3" }
once_cell = "1.18.0"
ouroboros = "0.17.2"
proc_macros = { package = "tree_sitter_grep_proc_macros", path = "proc_macros", version = "0.1.0" }
rayon = "1.7.0"
regex = "1.8.2"
ropey = "1.6.0"
serde = { version = "1.0.77", features = ["derive"] }
streaming-iterator = "0.1.9"
strum_macros = "0.25.1"
termcolor = "1.2.0"
thiserror = "1.0.43"
Expand Down Expand Up @@ -65,6 +70,9 @@ tree-sitter-swift = "0.3.6"
tree-sitter-toml = "0.20.0"
tree-sitter-typescript = "0.20.2"

[patch.crates-io]
tree-sitter = { git = "https://github.com/helixbass/tree-sitter", rev = "57e98fb0" }
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Read about how to do this "force dependencies to use this version"

The thing that I guess I wasn't anticipating is that I also had to do this in any "outer" crates (tree-sitter-lint and its project-local crates) even though they don't have their own explicit dependency on tree-sitter (they use tree-sitter-grep's re-export)?


[[bin]]
name = "tree-sitter-grep"

Expand Down
2 changes: 1 addition & 1 deletion examples/filter_before_line_number.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::{

use libc::c_char;
use tree_sitter::Node;
use tree_sitter_grep::PluginInitializeReturn;
use tree_sitter_lint_tree_sitter_grep::PluginInitializeReturn;

static ROW_NUMBER: AtomicUsize = AtomicUsize::new(0);

Expand Down
6 changes: 3 additions & 3 deletions examples/print_match_text.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
use clap::Parser;
use tree_sitter_grep::{run_with_callback, Args};
use tree_sitter_lint_tree_sitter_grep::{run_with_callback, Args};

fn main() {
let args = Args::parse_from(["tree_sitter_grep", "-q", "(function_item) @f"]);
run_with_callback(args, |node, file_contents, path| {
run_with_callback(args, |capture_info, file_contents, path| {
println!(
"Found match in {path:?}: {}",
std::str::from_utf8(&file_contents[node.byte_range()]).unwrap(),
std::str::from_utf8(&file_contents[capture_info.node.byte_range()]).unwrap(),
);
})
.unwrap();
Expand Down
2 changes: 1 addition & 1 deletion proc_macros/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ fn get_token_enum_definition(
variants_with_attributes: &[ExprPath],
) -> proc_macro2::TokenStream {
quote! {
#[derive(Copy, Clone, Debug, Eq, PartialEq, clap::ValueEnum, strum_macros::Display)]
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, clap::ValueEnum, strum_macros::Display)]
pub enum #name {
#(#variants_with_attributes),*
}
Expand Down
3 changes: 2 additions & 1 deletion rustfmt.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ format_macro_bodies = true
format_macro_matchers = true
group_imports = "StdExternalCrate"
imports_granularity = "Crate"
wrap_comments = true
edition = "2021"
# wrap_comments = true
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The thing of rustfmt often deciding to "wrap" code that I had commented out made me decide to turn this off

94 changes: 84 additions & 10 deletions src/args.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
use std::{
collections::HashMap,
fs,
path::{Path, PathBuf},
sync::{Arc, Mutex},
};

use clap::{ArgGroup, Parser};
use derive_builder::Builder;
use ignore::{types::Types, WalkBuilder, WalkParallel};
use rayon::iter::IterBridge;
use termcolor::BufferWriter;
use tree_sitter::Query;

use crate::{
language::SupportedLanguage,
Expand All @@ -23,7 +26,8 @@ use crate::{

const ALL_NODES_QUERY: &str = "(_) @node";

#[derive(Parser)]
#[derive(Builder, Clone, Default, Parser)]
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exposed an ArgsBuilder for tree-sitter-lint to use (vs building Args clunkily from passing "command-line args")

Presumably there should be some additional validation eg that some of the invariants that clap checks when parsing command-line args are not violated when using the builder, but I haven't done that yet

#[builder(default, setter(strip_option, into))]
#[clap(group(
ArgGroup::new("query_or_filter")
.multiple(true)
Expand All @@ -37,13 +41,16 @@ pub struct Args {
///
/// This conflicts with the --query option.
#[arg(short = 'Q', long = "query-file", conflicts_with = "query_text")]
pub path_to_query_file: Option<PathBuf>,
path_to_query_file: Option<PathBuf>,

/// The source text of a tree-sitter query.
///
/// This conflicts with the --query-file option.
#[arg(short, long = "query", conflicts_with = "path_to_query_file")]
pub query_text: Option<String>,
query_text: Option<String>,

#[clap(skip)]
query_per_language: Option<HashMap<SupportedLanguage, Arc<Query>>>,
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is "tackling a couple birds with one stone" (?), first this is allowing passing in pre-parsed queries, second this is supporting "query-per-language" (vs "just one query")

Query-per-language allows tree-sitter-lint to let a single "run" of tree-sitter-grep "drive" everything, which seems to make sense (so that we're only doing one project-file-walking)


/// The name of the tree-sitter query capture (without leading "@") whose
/// matching nodes will be output.
Expand Down Expand Up @@ -174,7 +181,11 @@ impl Args {
}

pub(crate) fn get_project_file_walker_types(&self) -> Types {
get_project_file_walker_types(self.language)
get_project_file_walker_types(self.language.map(|language| vec![language]).or_else(|| {
self.query_per_language
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is basically asserting that if you pass query_per_language those are "all" of the languages that you're interested in

Some of this is probably a little bit conceptually unresolved but that seems more or less like the right idea

.as_ref()
.map(|query_per_language| query_per_language.keys().cloned().collect())
}))
}

pub(crate) fn get_project_file_walker(&self) -> WalkParallel {
Expand All @@ -199,18 +210,81 @@ impl Args {
Ok(get_loaded_filter(self.filter.as_deref(), self.filter_arg.as_deref())?.map(Arc::new))
}

pub(crate) fn get_loaded_query_text(&self) -> Result<String, Error> {
pub(crate) fn get_loaded_query_text_per_language(
&self,
) -> Result<QueryOrQueryTextPerLanguage, Error> {
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may seem like a weird "half-step" (toward Args just going ahead and actually parsing the query text), but I think it makes sense

The reason is that we do want to go ahead and fail fast if eg we got passed a path to a query file that we can't read

But eg for the case of auto-language (and not having passed query_per_language) we want to let CachedQueries lazily resolve the query text to a parsed query

Ok(
match (self.path_to_query_file.as_ref(), self.query_text.as_ref()) {
(Some(path_to_query_file), None) => fs::read_to_string(path_to_query_file)
match (
self.path_to_query_file.as_ref(),
self.query_text.as_ref(),
self.query_per_language.as_ref(),
) {
(Some(path_to_query_file), None, None) => fs::read_to_string(path_to_query_file)
.map_err(|source| Error::QueryFileReadError {
source,
path_to_query_file: path_to_query_file.clone(),
})?,
(None, Some(query_text)) => query_text.clone(),
(None, None) => ALL_NODES_QUERY.to_owned(),
})?
.into(),
(None, Some(query_text), None) => query_text.clone().into(),
(None, None, Some(query_per_language)) => query_per_language.clone().into(),
(None, None, None) => ALL_NODES_QUERY.to_owned().into(),
_ => unreachable!(),
},
)
}
}

impl ArgsBuilder {
pub fn maybe_language(&mut self, language: Option<SupportedLanguage>) -> &mut Self {
self.language = Some(language);
self
}
}

pub enum QueryOrQueryTextPerLanguage {
SingleQueryText(String),
PerLanguage(HashMap<SupportedLanguage, Arc<Query>>),
}

impl QueryOrQueryTextPerLanguage {
pub fn get_query_or_query_text_for_language(
&self,
language: SupportedLanguage,
) -> QueryOrQueryText {
match self {
QueryOrQueryTextPerLanguage::SingleQueryText(query_text) => (&**query_text).into(),
QueryOrQueryTextPerLanguage::PerLanguage(per_language) => {
per_language.get(&language).unwrap().clone().into()
}
}
}
}

impl From<String> for QueryOrQueryTextPerLanguage {
fn from(value: String) -> Self {
Self::SingleQueryText(value)
}
}

impl From<HashMap<SupportedLanguage, Arc<Query>>> for QueryOrQueryTextPerLanguage {
fn from(value: HashMap<SupportedLanguage, Arc<Query>>) -> Self {
Self::PerLanguage(value)
}
}

pub enum QueryOrQueryText<'a> {
QueryText(&'a str),
Query(Arc<Query>),
}

impl<'a> From<&'a str> for QueryOrQueryText<'a> {
fn from(value: &'a str) -> Self {
Self::QueryText(value)
}
}

impl<'a> From<Arc<Query>> for QueryOrQueryText<'a> {
fn from(value: Arc<Query>) -> Self {
Self::Query(value)
}
}
2 changes: 1 addition & 1 deletion src/bin/tree-sitter-grep.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::process;

use clap::Parser;
use tree_sitter_grep::{run_print, Args, RunStatus};
use tree_sitter_lint_tree_sitter_grep::{run_print, Args, RunStatus};

pub fn main() {
let args = Args::parse();
Expand Down
Loading