Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions rust/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ const_format = "0.2.34"
ruff_python_parser = { git = "https://github.com/astral-sh/ruff.git", tag = "v0.4.10" }
ruff_python_ast = { git = "https://github.com/astral-sh/ruff.git", tag = "v0.4.10" }
ruff_source_file = { git = "https://github.com/astral-sh/ruff.git", tag = "v0.4.10" }
serde = { version = "1.0", features = ["derive"] }
serde_yaml = "0.9"
unindent = "0.2.4"

[dependencies.pyo3]
version = "0.24.1"
Expand Down
177 changes: 177 additions & 0 deletions rust/src/filesystem.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
use pyo3::exceptions::PyFileNotFoundError;
use pyo3::prelude::*;
use std::collections::HashMap;
use unindent::unindent;

type FileSystemContents = HashMap<String, String>;

// Implements BasicFileSystem (defined in grimp.application.ports.filesystem.BasicFileSystem).
#[pyclass]
pub struct FakeBasicFileSystem {
contents: FileSystemContents,
}

#[pymethods]
impl FakeBasicFileSystem {
#[pyo3(signature = (contents=None, content_map=None))]
#[new]
fn new(contents: Option<&str>, content_map: Option<HashMap<String, String>>) -> PyResult<Self> {
let mut parsed_contents = match contents {
Some(contents) => parse_indented_file_system_string(contents),
None => HashMap::new(),
};
if let Some(content_map) = content_map {
let unindented_map: HashMap<String, String> = content_map
.into_iter()
.map(|(key, val)| (key, unindent(&val).trim().to_string()))
.collect();
parsed_contents.extend(unindented_map);
};
Ok(FakeBasicFileSystem {
contents: parsed_contents,
})
}

#[getter]
fn sep(&self) -> String {
"/".to_string()

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be worth interning this with intern! for efficiency if it's called a lot from Python.

}

#[pyo3(signature = (*components))]
fn join(&self, components: Vec<String>) -> String {
let sep = self.sep();
components
.into_iter()
.map(|c| c.trim_end_matches(&sep).to_string())
.collect::<Vec<String>>()
.join(&sep)
Comment on lines +193 to +198

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the performance is important, it might make sense to define const SEP = "/" as a module constant to avoid creating a String every time join is called. This could still be referred to from self.sep() for Python code.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also wonder if we can avoid the Vec and/or the String in the collect?

}

fn split(&self, file_name: &str) -> (String, String) {
let components: Vec<&str> = file_name.split('/').collect();

if components.is_empty() {
return ("".to_string(), "".to_string());
}

let tail = components.last().unwrap_or(&""); // Last component, or empty if components is empty (shouldn't happen from split)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The explicit reference here should be unnecessary because "" is already a &str:

Suggested change
let tail = components.last().unwrap_or(&""); // Last component, or empty if components is empty (shouldn't happen from split)
let tail = components.last().unwrap_or(""); // Last component, or empty if components is empty (shouldn't happen from split)


let head_components = &components[..components.len() - 1]; // All components except the last
Comment on lines +202 to +210

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

split returns a DoubleEndedIterator, so it can be iterated from both ends:

Suggested change
let components: Vec<&str> = file_name.split('/').collect();
if components.is_empty() {
return ("".to_string(), "".to_string());
}
let tail = components.last().unwrap_or(&""); // Last component, or empty if components is empty (shouldn't happen from split)
let head_components = &components[..components.len() - 1]; // All components except the last
let mut components = file_name.split('/');
let tail = match components.next_back() {
Some(tail) => tail,
None => return ("".to_string(), "".to_string());
};
let head_components: Vec<&str> = components.collect();


let head = if head_components.is_empty() {
// Case for single component paths like "filename.txt" or empty string ""
"".to_string()
} else if file_name.starts_with('/')
&& head_components.len() == 1
&& head_components[0].is_empty()
{
// Special handling for paths starting with '/', e.g., "/" or "/filename.txt"
// If components were ["", ""], head_components is [""] -> should be "/"
// If components were ["", "file.txt"], head_components is [""] -> should be "/"
"/".to_string()
} else {
// Default joining for multiple components
head_components.join("/")
};

(head, tail.to_string())
}

/// Checks if a file or directory exists within the file system.
fn exists(&self, file_name: &str) -> bool {
self.contents.contains_key(file_name)
}

fn read(&self, file_name: &str) -> PyResult<String> {
match self.contents.get(file_name) {
Some(file_name) => Ok(file_name.clone()),
None => Err(PyFileNotFoundError::new_err("")),

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd include the file_name in the error message:

Suggested change
None => Err(PyFileNotFoundError::new_err("")),
None => Err(PyFileNotFoundError::new_err(format!("No such file: {file_name}"))),

}
}
}

/// Parses an indented string representing a file system structure
/// into a HashMap where keys are full file paths.
/// See tests.adaptors.filesystem.FakeFileSystem for the API.
pub fn parse_indented_file_system_string(file_system_string: &str) -> HashMap<String, String> {
let mut file_paths_map: HashMap<String, String> = HashMap::new();
let mut path_stack: Vec<String> = Vec::new(); // Stores current directory path components
let mut first_line = true; // Flag to handle the very first path component

// Normalize newlines and split into lines
let buffer = file_system_string.replace("\r\n", "\n");
let lines: Vec<&str> = buffer.split('\n').collect();

for line_raw in lines.clone() {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think pulling out this variable before the loop avoids the need to clone lines:

Suggested change
for line_raw in lines.clone() {
let first_line_starts_with_slash = lines[0].trim().starts_with('/');
for line_raw in lines {

let line = line_raw.trim_end(); // Remove trailing whitespace
if line.is_empty() {
continue; // Skip empty lines
}

let current_indent = line.chars().take_while(|&c| c.is_whitespace()).count();

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this will compile:

Suggested change
let current_indent = line.chars().take_while(|&c| c.is_whitespace()).count();
let current_indent = line.chars().take_while(char::is_whitespace).count();

let trimmed_line = line.trim_start();

// Assuming 4 spaces per indentation level for calculating depth
// Adjust this if your indentation standard is different (e.g., 2 spaces, tabs)
let current_depth = current_indent / 4;

if first_line {
// The first non-empty line sets the base path.
// It might be absolute (/a/b/) or relative (a/b/).
let root_component = trimmed_line.trim_end_matches('/').to_string();
path_stack.push(root_component);
first_line = false;
} else {
// Adjust the path_stack based on indentation level
// Pop elements from the stack until we reach the correct parent directory depth
while path_stack.len() > current_depth {
path_stack.pop();
}

// If the current line is a file, append it to the path for inserting into map,
// then pop it off so that subsequent siblings are correctly handled.
// If it's a directory, append it and it stays on the stack for its children.
let component_to_add = trimmed_line.trim_end_matches('/').to_string();
if !component_to_add.is_empty() {
// Avoid pushing empty strings due to lines like just "/"
path_stack.push(component_to_add);
}
}

// Construct the full path
// Join components on the stack. If the first component started with '/',
// ensure the final path also starts with '/'.
let full_path = if !path_stack.is_empty() {
let mut joined = path_stack.join("/");
// If the original root started with a slash, ensure the final path does too.
// But be careful not to double-slash if a component is e.g. "/root"
if lines[0].trim().starts_with('/') && !joined.starts_with('/') {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd pull lines[0].trim().starts_with('/') into a variable outside the for loop:

Suggested change
if lines[0].trim().starts_with('/') && !joined.starts_with('/') {
if first_line_starts_with_slash && !joined.starts_with('/') {

joined = format!("/{joined}");
}
joined
} else {
"".to_string()
};

// If it's a file (doesn't end with '/'), add it to the map
// A file is not a directory, so its name should be removed from the stack after processing
// so that sibling items are at the correct level.
if !trimmed_line.ends_with('/') {
file_paths_map.insert(full_path, String::new()); // Value can be empty or actual content
if !path_stack.is_empty() {
path_stack.pop(); // Pop the file name off the stack
}
}
}

// Edge case: If the very first line was a file and it ended up on the stack, it needs to be processed.
// This handles single-file inputs like "myfile.txt"
if !path_stack.is_empty()
&& !path_stack.last().unwrap().ends_with('/')
&& !file_paths_map.contains_key(&path_stack.join("/"))
{
file_paths_map.insert(path_stack.join("/"), String::new());
}

file_paths_map
}
3 changes: 3 additions & 0 deletions rust/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ pub mod exceptions;
pub mod graph;
pub mod import_parsing;
pub mod module_expressions;
mod filesystem;

use crate::errors::{GrimpError, GrimpResult};
use crate::exceptions::{InvalidModuleExpression, ModuleNotPresent, NoSuchContainer, ParseError};
Expand All @@ -18,11 +19,13 @@ use pyo3::types::{IntoPyDict, PyDict, PyFrozenSet, PyList, PySet, PyString, PyTu
use rayon::prelude::*;
use rustc_hash::FxHashSet;
use std::collections::HashSet;
use crate::filesystem::FakeBasicFileSystem;

#[pymodule]
fn _rustgrimp(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(parse_imported_objects_from_code))?;
m.add_class::<GraphWrapper>()?;
m.add_class::<FakeBasicFileSystem>()?;
m.add("ModuleNotPresent", py.get_type::<ModuleNotPresent>())?;
m.add("NoSuchContainer", py.get_type::<NoSuchContainer>())?;
m.add(
Expand Down
Loading