Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
bdeedfa
:memo: Add implementation plan for Windows junction support
ChanTsune Apr 17, 2026
b18001a
:memo: Rework junction plan after grill review
ChanTsune Apr 18, 2026
87112b5
:memo: Align junction plan with existing Windows module tree
ChanTsune Apr 18, 2026
5aafee6
:memo: Close junction external-target mutation hole in plan
ChanTsune Apr 18, 2026
1415ff4
:memo: Translate HRESULT to Win32 code in reparse error wrapping
ChanTsune Apr 18, 2026
ee00271
:memo: Formalize Windows junction support design spec
ChanTsune Apr 19, 2026
aca7a3d
:memo: Slim junction plan to reference the spec
ChanTsune Apr 19, 2026
e8c4214
:sparkles: Add Windows reparse buffer parser in CLI
ChanTsune Apr 19, 2026
87cce03
:art: Sort windows feature list alphabetically
ChanTsune Apr 19, 2026
04d0af4
:white_check_mark: Cover symlink reparse buffer parsing
ChanTsune Apr 19, 2026
b7b3750
:sparkles: Read NTFS reparse points via DeviceIoControl
ChanTsune Apr 19, 2026
36c08f0
:recycle: Align read_reparse_point with CLI FFI conventions
ChanTsune Apr 19, 2026
bc2e048
:sparkles: Create NTFS junctions via FSCTL_SET_REPARSE_POINT
ChanTsune Apr 19, 2026
a1d2c1e
:lock: Reject oversized junction targets before mutating filesystem
ChanTsune Apr 19, 2026
b7d7634
:sparkles: Add detect_junction helper for CLI
ChanTsune Apr 19, 2026
17cbfdf
:sparkles: Add PathnameEditor::edit_junction delegating to shared helper
ChanTsune Apr 19, 2026
f9e247c
:art: Align edit_junction test names and doc comment with conventions
ChanTsune Apr 19, 2026
01a7378
:bug: Terminate junction reparse names with UTF-16 NUL
ChanTsune Apr 19, 2026
e55c25a
:sparkles: Detect and archive Windows junctions as HardLink entries
ChanTsune Apr 20, 2026
362a6d1
:recycle: Route junction target through PathnameEditor::edit_junction
ChanTsune Apr 20, 2026
762bc75
:sparkles: Extract HardLink+fLTP=Directory as junction or symlink fal…
ChanTsune Apr 20, 2026
aede98e
:art: Polish Task 4.1 extract branch per code-quality review
ChanTsune Apr 20, 2026
7f0d976
:white_check_mark: Cover junction extract round-trip across platforms
ChanTsune Apr 20, 2026
41600fa
:bug: Gate junction round-trip test off WASM
ChanTsune Apr 20, 2026
838e618
:white_check_mark: End-to-end junction round trip via CLI
ChanTsune Apr 20, 2026
3f76715
:art: Hoist file_type() call in junction round-trip test
ChanTsune Apr 20, 2026
c746bc0
:white_check_mark: Pin junction extract does-not-mutate-target invariant
ChanTsune Apr 20, 2026
075e191
:bug: Load-bearing I2 fence: stamp Permission chunk in junction fixture
ChanTsune Apr 20, 2026
00bfc1c
:bug: Pass --unstable alongside --keep-permission on Windows
ChanTsune Apr 20, 2026
86ca0da
:memo: Document junction handling in --allow-unsafe-links
ChanTsune Apr 21, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 7 additions & 2 deletions cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,14 @@ nix = { version = "0.31.1", features = ["user", "fs", "ioctl"] }

[target.'cfg(windows)'.dependencies]
windows = { version = "0.62.2", features = [
"Win32_Storage_FileSystem",
"Win32_Foundation",
"Win32_Security",
"Win32_Security_Authorization",
"Win32_System_WindowsProgramming",
"Win32_Storage_FileSystem",
"Win32_System_IO",
"Win32_System_Ioctl",
"Win32_System_Threading",
"Win32_System_WindowsProgramming",
] }
field-offset = { version = "0.3.6", optional = true }

Expand All @@ -78,6 +82,7 @@ maplit = "1.0.2"
path-slash = "0.2.1"
rust-embed = { version = "8.11.0", features = ["debug-embed"] }
scopeguard = "1.2.0"
tempfile = "3"
walkdir = "2.5.0"
criterion = { version = "0.8.2", default-features = false, features = ["cargo_bench_support", "plotters"] }

Expand Down
4 changes: 2 additions & 2 deletions cli/src/command/bsdtar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -592,12 +592,12 @@ pub(crate) struct BsdtarCommand {
to_stdout: bool,
#[arg(
long,
help = "Allow extracting symbolic links and hard links that contain root or parent paths (default)"
help = "Allow extracting symbolic links, hard links, or Windows junctions whose target points outside the extraction root (default)"
)]
allow_unsafe_links: bool,
#[arg(
long,
help = "Do not allow extracting symbolic links and hard links that contain root or parent paths"
help = "Do not allow extracting symbolic links, hard links, or Windows junctions whose target points outside the extraction root"
)]
no_allow_unsafe_links: bool,
#[arg(
Expand Down
45 changes: 43 additions & 2 deletions cli/src/command/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,11 @@ pub(crate) enum StoreAs {
Dir,
Symlink(LinkTargetType),
Hardlink(PathBuf),
/// Windows NTFS junction. The inner `PathBuf` is the **external** target
/// path (typically absolute). This variant is only produced on Windows
/// but is declared unconditionally so that `match` arms remain exhaustive
/// on every platform.
Junction(PathBuf),
}

/// Source of an archive to include (file path or stdin).
Expand Down Expand Up @@ -737,8 +742,12 @@ pub(crate) fn collect_items_with_state(
// Classify entry and maybe add it to output
let store = if is_symlink {
let meta = fs::symlink_metadata(path)?;
let link_target_type = detect_symlink_target_type(path, &meta)?;
Some((StoreAs::Symlink(link_target_type), meta))
if let Some(target) = classify_junction(path)? {
Some((StoreAs::Junction(target), meta))
} else {
let link_target_type = detect_symlink_target_type(path, &meta)?;
Some((StoreAs::Symlink(link_target_type), meta))
}
} else if is_file {
if let Some(linked) = hardlink_resolver.resolve(path).ok().flatten() {
Some((StoreAs::Hardlink(linked), fs::symlink_metadata(path)?))
Expand Down Expand Up @@ -839,6 +848,26 @@ fn detect_symlink_target_type(
}
}

/// Returns the junction target if `path` is a Windows junction, or `None` for
/// any non-junction path (including regular directories, symlinks, and
/// unknown reparse tags). Errors inside the probe are swallowed to a debug
/// log so classification still falls through to the existing symlink handler.
#[cfg(windows)]
fn classify_junction(path: &Path) -> io::Result<Option<PathBuf>> {
match crate::utils::os::windows::fs::junction::detect_junction(path) {
Ok(v) => Ok(v),
Err(e) => {
log::debug!("Failed to inspect reparse point {}: {}", path.display(), e);
Ok(None)
}
}
}

#[cfg(not(windows))]
fn classify_junction(_path: &Path) -> io::Result<Option<PathBuf>> {
Ok(None)
}

pub(crate) fn collect_split_archives(first: impl AsRef<Path>) -> io::Result<Vec<fs::File>> {
let first = first.as_ref();
let mut archives = Vec::new();
Expand Down Expand Up @@ -936,6 +965,18 @@ pub(crate) fn create_entry(
let entry = EntryBuilder::new_dir(entry_name);
apply_metadata(entry, path, keep_options, metadata)?.build()
}
StoreAs::Junction(target) => {
// Route the target through PathnameEditor::edit_junction so user-
// specified `-s` / `--transform` substitutions apply to junction
// targets on the same footing as symlink targets. Invariant I1
// (spec §7) guarantees the target is valid UTF-8, so the shared
// helper's `from_path_lossy_preserve_root` is effectively
// lossless for real inputs.
let reference = pathname_editor.edit_junction(target);
let mut entry = EntryBuilder::new_hard_link(entry_name, reference)?;
entry.link_target_type(LinkTargetType::Directory);
apply_metadata(entry, path, keep_options, metadata)?.build()
}
}
.map(Some)
}
Expand Down
61 changes: 56 additions & 5 deletions cli/src/command/core/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,11 @@ impl PathnameEditor {
Some((sanitized, had_root))
}

/// Edit a symlink target path.
///
/// Only user-specified substitutions (`-s`) are applied.
/// Leading `/` and `--strip-components` are NOT applied, matching bsdtar.
pub(crate) fn edit_symlink(&self, target: &Path) -> EntryReference {
/// Apply user-specified substitutions to a link target while preserving
/// absolute path components and skipping `--strip-components`, matching
/// bsdtar symlink semantics. Shared between [`edit_symlink`](Self::edit_symlink)
/// and [`edit_junction`](Self::edit_junction).
fn transform_link_target_preserving_root(&self, target: &Path) -> EntryReference {
let transformed: Cow<'_, Path> = if let Some(t) = &self.transformers {
Cow::Owned(PathBuf::from(t.apply(
target.to_string_lossy(),
Expand All @@ -94,6 +94,28 @@ impl PathnameEditor {
EntryReference::from_path_lossy_preserve_root(&transformed)
}

/// Edit a symlink target path.
///
/// Only user-specified substitutions (`-s`) are applied.
/// Leading `/` and `--strip-components` are NOT applied, matching bsdtar.
pub(crate) fn edit_symlink(&self, target: &Path) -> EntryReference {
self.transform_link_target_preserving_root(target)
}

/// Edit a Windows-junction target path.
///
/// Only user-specified substitutions (`-s`) are applied.
/// Leading `/` and `--strip-components` are NOT applied, matching bsdtar
/// symlink semantics.
///
/// Semantically identical to [`edit_symlink`](Self::edit_symlink) for the
/// moment. A separate public method is introduced so that any future
/// divergence between symlink-target and junction-target handling can be
/// added without touching every call site.
pub(crate) fn edit_junction(&self, target: &Path) -> EntryReference {
self.transform_link_target_preserving_root(target)
}

/// Apply substitution transforms and strip leading components.
///
/// Returns `None` when the path becomes empty after transformation or stripping.
Expand Down Expand Up @@ -1156,4 +1178,33 @@ mod tests {
// NOT parent dir: "..." is a normal component
assert!(!has_parent_dir_component("a/.../b"));
}

#[test]
fn editor_junction_preserves_unix_absolute() {
let editor = PathnameEditor::new(None, None, false, false);
let out = editor.edit_junction(Path::new("/abs/target"));
assert_eq!(out.as_str(), "/abs/target");
}

#[test]
fn editor_junction_preserves_windows_absolute() {
let editor = PathnameEditor::new(None, None, false, false);
let out = editor.edit_junction(Path::new("C:\\abs\\target"));
assert_eq!(out.as_str(), "C:\\abs\\target");
}

#[test]
fn editor_junction_preserves_relative_unchanged() {
let editor = PathnameEditor::new(None, None, false, false);
let out = editor.edit_junction(Path::new("rel/target"));
assert_eq!(out.as_str(), "rel/target");
}

#[test]
fn editor_junction_does_not_apply_strip_components() {
let editor = PathnameEditor::new(Some(1), None, false, false);
let out = editor.edit_junction(Path::new("/abs/target"));
// strip_components does NOT apply to junction targets, matching symlink semantics.
assert_eq!(out.as_str(), "/abs/target");
}
}
140 changes: 138 additions & 2 deletions cli/src/command/extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -375,12 +375,12 @@
chroot: bool,
#[arg(
long,
help = "Allow extracting symbolic links and hard links that contain root or parent paths"
help = "Allow extracting symbolic links, hard links, or Windows junctions whose target points outside the extraction root"
)]
allow_unsafe_links: bool,
#[arg(
long,
help = "Do not allow extracting symbolic links and hard links that contain root or parent paths (default)"
help = "Do not allow extracting symbolic links, hard links, or Windows junctions whose target points outside the extraction root (default)"
)]
no_allow_unsafe_links: bool,
#[arg(
Expand Down Expand Up @@ -599,7 +599,7 @@
pub(crate) filter: PathFilter<'a>,
pub(crate) keep_options: KeepOptions,
pub(crate) pathname_editor: PathnameEditor,
pub(crate) ordered_path_locks: Arc<OrderedPathLocks>,

Check warning on line 602 in cli/src/command/extract.rs

View workflow job for this annotation

GitHub Actions / Test WebAssembly (nightly, wasm32-unknown-emscripten)

field `ordered_path_locks` is never read
pub(crate) unlink_first: bool,
pub(crate) time_filters: TimeFilters,
pub(crate) safe_writes: bool,
Expand All @@ -609,7 +609,7 @@
}

pub(crate) fn run_extract_archive_reader<'a, 'p, Provider>(
reader: impl IntoIterator<Item = impl Read> + Send,

Check warning on line 612 in cli/src/command/extract.rs

View workflow job for this annotation

GitHub Actions / Test WebAssembly (nightly, wasm32-unknown-emscripten)

field `ordered_path_locks` is never read
files: Vec<String>,
mut password_provider: Provider,
args: OutputOption<'a>,
Expand Down Expand Up @@ -1426,6 +1426,41 @@
DataKind::HardLink => {
let reader = item.reader(ReadOptions::with_password(password))?;
let original = io::read_to_string(reader)?;
let is_directory_link = matches!(
item.metadata().link_target_type(),
Some(LinkTargetType::Directory)
);

if is_directory_link {
// Encoded junction: apply user transforms but preserve absolute
// paths and do NOT sanitize (no edit_hardlink).
let transformed = pathname_editor.edit_junction(Path::new(original.as_str()));
let target_str = transformed.as_str();

if !allow_unsafe_links {
log::warn!(
"Skipped extracting a Windows junction. If you need to extract it, use `--allow-unsafe-links`."
);
return Ok(());
}
if *safe_writes || remove_existing {
utils::io::ignore_not_found(utils::fs::remove_path(&path))?;
}
create_junction_or_fallback(&path, target_str)?;

// SAFETY (Invariant I2): the default `restore_metadata()` call
// at the end of this function would apply chmod/chown/ACL/
// xattr/fflags to the junction via follow-link syscalls,
// mutating the EXTERNAL directory the junction points at
// (outside the extraction root). For the MVP we bypass the
// full metadata restore and only apply no-follow timestamp
// restoration. See `restore_link_timestamps_no_follow` for
// the follow-up that would properly restore junction-owned
// metadata.
restore_link_timestamps_no_follow(&path, item.metadata(), keep_options)?;
return Ok(());
}

let Some((original, had_root)) = pathname_editor.edit_hardlink(original.as_ref())
else {
log::warn!(
Expand Down Expand Up @@ -1613,7 +1648,7 @@
MacMetadataStrategy::Always
) && item.mac_metadata().is_some();
#[cfg(not(target_os = "macos"))]
let skip_xattr_acl = false;

Check warning on line 1651 in cli/src/command/extract.rs

View workflow job for this annotation

GitHub Actions / Test WebAssembly (nightly, wasm32-wasip1)

unused variable: `skip_xattr_acl`

#[cfg(unix)]
if !skip_xattr_acl && matches!(keep_options.xattr_strategy, XattrStrategy::Always) {
Expand All @@ -1625,7 +1660,7 @@
path.display(),
e
);
}

Check warning on line 1663 in cli/src/command/extract.rs

View workflow job for this annotation

GitHub Actions / Test WebAssembly (nightly, wasm32-wasip1)

unused variable: `skip_xattr_acl`
Err(e) => return Err(e),
}
}
Expand Down Expand Up @@ -2005,3 +2040,104 @@
) -> io::Result<()> {
pna::fs::symlink(original, link)
}

/// Create the link for a HardLink+fLTP=Directory entry.
///
/// On Windows, builds a real junction. On non-Windows, falls back to a
/// symbolic link per the PNA spec `chunk_specifications/index.md:332-336`
/// MAY clause. Accepts both absolute and relative stored targets:
///
/// - On Windows, relative targets are resolved against `link`'s parent then
/// canonicalized to an absolute path (kernel requires absolute for
/// junctions). If canonicalization fails (e.g. the target does not exist),
/// the join result is passed through; `create_junction` will then fail with
/// a descriptive I/O error.
/// - On non-Windows, the raw stored string is passed to `symlink` verbatim
/// so the resulting symlink is identical to what the archive encoded.
fn create_junction_or_fallback(link: &Path, target: &str) -> io::Result<()> {
#[cfg(windows)]
{
let raw = Path::new(target);
let absolute = if raw.is_absolute() {
raw.to_path_buf()
} else {
let base = link.parent().unwrap_or_else(|| Path::new("."));
let joined = base.join(raw);
match std::fs::canonicalize(&joined) {
Ok(canon) => canon,
Err(e) => {
log::debug!(
"Failed to canonicalize junction target {}: {}; using raw join",
joined.display(),
e
);
joined
}
}
Comment on lines +2066 to +2076
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Using std::fs::canonicalize here will fail if the junction target does not yet exist on disk, which is common when extracting an archive containing both a directory and a junction pointing to it. If canonicalize fails, the code falls back to joined, which might be a relative path. However, create_junction (in reparse.rs) explicitly rejects relative targets.

Consider using std::path::absolute to ensure the path is absolute without requiring it to exist on disk, while still attempting canonicalize for a cleaner path if possible.

            let absolute = std::path::absolute(&joined).unwrap_or_else(|_| joined.clone());
            match std::fs::canonicalize(&absolute) {
                Ok(canon) => canon,
                Err(e) => {
                    log::debug!(
                        "Failed to canonicalize junction target {}: {}; using absolute path",
                        absolute.display(),
                        e
                    );
                    absolute
                }
            }

};
crate::utils::os::windows::fs::reparse::create_junction(link, &absolute)
}
#[cfg(not(windows))]
{
log::debug!(
"Creating symbolic link instead of Windows junction: {} -> {}",
link.display(),
target
);
pna::fs::symlink(target, link)
}
}

/// Restore only the timestamps of a junction or fallback-symlink entry,
/// without following the link.
///
/// # Why not the full `restore_metadata()` path?
///
/// `DataKind::HardLink + fLTP=Directory` encodes a Windows junction (or a
/// fallback symlink on non-Windows). The default `restore_metadata()` uses
/// `chmod`, `chown`, `set_facl(follow_links=true)`, `setxattr`, `set_flags`,
/// and macOS `copyfile` — every one of those follows links. If we let them
/// run against the junction path, they would mutate the **external**
/// directory the junction points at (outside the extraction root), which is
/// a security hole (Invariant I2, spec §7).
///
/// For the MVP we bypass the full metadata path for junction entries and
/// apply only timestamps through the existing `restore_path_timestamps`
/// helper, which already uses `filetime::set_symlink_file_times` and thus
/// opens the reparse point itself with `FILE_FLAG_OPEN_REPARSE_POINT` on
/// Windows / uses `utimensat(AT_SYMLINK_NOFOLLOW)` / `lutimes` on Unix.
/// Delegation is sufficient — no separate `utils::fs` helper is required.
///
/// # TODO: junction-aware no-follow metadata (deferred follow-up, spec §11)
///
/// A full implementation should restore mode/owner/ACL/xattr/fflags on the
/// junction itself using no-follow APIs:
/// - Unix: `lchmod` (BSD), `lchown`, `lsetxattr`, `lremovexattr`.
/// - Linux: mode-on-symlink is not supported by the kernel; either skip
/// silently or gate behind `#[cfg(target_os = "linux")]` with a `warn!`.
/// - Windows: open the reparse point via `FILE_FLAG_OPEN_REPARSE_POINT` and
/// apply ACL/security info with `SetSecurityInfo` on that handle; mode is
/// expressed via the Windows security descriptor, not `chmod`.
/// - ACL restoration must pass `follow_links = false` into `restore_acls`.
///
/// When implemented, replace this helper with a junction-aware branch of
/// `restore_metadata` that keeps Invariant I2 but preserves all attributes.
#[cfg(not(target_family = "wasm"))]
#[inline]
fn restore_link_timestamps_no_follow(
path: &Path,
metadata: &pna::Metadata,
keep_options: &KeepOptions,
) -> io::Result<()> {
restore_path_timestamps(path, metadata, keep_options)
}

#[cfg(target_family = "wasm")]
#[inline]
fn restore_link_timestamps_no_follow(
_path: &Path,
_metadata: &pna::Metadata,
_keep_options: &KeepOptions,
) -> io::Result<()> {
Ok(())
}
2 changes: 2 additions & 0 deletions cli/src/utils/os/windows/fs.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
pub(crate) mod junction;
pub(crate) mod owner;
pub(crate) mod reparse;

use super::security::{Sid, apply_security_info};
use crate::utils::str::encode_wide;
Expand Down
Loading
Loading