diff --git a/README.md b/README.md index 8a74329..9592203 100644 --- a/README.md +++ b/README.md @@ -230,6 +230,68 @@ let path = path.with_unix_encoding(); assert_eq!(path, "/path/to/file.txt"); ``` +### Converting to `std::path` + +There are times when you need to hand a `typed-path` back to `std`, for +example to pass it to `std::fs::File::open`. Because `std::path::Path` +silently means "Windows-style" on Windows and "Unix-style" on Unix, this +library is careful about *which* typed paths it lets you convert directly, +to prevent code that compiles on one host but produces garbage on the +other. + +The conversions are available on the types whose encoding is guaranteed +to match (or be resolved against) the host platform: + +- **[`Utf8PlatformPath`][Utf8PlatformPath] / [`Utf8PlatformPathBuf`][Utf8PlatformPathBuf]** — + infallible, since the encoding matches the host and the bytes are + valid UTF-8. + + ```rust + use std::path::PathBuf; + use typed_path::Utf8PlatformPathBuf; + + let platform_path_buf = Utf8PlatformPathBuf::from("some_file.txt"); + let std_path_buf: PathBuf = platform_path_buf.into_std_path_buf(); + assert_eq!(std_path_buf, PathBuf::from("some_file.txt")); + ``` + +- **[`PlatformPath`][PlatformPath] / [`PlatformPathBuf`][PlatformPathBuf]** — + bytes are reinterpreted via the host's `OsStr` on Unix-family targets + (lossless), or routed through `to_string_lossy` on Windows. + + ```rust + use typed_path::PlatformPath; + + let platform_path = PlatformPath::new(b"some_file.txt"); + let std_path_buf = platform_path.to_std_path_buf_lossy(); + assert_eq!(std_path_buf, std::path::PathBuf::from("some_file.txt")); + ``` + +- **[`TypedPath`][TypedPath] / [`TypedPathBuf`][TypedPathBuf] / + [`Utf8TypedPath`][Utf8TypedPath] / [`Utf8TypedPathBuf`][Utf8TypedPathBuf]** — + fallible: succeed only when the runtime variant matches the host + platform. + + ```rust + use typed_path::Utf8TypedPathBuf; + + let native_path_buf = if cfg!(windows) { + Utf8TypedPathBuf::from(r"C:\some\path") + } else { + Utf8TypedPathBuf::from("/some/path") + }; + assert!(native_path_buf.into_std_path_buf().is_ok()); + ``` + +If you're holding a [`Utf8WindowsPath`][Utf8WindowsPath] / +[`Utf8UnixPath`][Utf8UnixPath] (or their non-UTF-8 counterparts) and need +to reach `std::path`, the intentional path is to first move through +[`Utf8PlatformPath`][Utf8PlatformPath] (with +`with_platform_encoding`) or through [`Utf8TypedPath`][Utf8TypedPath]. +This is a deliberate two-step process: cross-encoding conversion isn't +generally safe — a `Utf8WindowsPath` on a Linux host has no meaningful +representation as a Linux `std::path::PathBuf`. + ### Normalization Alongside implementing the standard methods associated with [`Path`][StdPath] @@ -374,4 +436,8 @@ Apache License, Version 2.0, (LICENSE-APACHE or [Utf8TypedPathBuf]: https://docs.rs/typed-path/latest/typed_path/enum.Utf8TypedPathBuf.html [NativePathBuf]: https://docs.rs/typed-path/latest/typed_path/type.NativePathBuf.html [Utf8NativePathBuf]: https://docs.rs/typed-path/latest/typed_path/type.Utf8NativePathBuf.html +[PlatformPath]: https://docs.rs/typed-path/latest/typed_path/type.PlatformPath.html +[PlatformPathBuf]: https://docs.rs/typed-path/latest/typed_path/type.PlatformPathBuf.html +[Utf8PlatformPath]: https://docs.rs/typed-path/latest/typed_path/type.Utf8PlatformPath.html +[Utf8PlatformPathBuf]: https://docs.rs/typed-path/latest/typed_path/type.Utf8PlatformPathBuf.html [utils]: https://docs.rs/typed-path/latest/typed_path/utils/index.html diff --git a/src/platform.rs b/src/platform.rs index c16c890..307dae8 100644 --- a/src/platform.rs +++ b/src/platform.rs @@ -148,6 +148,47 @@ mod non_utf8 { self.with_encoding_checked() } } + + #[cfg(feature = "std")] + impl PlatformPath { + /// Converts a [`PlatformPath`] into a [`std::path::PathBuf`], performing a lossy + /// conversion only when the underlying bytes are not representable. + /// + /// On Unix-family hosts, this is a lossless byte-for-byte conversion (the bytes are + /// reinterpreted via [`std::os::unix::ffi::OsStrExt`]). On Windows it falls back to + /// [`Path::to_string_lossy`], which replaces invalid UTF-8 sequences with + /// `U+FFFD REPLACEMENT CHARACTER`. + /// + /// For a fallible conversion that preserves the bytes exactly on every platform, use + /// the [`TryFrom for std::path::PathBuf`] impl (which errors when the + /// bytes are not valid UTF-8). + /// + /// [`TryFrom for std::path::PathBuf`]: std::convert::TryFrom + /// + /// # Examples + /// + /// ``` + /// use typed_path::PlatformPath; + /// + /// let path = PlatformPath::new("some/path"); + /// let std_path_buf = path.to_std_path_buf_lossy(); + /// assert_eq!(std_path_buf, std::path::PathBuf::from("some/path")); + /// ``` + pub fn to_std_path_buf_lossy(&self) -> std::path::PathBuf { + #[cfg(unix)] + { + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + std::path::PathBuf::from( + ::from_bytes(self.as_bytes()).to_owned(), + ) + } + #[cfg(windows)] + { + std::path::PathBuf::from(self.to_string_lossy().into_owned()) + } + } + } } mod utf8 { @@ -354,4 +395,68 @@ mod utf8 { StdPathBuf::from(utf8_platform_path_buf.into_string()) } } + + #[cfg(all(feature = "std", not(target_family = "wasm")))] + impl Utf8PlatformPath { + /// Borrows this [`Utf8PlatformPath`] as a [`std::path::Path`]. + /// + /// Because the underlying bytes are guaranteed to be valid UTF-8 *and* in the host + /// platform's encoding, this conversion is infallible and zero-copy. + /// + /// # Examples + /// + /// ``` + /// use typed_path::Utf8PlatformPath; + /// use std::path::Path; + /// + /// let platform_path = Utf8PlatformPath::new("some_file.txt"); + /// let std_path = platform_path.as_std_path(); + /// assert_eq!(std_path, Path::new("some_file.txt")); + /// ``` + pub fn as_std_path(&self) -> &StdPath { + StdPath::new(self.as_str()) + } + + /// Converts this [`Utf8PlatformPath`] into an owned [`std::path::PathBuf`]. + /// + /// Because the underlying bytes are guaranteed to be valid UTF-8 *and* in the host + /// platform's encoding, this conversion is infallible. + /// + /// # Examples + /// + /// ``` + /// use typed_path::Utf8PlatformPath; + /// use std::path::PathBuf; + /// + /// let platform_path = Utf8PlatformPath::new("some_file.txt"); + /// let std_path_buf = platform_path.to_std_path_buf(); + /// assert_eq!(std_path_buf, PathBuf::from("some_file.txt")); + /// ``` + pub fn to_std_path_buf(&self) -> StdPathBuf { + StdPathBuf::from(self.as_str()) + } + } + + #[cfg(all(feature = "std", not(target_family = "wasm")))] + impl Utf8PlatformPathBuf { + /// Consumes this [`Utf8PlatformPathBuf`] and returns the underlying + /// [`std::path::PathBuf`]. + /// + /// Because the underlying bytes are guaranteed to be valid UTF-8 *and* in the host + /// platform's encoding, this conversion is infallible. + /// + /// # Examples + /// + /// ``` + /// use typed_path::Utf8PlatformPathBuf; + /// use std::path::PathBuf; + /// + /// let platform_path_buf = Utf8PlatformPathBuf::from("some_file.txt"); + /// let std_path_buf = platform_path_buf.into_std_path_buf(); + /// assert_eq!(std_path_buf, PathBuf::from("some_file.txt")); + /// ``` + pub fn into_std_path_buf(self) -> StdPathBuf { + StdPathBuf::from(self.into_string()) + } + } } diff --git a/src/typed/non_utf8/path.rs b/src/typed/non_utf8/path.rs index 09cba92..f2e6580 100644 --- a/src/typed/non_utf8/path.rs +++ b/src/typed/non_utf8/path.rs @@ -797,6 +797,57 @@ impl<'a> TypedPath<'a> { Self::Windows(p) => TypedPathBuf::Windows(p.with_windows_encoding_checked()?), }) } + + /// Converts this [`TypedPath`] into an owned [`std::path::PathBuf`], returning [`None`] + /// if the path's encoding does not match the host platform or if the bytes are not valid + /// UTF-8. + /// + /// Conversion is only attempted when the underlying variant matches the compilation target + /// (`TypedPath::Unix` on Unix-family hosts, `TypedPath::Windows` on Windows). A + /// `TypedPath::Windows` on a Unix host (or vice versa) does not have a meaningful + /// representation as a host `std::path::PathBuf`, and would silently produce a path that + /// fails at the filesystem layer; this method returns [`None`] for those cases. + /// + /// For non-UTF-8 byte paths on Unix-family hosts, [`std::ffi::OsString::from`] provides a + /// lossless conversion through [`OsStrExt`]. + /// + /// [`OsStrExt`]: std::os::unix::ffi::OsStrExt + /// + /// # Examples + /// + /// ``` + /// use typed_path::TypedPath; + /// + /// // Succeeds when the path's encoding matches the host platform and bytes are valid UTF-8 + /// let native_path = if cfg!(windows) { + /// TypedPath::derive(br"C:\some\path") + /// } else { + /// TypedPath::derive(b"/some/path") + /// }; + /// assert!(native_path.to_std_path_buf().is_some()); + /// + /// // Returns None for the mismatched encoding + /// let foreign_path = if cfg!(windows) { + /// TypedPath::derive(b"/some/path") + /// } else { + /// TypedPath::derive(br"C:\some\path") + /// }; + /// assert_eq!(foreign_path.to_std_path_buf(), None); + /// ``` + #[cfg(all(feature = "std", not(target_family = "wasm")))] + pub fn to_std_path_buf(&self) -> Option { + match self { + #[cfg(unix)] + Self::Unix(p) => std::str::from_utf8(p.as_bytes()) + .ok() + .map(std::path::PathBuf::from), + #[cfg(windows)] + Self::Windows(p) => std::str::from_utf8(p.as_bytes()) + .ok() + .map(std::path::PathBuf::from), + _ => None, + } + } } impl<'a> From<&'a [u8]> for TypedPath<'a> { diff --git a/src/typed/non_utf8/pathbuf.rs b/src/typed/non_utf8/pathbuf.rs index 9fad919..afea47a 100644 --- a/src/typed/non_utf8/pathbuf.rs +++ b/src/typed/non_utf8/pathbuf.rs @@ -1096,6 +1096,14 @@ impl TryFrom for WindowsPathBuf { impl TryFrom for PathBuf { type Error = TypedPathBuf; + /// Attempts to convert a [`TypedPathBuf`] into a [`std::path::PathBuf`], succeeding only + /// when the runtime variant matches the host platform's encoding *and* the bytes are + /// valid UTF-8. + /// + /// Cross-encoding conversion (e.g. a [`TypedPathBuf::Windows`] on a Unix host) is + /// intentionally rejected; the bytes would otherwise produce a host path that fails at + /// the filesystem layer. The original [`TypedPathBuf`] is returned on failure so the + /// caller can recover it. fn try_from(path: TypedPathBuf) -> Result { match path { #[cfg(unix)] @@ -1107,6 +1115,42 @@ impl TryFrom for PathBuf { } } +impl TypedPathBuf { + /// Consumes this [`TypedPathBuf`] and returns the underlying [`std::path::PathBuf`], or + /// the original [`TypedPathBuf`] if the path's encoding does not match the host platform + /// or if the bytes are not valid UTF-8. + /// + /// See [`TryFrom for std::path::PathBuf`] for the rationale behind the + /// host-match requirement. + /// + /// [`TryFrom for std::path::PathBuf`]: std::convert::TryFrom + /// + /// # Examples + /// + /// ``` + /// use typed_path::TypedPathBuf; + /// + /// let native_path_buf = if cfg!(windows) { + /// TypedPathBuf::from(br"C:\some\path".to_vec()) + /// } else { + /// TypedPathBuf::from(b"/some/path".to_vec()) + /// }; + /// assert!(native_path_buf.into_std_path_buf().is_ok()); + /// + /// // The mismatched encoding is returned untouched + /// let foreign_path_buf = if cfg!(windows) { + /// TypedPathBuf::from(b"/some/path".to_vec()) + /// } else { + /// TypedPathBuf::from(br"C:\some\path".to_vec()) + /// }; + /// assert!(foreign_path_buf.into_std_path_buf().is_err()); + /// ``` + #[cfg(all(feature = "std", not(target_family = "wasm")))] + pub fn into_std_path_buf(self) -> Result { + PathBuf::try_from(self) + } +} + impl PartialEq> for TypedPathBuf { fn eq(&self, path: &TypedPath<'_>) -> bool { path.eq(&self.to_path()) diff --git a/src/typed/utf8/path.rs b/src/typed/utf8/path.rs index f5afa5c..1a0574c 100644 --- a/src/typed/utf8/path.rs +++ b/src/typed/utf8/path.rs @@ -723,6 +723,47 @@ impl<'a> Utf8TypedPath<'a> { Self::Windows(p) => Utf8TypedPathBuf::Windows(p.with_windows_encoding_checked()?), }) } + + /// Converts this [`Utf8TypedPath`] into an owned [`std::path::PathBuf`], returning [`None`] + /// if the path's encoding does not match the host platform. + /// + /// Conversion is only attempted when the underlying variant matches the compilation target + /// (`Utf8TypedPath::Unix` on Unix-family hosts, `Utf8TypedPath::Windows` on Windows). A + /// `Utf8TypedPath::Windows` on a Unix host (or vice versa) does not have a meaningful + /// representation as a host `std::path::PathBuf`, and would silently produce a path that + /// fails at the filesystem layer; this method returns [`None`] for those cases. + /// + /// # Examples + /// + /// ``` + /// use typed_path::Utf8TypedPath; + /// + /// // Succeeds when the path's encoding matches the host platform + /// let native_path = if cfg!(windows) { + /// Utf8TypedPath::derive(r"C:\some\path") + /// } else { + /// Utf8TypedPath::derive("/some/path") + /// }; + /// assert!(native_path.to_std_path_buf().is_some()); + /// + /// // Returns None for the mismatched encoding + /// let foreign_path = if cfg!(windows) { + /// Utf8TypedPath::derive("/some/path") + /// } else { + /// Utf8TypedPath::derive(r"C:\some\path") + /// }; + /// assert_eq!(foreign_path.to_std_path_buf(), None); + /// ``` + #[cfg(all(feature = "std", not(target_family = "wasm")))] + pub fn to_std_path_buf(&self) -> Option { + match self { + #[cfg(unix)] + Self::Unix(p) => Some(std::path::PathBuf::from(p.as_str())), + #[cfg(windows)] + Self::Windows(p) => Some(std::path::PathBuf::from(p.as_str())), + _ => None, + } + } } impl fmt::Display for Utf8TypedPath<'_> { diff --git a/src/typed/utf8/pathbuf.rs b/src/typed/utf8/pathbuf.rs index 825402b..e9d8d9c 100644 --- a/src/typed/utf8/pathbuf.rs +++ b/src/typed/utf8/pathbuf.rs @@ -1045,6 +1045,79 @@ impl TryFrom for Utf8WindowsPathBuf { } } +#[cfg(all(feature = "std", not(target_family = "wasm")))] +impl TryFrom for std::path::PathBuf { + type Error = Utf8TypedPathBuf; + + /// Attempts to convert a [`Utf8TypedPathBuf`] into a [`std::path::PathBuf`], succeeding + /// only when the runtime variant matches the host platform's encoding. + /// + /// Cross-encoding conversion (e.g. a [`Utf8TypedPathBuf::Windows`] on a Unix host) is + /// intentionally rejected; the bytes would otherwise produce a host path that fails at the + /// filesystem layer. The original [`Utf8TypedPathBuf`] is returned on mismatch so the + /// caller can recover it. + /// + /// # Examples + /// + /// ``` + /// use std::convert::TryFrom; + /// use std::path::PathBuf; + /// use typed_path::Utf8TypedPathBuf; + /// + /// let native_path_buf = if cfg!(windows) { + /// Utf8TypedPathBuf::from(r"C:\some\path") + /// } else { + /// Utf8TypedPathBuf::from("/some/path") + /// }; + /// assert!(PathBuf::try_from(native_path_buf).is_ok()); + /// + /// // The mismatched encoding is returned untouched + /// let foreign_path_buf = if cfg!(windows) { + /// Utf8TypedPathBuf::from("/some/path") + /// } else { + /// Utf8TypedPathBuf::from(r"C:\some\path") + /// }; + /// assert!(PathBuf::try_from(foreign_path_buf).is_err()); + /// ``` + fn try_from(path: Utf8TypedPathBuf) -> Result { + match path { + #[cfg(unix)] + Utf8TypedPathBuf::Unix(path) => Ok(std::path::PathBuf::from(path.into_string())), + #[cfg(windows)] + Utf8TypedPathBuf::Windows(path) => Ok(std::path::PathBuf::from(path.into_string())), + path => Err(path), + } + } +} + +impl Utf8TypedPathBuf { + /// Consumes this [`Utf8TypedPathBuf`] and returns the underlying [`std::path::PathBuf`], + /// or the original [`Utf8TypedPathBuf`] if the path's encoding does not match the host + /// platform. + /// + /// See [`TryFrom for std::path::PathBuf`] for the rationale behind the + /// host-match requirement. + /// + /// [`TryFrom for std::path::PathBuf`]: std::convert::TryFrom + /// + /// # Examples + /// + /// ``` + /// use typed_path::Utf8TypedPathBuf; + /// + /// let native_path_buf = if cfg!(windows) { + /// Utf8TypedPathBuf::from(r"C:\some\path") + /// } else { + /// Utf8TypedPathBuf::from("/some/path") + /// }; + /// assert!(native_path_buf.into_std_path_buf().is_ok()); + /// ``` + #[cfg(all(feature = "std", not(target_family = "wasm")))] + pub fn into_std_path_buf(self) -> Result { + std::path::PathBuf::try_from(self) + } +} + impl PartialEq> for Utf8TypedPathBuf { fn eq(&self, path: &Utf8TypedPath<'_>) -> bool { path.eq(&self.to_path())