diff --git a/psl/psl/tests/reformat/reformat.rs b/psl/psl/tests/reformat/reformat.rs index 81f6565b620d..9e044f06832e 100644 --- a/psl/psl/tests/reformat/reformat.rs +++ b/psl/psl/tests/reformat/reformat.rs @@ -1231,3 +1231,91 @@ fn attribute_arguments_reformatting_is_idempotent() { expected.assert_eq(&reformatted); assert_eq!(reformatted, reformat(&reformatted)); // it's idempotent } + +// Regression: https://github.com/prisma/prisma/issues/8548 +// +// `prisma format` used to emit LF for every line but reach the end of file +// with a CRLF on the last line when the original input used CRLF anywhere. +// The reformatter must now preserve the input's line-ending style natively +// (instead of post-processing the output) so the output is internally +// consistent. +mod line_endings { + fn reformat(input: &str) -> String { + psl::reformat(input, 2).unwrap_or_else(|| input.to_owned()) + } + + const SCHEMA_LINES: &[&str] = &[ + "model User {", + " id Int @id", + " name String", + "}", + "", + ]; + + fn join(sep: &str) -> String { + SCHEMA_LINES.join(sep) + } + + #[test] + fn lf_input_stays_lf() { + let input = join("\n"); + let out = reformat(&input); + assert!(!out.contains('\r'), "LF input must not gain any CR: {out:?}"); + assert!(out.ends_with('\n')); + } + + #[test] + fn crlf_input_stays_crlf() { + let input = join("\r\n"); + let out = reformat(&input); + assert!(out.ends_with("\r\n"), "CRLF input must end with CRLF: {out:?}"); + // Every LF in the output must be preceded by a CR. + let bytes = out.as_bytes(); + for (i, &b) in bytes.iter().enumerate() { + if b == b'\n' { + assert!( + i > 0 && bytes[i - 1] == b'\r', + "found a bare LF at byte {i} in CRLF output: {out:?}", + ); + } + } + } + + #[test] + fn mixed_input_defaults_to_lf() { + // First line uses LF, later lines use CRLF: any bare LF in the input + // forces the reformatter to fall back to LF for the whole output. + let input = format!( + "{}\n{}\r\n{}\r\n{}\r\n", + SCHEMA_LINES[0], SCHEMA_LINES[1], SCHEMA_LINES[2], SCHEMA_LINES[3] + ); + let out = reformat(&input); + assert!(!out.contains('\r'), "LF-first mixed input should normalize to LF: {out:?}"); + } + + #[test] + fn mixed_input_crlf_first_then_lf_defaults_to_lf() { + // First line uses CRLF, later line uses bare LF: the bare LF still + // forces the LF fallback so the contract is symmetric. + let input = format!( + "{}\r\n{}\n{}\r\n{}\r\n", + SCHEMA_LINES[0], SCHEMA_LINES[1], SCHEMA_LINES[2], SCHEMA_LINES[3] + ); + let out = reformat(&input); + assert!( + !out.contains('\r'), + "CRLF-first mixed input should still normalize to LF: {out:?}" + ); + } + + #[test] + fn no_trailing_crlf_after_lf_body() { + // Specifically the bug from issue #8548: the body was LF but the file + // ended with CRLF. Make sure a pure-LF input does NOT terminate with + // CRLF. + let input = "model A {\n id Int @id\n}\n"; + let out = reformat(input); + assert!(out.ends_with('\n') && !out.ends_with("\r\n")); + assert!(!out.contains('\r')); + } +} diff --git a/psl/schema-ast/src/ast/newline_type.rs b/psl/schema-ast/src/ast/newline_type.rs index 061f77c24e14..20647fcd1efd 100644 --- a/psl/schema-ast/src/ast/newline_type.rs +++ b/psl/schema-ast/src/ast/newline_type.rs @@ -24,3 +24,35 @@ impl AsRef for NewlineType { } } } + +impl NewlineType { + /// Detect the line-ending style used in the given input string. + /// + /// Scans the full input: if any bare `\n` (not preceded by `\r`) appears + /// the result is `Unix` (LF), even when other newlines in the same input + /// are CRLF. Only an input whose every newline is CRLF returns `Windows`. + /// An input with no newline at all returns the default (`Unix`). + /// + /// This mirrors the maintainer's guidance on prisma/prisma#8548: mixed-ending + /// inputs fall through to the LF default rather than guessing, regardless + /// of which style appears first. + pub fn detect(input: &str) -> NewlineType { + let bytes = input.as_bytes(); + let mut saw_crlf = false; + for (i, &b) in bytes.iter().enumerate() { + if b == b'\n' { + if i > 0 && bytes[i - 1] == b'\r' { + saw_crlf = true; + } else { + // A bare LF anywhere in the input forces LF output. + return NewlineType::Unix; + } + } + } + if saw_crlf { + NewlineType::Windows + } else { + NewlineType::Unix + } + } +} diff --git a/psl/schema-ast/src/lib.rs b/psl/schema-ast/src/lib.rs index dfa6d76f02f3..dfe6850352ab 100644 --- a/psl/schema-ast/src/lib.rs +++ b/psl/schema-ast/src/lib.rs @@ -3,7 +3,11 @@ #![deny(rust_2018_idioms, unsafe_code)] #![allow(clippy::derive_partial_eq_without_eq)] -pub use self::{parser::parse_schema, reformat::reformat, source_file::SourceFile}; +pub use self::{ + parser::parse_schema, + reformat::{reformat, reformat_with_line_ending}, + source_file::SourceFile, +}; /// The AST data structure. It aims to faithfully represent the syntax of a Prisma Schema, with /// source span information. diff --git a/psl/schema-ast/src/reformat.rs b/psl/schema-ast/src/reformat.rs index 46c1ac9c7294..722192df78cb 100644 --- a/psl/schema-ast/src/reformat.rs +++ b/psl/schema-ast/src/reformat.rs @@ -1,4 +1,5 @@ use crate::{ + ast::NewlineType, parser::{PrismaDatamodelParser, Rule}, renderer::{LineWriteable, Renderer, TableFormat}, }; @@ -8,15 +9,29 @@ use std::iter::Peekable; type Pair<'a> = pest::iterators::Pair<'a, Rule>; /// Reformat a PSL string. +/// +/// The output preserves the line-ending style detected from `input` (LF or +/// CRLF). Mixed-ending inputs fall back to LF. See `NewlineType::detect`. pub fn reformat(input: &str, indent_width: usize) -> Option { + reformat_with_line_ending(input, indent_width, NewlineType::detect(input)) +} + +/// Reformat a PSL string, emitting `line_ending` as the line separator for +/// every line of output (including the trailing newline). +pub fn reformat_with_line_ending( + input: &str, + indent_width: usize, + line_ending: NewlineType, +) -> Option { let mut ast = PrismaDatamodelParser::parse(Rule::schema, input).ok()?; - let mut renderer = Renderer::new(indent_width); + let mut renderer = Renderer::new(indent_width, line_ending); renderer.stream.reserve(input.len() / 2); reformat_top(&mut renderer, ast.next().unwrap()); // all schemas must end with a newline - if !renderer.stream.ends_with('\n') { - renderer.stream.push('\n'); + let ending = line_ending.as_ref(); + if !renderer.stream.ends_with(ending) { + renderer.stream.push_str(ending); } // TODO: why do we need to use a `Some` here? diff --git a/psl/schema-ast/src/renderer.rs b/psl/schema-ast/src/renderer.rs index dc6bbc6cc9c0..847b8e2359f0 100644 --- a/psl/schema-ast/src/renderer.rs +++ b/psl/schema-ast/src/renderer.rs @@ -2,6 +2,8 @@ mod table; pub(crate) use table::TableFormat; +use crate::ast::NewlineType; + pub(crate) trait LineWriteable { fn write(&mut self, param: &str); fn end_line(&mut self); @@ -11,14 +13,16 @@ pub(crate) struct Renderer { pub stream: String, indent: usize, indent_width: usize, + line_ending: NewlineType, } impl Renderer { - pub(crate) fn new(indent_width: usize) -> Renderer { + pub(crate) fn new(indent_width: usize, line_ending: NewlineType) -> Renderer { Renderer { stream: String::new(), indent: 0, indent_width, + line_ending, } } @@ -46,7 +50,7 @@ impl LineWriteable for Renderer { } fn end_line(&mut self) { - self.stream.push('\n'); + self.stream.push_str(self.line_ending.as_ref()); } }