From 88eb1d7fdc9dd302a49011074e89c6fd5d5c93c8 Mon Sep 17 00:00:00 2001 From: Matt Farmer Date: Wed, 25 Feb 2026 22:06:08 -0500 Subject: [PATCH 1/6] test: add characterization tests for HTML parsing pipeline Adds comprehensive characterization tests for Html5Parser, Html5Writer, PCDataXmlParser, AltXML, and MarkdownParser to lock in current behavior before refactoring the scala-xml internal API dependencies. New test coverage includes: AutoInsertedBody unwrapping, void/non-void tag rendering, CDATA round-trips as Lift PCData nodes, HTML entity resolution, namespace prefix handling, script/style verbatim content, unicode, malformed input failure cases, and full parse+serialize round-trips for both parser paths. Co-Authored-By: Claude Sonnet 4.6 --- .../net/liftweb/util/Html5ParserSpec.scala | 89 ++++++++++- .../net/liftweb/util/HtmlRoundTripSpec.scala | 151 ++++++++++++++++++ .../net/liftweb/util/MarkdownParserSpec.scala | 84 ++++++++++ .../liftweb/util/PCDataXmlParserSpec.scala | 111 ++++++++++--- .../net/liftweb/util/XmlParserSpec.scala | 46 +++++- .../net/liftweb/util/Html5ParserSpec.scala | 88 +++++++++- .../net/liftweb/util/HtmlRoundTripSpec.scala | 151 ++++++++++++++++++ .../net/liftweb/util/MarkdownParserSpec.scala | 84 ++++++++++ .../liftweb/util/PCDataXmlParserSpec.scala | 111 ++++++++++--- .../net/liftweb/util/XmlParserSpec.scala | 46 +++++- 10 files changed, 912 insertions(+), 49 deletions(-) create mode 100644 core/util/src/test/scala-2.13/net/liftweb/util/HtmlRoundTripSpec.scala create mode 100644 core/util/src/test/scala-2.13/net/liftweb/util/MarkdownParserSpec.scala create mode 100644 core/util/src/test/scala-3/net/liftweb/util/HtmlRoundTripSpec.scala create mode 100644 core/util/src/test/scala-3/net/liftweb/util/MarkdownParserSpec.scala diff --git a/core/util/src/test/scala-2.13/net/liftweb/util/Html5ParserSpec.scala b/core/util/src/test/scala-2.13/net/liftweb/util/Html5ParserSpec.scala index a0a479f5f..54533654c 100644 --- a/core/util/src/test/scala-2.13/net/liftweb/util/Html5ParserSpec.scala +++ b/core/util/src/test/scala-2.13/net/liftweb/util/Html5ParserSpec.scala @@ -17,7 +17,7 @@ package net.liftweb package util -import scala.xml.Elem +import scala.xml.{ Comment, Elem } import org.specs2.mutable.Specification import org.specs2.execute.PendingUntilFixed @@ -32,7 +32,7 @@ import Helpers._ class Html5ParserSpec extends Specification with PendingUntilFixed with Html5Parser with Html5Writer { "Html5Parser Specification".title - "Htm5 Writer" should { + "Html5 Writer" should { "Write &" in { toString() must_== """""" } @@ -40,6 +40,39 @@ class Html5ParserSpec extends Specification with PendingUntilFixed with Html5Par "ignore attributes that are null" in { toString() must_== """""" } + + "render void tags without a closing tag" in { + toString(
) must_== "
" + } + + "render void tags with attributes" in { + toString(y) must_== """y""" + } + + "render non-void empty tags with a closing tag" in { + toString(
) must_== "
" + } + + "not escape content inside script tags" in { + toString() must_== "" + } + + "not escape content inside style tags" in { + toString() must_== "" + } + + "write PCData as CDATA section" in { + toString(
{PCData("x < y & z")}
) must_== "
" + } + + "write Comment nodes" in { + toString(
{Comment("a comment")}
) must_== "
" + } + + "preserve namespace prefix on elements" in { + toString(
) must_== + """
""" + } } "Html5 Parser" should { @@ -94,7 +127,57 @@ class Html5ParserSpec extends Specification with PendingUntilFixed with Html5Par e.label must_== "div" (parsed.openOrThrowException("Test") \ "@with").text must_== "dog" } + + "unwrap a single fragment element via AutoInsertedBody" in { + val result = parse("
hello
").openOrThrowException("Test") + result.label must_== "div" + result.text must_== "hello" + } + + "not unwrap a full html document" in { + val result = parse("T

X

") + .openOrThrowException("Test") + result.label must_== "html" + } + + "unwrap a single self-closing fragment element" in { + val result = parse("").openOrThrowException("Test") + result.label must_== "span" + } + + "resolve standard HTML entities to their character values" in { + val result = parse("

 

").openOrThrowException("Test") + result.text must_== "\u00A0" + } + + "preserve script tag content verbatim (in head)" in { + // nu.validator places bare ").openOrThrowException("Test") + result.label must_== "html" + (result \\ "script").text must_== "var x = 1 < 2 && true;" + } + + "preserve style tag content verbatim (in head)" in { + // nu.validator places bare ").openOrThrowException("Test") + result.label must_== "html" + (result \\ "style").text must_== "p > span { color: red; }" + } + + "preserve data-* attributes" in { + val result = parse("""
x
""").openOrThrowException("Test") + (result \ "@data-foo").text must_== "bar" + (result \ "@data-baz").text must_== "qux" + } + + "preserve Unicode content" in { + val result = parse("

café naïve 日本語

").openOrThrowException("Test") + result.text must_== "café naïve 日本語" + } + + "return a Full result for empty string input" in { + parse("") must beAnInstanceOf[Full[Elem]] + } } } - diff --git a/core/util/src/test/scala-2.13/net/liftweb/util/HtmlRoundTripSpec.scala b/core/util/src/test/scala-2.13/net/liftweb/util/HtmlRoundTripSpec.scala new file mode 100644 index 000000000..64b40791e --- /dev/null +++ b/core/util/src/test/scala-2.13/net/liftweb/util/HtmlRoundTripSpec.scala @@ -0,0 +1,151 @@ +/* + * Copyright 2026 Lift Committers and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package net.liftweb +package util + +import scala.xml.Elem + +import org.specs2.mutable.Specification + + +/** + * Round-trip tests for HTML parsing and serialization pipelines. + * These lock in the observable behavior of both parser paths so that + * implementation changes can be validated against them. + */ +class HtmlRoundTripSpec extends Specification with Html5Parser with Html5Writer { + "Html5 round-trip" should { + "preserve element label and text content" in { + val result = parse("
hello world
").openOrThrowException("Test") + val output = toString(result) + output must contain("
") + output must contain("hello world") + output must contain("
") + } + + "preserve attribute values" in { + val result = parse("""link""") + .openOrThrowException("Test") + val output = toString(result) + output must contain("http://example.com") + output must contain("nav") + output must contain("link") + } + + "preserve nested structure" in { + val input = "" + val result = parse(input).openOrThrowException("Test") + val output = toString(result) + output must contain("