y-scope · gibber9809 · Apr 9, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 14, 2026
@@ -30,7 +30,9 @@ constexpr std::string_view cTimestampSchema{
         R"(((Jan(uary){0,1})|(Feb(ruary){0,1})|(Mar(ch){0,1})|(Apr(il){0,1})|(May)|(Jun(e){0,1})|)"
         R"((Jul(y){0,1})|(Aug(ust){0,1})|(Sep(tember){0,1})|(Oct(ober){0,1})|(Nov(ember){0,1})|)"
         R"((Dec(ember){0,1}))[ /\-]\d{2,4}))[ T:][ 0-9]{2}:[ 0-9]{2}:[ 0-9]{2}([,\.:]\d{1,9}){0,1})"
-        R"(([ ]{0,1}(UTC){0,1}([\+\-]\d{2}(:{0,1}\d{2}){0,1}){0,1}Z{0,1}){0,1}))"
+        R"(((( UTC){0,1}([\+\-]\d{2}(:{0,1}\d{2}){0,1}){0,1}Z{0,1})|)"
+        R"((( [\+\-]\d{2}(:{0,1}\d{2}){0,1}){0,1}Z{0,1})|(( Z){0,1})|)"
+        R"(((UTC){0,1}([\+\-]\d{2}(:{0,1}\d{2}){0,1}){0,1}Z{0,1})){0,1}))"
 };
 
 constexpr std::string_view cDelimiters{R"(delimiters: \t\r\n[(:)"};

@@ -1,6 +1,7 @@
 """Session-scoped test log fixtures shared across integration tests."""
 
 import logging
+import pathlib
 import subprocess
 
 import pytest
@@ -45,6 +46,42 @@ def postgresql(
     )
 
 
+@pytest.fixture(scope="session")
+def simple_unstructured(
+    request: pytest.FixtureRequest,
+    integration_test_path_config: IntegrationTestPathConfig,
+) -> IntegrationTestLogs:
+    """Provides a simple unstructured test log."""
+    name = "simple_unstructured"
+    integration_test_logs = IntegrationTestLogs(
+        name=name,
+        tarball_url=f"{name}.tar.gz",
+        integration_test_path_config=integration_test_path_config,
+        num_log_events=11,
+    )
+    remove_path(integration_test_logs.extraction_dir)
+    integration_test_logs.extraction_dir.mkdir(parents=True, exist_ok=False)
+
+    with pathlib.Path.open(integration_test_logs.extraction_dir / f"{name}.log", "w") as f:
+        f.write(
+            "2015-03-23 05:48:30,122 TEST1\n"
+            "2015-03-23 05:48:30,122Z TEST2\n"
+            "2015-03-23 05:48:30,122 Z TEST3\n"
+            "2015-03-23 05:48:30,122+00 TEST4\n"
+            "2015-03-23 05:48:30,122+00Z TEST5\n"
+            "2015-03-23 05:48:30,122 +00 TEST6\n"
+            "2015-03-23 05:48:30,122 +00Z TEST7\n"
+            "2015-03-23 05:48:30,122UTC+00 TEST8\n"
+            "2015-03-23 05:48:30,122UTC+00Z TEST9\n"
+            "2015-03-23 05:48:30,122 UTC+00 TEST10\n"
+            "2015-03-23 05:48:30,122 UTC+00Z TEST11\n"
+        )
+
+    logger.info("Set up logs for dataset `%s`.", name)
+    request.config.cache.set(name, True)
+    return integration_test_logs
+
+
 def _download_and_extract_gzip_dataset(
     request: pytest.FixtureRequest,
     integration_test_path_config: IntegrationTestPathConfig,

@@ -0,0 +1,81 @@
+"""
+Integration tests verifying that CLP core compression binaries perform lossless round-trip
+compression and decompression.
+"""
-"""
-Integration tests verifying that CLP core compression binaries perform lossless round-trip
-compression and decompression.
-"""
+"""
+Integration tests verifying log-converter output can be compressed by clp-s and searched
+with expected event counts.
+"""
-"""
-Integration tests verifying that CLP core compression binaries perform lossless round-trip
-compression and decompression.
-"""
+"""
+Integration tests verifying log-converter output can be compressed by clp-s and searched
+with expected event counts.
+"""
+
+import pytest
+
+from tests.utils.config import (
+    ClpCorePathConfig,
+    ConversionTestPathConfig,
+    IntegrationTestLogs,
+    IntegrationTestPathConfig,
+)
+from tests.utils.subprocess_utils import run_and_log_subprocess
+
+pytestmark = pytest.mark.core
+
+text_datasets = pytest.mark.parametrize(
+    "test_logs_fixture",
+    [
+        "simple_unstructured",
+    ],
+)
+
+
+@pytest.mark.clp_s
+@text_datasets
+def test_log_converter_transform(
+    request: pytest.FixtureRequest,
+    clp_core_path_config: ClpCorePathConfig,
+    integration_test_path_config: IntegrationTestPathConfig,
+    test_logs_fixture: str,
+) -> None:
+    """
+    Validate that converted logs from the core binary `log-converter` can be ingested successfully
+    by `clp-s`.
+
+    :param request:
+    :param clp_core_path_config:
+    :param integration_test_path_config:
+    :param test_logs_fixture:
+    """
+    integration_test_logs: IntegrationTestLogs = request.getfixturevalue(test_logs_fixture)
+    test_logs_name = integration_test_logs.name
+
+    test_paths = ConversionTestPathConfig(
+        test_name=f"clp-s-{test_logs_name}",
+        logs_source_dir=integration_test_logs.extraction_dir,
+        num_log_events=integration_test_logs.num_log_events,
+        integration_test_path_config=integration_test_path_config,
+    )
+    _convert_and_compress(clp_core_path_config, test_paths)
+
+    test_paths.clear_test_outputs()
+
+
+def _convert_and_compress(
+    clp_core_path_config: ClpCorePathConfig,
+    test_paths: ConversionTestPathConfig,
+) -> None:
+    test_paths.clear_test_outputs()
+    log_converter_bin_path = str(clp_core_path_config.log_converter_binary_path)
+    clp_s_bin_path = str(clp_core_path_config.clp_s_binary_path)
+    src_path = str(test_paths.logs_source_dir)
+    conversion_path = str(test_paths.conversion_dir)
+    compression_path = str(test_paths.compression_dir)
+    run_and_log_subprocess([log_converter_bin_path, src_path, "--output-dir", conversion_path])
+    run_and_log_subprocess(
+        [clp_s_bin_path, "c", compression_path, conversion_path, "--timestamp-key", "timestamp"]
+    )
+
+    if test_paths.num_log_events is None:
+        return
+
+    output = run_and_log_subprocess([clp_s_bin_path, "s", compression_path, "timestamp > 0"])
+    num_events = 0 if output.stdout is None else len(output.stdout.splitlines())
+    if num_events != test_paths.num_log_events:
+        pytest.fail(
+            f"Expected {test_paths.num_log_events} log events after conversion, "
+            f"but found {num_events}."
+        )
@@ -61,6 +61,11 @@ def clp_s_binary_path(self) -> Path:
         """:return: The absolute path to the core binary `clp-s`."""
         return self.clp_core_bins_dir / "clp-s"
 
+    @property
+    def log_converter_binary_path(self) -> Path:
+        """:return: The absolute path to the core binary `log-converter`."""
+        return self.clp_core_bins_dir / "log-converter"
+
 
 @dataclass(frozen=True)
 class PackagePathConfig:
@@ -310,6 +315,8 @@ class IntegrationTestLogs:
     tarball_path: Path = field(init=False, repr=True)
     #:
     extraction_dir: Path = field(init=False, repr=True)
+    #: Optional number of log events in the downloaded logs.
+    num_log_events: int | None = None
 
     def __post_init__(self, integration_test_path_config: IntegrationTestPathConfig) -> None:
         """Initialize and set tarball and extraction paths for integration test logs."""
@@ -358,3 +365,38 @@ def clear_test_outputs(self) -> None:
         """Remove any existing output directories created by this compression test."""
         remove_path(self.compression_dir)
         remove_path(self.decompression_dir)
+
+
+@dataclass(frozen=True)
+class ConversionTestPathConfig:
+    """Per-test path configuration for conversion workflow artifacts."""
+
+    #:
+    test_name: str
+    #: Directory containing the original (uncompressed) log files used by this test.
+    logs_source_dir: Path
+    integration_test_path_config: InitVar[IntegrationTestPathConfig]
+    #: Path to store converted kv-ir files generated by the test.
+    conversion_dir: Path = field(init=False, repr=True)
+    #: Path to store compressed archives generated by the test.
+    compression_dir: Path = field(init=False, repr=True)
+    #: Optional number of log events in the converted logs.
+    num_log_events: int | None = None
+
+    def __post_init__(self, integration_test_path_config: IntegrationTestPathConfig) -> None:
+        """Initialize and set required directory paths for conversion tests."""
+        test_name = self.test_name.strip()
+        if 0 == len(test_name):
+            err_msg = "`test_name` cannot be empty."
+            raise ValueError(err_msg)
+        test_root_dir = integration_test_path_config.test_root_dir
+        validate_dir_exists(test_root_dir)
+
+        object.__setattr__(self, "test_name", test_name)
+        object.__setattr__(self, "conversion_dir", test_root_dir / f"{test_name}-converted")
+        object.__setattr__(self, "compression_dir", test_root_dir / f"{test_name}-archives")
+
+    def clear_test_outputs(self) -> None:
+        """Remove any existing output directories created by this conversion test."""
+        remove_path(self.conversion_dir)
+        remove_path(self.compression_dir)