From fb344a71fb10071df23f15765456df665fd55619 Mon Sep 17 00:00:00 2001 From: Johnson George Date: Sun, 31 May 2026 15:48:01 -0700 Subject: [PATCH 1/2] feat: add LISA MCP server with AI-native developer tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a Model Context Protocol (MCP) server that gives AI assistants deep knowledge of LISA conventions for test authoring, log analysis, runbook management, debugging, and framework exploration. Package structure (lisa_mcp/): - server.py — FastMCP server with host header validation, CLI with stdio/SSE transport - tools/test_writer.py — 5 tools: write_test (with existing test detection), scaffold suite/case, guidelines, list requirements - tools/runbook.py — 3 tools: generate, validate, fix runbooks - tools/log_analysis.py — log download with Azure AD auth, blob prefix downloads, analyze/summarize logs, explain failures, diagnose bugs, file search/read/list - tools/knowledge.py — 6 tools: explain concepts/errors, API reference, find examples, list tools/features - tools/execution.py — 1 tool: lisa_run placeholder - tools/_repo.py — repo discovery, doc/context loading - docs_index.yaml — manifest mapping tools to .rst/.md docs - context/*.md — curated knowledge base lisa_write_test detects existing test suites and methods matching the query, displaying method names, line numbers, and descriptions so the caller LLM can add to an existing suite instead of creating duplicates. Also includes: - Dockerfile using local source with host header validation - pyproject.toml with lisa-mcp entry point and azure dependencies - tests (unit + integration) - README.md with setup, configuration, and architecture docs - .gitignore entries for MCP local files --- .gitignore | 8 +- mcp/Dockerfile | 26 + mcp/README.md | 334 +++++ mcp/lisa_mcp/__init__.py | 2 + mcp/lisa_mcp/__main__.py | 8 + mcp/lisa_mcp/context/__init__.py | 2 + mcp/lisa_mcp/context/concepts.md | 267 ++++ mcp/lisa_mcp/context/error_patterns.md | 168 +++ mcp/lisa_mcp/context/runbook_schema.md | 246 ++++ mcp/lisa_mcp/context/test_patterns.md | 201 +++ mcp/lisa_mcp/docs_index.yaml | 183 +++ mcp/lisa_mcp/server.py | 142 ++ mcp/lisa_mcp/tools/__init__.py | 2 + mcp/lisa_mcp/tools/_repo.py | 155 +++ mcp/lisa_mcp/tools/execution.py | 48 + mcp/lisa_mcp/tools/knowledge.py | 702 ++++++++++ mcp/lisa_mcp/tools/log_analysis.py | 1423 +++++++++++++++++++++ mcp/lisa_mcp/tools/runbook.py | 324 +++++ mcp/lisa_mcp/tools/test_writer.py | 823 ++++++++++++ mcp/pyproject.toml | 32 + mcp/run_tests.py | 139 ++ mcp/server.py | 13 + mcp/tests/__init__.py | 2 + mcp/tests/fixtures/sample_failing_run.log | 38 + mcp/tests/fixtures/sample_passing_run.log | 24 + mcp/tests/fixtures/sample_runbook.yml | 37 + mcp/tests/test_all_tools.py | 789 ++++++++++++ mcp/tests/test_authoring.py | 145 +++ mcp/tests/test_log_analysis.py | 93 ++ mcp/tests/test_mcp_integration.py | 189 +++ 30 files changed, 6564 insertions(+), 1 deletion(-) create mode 100644 mcp/Dockerfile create mode 100644 mcp/README.md create mode 100644 mcp/lisa_mcp/__init__.py create mode 100644 mcp/lisa_mcp/__main__.py create mode 100644 mcp/lisa_mcp/context/__init__.py create mode 100644 mcp/lisa_mcp/context/concepts.md create mode 100644 mcp/lisa_mcp/context/error_patterns.md create mode 100644 mcp/lisa_mcp/context/runbook_schema.md create mode 100644 mcp/lisa_mcp/context/test_patterns.md create mode 100644 mcp/lisa_mcp/docs_index.yaml create mode 100644 mcp/lisa_mcp/server.py create mode 100644 mcp/lisa_mcp/tools/__init__.py create mode 100644 mcp/lisa_mcp/tools/_repo.py create mode 100644 mcp/lisa_mcp/tools/execution.py create mode 100644 mcp/lisa_mcp/tools/knowledge.py create mode 100644 mcp/lisa_mcp/tools/log_analysis.py create mode 100644 mcp/lisa_mcp/tools/runbook.py create mode 100644 mcp/lisa_mcp/tools/test_writer.py create mode 100644 mcp/pyproject.toml create mode 100644 mcp/run_tests.py create mode 100644 mcp/server.py create mode 100644 mcp/tests/__init__.py create mode 100644 mcp/tests/fixtures/sample_failing_run.log create mode 100644 mcp/tests/fixtures/sample_passing_run.log create mode 100644 mcp/tests/fixtures/sample_runbook.yml create mode 100644 mcp/tests/test_all_tools.py create mode 100644 mcp/tests/test_authoring.py create mode 100644 mcp/tests/test_log_analysis.py create mode 100644 mcp/tests/test_mcp_integration.py diff --git a/.gitignore b/.gitignore index b9fcac91ee..7af9439adf 100644 --- a/.gitignore +++ b/.gitignore @@ -36,4 +36,10 @@ dist # Local working file notes -selftests/test_runbook.yml \ No newline at end of file +selftests/test_runbook.yml + +# MCP local/temp files +mcp/tmp/ +mcp/.env +mcp/**/*.log +mcp/test-results/ \ No newline at end of file diff --git a/mcp/Dockerfile b/mcp/Dockerfile new file mode 100644 index 0000000000..d8180863cc --- /dev/null +++ b/mcp/Dockerfile @@ -0,0 +1,26 @@ +FROM python:3.12-slim + +WORKDIR /app + +# Install git (needed for repo clone) +RUN apt-get update && apt-get install -y --no-install-recommends git \ + && rm -rf /var/lib/apt/lists/* + +# Clone the LISA repo +ARG LISA_BRANCH=main +RUN git clone --depth 1 --branch ${LISA_BRANCH} \ + https://github.com/microsoft/lisa.git /app/lisa + +# Overlay local MCP source so unpushed changes are included in the image +COPY . /app/lisa/mcp + +# Install the MCP server package (with Azure blob download support) +RUN pip install --no-cache-dir "/app/lisa/mcp[azure]" + +# Point the MCP server at the cloned repo +ENV LISA_REPO_ROOT=/app/lisa + +EXPOSE 8080 + +ENTRYPOINT ["lisa-mcp"] +CMD ["--transport", "sse", "--port", "8080"] diff --git a/mcp/README.md b/mcp/README.md new file mode 100644 index 0000000000..97ebf45fec --- /dev/null +++ b/mcp/README.md @@ -0,0 +1,334 @@ +# LISA MCP Server + +An [MCP (Model Context Protocol)](https://modelcontextprotocol.io/) server that provides AI-native developer tools for the [LISA](https://github.com/microsoft/lisa) test automation framework. + +## What It Does + +The LISA MCP server gives AI assistants (Claude, GitHub Copilot, etc.) deep knowledge of LISA's conventions, enabling them to: + +- **Write LISA tests** — scaffold test suites and cases with correct decorators, metadata, and structure +- **Generate runbooks** — produce valid YAML runbooks from natural language descriptions +- **Analyze logs** — parse LISA run logs, extract failures, and classify error types +- **Debug failures** — diagnose test failures with source correlation and root cause analysis +- **Explain concepts** — answer questions about LISA architecture, APIs, and patterns + +## Tools + +All tools follow the `lisa_{verb}_{noun}` naming convention to prevent collisions +when multiple MCP servers are connected simultaneously. + +### Test Authoring (`test_writer.py`) +| Tool | Description | +|------|-------------| +| `lisa_get_test_writer_guidelines` | Return the full lisa_test_writer prompt — the authoritative reference for writing LISA tests | +| `lisa_write_test` | **Primary tool** — follows the mandatory Gather → Research → Design Plan workflow; returns structured metadata for agent-to-agent use | +| `lisa_scaffold_test_suite` | Generate a complete test suite skeleton (use after design plan is confirmed) | +| `lisa_scaffold_test_case` | Generate a single test case method (use after design plan is confirmed) | +| `lisa_list_test_requirements` | Show requirements for a test method | + +### Runbook (`runbook.py`) +| Tool | Description | +|------|-------------| +| `lisa_generate_runbook` | Produce a valid YAML runbook from natural language parameters | +| `lisa_validate_runbook` | Check a runbook for structural issues | +| `lisa_fix_runbook` | Validate a runbook YAML and return a corrected version with explanation | + +### Log Analysis (`log_analysis.py`) +| Tool | Description | +|------|-------------| +| `lisa_analyze_log` | Ingest a LISA run log, identify failures, and extract meaningful signal | +| `lisa_explain_failure` | Classify and explain a test failure — framework vs test vs infra | +| `lisa_summarize_run` | High-level pass/fail/skip summary with failure themes grouped by area | +| `lisa_start_log_investigation` | Bootstrap a full root-cause analysis — returns expert prompts, file listings, initial search hits, and next-step instructions. Accepts local paths or HTTPS URLs (SAS URLs supported) | +| `lisa_download_logs` | Download log files from a URL (SAS, bearer token, or public) to the server for investigation | +| `lisa_get_log_analysis_prompts` | Expert analysis strategies for host AI reasoning | +| `lisa_search_log_files` | Regex search across log files in a directory | +| `lisa_read_log_file` | Read a log file with line range | +| `lisa_list_log_files` | List files in a log directory | +| `lisa_diagnose_bug` | Given a test name and failure log, reason about root cause and suggest a fix with code | + +### Framework Knowledge (`knowledge.py`) +| Tool | Description | +|------|-------------| +| `lisa_explain_concept` | Answer framework questions: what is a Feature, how does environment matching work, etc. | +| `lisa_get_api_reference` | Look up a LISA class/function signature | +| `lisa_find_examples` | Search test suites for relevant examples | +| `lisa_list_tools` | List all LISA tools (command wrappers) | +| `lisa_list_features` | List all LISA features (platform capabilities) | +| `lisa_explain_error` | Look up LISA error types and resolution steps | + +### Execution (`execution.py`) +| Tool | Description | +|------|-------------| +| `lisa_run` | Run LISA tests locally via stdio transport (placeholder — requires local LISA install) | + +## Setup + +### Prerequisites +- Python 3.10+ + +### Install + +**From the git repository (no local clone needed):** + +```bash +pip install "lisa-mcp @ git+https://github.com/microsoft/lisa.git@main#subdirectory=mcp" +``` + +**From a local clone:** + +```bash +cd mcp +pip install -e . +``` + +### Run + +```bash +# Local mode — stdio transport (for Claude Desktop, VS Code, etc.) +lisa-mcp + +# Hosted mode — SSE/HTTP transport (for agent-to-agent pipelines, CI systems) +lisa-mcp --transport sse --port 8080 +``` + +## Configuration + +### Claude Desktop + +Add to your `claude_desktop_config.json` (macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`, Windows: `%APPDATA%\Claude\claude_desktop_config.json`): + +```json +{ + "mcpServers": { + "lisa": { + "command": "lisa-mcp" + } + } +} +``` + +### VS Code (GitHub Copilot) + +Add to your workspace `.vscode/mcp.json`: + +```json +{ + "servers": { + "lisa": { + "type": "stdio", + "command": "lisa-mcp" + } + } +} +``` + +Or use `uvx` for a no-install option: + +```json +{ + "servers": { + "lisa": { + "command": "uvx", + "args": ["--from", "./mcp", "lisa-mcp"] + } + } +} +``` + +### Environment Variables + +| Variable | Description | +|----------|-------------| +| `LISA_REPO_ROOT` | Override auto-detected LISA repo root path | + +### Remote Server (recommended for teams) + +Run the MCP server centrally so clients don't need the LISA repo locally. + +**With Docker:** + +```bash +# Build +cd mcp +docker build -t lisa-mcp . + +# Run +docker run -p 8080:8080 lisa-mcp + +# Build from a specific branch +docker build --build-arg LISA_BRANCH= -t lisa-mcp . +``` + +**Without Docker:** + +```bash +git clone https://github.com/microsoft/lisa.git +cd lisa/mcp +pip install -e . +lisa-mcp --transport sse --port 8080 +``` + +Then configure clients to connect via URL — no local install required: + +**VS Code (`mcp.json`):** + +```json +{ + "servers": { + "lisa": { + "type": "sse", + "url": "http://your-server:8080/sse" + } + } +} +``` + +**Claude Desktop (`claude_desktop_config.json`):** + +```json +{ + "mcpServers": { + "lisa": { + "url": "http://your-server:8080/sse" + } + } +} +``` + +### Local Testing with Docker (WSL) + +Test the MCP server locally before deploying to a remote host: + +```bash +# 1. Clone the repo +cd ~ +git clone --branch main https://github.com/microsoft/lisa.git +cd lisa/mcp + +# 2. Build the Docker image +docker build -t lisa-mcp . + +# 3. Run the container +docker run -p 8080:8080 lisa-mcp + +# 4. Verify (in a new terminal) +curl http://localhost:8080/sse +``` + +Then in your VS Code workspace, create `.vscode/mcp.json`: + +```json +{ + "servers": { + "lisa": { + "type": "sse", + "url": "http://localhost:8080/sse" + } + } +} +``` + +Reload VS Code and test via Copilot Chat — `localhost:8080` from Windows reaches the WSL container automatically. + +```bash +# Stop when done +docker stop $(docker ps -q --filter ancestor=lisa-mcp) +``` + +## Architecture + +``` +mcp/ +├── Dockerfile # Container image for remote SSE deployment +├── server.py # Convenience entrypoint (delegates to lisa_mcp) +├── pyproject.toml # Package config, entry point: lisa-mcp +├── lisa_mcp/ +│ ├── server.py # MCP server, tool registration, CLI (main entry point) +│ ├── docs_index.yaml # Manifest mapping tools → .rst/.md doc files +│ ├── context/ # Curated knowledge (supplements the .rst docs) +│ │ ├── concepts.md # Core concepts explained +│ │ ├── test_patterns.md # Canonical test writing patterns +│ │ ├── error_patterns.md # Known errors → root cause → fix +│ │ └── runbook_schema.md # Annotated runbook field reference +│ └── tools/ +│ ├── _repo.py # Repo root detection, doc/context loading +│ ├── test_writer.py # Test authoring tools (5 tools) +│ ├── runbook.py # Runbook generate/validate/fix (3 tools) +│ ├── log_analysis.py# Log parsing, failure analysis, diagnosis (9 tools) +│ ├── knowledge.py # Concept/API/example/error lookup (6 tools) +│ └── execution.py # Local test execution (1 tool) +└── tests/ # Self-tests for the MCP server +``` + +**Design principles:** +- **No LISA import required** — tools work against the repo file system and log text. Users don't need a LISA install to use the MCP server. +- **Context assembly, not AI calls** — tools provide structured LISA context to the host AI (Claude/Copilot). The MCP server itself doesn't call any LLM API. +- **Stateless** — each tool call is self-contained with no session state. +- **Test writer prompt integrated** — authoring tools follow the mandatory workflow from `.github/prompts/lisa_test_writer.prompt.md` (Gather → Research → Design Plan → Code). +- **Docs read at runtime** — .rst and .md files from the repo are loaded directly (no conversion needed). A single `docs_index.yaml` manifest maps each tool to its relevant doc files. + +## Documentation Integration + +The MCP server reads LISA's existing `.rst` documentation directly — no markdown conversion step required. LLMs read `.rst` perfectly well as plain text. + +### How it works + +A single manifest file, [docs_index.yaml](docs_index.yaml), maps each MCP tool to the relevant doc files: + +```yaml +tools: + lisa_write_test: + primary: .github/prompts/lisa_test_writer.prompt.md + supplementary: + - docs/write_test/write_case.rst + - docs/write_test/concepts.rst + - docs/write_test/guidelines.rst + + lisa_explain_concept: + primary: docs/write_test/concepts.rst + supplementary: + - docs/write_test/extension.rst + - docs/run_test/runbook.rst + +topics: + runbook: docs/run_test/runbook.rst + platform: docs/run_test/platform.rst + transformer: docs/run_test/transformers.rst +``` + +- **`tools` section** — maps MCP tool names to their primary + supplementary doc files. Loaded when the tool is called. +- **`topics` section** — maps topic keywords to doc files. Used by `explain_concept` for targeted doc lookup. + +### Adding new documentation + +When a new `.rst` doc is added to the repo, update `docs_index.yaml` to map it to the relevant tools. No Python code changes needed. + +## Test Authoring Workflow + +The `write_test` tool implements the mandatory three-stage workflow from the `lisa_test_writer` prompt: + +1. **Gather** — automatically searches `lisa/tools/`, `lisa/features/`, and existing test suites for relevant code +2. **Research** — extracts API signatures for discovered tools and features +3. **Design Plan** — produces an Arrange → Act → Assert plan with workspace references + +The user confirms the design plan before code is generated via `scaffold_test_suite` or `scaffold_test_case`. + +``` +User: "Write a test to verify SR-IOV VFs are created" + → lisa_write_test(description="SR-IOV VFs are created for each NIC", area="network", feature="Sriov") + → Returns: Design plan with found tools (Lspci), features (Sriov), similar suites + structured JSON metadata + → User confirms plan + → lisa_scaffold_test_suite(...) generates the code skeleton +``` + +## Contributing + +1. Add new tools in the appropriate `tools/` module +2. Register them in the `register_*_tools()` function +3. Update `context/` markdown files when LISA conventions change +4. Add tests in `tests/` + +## License + +MIT — same as LISA. diff --git a/mcp/lisa_mcp/__init__.py b/mcp/lisa_mcp/__init__.py new file mode 100644 index 0000000000..9a0454564d --- /dev/null +++ b/mcp/lisa_mcp/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. diff --git a/mcp/lisa_mcp/__main__.py b/mcp/lisa_mcp/__main__.py new file mode 100644 index 0000000000..d2cb5173be --- /dev/null +++ b/mcp/lisa_mcp/__main__.py @@ -0,0 +1,8 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Allow running the LISA MCP server as ``python -m lisa_mcp``.""" + +from lisa_mcp.server import main + +main() diff --git a/mcp/lisa_mcp/context/__init__.py b/mcp/lisa_mcp/context/__init__.py new file mode 100644 index 0000000000..9a0454564d --- /dev/null +++ b/mcp/lisa_mcp/context/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. diff --git a/mcp/lisa_mcp/context/concepts.md b/mcp/lisa_mcp/context/concepts.md new file mode 100644 index 0000000000..2e09f8d1b6 --- /dev/null +++ b/mcp/lisa_mcp/context/concepts.md @@ -0,0 +1,267 @@ +# LISA Concepts + +## Runbook + +A **runbook** is a YAML configuration file that drives LISA's entire test execution +pipeline. It defines what platform to use, which tests to run, how to configure +nodes, and where to report results. + +### Structure + +```yaml +name: my-test-run +concurrency: 2 # parallel environments + +extension: # paths to load test suites from + - "../../lisa/microsoft/testsuites" + +platform: # one or more platform configs + - type: azure + admin_username: "$(admin_username)" + admin_private_key_file: "$(admin_private_key_file)" + keep_environment: no # "no", "always", or "failed" + +environment: # optional pre-defined environments + environments: + - nodes: + - type: remote + address: "10.0.0.5" + port: 22 + +variable: # key-value pairs for parameterization + - name: subscription_id + value: "" + is_secret: true + +notifier: # output handlers + - type: console + - type: html + +testcase: # test selection criteria + - criteria: + area: provisioning + priority: [0, 1] +``` + +### Key Rules +- `platform` is a list — LISA can target multiple platforms in one run +- `testcase` is a list of filter criteria, combined with OR logic +- `extension` paths are relative to the runbook file location +- Variables from CLI (`lisa -v key:value`) override runbook values +- Runbooks can include other runbooks via `include:` + +--- + +## Environment + +An **environment** is a collection of nodes (VMs or machines) provisioned by a +platform and managed by LISA for test execution. + +### Lifecycle +1. LISA reads test requirements from `@TestCaseMetadata` +2. Requirements are matched against platform capabilities via search_space +3. Platform provisions matching nodes into an environment +4. Tests execute against the environment +5. Environment is cleaned up based on `keep_environment` setting + +### Key Settings +- `use_new_environment: True` — fresh environment per test case (costly) +- `keep_environment: "failed"` — preserve environment for debugging failed tests +- `environment_status: EnvironmentStatus.Deployed` — test expects a ready environment + +--- + +## Node + +A **Node** is a single machine (VM or physical) within an environment. + +### Types +- `Node` — base class +- `RemoteNode` — connected via SSH (most common in cloud testing) +- `LocalNode` — the machine running LISA itself + +### Key APIs +```python +# Execute a command +result = node.execute("uname -r", sudo=True) + +# Use a typed tool (preferred) +info = node.tools[Uname].get_linux_information() +node.tools[Echo].run("hello") + +# Access a feature +serial = node.features[SerialConsole] +serial.check_panic(saved_path=log_path) + +# OS information +node.os.name # "Ubuntu", "RHEL", etc. +node.os.information # detailed version info + +# State management +node.reboot() +node.mark_dirty() # flag for re-provisioning + +# Cross-OS path handling +path = node.get_pure_path("/etc/config") +``` + +--- + +## Feature + +A **Feature** represents a platform capability that a node may or may not have. +Features abstract hardware/platform differences behind a uniform API. + +### Available Features +- `StartStop` — VM lifecycle (start, stop, restart) +- `Gpu` — GPU detection and management +- `Nvme` — NVMe storage +- `NetworkInterface` — NIC configuration and management +- `SerialConsole` — serial console output access +- `Resize` — VM size changes +- `Hibernation` — VM hibernation support +- `Disk` — disk type and configuration +- `AvailabilityZone` — zone placement +- `Virtualization` — nested virtualization + +### Usage Pattern +```python +# Declare requirement +@TestCaseMetadata( + requirement=simple_requirement(supported_features=[Gpu, Nvme]), +) + +# Use in test +gpu = node.features[Gpu] +nvme = node.features[Nvme] + +# Check support at runtime +if node.features.is_supported(SerialConsole): + serial = node.features[SerialConsole] +``` + +--- + +## Tool + +A **Tool** wraps a Linux/Windows command as a Python class with typed methods +and structured output parsing. + +### ~130 Tools Available +System: Echo, Cat, Grep, Find, Rm, Mv, Cp, Ls, Chmod, Chown +Storage: Mount, Umount, Mkfs, Lsblk, Blkid, Df, Fdisk, Parted +Network: Ip, Ethtool, Ping, Ssh, Curl, Wget, Iperf3 +Kernel: GrubConfig, KernelConfig, Dmesg, Sysctl, Reboot +Diagnostics: Lspci, Lscpu, Uname, Uptime, Free, Ps, Journalctl +Package Mgmt: Apt, Dpkg, Rpm, Yum, Make +Performance: Fio, Sar, PerfTool, StressNg + +### Usage +```python +# Get tool from node +echo = node.tools[Echo] +result = echo.run("hello world") + +# Shorthand +node.tools.echo("hello") + +# Tools handle cross-distro differences internally +node.tools[Mount].mount("/dev/sdb1", "/mnt/data") +``` + +--- + +## Platform + +A **Platform** provides the infrastructure abstraction for provisioning and +managing test environments. + +### Supported Platforms +- `azure` — Azure VMs via ARM templates +- `hyperv` — Hyper-V VMs +- `libvirt` — KVM/QEMU via libvirt +- `baremetal` — physical machines via IPMI/Redfish +- `remote` — pre-existing machines (SSH only, no provisioning) +- `local` — local machine +- `aws` — AWS EC2 instances + +--- + +## Test Suite + +A **TestSuite** is a Python class that groups related test cases. + +### Rules +- One test class per file +- Class name in PascalCase, describes the feature area +- Inherits from `TestSuite` +- File location: `lisa/microsoft/testsuites//.py` +- Decorated with `@TestSuiteMetadata(area=..., category=..., description=...)` + +### Lifecycle Methods +- `before_case(log, **kwargs)` — runs before each test case +- `after_case(log, **kwargs)` — runs after each test case (guaranteed cleanup) + +--- + +## Test Case + +A **test case** is a method in a TestSuite decorated with `@TestCaseMetadata`. + +### Rules +- Method name starts with `verify_` or `test_` +- Must have `priority` (0=critical, 1=high, 2=normal, 3=stress) +- Must have `description` explaining what it validates +- Must have `requirement` via `simple_requirement()` +- Parameters: `self, node: Node, log: Logger` at minimum +- Use `assert_that()` from assertpy for assertions +- Follow AAA: Arrange → Act → Assert + +### Available Parameters +- `node: Node` — the test target +- `log: Logger` — test logger +- `environment: Environment` — full environment +- `log_path: Path` — path for saving artifacts +- `working_path: Path` — temp working directory +- `variables: Dict[str, Any]` — runbook variables + +--- + +## simple_requirement() + +Declares what environment capabilities a test needs. + +```python +simple_requirement( + min_count=1, # minimum nodes + min_core_count=2, # min CPU cores per node + min_memory_mb=2048, # min RAM + min_nic_count=2, # min network interfaces + min_data_disk_count=1, # min data disks + min_gpu_count=1, # min GPUs + supported_os=[Posix], # required OS types + unsupported_os=[Windows], # excluded OS types + supported_features=[Gpu], # required features + unsupported_features=[], # excluded features + supported_platform_type=["azure"], + environment_status=EnvironmentStatus.Deployed, + disk=DiskPremiumSSDLRS(), # disk type + network_interface=Sriov(), # NIC type +) +``` + +--- + +## Priority / Tiers + +- **Priority 0 (T0)**: Critical smoke tests — basic boot, connectivity +- **Priority 1 (T1)**: High-priority functional — core features +- **Priority 2 (T2)**: Normal functional — standard validation +- **Priority 3 (T3)**: Stress, long-running, edge cases + +Filter in runbook: +```yaml +testcase: + - criteria: + priority: [0, 1] # run T0 and T1 only +``` diff --git a/mcp/lisa_mcp/context/error_patterns.md b/mcp/lisa_mcp/context/error_patterns.md new file mode 100644 index 0000000000..f085d86e1c --- /dev/null +++ b/mcp/lisa_mcp/context/error_patterns.md @@ -0,0 +1,168 @@ +# LISA Known Error Patterns + +Common errors encountered in LISA test runs, their root causes, and resolutions. + +--- + +### TcpConnectionException + +**Symptom:** `TcpConnectionException: failed to connect to :` + +**Root Cause:** The target node is not reachable via TCP. Common reasons: +- VM is still booting (provisioning agent hasn't started sshd yet) +- VM kernel panicked during boot +- Network Security Group (NSG) blocks port 22 +- VM was deallocated or failed provisioning +- Network configuration error (wrong subnet, missing public IP) + +**Resolution:** +1. Check VM status in the platform portal +2. Check serial console for boot errors or kernel panics +3. Verify NSG rules allow inbound SSH (port 22) +4. Increase `wait_resource_timeout` in runbook if VM is slow to boot +5. Check if the image requires cloud-init to complete before sshd starts + +--- + +### SkippedException + +**Symptom:** Test is marked SKIPPED instead of running + +**Root Cause:** Test preconditions not met. Not a failure — working as designed. +- Target OS doesn't match `supported_os` in `simple_requirement()` +- Required feature (GPU, NVMe, SR-IOV) not available on the VM size +- Required tool not installable on the target distro +- Kernel version too old for the tested functionality + +**Resolution:** +- Verify the test's `simple_requirement()` matches your target environment +- Choose a VM size that provides the required features +- Use an image/distro that supports the tested functionality + +--- + +### BadEnvironmentStateException + +**Symptom:** `BadEnvironmentStateException: environment is not in expected state` + +**Root Cause:** The environment lifecycle is inconsistent — usually because: +- A previous test modified the node and didn't call `node.mark_dirty()` +- The node was rebooted but didn't come back online +- Platform-level timeout during environment recovery + +**Resolution:** +1. Check if the previous test in the run modified kernel params, drivers, or network +2. Add `node.mark_dirty()` to tests that alter system state +3. Use `use_new_environment=True` for isolated test execution +4. Check platform logs for environment lifecycle errors + +--- + +### OverconstrainedAllocationRequest + +**Symptom:** Azure deployment fails with allocation constraint error + +**Root Cause:** No physical host in the target region matches all the requested +VM constraints (size, zone, disk type, accelerated networking, etc.) + +**Resolution:** +1. Try a different Azure region +2. Remove or relax constraints (availability zone, specific VM size) +3. Try a different VM size family with similar capabilities +4. Check Azure capacity status for the region + +--- + +### QuotaExceeded + +**Symptom:** Azure returns quota exceeded error during deployment + +**Root Cause:** Subscription hit resource limits — vCPU count, VM count, or +VM family-specific quotas. + +**Resolution:** +1. Clean up idle VMs and unused resources in the subscription +2. Request quota increase via Azure portal → Quotas +3. Use a different subscription +4. Reduce `concurrency` in the runbook to deploy fewer VMs simultaneously + +--- + +### Kernel Panic / BUG: soft lockup + +**Symptom:** VM becomes unresponsive; serial console shows panic or lockup + +**Root Cause:** Kernel-level crash. Common triggers: +- Driver incompatibility (especially with accelerated networking or GPU) +- Memory corruption +- Incompatible kernel parameters (e.g., swiotlb, iommu settings) +- Race condition in boot sequence + +**Resolution:** +1. Check serial console output for full panic trace +2. Identify the faulting module from the call trace +3. Check if the kernel version is known-good for this distro +4. If caused by custom kernel params, test without them first +5. Report to distro vendor with serial console output + +--- + +### AssertionError / assert_that failure + +**Symptom:** Test fails with assertion mismatch + +**Root Cause:** The system under test produced unexpected output. This is +usually the "real" test finding a real bug. + +**Resolution:** +1. Read the `.described_as()` message for business context +2. Compare expected vs actual values +3. SSH to the node manually and reproduce the command +4. Check if the behavior is distro-specific or version-specific +5. Verify test expectations match the documentation/spec + +--- + +### PassedException + +**Symptom:** Test shows as ATTEMPTED instead of PASSED + +**Root Cause:** The test encountered a non-critical error but still achieved +its primary objective. It's a "soft pass" with caveats. + +**Resolution:** +- Review the warning message to understand what was unexpected +- Decide if the caveat is acceptable for your validation scenario +- Consider filing a bug if the warning indicates a real issue + +--- + +### SSH Authentication Failure + +**Symptom:** `paramiko.ssh_exception.AuthenticationException` + +**Root Cause:** SSH credentials don't work on the target node. +- Wrong username/password combination +- SSH key not accepted +- Password authentication disabled in sshd_config +- Cloud-init hasn't configured the user yet + +**Resolution:** +1. Verify `admin_username` and `admin_private_key_file` in runbook +2. Check if the image uses key-only authentication +3. Ensure `admin_private_key_file` points to a valid key +4. Wait longer for cloud-init to complete user setup + +--- + +### LisaException: tool not found + +**Symptom:** A LISA tool can't find its underlying command on the node + +**Root Cause:** The Linux command that the tool wraps isn't installed. + +**Resolution:** +- The tool should auto-install via the OS package manager +- If not, the distro may not have the package in its repos +- Check if the tool's `_install` method handles the target distro +- Consider raising `SkippedException` if the tool is optional for the test diff --git a/mcp/lisa_mcp/context/runbook_schema.md b/mcp/lisa_mcp/context/runbook_schema.md new file mode 100644 index 0000000000..9ab0494fa4 --- /dev/null +++ b/mcp/lisa_mcp/context/runbook_schema.md @@ -0,0 +1,246 @@ +# LISA Runbook Schema Reference + +Complete field reference for LISA runbook YAML files. + +--- + +## Top-Level Fields + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `name` | string | `"not_named"` | Run name for identification | +| `concurrency` | int | `1` | Number of parallel test environments | +| `exit_with_failed_count` | bool | `true` | Exit code reflects failure count | +| `exit_on_first_failure` | bool | `false` | Stop on first test failure | +| `test_project` | string | `""` | Project identifier for reporting | +| `test_pass` | string | `""` | Test pass identifier | +| `tags` | list[string] | null | Global tags | +| `wait_resource_timeout` | float | `5` | Minutes to wait for resource allocation | + +--- + +## `include` + +Include other runbook files. Values from included files are merged. + +```yaml +include: + - path: "./base_config.yml" + strategy: overwrite # "overwrite" or "add" +``` + +--- + +## `extension` + +Paths to load test suites, custom platforms, notifiers, etc. + +```yaml +extension: + - "../../lisa/microsoft/testsuites" + - "./my_custom_tests" +``` + +Paths are relative to the runbook file location. + +--- + +## `platform` + +List of platform configurations. At least one is required. + +```yaml +platform: + - type: azure # Platform type + admin_username: "$(admin_username)" # SSH username + admin_private_key_file: "$(admin_private_key_file)" # SSH private key path + keep_environment: "no" # "no", "always", "failed" + guest_enabled: false # Enable guest/nested VM testing + + # Azure-specific fields + azure: + subscription_id: "$(subscription_id)" + deploy_location: "westus2" + resource_group_name: "" # Custom RG name + + marketplace: # Image specification + publisher: "canonical" + offer: "0001-com-ubuntu-server-jammy" + sku: "22_04-lts-gen2" + version: "latest" + + requirement: + azure: + vm_size: "Standard_DS2_v2" +``` + +### Platform Types +- `azure` — Azure Resource Manager +- `hyperv` — Hyper-V on Windows +- `libvirt` — KVM/QEMU via libvirt +- `baremetal` — Physical machines via IPMI/Redfish +- `remote` — Pre-existing machines (no provisioning) +- `local` — Local machine +- `aws` — AWS EC2 +- `ready` — Pre-provisioned environment + +### `keep_environment` Values +- `"no"` — Always clean up after tests (default) +- `"always"` — Never clean up (for debugging, costs money) +- `"failed"` — Keep only if a test failed (good for CI) + +--- + +## `environment` + +Pre-defined environments with specific node configurations. + +```yaml +environment: + environments: + - name: "my-env" + nodes: + - type: local + + - name: "remote-env" + nodes: + - type: remote + address: "10.0.0.5" + port: 22 + username: "admin" + password: "$(password)" + + - nodes_requirement: + - node_count: 2 + core_count: + min: 4 + memory_mb: + min: 8192 +``` + +--- + +## `variable` + +Key-value pairs for parameterization. + +```yaml +variable: + - name: admin_username + value: "azureuser" + + - name: admin_private_key_file + value: "" + + - name: custom_config + value: "" + is_case_visible: true # Available to test methods + + - name: vars_from_file + file: "./variables.yml" # Load from file +``` + +### Variable Substitution +Use `$(variable_name)` syntax in any string field: +```yaml +platform: + - type: azure + admin_username: "$(admin_username)" +``` + +### CLI Override +```bash +lisa -r runbook.yml -v "admin_username:myuser" -v "admin_private_key_file:~/.ssh/id_rsa" +``` + +--- + +## `testcase` + +List of test selection criteria. Tests matching ANY criteria block are included. + +```yaml +testcase: + - criteria: + area: provisioning # Match test area + priority: [0, 1] # Match priority range + tags: [smoke, basic] # Match any tag + + - criteria: + name: smoke_test # Match exact test name + + - criteria: + area: network + priority: 2 + times: 3 # Run matched tests 3 times + retry: 2 # Retry on failure + use_new_environment: true # Fresh env per case + ignore_failure: false # Count failures +``` + +### Criteria Fields +| Field | Type | Description | +|-------|------|-------------| +| `area` | string | TestSuiteMetadata area | +| `category` | string | TestSuiteMetadata category | +| `priority` | int or list | Priority level or range `[min, max]` | +| `tags` | list[string] | Match any tag | +| `name` | string | Test method name (exact or regex) | + +--- + +## `notifier` + +Output handlers for test results. + +```yaml +notifier: + - type: console # Real-time terminal output + - type: html # HTML report + - type: junit # JUnit XML for CI systems + path: "./results" +``` + +--- + +## `transformer` + +Pre-execution transformers for dynamic setup. + +```yaml +transformer: + - type: to_list + items: + - item1 + - item2 + name: my_list +``` + +--- + +## `combinator` + +Generate multiple variable combinations for parameterized runs. + +```yaml +combinator: + type: grid + items: + - name: vm_size + values: ["Standard_DS2_v2", "Standard_DS3_v2"] + - name: image + values: ["ubuntu-22.04", "rhel-9"] +``` + +--- + +## `dev` + +Development settings (usually not in production runbooks). + +```yaml +dev: + enabled: true + mock_platform: true + log_level: DEBUG +``` diff --git a/mcp/lisa_mcp/context/test_patterns.md b/mcp/lisa_mcp/context/test_patterns.md new file mode 100644 index 0000000000..767464b8da --- /dev/null +++ b/mcp/lisa_mcp/context/test_patterns.md @@ -0,0 +1,201 @@ +# LISA Test Patterns + +Canonical patterns for writing LISA test suites and cases. Copy these as starting +points. + +## Basic Test Suite + +```python +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +from typing import Any + +from assertpy import assert_that + +from lisa import ( + Logger, + Node, + TestCaseMetadata, + TestSuite, + TestSuiteMetadata, + simple_requirement, +) +from lisa.operating_system import Posix + + +@TestSuiteMetadata( + area="", + category="functional", + description=""" + + """, + requirement=simple_requirement(supported_os=[Posix]), +) +class MyFeature(TestSuite): + @TestCaseMetadata( + description=""" + + """, + priority=2, + requirement=simple_requirement( + supported_os=[Posix], + ), + ) + def verify_my_feature(self, node: Node, log: Logger) -> None: + # Arrange + tool = node.tools[SomeTool] + + # Act + result = tool.some_method() + + # Assert + assert_that(result).described_as( + "Expected the tool to return a valid result" + ).is_not_none() + + def before_case(self, log: Logger, **kwargs: Any) -> None: + log.info("Setting up test case") + + def after_case(self, log: Logger, **kwargs: Any) -> None: + log.info("Cleaning up test case") +``` + +## Test with Feature Requirement + +```python +from lisa.features import Gpu, SerialConsole, StartStop + +@TestCaseMetadata( + description=""" + Verify GPU is detected and driver loads correctly. + """, + priority=1, + requirement=simple_requirement( + supported_os=[Posix], + supported_features=[Gpu], + min_gpu_count=1, + ), +) +def verify_gpu_detection(self, node: Node, log: Logger) -> None: + gpu = node.features[Gpu] + # ... test logic +``` + +## Test with Multiple Nodes + +```python +from lisa import Environment + +@TestCaseMetadata( + description=""" + Verify network connectivity between two nodes. + """, + priority=2, + requirement=simple_requirement( + min_count=2, + supported_os=[Posix], + min_nic_count=1, + ), +) +def verify_inter_node_connectivity( + self, environment: Environment, log: Logger +) -> None: + node1 = environment.nodes[0] + node2 = environment.nodes[1] + # ... test logic +``` + +## Test with Cleanup (mark_dirty) + +```python +@TestCaseMetadata( + description=""" + Verify kernel parameter modification survives reboot. + """, + priority=2, + requirement=simple_requirement(supported_os=[Posix]), +) +def verify_kernel_param(self, node: Node, log: Logger) -> None: + try: + grub = node.tools[GrubConfig] + grub.set_kernel_cmdline_arg("my_param", "value") + node.reboot() + + result = node.tools[Cat].read("/proc/cmdline", sudo=True) + assert_that(result).described_as( + "Kernel parameter should be present after reboot" + ).contains("my_param=value") + finally: + node.mark_dirty() # kernel params were modified +``` + +## Test with Skip + +```python +from lisa import SkippedException + +@TestCaseMetadata( + description=""" + Verify feature X on supported kernels only. + """, + priority=2, + requirement=simple_requirement(supported_os=[Posix]), +) +def verify_feature_x(self, node: Node, log: Logger) -> None: + kernel_version = node.tools[Uname].get_linux_information().kernel_version_raw + if kernel_version < "5.15": + raise SkippedException( + f"Feature X requires kernel >= 5.15, got {kernel_version}" + ) + # ... test logic +``` + +## Assertion Patterns + +```python +from assertpy import assert_that + +# Value assertions +assert_that(result.exit_code).described_as( + "Command should succeed" +).is_equal_to(0) + +# String assertions +assert_that(result.stdout).described_as( + "Output should contain expected module name" +).contains("my_module") + +# Collection assertions +assert_that(devices).described_as( + "At least one NVMe device should be present" +).is_not_empty() + +assert_that(found_drivers).described_as( + "All required drivers should be loaded" +).contains("hv_netvsc", "hv_storvsc") + +# Length assertions (use native matcher) +assert_that(disks).described_as( + "Expected exactly 2 data disks" +).is_length(2) + +# Boolean assertions +assert_that(node.tools[KernelConfig].is_enabled("CONFIG_HYPERV")).described_as( + "Hyper-V kernel config should be enabled" +).is_true() +``` + +## Logging Patterns + +```python +# INFO — high-level progress (reads like a story) +log.info(f"Starting SRIOV validation on node '{node.name}'") +log.info(f"Found {len(devices)} NVMe devices, expected {expected_count}") +log.info(f"Reboot completed successfully in {elapsed:.2f}s") + +# DEBUG — detailed diagnostics +log.debug(f"Command output: {result.stdout}") +log.debug(f"Parsed kernel version: {version}") +log.debug(f"NIC configuration: {nic_info}") +``` diff --git a/mcp/lisa_mcp/docs_index.yaml b/mcp/lisa_mcp/docs_index.yaml new file mode 100644 index 0000000000..61cb371fbf --- /dev/null +++ b/mcp/lisa_mcp/docs_index.yaml @@ -0,0 +1,183 @@ +# Mapping of MCP tool names to LISA documentation files. +# +# The MCP server reads this manifest at startup to know which .rst / .md +# docs to inject as context for each tool. Paths are relative to the +# repo root. Files are loaded as plain text — .rst is read directly, +# no conversion to markdown is needed. +# +# primary — the main doc for the tool, always loaded +# supplementary — additional docs loaded when extra context helps + +tools: + + # ── test_writer.py ───────────────────────────────────────────── + lisa_write_test: + primary: .github/prompts/lisa_test_writer.prompt.md + supplementary: + - docs/write_test/write_case.rst + - docs/write_test/concepts.rst + - docs/write_test/guidelines.rst + + lisa_get_test_writer_guidelines: + primary: .github/prompts/lisa_test_writer.prompt.md + supplementary: + - docs/write_test/write_case.rst + + lisa_scaffold_test_suite: + primary: docs/write_test/write_case.rst + supplementary: + - docs/write_test/concepts.rst + - docs/write_test/guidelines.rst + + lisa_scaffold_test_case: + primary: docs/write_test/write_case.rst + supplementary: + - docs/write_test/concepts.rst + + lisa_list_test_requirements: + primary: docs/write_test/write_case.rst + supplementary: [] + + # ── runbook.py ───────────────────────────────────────────────── + lisa_generate_runbook: + primary: docs/run_test/runbook.rst + supplementary: + - docs/run_test/command_line.rst + - docs/run_test/platform.rst + + lisa_validate_runbook: + primary: docs/run_test/runbook.rst + supplementary: [] + + lisa_fix_runbook: + primary: docs/run_test/runbook.rst + supplementary: + - docs/run_test/command_line.rst + + # ── log_analysis.py ──────────────────────────────────────────── + lisa_analyze_log: + primary: docs/run_test/troubleshoot_failures.rst + supplementary: + - docs/run_test/microsoft_tests.rst + + lisa_explain_failure: + primary: docs/run_test/troubleshoot_failures.rst + supplementary: [] + + lisa_summarize_run: + primary: docs/run_test/troubleshoot_failures.rst + supplementary: [] + + lisa_get_log_analysis_prompts: + primary: lisa/ai/README.md + supplementary: [] + + lisa_search_log_files: + primary: docs/run_test/troubleshoot_failures.rst + supplementary: [] + + lisa_read_log_file: + primary: docs/run_test/troubleshoot_failures.rst + supplementary: [] + + lisa_list_log_files: + primary: docs/run_test/troubleshoot_failures.rst + supplementary: [] + + lisa_diagnose_bug: + primary: docs/run_test/troubleshoot_failures.rst + supplementary: + - docs/write_test/write_case.rst + + # ── knowledge.py ─────────────────────────────────────────────── + lisa_explain_concept: + primary: docs/write_test/concepts.rst + supplementary: [] + + lisa_get_api_reference: + primary: docs/write_test/write_case.rst + supplementary: [] + + lisa_find_examples: + primary: docs/write_test/write_case.rst + supplementary: [] + + lisa_list_tools: + primary: docs/write_test/write_case.rst + supplementary: [] + + lisa_list_features: + primary: docs/write_test/write_case.rst + supplementary: [] + + lisa_explain_error: + primary: docs/run_test/troubleshoot_failures.rst + supplementary: [] + + # ── execution.py ─────────────────────────────────────────────── + lisa_run: + primary: docs/run_test/run.rst + supplementary: + - docs/run_test/command_line.rst + + # ── Framework Knowledge ──────────────────────────────────────── + lisa_explain_concept: + primary: docs/write_test/concepts.rst + supplementary: + - docs/write_test/write_case.rst + - docs/write_test/extension.rst + - docs/run_test/runbook.rst + - docs/run_test/platform.rst + + lisa_get_api_reference: + primary: docs/write_test/extension.rst + supplementary: + - docs/write_test/write_case.rst + + lisa_find_examples: + primary: docs/write_test/write_case.rst + supplementary: [] + + lisa_list_tools: + primary: docs/write_test/extension.rst + supplementary: [] + + lisa_list_features: + primary: docs/write_test/extension.rst + supplementary: [] + + +# ── Topic index (used by explain_concept for targeted lookup) ──── +# +# Maps topic keywords to the best doc file for that concept. +# explain_concept searches this when the curated concepts.md +# doesn't have a match. + +topics: + runbook: docs/run_test/runbook.rst + platform: docs/run_test/platform.rst + environment: docs/write_test/concepts.rst + node: docs/write_test/concepts.rst + feature: docs/write_test/extension.rst + tool: docs/write_test/extension.rst + extension: docs/write_test/extension.rst + test suite: docs/write_test/write_case.rst + test case: docs/write_test/write_case.rst + transformer: docs/run_test/transformers.rst + combinator: docs/write_test/concepts.rst + variable: docs/run_test/runbook.rst + notifier: docs/write_test/extension.rst + requirement: docs/write_test/concepts.rst + capability: docs/write_test/concepts.rst + search_space: docs/write_test/concepts.rst + priority: docs/run_test/microsoft_tests.rst + log agent: lisa/ai/README.md + log analysis: lisa/ai/README.md + ai analysis: lisa/ai/README.md + tier: docs/run_test/microsoft_tests.rst + install: docs/install.rst + authentication: docs/run_test/azure_auth.rst + azure: docs/run_test/quick_run.rst + troubleshoot: docs/run_test/troubleshoot_failures.rst + guidelines: docs/write_test/guidelines.rst + contributing: docs/contributing.rst diff --git a/mcp/lisa_mcp/server.py b/mcp/lisa_mcp/server.py new file mode 100644 index 0000000000..1427bc3173 --- /dev/null +++ b/mcp/lisa_mcp/server.py @@ -0,0 +1,142 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""LISA MCP Server — AI-native developer tools for the LISA test framework.""" + +import argparse +import logging + +from lisa_mcp.tools.execution import register_execution_tools +from lisa_mcp.tools.knowledge import register_knowledge_tools +from lisa_mcp.tools.log_analysis import register_log_analysis_tools +from lisa_mcp.tools.runbook import register_runbook_tools +from lisa_mcp.tools.test_writer import register_test_writer_tools +from mcp.server.fastmcp import FastMCP + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger("lisa-mcp") + +mcp = FastMCP( + "lisa-mcp", + instructions=""" + You are the LISA MCP server — a developer productivity tool for the LISA + (Linux Integrated System Analyzer) test automation framework. + + LISA is a Python-based test framework for validating Linux distributions on + cloud platforms (Azure, Hyper-V, bare metal). It uses YAML runbooks for + configuration and Python test suites with metadata decorators. + + Available capabilities: + - **Test Authoring**: Write LISA tests following the lisa_test_writer prompt + workflow (Gather → Research → Design Plan → Code). Start with + lisa_write_test or lisa_get_test_writer_guidelines. + - **Log Analysis**: Parse and explain LISA run logs and failures. + Start with lisa_start_log_investigation to bootstrap a full + root-cause analysis — it returns expert prompts, file listings, + initial search hits, and next-step instructions. Then use + lisa_search_log_files / lisa_read_log_file / lisa_list_log_files + to dig deeper — you (the host AI) act as the reasoning engine. + - **Runbook**: Generate, validate, and fix LISA YAML runbooks. + - **Debugging**: Diagnose test failures with source correlation + - **Execution**: Run LISA tests locally (stdio mode only) + - **Framework Knowledge**: Explain LISA concepts, find examples, API reference + + All tools follow the lisa_{verb}_{noun} naming convention. + """, +) + +register_test_writer_tools(mcp) +register_runbook_tools(mcp) +register_log_analysis_tools(mcp) +register_knowledge_tools(mcp) +register_execution_tools(mcp) + + +def main() -> None: + """Entry point for the LISA MCP server. + + Supports two transport modes per the deployment strategy: + - stdio (default): For local use with Claude Desktop, VS Code Copilot + - sse: For hosted deployment serving agent-to-agent pipelines over HTTP + """ + parser = argparse.ArgumentParser( + prog="lisa-mcp", + description="LISA MCP Server — AI-native tools for LISA test framework", + ) + parser.add_argument( + "--transport", + choices=["stdio", "sse"], + default="stdio", + help="Transport mode: stdio (local, default) or sse (hosted HTTP)", + ) + parser.add_argument( + "--host", + default="0.0.0.0", + help="Host to bind for SSE transport (default: 0.0.0.0)", + ) + parser.add_argument( + "--port", + type=int, + default=8080, + help="Port for SSE/HTTP transport (default: 8080)", + ) + args = parser.parse_args() + + log.info(f"Starting LISA MCP server (transport={args.transport})") + + if args.transport == "sse": + import os + + import uvicorn + from starlette.applications import Starlette + from starlette.middleware import Middleware + from starlette.middleware.trustedhost import TrustedHostMiddleware + from starlette.routing import Mount, Route + + from mcp.server.sse import SseServerTransport + + sse = SseServerTransport("/messages/") + + async def handle_sse(request): + async with sse.connect_sse( + request.scope, request.receive, request._send + ) as streams: + await mcp._mcp_server.run( + streams[0], + streams[1], + mcp._mcp_server.create_initialization_options(), + ) + + # Trusted hosts for Host header validation behind a reverse proxy. + # Set ALLOWED_HOSTS="host1,host2" in your deployment environment. + # Defaults to localhost only (for local development). + default_hosts = "localhost,127.0.0.1" + allowed_hosts = os.environ.get("ALLOWED_HOSTS", default_hosts).split(",") + + app = Starlette( + routes=[ + Route("/sse", endpoint=handle_sse), + Mount("/messages/", app=sse.handle_post_message), + ], + middleware=[ + Middleware( + TrustedHostMiddleware, + allowed_hosts=allowed_hosts, + ), + ], + ) + + uvicorn.run( + app, + host=args.host, + port=args.port, + log_level="info", + forwarded_allow_ips="*", + proxy_headers=True, + ) + else: + mcp.run() + + +if __name__ == "__main__": + main() diff --git a/mcp/lisa_mcp/tools/__init__.py b/mcp/lisa_mcp/tools/__init__.py new file mode 100644 index 0000000000..9a0454564d --- /dev/null +++ b/mcp/lisa_mcp/tools/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. diff --git a/mcp/lisa_mcp/tools/_repo.py b/mcp/lisa_mcp/tools/_repo.py new file mode 100644 index 0000000000..9f2e8edbc4 --- /dev/null +++ b/mcp/lisa_mcp/tools/_repo.py @@ -0,0 +1,155 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Helpers for locating the LISA repo and loading context files.""" + +from __future__ import annotations + +import os +from pathlib import Path +from typing import Any, Optional + +import yaml + +# Cache parsed manifest so it's loaded once per process +_manifest_cache: Optional[dict[str, Any]] = None + +# The lisa_mcp/ package dir is two levels up from lisa_mcp/tools/_repo.py +_PACKAGE_DIR = Path(__file__).resolve().parent.parent + +# The mcp/ directory is three levels up from lisa_mcp/tools/_repo.py +_MCP_DIR = _PACKAGE_DIR.parent + + +def find_repo_root() -> Optional[Path]: + """Walk up from this file to find the LISA repository root. + + The repo root is identified by having a ``lisa/`` package directory + and a ``pyproject.toml``. + """ + # mcp/ lives alongside lisa/ at the repo root + candidate = _MCP_DIR.parent + if (candidate / "lisa").is_dir() and (candidate / "pyproject.toml").is_file(): + return candidate + + # Fallback: check LISA_REPO_ROOT env var + env_root = os.environ.get("LISA_REPO_ROOT") + if env_root: + p = Path(env_root) + if p.is_dir(): + return p + + return None + + +def load_context_file(name: str) -> str: + """Load a markdown file from the ``lisa_mcp/context/`` directory.""" + context_dir = _PACKAGE_DIR / "context" + path = context_dir / name + if path.exists(): + return path.read_text(encoding="utf-8") + return f"(Context file '{name}' not found at {path})" + + +def load_test_writer_prompt() -> str: + """Load the lisa_test_writer.prompt.md from the repo's .github/prompts/.""" + repo_root = find_repo_root() + if not repo_root: + return "(Could not locate LISA repo root to load test writer prompt.)" + + prompt_path = repo_root / ".github" / "prompts" / "lisa_test_writer.prompt.md" + if prompt_path.exists(): + return prompt_path.read_text(encoding="utf-8") + return f"(Test writer prompt not found at {prompt_path})" + + +# --------------------------------------------------------------------------- +# Docs manifest helpers +# --------------------------------------------------------------------------- + + +def _load_manifest() -> dict[str, Any]: + """Parse ``mcp/docs_index.yaml`` and cache the result.""" + global _manifest_cache + if _manifest_cache is not None: + return _manifest_cache + + manifest_path = _PACKAGE_DIR / "docs_index.yaml" + if not manifest_path.exists(): + _manifest_cache = {} + return _manifest_cache + + with open(manifest_path, encoding="utf-8") as f: + _manifest_cache = yaml.safe_load(f) or {} + return _manifest_cache + + +def load_docs_for_tool(tool_name: str) -> str: + """Load the .rst/.md documentation mapped to *tool_name* in docs_index.yaml. + + Returns the concatenated content of the primary doc and any supplementary + docs. Files are read as plain text — .rst is perfectly usable by LLMs + without conversion. + """ + manifest = _load_manifest() + tool_entry = (manifest.get("tools") or {}).get(tool_name) + if not tool_entry: + return "" + + repo_root = find_repo_root() + if not repo_root: + return "" + + paths: list[str] = [] + primary = tool_entry.get("primary") + if primary: + paths.append(primary) + supplementary = tool_entry.get("supplementary") or [] + paths.extend(supplementary) + + sections: list[str] = [] + for rel_path in paths: + full = repo_root / rel_path + if full.exists(): + try: + content = full.read_text(encoding="utf-8", errors="replace") + sections.append(f"--- [{rel_path}] ---\n{content}") + except OSError: + pass + + return "\n\n".join(sections) + + +def load_doc_for_topic(topic: str) -> str: + """Look up a topic keyword in the ``topics`` section of docs_index.yaml + and return the content of the mapped doc file. + """ + manifest = _load_manifest() + topics = manifest.get("topics") or {} + + # Exact match first + rel_path = topics.get(topic.lower().strip()) + + # Fuzzy: check if topic is a substring of any key or vice-versa + if not rel_path: + topic_lower = topic.lower().strip() + for key, path in topics.items(): + if topic_lower in key or key in topic_lower: + rel_path = path + break + + if not rel_path: + return "" + + repo_root = find_repo_root() + if not repo_root: + return "" + + full = repo_root / rel_path + if full.exists(): + try: + return full.read_text(encoding="utf-8", errors="replace") + except OSError: + pass + + return "" diff --git a/mcp/lisa_mcp/tools/execution.py b/mcp/lisa_mcp/tools/execution.py new file mode 100644 index 0000000000..be4d7ceb6d --- /dev/null +++ b/mcp/lisa_mcp/tools/execution.py @@ -0,0 +1,48 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Execution tools — run LISA tests locally (stdio mode only).""" + +from __future__ import annotations + +from mcp.server.fastmcp import FastMCP + + +def register_execution_tools(mcp: FastMCP) -> None: + @mcp.tool() + def lisa_run( + runbook_path: str, + variables: str = "", + ) -> str: + """Shell out to the LISA container and execute a test run locally. + + **Only available in stdio (local) mode.** If called against a remote + SSE server, returns an error explaining that test execution is local + only and instructs the user to run lisa-mcp locally. + + Args: + runbook_path: Path to the LISA runbook YAML file + variables: Space-separated LISA variables in key:value format + (e.g. "admin_username:azureuser subscription_id:xxx") + """ + # TODO: Implement local execution via LISA container. + # This tool is intentionally a placeholder — lisa_run requires + # LISA and Docker installed locally and Azure credentials configured. + # See spec Section 6.3 for the full design. + return ( + "**lisa_run is not yet implemented.**\n\n" + "This tool will shell out to the LISA container on your local " + "machine to execute the specified runbook. It requires:\n" + "- LISA installed locally (or via Docker)\n" + "- Azure credentials configured in `~/.lisa/mcp_config.yaml`\n" + "- stdio transport mode (not available on remote SSE servers)\n\n" + "For now, run LISA manually:\n" + "```bash\n" + f"lisa -r {runbook_path}" + + ( + f" {' '.join(f'-v {v}' for v in variables.split() if v)}" + if variables + else "" + ) + + "\n```" + ) diff --git a/mcp/lisa_mcp/tools/knowledge.py b/mcp/lisa_mcp/tools/knowledge.py new file mode 100644 index 0000000000..310614f01d --- /dev/null +++ b/mcp/lisa_mcp/tools/knowledge.py @@ -0,0 +1,702 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Framework knowledge tools — concepts, API reference, examples, error lookup.""" + +from __future__ import annotations + +import re +from pathlib import Path + +from lisa_mcp.tools._repo import ( + find_repo_root, + load_context_file, + load_doc_for_topic, + load_docs_for_tool, +) +from mcp.server.fastmcp import FastMCP + + +def register_knowledge_tools(mcp: FastMCP) -> None: # noqa: C901 + @mcp.tool() + def lisa_explain_concept(concept: str) -> str: + """Explain a LISA framework concept in plain language with usage examples. + + Covers: runbook, environment, node, feature, tool, platform, tier/priority, + test suite, test case, simple_requirement, notifier, transformer, combinator, + extension, variable, search_space, and more. + + Uses the official LISA .rst documentation and curated knowledge base. + + Args: + concept: The LISA concept to explain (e.g. "runbook", "feature", + "simple_requirement", "environment matching") + """ + concept_lower = concept.lower().strip() + + # 1. Check curated context/concepts.md first (structured summaries) + concepts_md = load_context_file("concepts.md") + if "not found" not in concepts_md.lower(): + sections = concepts_md.split("\n## ") + for section in sections: + header = section.split("\n")[0].lower() + if concept_lower in header: + rst_docs = load_doc_for_topic(concept_lower) + result = f"## {section}" + if rst_docs: + result += ( + "\n\n---\n\n" + "**From official LISA documentation:**\n\n" + + rst_docs[:3000] + ) + return result + + # 2. Check built-in inline knowledge + builtin = _BUILTIN_CONCEPTS.get(concept_lower) + if not builtin: + for key, value in _BUILTIN_CONCEPTS.items(): + if concept_lower in key or key in concept_lower: + builtin = value + break + + # 3. Try the official .rst docs via topic index + rst_docs = load_doc_for_topic(concept_lower) + + if builtin and rst_docs: + return ( + builtin + "\n\n---\n\n" + "**From official LISA documentation:**\n\n" + rst_docs[:3000] + ) + if builtin: + return builtin + if rst_docs: + return ( + f"## {concept}\n\n" + "**From official LISA documentation:**\n\n" + rst_docs[:3000] + ) + + return ( + f"Concept `{concept}` not found in the knowledge base. " + "Try one of: runbook, environment, node, feature, tool, platform, " + "tier, test suite, test case, simple_requirement, notifier, " + "transformer, combinator, extension, variable, search_space." + ) + + @mcp.tool() + def lisa_get_api_reference(symbol: str) -> str: + """Look up a LISA class, decorator, function, or tool and return its + signature, docstring, and usage example. + + Args: + symbol: Python symbol name (e.g. "TestSuiteMetadata", + "simple_requirement", "Node", "RemoteNode", "Echo") + """ + repo_root = find_repo_root() + if not repo_root: + return "Could not locate LISA repository." + + search_paths = [ + repo_root / "lisa" / "testsuite.py", + repo_root / "lisa" / "node.py", + repo_root / "lisa" / "schema.py", + repo_root / "lisa" / "feature.py", + repo_root / "lisa" / "environment.py", + repo_root / "lisa" / "platform_.py", + repo_root / "lisa" / "notifier.py", + repo_root / "lisa" / "runner.py", + repo_root / "lisa" / "messages.py", + repo_root / "lisa" / "__init__.py", + ] + + tools_dir = repo_root / "lisa" / "tools" + if tools_dir.exists(): + search_paths.extend(tools_dir.glob("*.py")) + + features_dir = repo_root / "lisa" / "features" + if features_dir.exists(): + search_paths.extend(features_dir.glob("*.py")) + + results = [] + for path in search_paths: + if not path.exists(): + continue + try: + content = path.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + + patterns = [ + rf"^class\s+{re.escape(symbol)}\b", + rf"^def\s+{re.escape(symbol)}\b", + rf"^\s+def\s+{re.escape(symbol)}\b", + ] + + for pat in patterns: + for m in re.finditer(pat, content, re.MULTILINE): + lines = content.split("\n") + match_line = content[: m.start()].count("\n") + + start = match_line + while start > 0 and ( + lines[start - 1].strip().startswith("@") + or lines[start - 1].strip().startswith("#") + or not lines[start - 1].strip() + ): + start -= 1 + + indent = len(lines[match_line]) - len(lines[match_line].lstrip()) + end = match_line + 1 + while end < len(lines): + line = lines[end] + if line.strip() and not line[0].isspace(): + break + if ( + line.strip() + and ( + line.startswith(" " * indent) + or line.startswith("\t" * (indent // 4 or 1)) + ) + and not line.startswith(" " * (indent + 1)) + and ( + line.strip().startswith("class ") + or line.strip().startswith("def ") + ) + and end > match_line + 1 + ): + break + end += 1 + if end - match_line > 60: + break + + rel_path = path.relative_to(repo_root) + snippet = "\n".join(lines[start:end]) + results.append( + f"**`{rel_path}` (line {start + 1})**\n\n" + f"```python\n{snippet}\n```" + ) + + if results: + break + + if results: + return f"## API Reference: `{symbol}`\n\n" + "\n\n---\n\n".join(results[:3]) + + return ( + f"Symbol `{symbol}` not found. Try the full class name (e.g. " + "`TestSuiteMetadata`, `RemoteNode`, `Echo`) or check spelling." + ) + + @mcp.tool() + def lisa_find_examples(query: str, max_results: int = 5) -> str: + """Search existing LISA test suites for examples matching a description. + Useful for finding patterns to follow when writing new tests. + + Args: + query: What you're looking for (e.g. "SRIOV test", "disk resize", + "network failover", "GPU validation") + max_results: Maximum number of matching files to return (1-10) + """ + repo_root = find_repo_root() + if not repo_root: + return "Could not locate LISA repository." + + max_results = min(max(1, max_results), 10) + + testsuites_dirs = [ + repo_root / "lisa" / "microsoft" / "testsuites", + repo_root / "lisa" / "examples" / "testsuites", + ] + + keywords = [ + w.lower() + for w in re.split(r"\W+", query) + if len(w) > 2 and w.lower() not in {"the", "and", "for", "test", "with"} + ] + + if not keywords: + return "Provide a more specific query with meaningful keywords." + + scored_files: list[tuple[int, Path, str]] = [] + + for testsuites_dir in testsuites_dirs: + if not testsuites_dir.exists(): + continue + for py_file in testsuites_dir.rglob("*.py"): + if py_file.name == "__init__.py": + continue + try: + content = py_file.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + + content_lower = content.lower() + filename_lower = py_file.stem.lower() + + score = 0 + for kw in keywords: + if kw in filename_lower: + score += 10 + if kw in str(py_file.parent.name).lower(): + score += 8 + score += min(content_lower.count(kw), 5) + + if score > 0: + class_match = re.search(r"class\s+(\w+)\(TestSuite\)", content) + area_match = re.search(r'area\s*=\s*"([^"]+)"', content) + summary = "" + if class_match: + summary += f"Class: {class_match.group(1)}" + if area_match: + summary += f", Area: {area_match.group(1)}" + + methods = re.findall(r"def\s+((?:verify_|test_)\w+)\s*\(", content) + if methods: + summary += f"\nMethods: {', '.join(methods[:5])}" + + scored_files.append((score, py_file, summary)) + + scored_files.sort(key=lambda x: x[0], reverse=True) + top = scored_files[:max_results] + + if not top: + return ( + f"No test files matching `{query}` found. " + "Try broader keywords or check the test suite directory structure." + ) + + results = [f'## Examples matching: "{query}"\n'] + for _score, path, summary in top: + rel = path.relative_to(repo_root) + results.append(f"### `{rel}`\n{summary}\n") + + return "\n".join(results) + + @mcp.tool() + def lisa_list_tools() -> str: + """List all available LISA tools (command wrappers) that can be used + in test cases via `node.tools[ToolName]`. + + Returns the tool name and the underlying command it wraps. + """ + repo_root = find_repo_root() + if not repo_root: + return "Could not locate LISA repository." + + tools_dir = repo_root / "lisa" / "tools" + if not tools_dir.exists(): + return "LISA tools directory not found." + + tools = [] + for py_file in sorted(tools_dir.glob("*.py")): + if py_file.name.startswith("_") or py_file.name == "__init__.py": + continue + try: + content = py_file.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + + class_match = re.search(r"class\s+(\w+)\([^)]*\):", content) + cmd_match = re.search(r'command\s*=\s*"([^"]+)"', content) + if not cmd_match: + cmd_match = re.search( + r"def\s+command\s*\(self\)[^:]*:\s*\n\s*return\s+\"([^\"]+)\"", + content, + ) + + if class_match: + name = class_match.group(1) + cmd = cmd_match.group(1) if cmd_match else "—" + tools.append(f"- **{name}** → `{cmd}`") + + if not tools: + return "No tools found in lisa/tools/." + + return ( + f"## LISA Tools ({len(tools)} available)\n\n" + "Usage: `node.tools[ToolName].method()`\n\n" + "\n".join(tools) + ) + + @mcp.tool() + def lisa_list_features() -> str: + """List all available LISA features (platform capabilities) that can + be used in test cases via `node.features[FeatureName]`. + + Returns the feature name, whether it can be disabled, and its purpose. + """ + repo_root = find_repo_root() + if not repo_root: + return "Could not locate LISA repository." + + features_dir = repo_root / "lisa" / "features" + if not features_dir.exists(): + return "LISA features directory not found." + + features = [] + for py_file in sorted(features_dir.glob("*.py")): + if py_file.name.startswith("_") or py_file.name == "__init__.py": + continue + try: + content = py_file.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + + for m in re.finditer(r"class\s+(\w+)\([^)]*Feature[^)]*\):", content): + name = m.group(1) + after = content[m.end() :] + doc_match = re.match(r'\s*"""([^"]+)"""', after) + doc = doc_match.group(1).strip() if doc_match else "" + features.append(f"- **{name}**" + (f" — {doc}" if doc else "")) + + if not features: + return "No features found in lisa/features/." + + return ( + f"## LISA Features ({len(features)} available)\n\n" + "Usage: `node.features[FeatureName]`\n" + "Declare in test: " + "`simple_requirement(supported_features=[FeatureName])`\n\n" + + "\n".join(features) + ) + + # ------------------------------------------------------------------ + # Error explanation (moved from bug_fixing per spec file layout) + # ------------------------------------------------------------------ + + @mcp.tool() + def lisa_explain_error(error_text: str) -> str: + """Look up a LISA error message or exception type and explain what + triggers it, common causes, and how to resolve it. + + Args: + error_text: The error message, exception class name, or error code + (e.g. "TcpConnectionException", "SkippedException", + "OverconstrainedAllocationRequest") + """ + error_patterns = load_context_file("error_patterns.md") + + explanations = [] + error_lower = error_text.lower() + + known_errors = { + "tcpconnectionexception": { + "what": "TCP connection to the target node failed.", + "causes": [ + "VM hasn't finished booting yet", + "SSH service (sshd) not running on the target", + "Network Security Group (NSG) blocking port 22", + "VM is in a failed provisioning state", + "Network configuration issue (wrong IP/subnet)", + ], + "fix": ( + "1. Check VM status in the platform portal\n" + "2. Check serial console for boot errors\n" + "3. Verify NSG rules allow SSH (port 22)\n" + "4. Increase `wait_resource_timeout` if VM is slow to boot" + ), + }, + "skippedexception": { + "what": "Test was skipped because prerequisites were not met.", + "causes": [ + "Target OS doesn't match `supported_os` requirement", + "Required feature (GPU, NVMe, etc.) not available", + "Target VM size doesn't meet min_core_count, min_nic_count, etc.", + "Required tool not available on the target OS", + ], + "fix": ( + "This is normal behavior — the test correctly detected that " + "the environment doesn't meet its requirements. To run this " + "test, provision a node that matches its `simple_requirement()`." + ), + }, + "lisaexception": { + "what": "General LISA framework exception.", + "causes": [ + "Test logic error — an unexpected condition was encountered", + "Missing configuration or variable", + "Platform-specific operation failed", + ], + "fix": ( + "Read the exception message — LISA exceptions should include " + "what happened and how to investigate. If the message is " + "unhelpful, that's a bug in the error reporting." + ), + }, + "badenvironmentstateexception": { + "what": "The test environment is in an unexpected state.", + "causes": [ + "A previous test left the environment dirty", + "VM was rebooted but didn't come back online", + "Environment was already cleaned up before test ran", + ], + "fix": ( + "1. Check if the previous test called `node.mark_dirty()`\n" + "2. Try running the test with `use_new_environment: True`\n" + "3. Check platform logs for environment lifecycle issues" + ), + }, + "passedexception": { + "what": "Test passed with warnings — a soft pass.", + "causes": [ + "A non-critical error occurred but the test still achieved " + "its primary objective", + "A retry succeeded after initial failure", + ], + "fix": "Review the warning message. The test passed but something " + "unexpected happened that should be investigated.", + }, + "overconstrainedallocationrequest": { + "what": "Azure couldn't allocate a VM matching the requirements.", + "causes": [ + "Requested VM size not available in the target region", + "Region capacity exhaustion", + "Conflicting requirements (e.g., specific zone + specific size)", + ], + "fix": ( + "1. Try a different Azure region via `deploy_location`\n" + "2. Try a different VM size\n" + "3. Remove availability zone constraints\n" + "4. Check Azure capacity status for the region" + ), + }, + "quotaexceeded": { + "what": "Azure subscription quota exceeded.", + "causes": [ + "Too many VMs already deployed in the subscription", + "Regional core quota reached", + "VM family-specific quota limit", + ], + "fix": ( + "1. Clean up unused VMs and resources\n" + "2. Request a quota increase via Azure portal\n" + "3. Use a different subscription or region" + ), + }, + } + + for key, info in known_errors.items(): + if key in error_lower: + explanations.append( + f"### {key}\n\n" + f"**What:** {info['what']}\n\n" + f"**Common Causes:**\n" + + "\n".join(f"- {c}" for c in info["causes"]) + + f"\n\n**How to Fix:**\n{info['fix']}" + ) + + if error_patterns and "not found" not in error_patterns.lower(): + explanations.append( + f"### Additional Context from Error Pattern Database\n\n" + f"{_search_error_patterns(error_text, error_patterns)}" + ) + + troubleshoot_docs = load_docs_for_tool("explain_error") + if troubleshoot_docs: + explanations.append( + "### Official Troubleshooting Documentation\n\n" + + troubleshoot_docs[:2000] + ) + + if not explanations: + return ( + f"No specific documentation found for `{error_text}`. " + "Try providing the full exception class name or a longer " + "snippet of the error message." + ) + + return "\n\n---\n\n".join(explanations) + + +# --------------------------------------------------------------------------- +# Built-in concept explanations +# --------------------------------------------------------------------------- + +_BUILTIN_CONCEPTS = { + "runbook": ( + "## Runbook\n\n" + "A **runbook** is a YAML configuration file that controls everything about " + "a LISA test execution — platform settings, test selection, variables, " + "notifiers, and environment definitions.\n\n" + "**Key fields:**\n" + "- `platform`: List of platform configs (azure, hyperv, local, remote)\n" + "- `testcase`: List of test selection criteria (area, priority, tags, name)\n" + "- `variable`: Variables passed to tests and platform config\n" + "- `extension`: Paths to load test suites and custom code from\n" + "- `notifier`: Output handlers (console, html, junit)\n" + "- `environment`: Pre-defined environments with specific nodes\n" + "- `concurrency`: Number of parallel test environments\n\n" + "**Usage:**\n```bash\nlisa -r runbook.yml -v key:value\n```\n\n" + "Runbooks can include other runbooks via `include:` for composition." + ), + "environment": ( + "## Environment\n\n" + "An **environment** is a set of nodes (VMs or physical machines) that LISA " + "provisions and manages for test execution.\n\n" + "- Each test case declares its requirements via `simple_requirement()`\n" + "- LISA matches test requirements against available environments\n" + "- Environments can be reused across tests or provisioned fresh per test\n" + "- `use_new_environment=True` forces a fresh environment\n" + "- `keep_environment` controls cleanup: `no`, `always`, or `failed`\n\n" + "**Environment matching** compares the test's `NodeSpace` requirements " + "(CPU, memory, features, OS) against what each platform can provide." # noqa: E501 + ), + "node": ( + "## Node\n\n" + "A **Node** represents a single machine (VM or physical) in a LISA environment.\n\n" # noqa: E501 + "**Types:**\n" + "- `Node` — base class, can be local or remote\n" + "- `RemoteNode` — connected via SSH, has connection_info\n" + "- `LocalNode` — the machine running LISA itself\n\n" + "**Key APIs:**\n" + "- `node.tools[ToolName]` — access a tool (e.g., `node.tools[Echo]`)\n" + "- `node.features[FeatureName]` — access a feature\n" + "- `node.execute()` — run a shell command\n" + "- `node.os` — operating system info (distro, version)\n" + "- `node.mark_dirty()` — flag node for re-provisioning\n" + "- `node.reboot()` — reboot the node\n" + "- `node.get_pure_path()` — cross-OS path handling" + ), + "feature": ( + "## Feature\n\n" + "A **Feature** represents a platform-specific capability that a node may " + "or may not support (GPU, NVMe, SR-IOV, serial console, etc.).\n\n" + "**Usage in tests:**\n" + "1. Declare requirement: `simple_requirement(supported_features=[Gpu])`\n" + "2. Access in test: `gpu = node.features[Gpu]`\n" + "3. Check support: `node.features.is_supported(Gpu)`\n\n" + "**Available features:** StartStop, Gpu, Nvme, NetworkInterface, " + "SerialConsole, Resize, Hibernation, Disk, AvailabilityZone, " + "Virtualization, and more in `lisa/features/`." + ), + "tool": ( + "## Tool\n\n" + "A **Tool** wraps a system command (echo, mount, grep, etc.) as a Python " + "class with typed methods.\n\n" + "**Usage:**\n```python\nresult = node.tools[Echo].run('hello')\n" + "info = node.tools[Uname].get_linux_information()\n" + "node.tools[Mount].mount('/dev/sdb1', '/mnt/data')\n```\n\n" + "~130 tools available in `lisa/tools/`. Prefer tools over raw " + "`node.execute()` for reliability and cross-distro compatibility." + ), + "platform": ( + "## Platform\n\n" + "A **Platform** provides environment provisioning for a specific " + "infrastructure: Azure, Hyper-V, libvirt, bare metal, AWS, or local.\n\n" + "Configured in the runbook's `platform:` section with type-specific fields.\n" + "Each platform implements node creation, lifecycle, and capability reporting." + ), + "simple_requirement": ( + "## simple_requirement()\n\n" + "Defines what a test case needs from its environment.\n\n" + "```python\nsimple_requirement(\n" + " min_count=1, # min nodes\n" + " min_core_count=2, # min CPU cores per node\n" + " min_memory_mb=2048, # min RAM per node\n" + " min_nic_count=2, # min NICs\n" + " min_data_disk_count=1, # min data disks\n" + " min_gpu_count=1, # min GPUs\n" + " supported_os=[Posix], # required OS types\n" + " unsupported_os=[], # excluded OS types\n" + " supported_features=[Gpu], # required features\n" + " supported_platform_type=['azure'],\n" + " environment_status=EnvironmentStatus.Deployed,\n" + " disk=DiskPremiumSSDLRS(), # disk type requirement\n" + " network_interface=Sriov(),# NIC type requirement\n" + ")\n```\n\n" + "LISA's search_space module matches these against platform capabilities." + ), + "tier": ( + "## Tiers / Priority Levels\n\n" + "LISA test cases have a `priority` field (0–3) that maps to test tiers:\n\n" + "- **Priority 0 (T0)**: Critical smoke tests — must pass for any image\n" + "- **Priority 1 (T1)**: High-priority functional tests\n" + "- **Priority 2 (T2)**: Normal functional tests (default)\n" + "- **Priority 3 (T3)**: Stress tests, long-running, niche scenarios\n\n" + "Filter in runbook: `testcase: [{criteria: {priority: [0, 1]}}]`" + ), + "priority": None, # alias — handled by tier + "test suite": ( + "## Test Suite\n\n" + "A **TestSuite** is a Python class decorated with `@TestSuiteMetadata` " + "containing one or more test case methods.\n\n" + "**Rules:**\n" + "- One test class per file\n" + "- Class inherits from `TestSuite`\n" + "- PascalCase class name describing the feature\n" + "- File at `lisa/microsoft/testsuites//.py`\n" + "- `before_case()` / `after_case()` for setup/cleanup\n" + "- `@TestSuiteMetadata` must have `area`, `category`, `description`" + ), + "test case": ( + "## Test Case\n\n" + "A **test case** is a method in a TestSuite class decorated with " + "`@TestCaseMetadata`.\n\n" + "**Rules:**\n" + "- Method name prefixed with `verify_` or `test_`\n" + "- `@TestCaseMetadata` with `description`, `priority`, `requirement`\n" + "- Parameters: `self, node: Node, log: Logger` (minimum)\n" + "- Also available: `environment`, `log_path`, `working_path`, `variables`\n" + "- Use `assert_that()` from assertpy, not bare `assert`\n" + "- Use `SkippedException` for unmet preconditions\n" + "- Follow AAA pattern: Arrange → Act → Assert" + ), + "notifier": ( + "## Notifier\n\n" + "A **Notifier** subscribes to LISA messages and processes results.\n\n" + "**Built-in notifiers:**\n" + "- `console` — real-time terminal output\n" + "- `html` — HTML report generation\n\n" + "Configured in runbook: `notifier: [{type: console}, {type: html}]`\n" + "Custom notifiers can subscribe to TestRunMessage, TestResultMessage, etc." + ), + "transformer": ( + "## Transformer\n\n" + "A **Transformer** runs before test execution to modify variables, " + "download artifacts, or prepare the environment.\n\n" + "Used for dynamic setup that can't be expressed in static YAML." + ), + "combinator": ( + "## Combinator\n\n" + "A **Combinator** generates multiple variable sets from a matrix, " + "enabling parameterized test runs across different configurations.\n\n" + "Example: test across multiple VM sizes × multiple images." + ), + "extension": ( + "## Extension\n\n" + "The `extension:` runbook field lists paths where LISA should search " + "for test suites, custom platforms, notifiers, and transformers.\n\n" + '```yaml\nextension:\n - "../../lisa/microsoft/testsuites"\n - "./custom_tests"\n```' # noqa: E501 + ), + "variable": ( + "## Variable\n\n" + "**Variables** are key-value pairs passed to LISA via runbook `variable:` " + "section or CLI `-v key:value`.\n\n" + "- `is_secret: true` masks the value in logs\n" + "- `is_case_visible: true` makes it available to test methods via `variables` param\n" # noqa: E501 + "- Variables can reference files: `file: path/to/vars.yml`\n" + "- CLI variables override runbook values" + ), + "search_space": ( + "## Search Space\n\n" + "The **search_space** module handles requirement matching — comparing " + "what a test needs against what a platform can provide.\n\n" + "Supports ranges (`IntRange(min=2, max=8)`), sets, and complex " + "capability negotiation for CPU, memory, disk, network, and features." + ), +} + +# Wire up alias +_BUILTIN_CONCEPTS["priority"] = _BUILTIN_CONCEPTS["tier"] + + +def _search_error_patterns(query: str, patterns_md: str) -> str: + """Search error patterns document for relevant entries.""" + query_lower = query.lower() + relevant = [] + + for section in patterns_md.split("\n### "): + if query_lower in section.lower(): + relevant.append(section.strip()[:300]) + + if relevant: + return "\n\n".join(relevant[:3]) + return "No matching patterns in error database." diff --git a/mcp/lisa_mcp/tools/log_analysis.py b/mcp/lisa_mcp/tools/log_analysis.py new file mode 100644 index 0000000000..5586eb1f13 --- /dev/null +++ b/mcp/lisa_mcp/tools/log_analysis.py @@ -0,0 +1,1423 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Log analysis tools — parse, explain, and summarize LISA run logs.""" + +from __future__ import annotations + +import os +import re +import shutil +import tarfile +import tempfile +import zipfile +from pathlib import Path +from typing import Optional +from urllib.parse import unquote, urlparse +from urllib.request import Request, urlopen + +from lisa_mcp.tools._repo import find_repo_root, load_context_file, load_docs_for_tool +from mcp.server.fastmcp import FastMCP + + +def _load_ai_prompts() -> str: + """Load the LISA AI log analyzer prompts from lisa/ai/prompts/default/. + + Returns the concatenated prompt text for log_search, code_search, + final_answer, and user workflow — the same strategies the multi-agent + log analyzer uses. + """ + repo_root = find_repo_root() + if not repo_root: + return "" + + prompts_dir = repo_root / "lisa" / "ai" / "prompts" / "default" + if not prompts_dir.is_dir(): + return "" + + prompt_files = [ + ("user.txt", "Overall Analysis Workflow"), + ("log_search.txt", "Log Search Agent Strategy"), + ("code_search.txt", "Code Search Agent Strategy"), + ("final_answer.txt", "Final Answer Synthesis"), + ] + + sections: list[str] = [] + for filename, heading in prompt_files: + path = prompts_dir / filename + if path.exists(): + content = path.read_text(encoding="utf-8", errors="replace") + sections.append(f"## {heading}\n\n{content}") + + return "\n\n---\n\n".join(sections) + + +def register_log_analysis_tools(mcp: FastMCP) -> None: # noqa: C901 + @mcp.tool() + def lisa_analyze_log( + log_content: Optional[str] = None, + log_path: Optional[str] = None, + ) -> str: + """Parse a LISA run log to extract structured results — pass/fail/skip + counts, failure details, and warnings. + + Provide either the log text directly or a file path. + + Args: + log_content: Raw LISA log text (paste from terminal or file) + log_path: Absolute path to a LISA log file on disk + """ + text = _get_log_text(log_content, log_path) + if text.startswith("Error:"): + return text + + results = _extract_test_results(text) + errors = _extract_errors(text) + warnings = _extract_warnings(text) + panics = _extract_kernel_panics(text) + + sections = [] + + # Summary counts + passed = sum(1 for r in results if r["status"] == "PASSED") + failed = sum(1 for r in results if r["status"] == "FAILED") + skipped = sum(1 for r in results if r["status"] == "SKIPPED") + attempted = sum(1 for r in results if r["status"] == "ATTEMPTED") + total = len(results) + + sections.append( + f"**Run Summary:** {total} tests — " + f"{passed} passed, {failed} failed, {skipped} skipped" + + (f", {attempted} attempted" if attempted else "") + ) + + # Failed tests + if failed: + fail_lines = [] + for r in results: + if r["status"] == "FAILED": + msg = r.get("message", "") + fail_lines.append(f"- **{r['name']}**: {msg[:200]}") + sections.append("**Failures:**\n" + "\n".join(fail_lines)) + + # Kernel panics + if panics: + sections.append( + "**Kernel Panics Detected:**\n" + + "\n".join(f"- {p[:200]}" for p in panics[:5]) + ) + + # Errors (non-test) + if errors: + sections.append( + f"**Errors ({len(errors)}):**\n" + + "\n".join(f"- {e[:200]}" for e in errors[:10]) + ) + + # Warnings + if warnings: + sections.append( + f"**Warnings ({len(warnings)}):**\n" + + "\n".join(f"- {w[:200]}" for w in warnings[:5]) + ) + + if not sections: + sections.append( + "No structured test results found in the log. " + "The log might not be a LISA run log, or the run " + "may not have reached test execution." + ) + + return "\n\n".join(sections) + + @mcp.tool() + def lisa_explain_failure(failure_text: str) -> str: + """Given a LISA test failure block (stack trace, error message, or log + snippet), classify the failure type and provide context for debugging. + + Categories: + - **Framework error**: LISA infrastructure issue (SSH, provisioning) + - **Test logic error**: Assertion failure in test code + - **Infrastructure error**: VM/cloud platform issue + - **Kernel error**: Kernel panic, oops, or bug + + Args: + failure_text: The failure output — stack trace, error message, or + relevant log lines + """ + categories = [] + explanations = [] + + text_lower = failure_text.lower() + + # Kernel issues + if any( + k in text_lower + for k in [ + "kernel panic", + "kernel bug", + "call trace", + "rip:", + "bug: soft lockup", + "oops", + "general protection fault", + ] + ): + categories.append("Kernel Error") + explanations.append( + "The failure contains kernel-level errors. Check:\n" + "- Serial console output for the full panic/oops\n" + "- `dmesg` on the node if still accessible\n" + "- Whether the kernel version is known-good for this distro\n" + "- If custom kernel parameters were applied" + ) + + # SSH / connectivity + if any( + k in text_lower + for k in [ + "tcpconnectionexception", + "ssh", + "connection refused", + "connection timed out", + "tcp port", + "paramiko", + "no route to host", + "network is unreachable", + ] + ): + categories.append("Framework Error — Connectivity") + explanations.append( + "SSH or TCP connection failure. Check:\n" + "- Is the VM still running? (check platform portal)\n" + "- Network security group / firewall rules\n" + "- Whether the VM booted successfully (serial console)\n" + "- If this is a new image, verify sshd is enabled" + ) + + # Provisioning + if any( + k in text_lower + for k in [ + "provisioningerror", + "deployment failed", + "allocation failed", + "overconstrainedallocationrequest", + "operationnotallowed", + "resourcenotfound", + "quotaexceeded", + ] + ): + categories.append("Infrastructure Error — Provisioning") + explanations.append( + "VM provisioning failed on the cloud platform. Check:\n" + "- VM size availability in the target region\n" + "- Subscription quota limits\n" + "- Image availability in the marketplace\n" + "- Whether the requested features (GPU, NVMe) are " + "supported by the chosen VM size" + ) + + # Assertion failures + if any( + k in text_lower + for k in [ + "assertionerror", + "assert_that", + "expected", + "to be equal to", + "to contain", + "is not true", + "is not false", + ] + ): + categories.append("Test Logic Error — Assertion") + explanations.append( + "A test assertion failed. This typically means the system " + "under test produced unexpected output. Check:\n" + "- The expected vs actual values in the assertion\n" + "- Whether the test assumptions match the target OS/distro\n" + "- If the test has a `.described_as()` hint explaining the intent" + ) + + # Timeout + if any( + k in text_lower + for k in [ + "timeout", + "timed out", + "time out", + "deadline exceeded", + ] + ): + categories.append("Timeout") + explanations.append( + "An operation timed out. Check:\n" + "- Whether the VM was under heavy load\n" + "- If the timeout value is appropriate for the operation\n" + "- Network latency between LISA host and target\n" + "- If the operation completed but detection failed" + ) + + # SkippedException (not a real failure) + if "skippedexception" in text_lower: + categories.append("Skipped (Not a Failure)") + explanations.append( + "The test was skipped due to unmet preconditions. This is " + "expected behavior — the test prerequisites (OS, feature, " + "hardware) were not met by the target environment." + ) + + # LisaException generic + if "lisaexception" in text_lower and not categories: + categories.append("Framework Error") + explanations.append( + "A LISA framework exception occurred. Read the exception " + "message carefully — it should indicate what happened and " + "how to investigate." + ) + + if not categories: + categories.append("Unknown") + explanations.append( + "Could not automatically classify this failure. Provide more " + "context (full stack trace, surrounding log lines) for better " + "analysis." + ) + + result = f"**Failure Classification:** {', '.join(categories)}\n\n" + result += "\n\n".join(explanations) + + # Append official troubleshooting guidance if available + troubleshoot_docs = load_docs_for_tool("explain_failure") + if troubleshoot_docs: + result += ( + "\n\n---\n\n" + "**From official LISA troubleshooting docs:**\n\n" + + troubleshoot_docs[:2000] + ) + + return result + + @mcp.tool() + def lisa_summarize_run( + log_content: Optional[str] = None, + log_path: Optional[str] = None, + ) -> str: + """Produce a concise, high-level summary of a LISA test run suitable + for sharing in a report or PR comment. + + Args: + log_content: Raw LISA log text + log_path: Absolute path to a LISA log file + """ + text = _get_log_text(log_content, log_path) + if text.startswith("Error:"): + return text + + results = _extract_test_results(text) + panics = _extract_kernel_panics(text) + + passed = [r for r in results if r["status"] == "PASSED"] + failed = [r for r in results if r["status"] == "FAILED"] + skipped = [r for r in results if r["status"] == "SKIPPED"] + + lines = [] + lines.append("## LISA Run Summary") + lines.append("") + lines.append("| Status | Count |") + lines.append("|--------|-------|") + lines.append(f"| Passed | {len(passed)} |") + lines.append(f"| Failed | {len(failed)} |") + lines.append(f"| Skipped | {len(skipped)} |") + lines.append(f"| **Total** | **{len(results)}** |") + + if failed: + lines.append("") + lines.append("### Failures") + for r in failed: + msg = r.get("message", "no message") + lines.append(f"- **{r['name']}** — {msg[:150]}") + + if panics: + lines.append("") + lines.append(f"### Kernel Panics ({len(panics)})") + for p in panics[:3]: + lines.append(f"- {p[:150]}") + + if not results: + lines.append("") + lines.append( + "*No test results extracted. The log may not contain " + "structured LISA output.*" + ) + + # Extract run duration if available + duration_match = re.search( + r"(?:total|elapsed|duration)[:\s]+(\d+\.?\d*)\s*(?:s|seconds?|minutes?)", + text, + re.IGNORECASE, + ) + if duration_match: + lines.append("") + lines.append(f"**Duration:** {duration_match.group(0)}") + + return "\n".join(lines) + + @mcp.tool() + def lisa_download_logs( + url: str, + auth_token: Optional[str] = None, + ) -> str: + """Download log files from a URL so they can be investigated on the + server with ``lisa_start_log_investigation`` and the file tools. + + Supports three URL formats: + + - **Azure Portal URLs** — paste directly from the portal Storage + Browser. The tool auto-converts them to blob API calls. + - **Azure Blob URLs** — direct ``*.blob.core.windows.net`` URLs. + If the path is a virtual directory (prefix), all blobs under + it are downloaded. Single-file blobs are also supported. + - **Direct HTTPS URLs** — for publicly accessible files or + archives. Pass ``auth_token`` for bearer-token APIs. + + Azure Blob authentication uses ``DefaultAzureCredential`` + (managed identity on App Service, ``az login`` locally). + The identity must have **Storage Blob Data Reader** role. + + Archives (``.tar.gz``, ``.tgz``, ``.zip``) are auto-extracted. + + Returns the absolute path to the downloaded log directory. + + Args: + url: HTTPS URL, Azure Blob URL, or Azure Portal storage URL + auth_token: Optional bearer token for non-Azure URLs + """ + # Auto-convert Azure Portal URLs to blob prefix downloads + portal_info = _parse_portal_storage_url(url) + if portal_info: + download_dir = tempfile.mkdtemp(prefix="lisa_logs_") + try: + result_dir, count = _download_azure_blob_prefix( + portal_info["account"], + portal_info["container"], + portal_info["prefix"], + download_dir, + ) + return ( + f"**Downloaded** {count} file(s) → `{result_dir}`\n\n" + f"Use this path with:\n" + f'- `lisa_start_log_investigation(log_path="{result_dir}")`\n' + f'- `lisa_search_log_files(path="{result_dir}", ...)`\n' + f'- `lisa_list_log_files(folder_path="{result_dir}")`' + ) + except Exception as exc: + shutil.rmtree(download_dir, ignore_errors=True) + return f"**Error:** Download failed — {type(exc).__name__}: {exc}" + + parsed = urlparse(url) + if parsed.scheme not in ("https",): + return "**Error:** Only HTTPS URLs are supported." + if not parsed.hostname: + return "**Error:** Could not parse hostname from URL." + + is_azure_blob = parsed.hostname and parsed.hostname.endswith( + ".blob.core.windows.net" + ) + + # Azure blob prefix (virtual directory) — list + download all + if is_azure_blob and not auth_token: + path_parts = [p for p in parsed.path.strip("/").split("/") if p] + if len(path_parts) >= 2: + container = path_parts[0] + prefix = "/".join(path_parts[1:]) + account = parsed.hostname.split(".")[0] + download_dir = tempfile.mkdtemp(prefix="lisa_logs_") + try: + result_dir, count = _download_azure_blob_prefix( + account, + container, + prefix, + download_dir, + ) + return ( + f"**Downloaded** {count} file(s) → `{result_dir}`\n\n" + f"Use this path with:\n" + f"- `lisa_start_log_investigation" + f'(log_path="{result_dir}")`\n' + f"- `lisa_search_log_files" + f'(path="{result_dir}", ...)`\n' + f"- `lisa_list_log_files" + f'(folder_path="{result_dir}")`' + ) + except Exception as exc: + shutil.rmtree(download_dir, ignore_errors=True) + return ( + f"**Error:** Download failed — " f"{type(exc).__name__}: {exc}" + ) + + download_dir = tempfile.mkdtemp(prefix="lisa_logs_") + filename = os.path.basename(parsed.path) or "logs" + # Sanitize filename + filename = re.sub(r"[^\w.\-]", "_", filename) + if not filename: + filename = "logs" + download_path = os.path.join(download_dir, filename) + + try: + headers = {} + if auth_token: + headers["Authorization"] = f"Bearer {auth_token}" + req = Request(url, headers=headers) + with urlopen(req, timeout=120) as resp: # noqa: S310 + with open(download_path, "wb") as f: + shutil.copyfileobj(resp, f) + + size_mb = os.path.getsize(download_path) / (1024 * 1024) + result_dir = _extract_archive(download_path, download_dir) + + file_count = sum(1 for _, _, files in os.walk(result_dir) for _ in files) + + return ( + f"**Downloaded** {size_mb:.1f} MB → `{result_dir}`\n" + f"**Files:** {file_count}\n\n" + f"Use this path with:\n" + f'- `lisa_start_log_investigation(log_path="{result_dir}")`\n' + f'- `lisa_search_log_files(path="{result_dir}", ...)`\n' + f'- `lisa_list_log_files(folder_path="{result_dir}")`' + ) + except Exception as exc: + shutil.rmtree(download_dir, ignore_errors=True) + return f"**Error:** Download failed — {type(exc).__name__}: {exc}" + + @mcp.tool() + def lisa_start_log_investigation( + log_path: Optional[str] = None, + log_url: Optional[str] = None, + auth_token: Optional[str] = None, + error_message: str = "", + code_path: Optional[str] = None, + ) -> str: + """Bootstrap a root-cause investigation on LISA logs — returns the + full analysis context so **you** (the caller LLM) can drive the + same multi-step workflow the LISA AI log analyzer uses. + + This is the recommended entry-point for log analysis. It gathers + everything you need in a single call: + + 1. Lists all files in the log directory + 2. Searches for the error message across all log files + 3. Searches for common failure patterns (error, warn, fail, panic) + 4. Locates serial console logs (critical for boot/kernel issues) + 5. Loads the expert analysis prompts (workflow, search strategy, + code review strategy, output format) + + **Supports two input modes:** + - ``log_path`` — local directory path (for stdio/local mode) + - ``log_url`` — HTTPS URL to a log file or archive; the server + downloads and extracts it automatically (for remote SSE mode). + SAS URLs work — the token is embedded in the URL. + + After receiving the response, continue the investigation by calling: + - ``lisa_read_log_file`` to read context around each match + - ``lisa_search_log_files`` for additional targeted searches + - ``lisa_explain_failure`` to classify specific failure blocks + - ``lisa_diagnose_bug`` if you identify a failing test name + + Produce your final answer as JSON:: + + { + "summary": "3-4 sentences with verbatim error tokens and evidence", + "problem": "≤30 words root cause", + "problem_keywords": ["keyword1", "keyword2"], + "code_recommendation": "" + } + + Args: + log_path: Absolute path to the LISA log directory (local mode) + log_url: HTTPS URL to a log file or archive (remote mode) + auth_token: Optional bearer token for URL authentication + error_message: The error or failure text to investigate + code_path: Path to LISA source code (auto-detected if omitted) + """ + # Resolve log directory — either from local path or downloaded URL + if log_url and not log_path: + download_result = lisa_download_logs(url=log_url, auth_token=auth_token) + if download_result.startswith("**Error:"): + return download_result + # Extract the path from the download result + path_match = re.search(r"`(/[^`]+)`", download_result) + if not path_match: + return "**Error:** Could not determine downloaded log path." + resolved_path = path_match.group(1) + elif log_path: + resolved_path = log_path + else: + return ( + "**Error:** Provide either `log_path` (local directory) " + "or `log_url` (HTTPS URL to log file/archive)." + ) + + path_obj = Path(resolved_path) + if not path_obj.is_dir(): + return f"**Error:** Directory not found: {resolved_path}" + + sections: list[str] = [] + sections.append("# LISA Log Investigation Context\n") + + # --- 1. Expert analysis prompts --- + prompts = _load_ai_prompts() + if prompts: + sections.append("## Expert Analysis Methodology\n") + sections.append(prompts) + else: + sections.append( + "*Expert prompts not available — follow the standard " + "workflow: search → read context → hypothesize → verify.*\n" + ) + + # --- 2. Log file listing --- + extensions = [".log", ".txt", ".out", ".xml", ".json"] + all_files: list[str] = [] + serial_console: list[str] = [] + + for root, _, files in os.walk(path_obj): + for fname in files: + fpath = os.path.join(root, fname) + _, ext = os.path.splitext(fpath.lower()) + if ext in extensions: + abs_path = os.path.abspath(fpath) + all_files.append(abs_path) + if "serial_console" in fname.lower(): + serial_console.append(abs_path) + if len(all_files) >= 200: + break + if len(all_files) >= 200: + break + + sections.append(f"\n## Log Files ({len(all_files)} found)\n") + for fp in all_files: + sections.append(f"- `{fp}`") + + if serial_console: + sections.append("\n### Serial Console Logs (prioritize these)\n") + for fp in serial_console: + sections.append(f"- `{fp}`") + + # --- 3. Error message search --- + if error_message: + sections.append(f"\n## Initial Error Search: `{error_message[:200]}`\n") + error_matches = _search_in_files( + error_message, path_obj, extensions, limit=50 + ) + if error_matches: + for m in error_matches: + sections.append(f"- `{m['file']}` L{m['line']}: {m['text']}") + else: + sections.append("*No exact matches. Try broader search terms.*") + + # --- 4. Common pattern search --- + patterns = ["error", "warn", "fail", "panic", "unable", "not found"] + pattern_results: dict[str, int] = {} + for pattern in patterns: + matches = _search_in_files(pattern, path_obj, extensions, limit=20) + pattern_results[pattern] = len(matches) + + sections.append("\n## Pattern Hit Counts (across all log files)\n") + sections.append("| Pattern | Matches |") + sections.append("|---------|---------|") + for pattern, count in pattern_results.items(): + sections.append(f"| {pattern} | {count} |") + + # --- 5. Code path --- + repo_root = find_repo_root() + resolved_code = code_path or (str(repo_root) if repo_root else "") + if resolved_code: + sections.append(f"\n## Code Path\n`{resolved_code}`") + sections.append( + "Use ``lisa_diagnose_bug`` with a test name to inspect " + "source code for defects." + ) + + # --- 6. Next steps --- + sections.append("\n## Next Steps\n") + sections.append( + "1. **Read context** around error matches using " + "``lisa_read_log_file``\n" + "2. **Search** for specific patterns using " + "``lisa_search_log_files``\n" + "3. **Check serial console** if connectivity/boot issue\n" + "4. **Classify failure** using ``lisa_explain_failure``\n" + "5. **Inspect code** using ``lisa_diagnose_bug`` if a test name " + "is identified\n" + "6. **Produce final JSON** in the format shown above" + ) + + return "\n".join(sections) + + @mcp.tool() + def lisa_get_log_analysis_prompts() -> str: + """Return the LISA AI Log Analyzer's expert analysis strategies so + **you** (the host AI) can perform root-cause analysis on LISA logs. + + Use this together with the file-investigation tools: + - ``lisa_search_log_files`` — search for patterns across log files + - ``lisa_read_log_file`` — read a range of lines from a specific file + - ``lisa_list_log_files`` — discover files in a log directory + + **Recommended workflow:** + 1. Call ``lisa_get_log_analysis_prompts`` to load the expert methodology + 2. Call ``lisa_list_log_files`` to discover the log directory structure + 3. Call ``lisa_search_log_files`` with error patterns across all logs + 4. Call ``lisa_read_log_file`` to examine context around each match + 5. Synthesize findings following the Final Answer format + + The prompts cover: + - **Overall Analysis Workflow** — 5-step root-cause analysis + - **Log Search Strategy** — how to search LISA logs, serial + console logs, and interpret the LISA log format + - **Code Search Strategy** — how to review source code for defects + - **Final Answer Synthesis** — structured JSON output format + + Returns: + The concatenated prompt text from ``lisa/ai/prompts/default/`` + """ + prompts = _load_ai_prompts() + if not prompts: + return ( + "**Error:** Could not load AI log analysis prompts.\n\n" + "Ensure the LISA repo root is accessible and " + "``lisa/ai/prompts/default/`` exists." + ) + + return ( + "# LISA AI Log Analyzer — Agent Prompts\n\n" + "These are the expert prompts used by the LISA AI multi-agent " + "log analyzer. Use these strategies with the ``lisa_search_log_files``" + ", ``lisa_read_log_file``, and ``lisa_list_log_files`` tools to perform " + "the same analysis yourself.\n\n" + prompts + ) + + # ------------------------------------------------------------------ + # File-investigation tools + # ------------------------------------------------------------------ + + @mcp.tool() + def lisa_search_log_files( + search_string: str, + path: str, + file_extensions: str = ".log,.txt,.out", + ) -> str: + """Search for a string across log files in a directory tree. + + This replicates the LogSearchAgent's ``search_files`` capability. + Use it to find error messages, patterns, or keywords in LISA log + output, serial console logs, and other text files. + + Results include file path, line number, and matched text for each + hit (up to 200 matches). + + Args: + search_string: The text to search for (case-insensitive) + path: Absolute path to the log directory to search in + file_extensions: Comma-separated extensions to include + (default: ``.log,.txt,.out``) + """ + path_obj = Path(path) + if not path_obj.is_dir(): + return f"**Error:** Directory not found: {path}" + + search_lower = search_string.lower() + extensions = [ext.strip().lower() for ext in file_extensions.split(",")] + matches: list[dict[str, object]] = [] + + for root, _, files in os.walk(path_obj): + for fname in files: + fpath = os.path.join(root, fname) + if os.path.relpath(fpath, path).startswith("."): + continue + _, ext = os.path.splitext(fpath.lower()) + if ext not in extensions: + continue + try: + with open(fpath, "r", encoding="utf-8", errors="replace") as f: + for i, line in enumerate(f, start=1): + if search_lower in line.lower(): + matches.append( + { + "file": os.path.abspath(fpath), + "line": i, + "text": line.strip()[:500], + } + ) + if len(matches) >= _MAX_SEARCH_MATCHES: + break + except OSError: + continue + if len(matches) >= _MAX_SEARCH_MATCHES: + break + + if not matches: + return ( + f"No matches for **{search_string}** in `{path}` " + f"(extensions: {file_extensions})" + ) + + lines = [ + f"**Found {len(matches)} match(es)** for " + f"**{search_string}** in `{path}`:\n" + ] + for m in matches: + lines.append(f"- `{m['file']}` L{m['line']}: {m['text']}") + + if len(matches) >= _MAX_SEARCH_MATCHES: + lines.append( + f"\n*Results capped at {_MAX_SEARCH_MATCHES}. " + f"Narrow your search for more targeted results.*" + ) + + return "\n".join(lines) + + @mcp.tool() + def lisa_read_log_file( + file_path: str, + start_line: int = 1, + line_count: int = 200, + ) -> str: + """Read a range of lines from a log file. + + This replicates the LogSearchAgent's ``read_text_file`` capability. + Use it to examine context around matches found by + ``lisa_search_log_files`` — the surrounding lines often reveal the + root cause. + + **Tip:** Read at least 100 lines around an error to capture the + full command execution sequence and timestamps. + + Args: + file_path: Absolute path to the file to read + start_line: Line number to start reading from (1-based, default 1) + line_count: Number of lines to read (default 200, max 300) + """ + p = Path(file_path) + if not p.is_file(): + return f"**Error:** File not found: {file_path}" + + bounded_count = min(line_count, _MAX_READ_LINES) + end_line = start_line + bounded_count - 1 + + result_lines: list[str] = [] + try: + with open(p, "r", encoding="utf-8", errors="replace") as f: + for i, line in enumerate(f, start=1): + if i < start_line: + continue + if i > end_line: + break + result_lines.append(f"({i}): {line.rstrip()}") + except OSError as exc: + return f"**Error:** Could not read file: {exc}" + + if not result_lines: + return ( + f"**Error:** No lines in range {start_line}-{end_line} " + f"for `{file_path}`" + ) + + text = "\n".join(result_lines) + if len(text) > _MAX_READ_CHARS: + text = ( + text[:_MAX_READ_CHARS] + + f"\n...[truncated {len(text) - _MAX_READ_CHARS} chars]" + ) + + return ( + f"**`{file_path}`** lines {start_line}–" + f"{start_line + len(result_lines) - 1}:\n```\n{text}\n```" + ) + + @mcp.tool() + def lisa_list_log_files( + folder_path: str, + file_extensions: str = ".log,.txt,.out,.xml,.json", + recursive: bool = True, + max_files: int = 200, + ) -> str: + """List files in a log directory, optionally filtered by extension. + + This replicates the LogSearchAgent's ``list_files`` capability. + Use it to discover the log directory structure before searching. + + **Tip:** Start here to understand what logs are available, then + use ``lisa_search_log_files`` and ``lisa_read_log_file`` to dig in. + + Args: + folder_path: Absolute path to the log directory + file_extensions: Comma-separated extensions to include + (default: ``.log,.txt,.out,.xml,.json``) + recursive: Whether to search subdirectories (default True) + max_files: Maximum number of files to return (default 200) + """ + p = Path(folder_path) + if not p.is_dir(): + return f"**Error:** Directory not found: {folder_path}" + + extensions = [ext.strip().lower() for ext in file_extensions.split(",")] + found: list[str] = [] + + if recursive: + for root, _, files in os.walk(p): + for fname in files: + fpath = os.path.join(root, fname) + if os.path.relpath(fpath, folder_path).startswith("."): + continue + _, ext = os.path.splitext(fpath.lower()) + if ext in extensions: + found.append(os.path.abspath(fpath)) + if len(found) >= max_files: + break + if len(found) >= max_files: + break + else: + for item in sorted(p.iterdir()): + if item.is_file(): + _, ext = os.path.splitext(item.name.lower()) + if ext in extensions: + found.append(str(item.resolve())) + if len(found) >= max_files: + break + + if not found: + return f"No files matching `{file_extensions}` in `{folder_path}`" + + lines = [ + f"**{len(found)} file(s)** in `{folder_path}` " + f"(extensions: {file_extensions}):\n" + ] + for fp in found: + lines.append(f"- `{fp}`") + + if len(found) >= max_files: + lines.append(f"\n*Listing capped at {max_files} files.*") + + return "\n".join(lines) + + # ------------------------------------------------------------------ + # Debugging / diagnosis tools (spec Section 6.4 — Analysis) + # ------------------------------------------------------------------ + + @mcp.tool() + def lisa_diagnose_bug( + test_name: str, + failure_log: str, + ) -> str: + """Given a test name and its failure log, locate the test source code, + correlate with the failure, and suggest a root cause and fix. + + Uses the LISA troubleshooting documentation and curated error patterns + from the repo. + + Args: + test_name: Exact test method name (e.g. "verify_sriov_failover") + failure_log: The failure output — error message, stack trace, or + relevant log lines from the failed run + """ + repo_root = find_repo_root() + source_context = "" + + if repo_root: + source_context = _find_test_source(repo_root, test_name) + + error_patterns = load_context_file("error_patterns.md") + troubleshoot_docs = load_docs_for_tool("diagnose_test") + + sections = [] + sections.append(f"## Diagnosis for `{test_name}`\n") + + if source_context: + sections.append(f"### Test Source\n{source_context}") + else: + sections.append( + f"*Test `{test_name}` not found in the repository. " + "Provide the test file path if it's in a custom location.*" + ) + + classification = _classify_failure(failure_log) + sections.append(f"### Failure Classification\n{classification}") + + matches = _match_known_patterns(failure_log, error_patterns) + if matches: + sections.append(f"### Known Pattern Matches\n{matches}") + + guidance = _generate_debug_guidance(failure_log, source_context) + sections.append(f"### Debugging Steps\n{guidance}") + + if troubleshoot_docs: + sections.append( + "### Official Troubleshooting Documentation\n\n" + + troubleshoot_docs[:2000] + ) + + return "\n\n".join(sections) + + +# --------------------------------------------------------------------------- +# Internal helpers +# --------------------------------------------------------------------------- + +_MAX_LOG_SIZE = 5 * 1024 * 1024 # 5 MB +_MAX_SEARCH_MATCHES = 200 +_MAX_READ_LINES = 300 +_MAX_READ_CHARS = 30000 + + +def _search_in_files( + search_string: str, + root_path: Path, + extensions: list[str], + limit: int = 50, +) -> list[dict[str, object]]: + """Search for a string across files under *root_path*.""" + search_lower = search_string.lower() + matches: list[dict[str, object]] = [] + for root, _, files in os.walk(root_path): + for fname in files: + fpath = os.path.join(root, fname) + _, ext = os.path.splitext(fpath.lower()) + if ext not in extensions: + continue + try: + with open(fpath, "r", encoding="utf-8", errors="replace") as f: + for i, line in enumerate(f, start=1): + if search_lower in line.lower(): + matches.append( + { + "file": os.path.abspath(fpath), + "line": i, + "text": line.strip()[:500], + } + ) + if len(matches) >= limit: + return matches + except OSError: + continue + return matches + + +def _get_log_text( + content: Optional[str], + path: Optional[str], +) -> str: + if content: + return content + if path: + p = Path(path) + if not p.exists(): + return f"Error: File not found — {path}" + size = p.stat().st_size + if size > _MAX_LOG_SIZE: + return ( + f"Error: Log file is {size // (1024 * 1024)} MB, exceeding the " + "5 MB limit. Provide a trimmed version or the relevant section." + ) + return p.read_text(encoding="utf-8", errors="replace") + return "Error: Provide either `log_content` or `log_path`." + + +def _extract_test_results(text: str) -> list[dict[str, str]]: + """Extract test result entries from LISA log output.""" + results = [] + patterns = [ + re.compile( + r"(\w+)\s*\|\s*(PASSED|FAILED|SKIPPED|ATTEMPTED)\s*(?:\|\s*(.*))?", + re.IGNORECASE, + ), + re.compile( + r"\[?(PASSED|FAILED|SKIPPED|ATTEMPTED)\]?\s+(?:test\s+)?(\w+)" + r"(?:\s*[:\-]\s*(.*))?", + re.IGNORECASE, + ), + re.compile( + r"(?:test|case)\s+(\S+)\s+.*?(PASSED|FAILED|SKIPPED|ATTEMPTED)" + r"(?:\s*[:\-]\s*(.*))?", + re.IGNORECASE, + ), + ] + + seen = set() + for pattern in patterns: + for m in pattern.finditer(text): + groups = m.groups() + if groups[0].upper() in ("PASSED", "FAILED", "SKIPPED", "ATTEMPTED"): + status, name = groups[0].upper(), groups[1] + message = groups[2] if len(groups) > 2 else "" + else: + name, status = groups[0], groups[1].upper() + message = groups[2] if len(groups) > 2 else "" + + if name not in seen: + seen.add(name) + results.append( + { + "name": name, + "status": status, + "message": (message or "").strip(), + } + ) + + return results + + +def _extract_errors(text: str) -> list[str]: + """Extract ERROR-level log lines.""" + errors = [] + for line in text.split("\n"): + if re.search(r"\bERROR\b", line): + errors.append(line.strip()) + return errors + + +def _extract_warnings(text: str) -> list[str]: + """Extract WARNING-level log lines.""" + warnings = [] + for line in text.split("\n"): + if re.search(r"\bWARNING\b", line): + warnings.append(line.strip()) + return warnings + + +def _extract_kernel_panics(text: str) -> list[str]: + """Extract kernel panic indicators.""" + panics = [] + panic_patterns = [ + r"Kernel panic.*", + r"BUG: soft lockup.*", + r"general protection fault.*", + r"Call Trace:.*", + r"RIP:.*", + ] + for pattern in panic_patterns: + for m in re.finditer(pattern, text, re.IGNORECASE): + panics.append(m.group(0).strip()) + return panics + + +# --------------------------------------------------------------------------- +# Helpers moved from bug_fixing — used by lisa_diagnose_bug +# --------------------------------------------------------------------------- + + +def _find_test_source(repo_root: Path, test_name: str) -> str: + """Find and return the source code for a test method.""" + testsuites_dirs = [ + repo_root / "lisa" / "microsoft" / "testsuites", + repo_root / "lisa" / "examples" / "testsuites", + ] + + for testsuites_dir in testsuites_dirs: + if not testsuites_dir.exists(): + continue + for py_file in testsuites_dir.rglob("*.py"): + try: + content = py_file.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + + pattern = rf"def\s+{re.escape(test_name)}\s*\(" + match = re.search(pattern, content) + if not match: + continue + + lines = content.split("\n") + match_line = content[: match.start()].count("\n") + start = max(0, match_line - 15) + end = min(len(lines), match_line + 40) + + rel_path = py_file.relative_to(repo_root) + source_block = "\n".join(lines[start:end]) + return ( + f"**Source: `{rel_path}` (lines {start + 1}\u2013{end})**\n\n" + f"```python\n{source_block}\n```" + ) + + return "" + + +def _classify_failure(log: str) -> str: + """Quick classification of failure type.""" + categories = [] + log_lower = log.lower() + + if any(k in log_lower for k in ["kernel panic", "oops", "call trace", "rip:"]): + categories.append("Kernel Error") + if any(k in log_lower for k in ["tcpconnection", "ssh", "connection refused"]): + categories.append("Connectivity Error") + if any(k in log_lower for k in ["assertionerror", "assert_that"]): + categories.append("Assertion Failure") + if any(k in log_lower for k in ["timeout", "timed out"]): + categories.append("Timeout") + if any(k in log_lower for k in ["skippedexception"]): + categories.append("Skipped (not a real failure)") + if any(k in log_lower for k in ["provisioning", "deployment failed"]): + categories.append("Provisioning Error") + + if not categories: + categories.append("Unclassified — provide more context for better analysis") + + return ", ".join(categories) + + +def _match_known_patterns(log: str, patterns_md: str) -> str: + """Search error patterns doc for matches.""" + if "not found" in patterns_md.lower(): + return "" + + matches = [] + current_pattern = "" + current_fix = "" + + for line in patterns_md.split("\n"): + if line.startswith("### "): + if current_pattern and current_pattern.lower() in log.lower(): + matches.append(f"- **{current_pattern}**: {current_fix}") + current_pattern = line[4:].strip() + current_fix = "" + elif line.startswith("Fix:") or line.startswith("Resolution:"): + current_fix = line.split(":", 1)[1].strip() + + if current_pattern and current_pattern.lower() in log.lower(): + matches.append(f"- **{current_pattern}**: {current_fix}") + + return "\n".join(matches) if matches else "No known error patterns matched." + + +def _generate_debug_guidance(failure_log: str, source: str) -> str: + """Provide targeted debugging steps based on failure type.""" + steps = [] + log_lower = failure_log.lower() + + steps.append( + "1. **Check the full stack trace** — the bottom of the traceback " + "shows the actual error, frames above show how it got there." + ) + + if "assert" in log_lower: + steps.append( + "2. **Compare expected vs actual** — find the `assert_that()` call in " + "the test source and check what value was actually produced." + ) + steps.append( + "3. **Run the underlying command manually** — SSH into the node and " + "run the same command the test runs to see the raw output." + ) + + if "timeout" in log_lower: + steps.append( + "2. **Check node responsiveness** — is the VM still running? " + "Can you SSH to it manually?" + ) + steps.append( + "3. **Increase timeout** — if the operation is legitimate but slow, " + "increase the test's `timeout` parameter in `@TestCaseMetadata`." + ) + + if "ssh" in log_lower or "tcp" in log_lower: + steps.append( + "2. **Check serial console** — the VM may have panicked during boot." + ) + steps.append("3. **Check NSG rules** — port 22 must be open.") + + if not any(k in log_lower for k in ["assert", "timeout", "ssh", "tcp"]): + steps.append( + "2. **Reproduce locally** — run the test with `lisa -r runbook.yml " + '-v "testcase.name:"` to reproduce.' + ) + steps.append( + "3. **Enable debug logging** — add `--log-level DEBUG` to the LISA " + "command to get full command output." + ) + + return "\n".join(steps) + + +# --------------------------------------------------------------------------- +# Azure Blob + archive helpers +# --------------------------------------------------------------------------- + + +def _get_azure_imports() -> tuple: + """Import Azure SDK packages, raising a clear error if missing.""" + try: + from azure.identity import DefaultAzureCredential + from azure.storage.blob import BlobServiceClient + except ImportError as exc: + raise ImportError( + "Azure Blob download requires azure-identity and " + "azure-storage-blob packages. Install with:\n" + " pip install azure-identity azure-storage-blob" + ) from exc + return DefaultAzureCredential, BlobServiceClient + + +def _parse_portal_storage_url(url: str) -> Optional[dict[str, str]]: + """Parse an Azure Portal storage URL into account/container/prefix. + + Accepts URLs like:: + + https://portal.azure.com/#blade/Microsoft_Azure_Storage/ + ContainerMenuBlade/.../storageAccountId/%2F...%2F + storageAccounts%2F/path/%2F + + Returns ``None`` if the URL is not a portal storage URL. + """ + parsed = urlparse(url) + if not parsed.hostname or not parsed.hostname.endswith("portal.azure.com"): + return None + if not parsed.fragment: + return None + + decoded = unquote(parsed.fragment) + if "/storageAccountId/" not in decoded or "/path/" not in decoded: + return None + + storage_and_path = decoded.split("/storageAccountId/", 1)[1] + if "/path/" not in storage_and_path: + return None + storage_id_part, path_part = storage_and_path.split("/path/", 1) + + # Extract storage account name from the ARM resource ID + segments = [s for s in storage_id_part.split("/") if s] + try: + sa_idx = segments.index("storageAccounts") + except ValueError: + return None + if len(segments) <= sa_idx + 1: + return None + account = segments[sa_idx + 1] + + # Extract container and blob prefix from the path + path_parts = [p for p in path_part.strip("/").split("/") if p] + if not path_parts: + return None + container = path_parts[0] + prefix = "/".join(path_parts[1:]) if len(path_parts) > 1 else "" + + return {"account": account, "container": container, "prefix": prefix} + + +def _download_azure_blob_prefix( + account: str, + container: str, + prefix: str, + download_dir: str, +) -> tuple[str, int]: + """Download all blobs under a prefix to a local directory. + + Uses ``ContainerClient.list_blobs(name_starts_with=...)`` to enumerate + blobs, then downloads each one preserving the directory structure. + + Returns ``(result_dir, file_count)``. + """ + DefaultAzureCredential, BlobServiceClient = _get_azure_imports() # noqa: N806 + + account_url = f"https://{account}.blob.core.windows.net" + + # Use pre-fetched token (run-local.sh injects this for Docker) + # or fall back to DefaultAzureCredential (managed identity, az login) + storage_token = os.environ.get("AZURE_STORAGE_TOKEN") + if storage_token: + from azure.core.credentials import AccessToken, TokenCredential + + class _StaticTokenCredential(TokenCredential): + """Wraps a pre-fetched token for the Azure SDK.""" + + def get_token(self, *scopes, **kwargs): # type: ignore[override] + return AccessToken(storage_token, 0) + + credential = _StaticTokenCredential() + else: + credential = DefaultAzureCredential() + + service_client = BlobServiceClient(account_url=account_url, credential=credential) + container_client = service_client.get_container_client(container) + + starts_with = prefix + if starts_with and not starts_with.endswith("/"): + starts_with = f"{starts_with}/" + + blobs = list(container_client.list_blobs(name_starts_with=starts_with)) + + # If no blobs with trailing slash, try the exact prefix (single blob) + if not blobs and prefix: + blobs = list(container_client.list_blobs(name_starts_with=prefix)) + + if not blobs: + raise FileNotFoundError(f"No blobs found under '{container}/{prefix}'.") + + # Use the leaf folder name as the local root + normalized = prefix.strip("/") + prefix_with_sep = f"{normalized}/" if normalized else "" + leaf_name = normalized.rsplit("/", maxsplit=1)[-1] if normalized else container + result_dir = os.path.join(download_dir, leaf_name) + + downloaded = 0 + for blob in blobs: + blob_name = blob.name + relative = blob_name + if prefix_with_sep and blob_name.startswith(prefix_with_sep): + relative = blob_name[len(prefix_with_sep) :] + if not relative: + continue + + # Path traversal protection + safe_parts = [p for p in relative.split("/") if p and p != "." and p != ".."] + if not safe_parts: + continue + + local_path = os.path.join(result_dir, *safe_parts) + abs_result = os.path.abspath(result_dir) + abs_local = os.path.abspath(local_path) + if os.path.commonpath([abs_result, abs_local]) != abs_result: + continue + + os.makedirs(os.path.dirname(local_path), exist_ok=True) + with open(local_path, "wb") as f: + stream = container_client.download_blob(blob_name) + for chunk in stream.chunks(): + f.write(chunk) + downloaded += 1 + + return result_dir, downloaded + + +def _extract_archive(download_path: str, download_dir: str) -> str: + """Extract tar.gz/zip archives, return the result directory path.""" + extract_dir = os.path.join(download_dir, "extracted") + + if tarfile.is_tarfile(download_path): + os.makedirs(extract_dir, exist_ok=True) + with tarfile.open(download_path) as tf: + safe_members = [ + m + for m in tf.getmembers() + if not m.name.startswith(("/", "..")) and ".." not in m.name + ] + tf.extractall(extract_dir, members=safe_members) + os.remove(download_path) + return extract_dir + + if zipfile.is_zipfile(download_path): + os.makedirs(extract_dir, exist_ok=True) + with zipfile.ZipFile(download_path) as zf: + safe_names = [ + n + for n in zf.namelist() + if not n.startswith(("/", "..")) and ".." not in n + ] + for name in safe_names: + zf.extract(name, extract_dir) + os.remove(download_path) + return extract_dir + + return download_dir diff --git a/mcp/lisa_mcp/tools/runbook.py b/mcp/lisa_mcp/tools/runbook.py new file mode 100644 index 0000000000..23f8fed091 --- /dev/null +++ b/mcp/lisa_mcp/tools/runbook.py @@ -0,0 +1,324 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Runbook tools — generate, validate, and fix LISA YAML runbooks.""" + +from __future__ import annotations + +from typing import Optional + +from mcp.server.fastmcp import FastMCP + + +def register_runbook_tools(mcp: FastMCP) -> None: # noqa: C901 + @mcp.tool() + def lisa_generate_runbook( + platform: str = "azure", + area: Optional[str] = None, + priority: Optional[int] = None, + tags: Optional[str] = None, + vm_size: Optional[str] = None, + location: Optional[str] = None, + image: Optional[str] = None, + concurrency: int = 1, + keep_environment: str = "no", + test_names: Optional[str] = None, + ) -> str: + """Generate a valid LISA YAML runbook from parameters. + + Args: + platform: Target platform — "azure", "hyperv", "local", "remote" + area: Test area filter (e.g. "provisioning", "network") + priority: Max priority level (0-3) to include + tags: Comma-separated test tags to filter on + vm_size: Azure VM size (e.g. "Standard_DS2_v2") + location: Azure region (e.g. "westus2") + image: Marketplace image string (e.g. "canonical 0001-com-ubuntu-server-jammy 22_04-lts-gen2 latest") # noqa: E501 + concurrency: Number of parallel test environments + keep_environment: "no", "always", or "failed" + test_names: Comma-separated test method names to run + """ + sections = [] + + # Header + sections.append("name: generated-runbook") + sections.append(f"concurrency: {concurrency}") + + # Extension — point to test suites + sections.append("") + sections.append("extension:") + sections.append(' - "../../lisa/microsoft/testsuites"') + + # Platform + sections.append("") + sections.append("platform:") + sections.append(f" - type: {platform}") + sections.append(' admin_username: "$(admin_username)"') + sections.append(' admin_private_key_file: "$(admin_private_key_file)"') + sections.append(f" keep_environment: {keep_environment}") + + if platform == "azure": + sections.append(" azure:") + sections.append(' subscription_id: "$(subscription_id)"') + if location: + sections.append(f' deploy_location: "{location}"') + if vm_size: + sections.append(" requirement:") + sections.append(" azure:") + sections.append(f' vm_size: "{vm_size}"') + if image: + parts = image.split() + if len(parts) == 4: + sections.append(" marketplace:") + sections.append(f' publisher: "{parts[0]}"') + sections.append(f' offer: "{parts[1]}"') + sections.append(f' sku: "{parts[2]}"') + sections.append(f' version: "{parts[3]}"') + + elif platform == "remote": + sections.append(" # Configure remote node connection") + sections.append(" # nodes:") + sections.append(" # - type: remote") + sections.append(' # address: "$(remote_address)"') + sections.append(" # port: 22") + + # Notifier + sections.append("") + sections.append("notifier:") + sections.append(" - type: console") + sections.append(" - type: html") + + # Variable section + sections.append("") + sections.append("variable:") + sections.append(" - name: admin_username") + sections.append(' value: ""') + sections.append(" - name: admin_private_key_file") + sections.append(' value: ""') + if platform == "azure": + sections.append(" - name: subscription_id") + sections.append(' value: ""') + sections.append(" is_secret: true") + + # Test cases + sections.append("") + sections.append("testcase:") + sections.append(" - criteria:") + if area: + sections.append(f" area: {area}") + if priority is not None: + sections.append(f" priority: [0, {priority}]") + if tags: + tag_list = [t.strip() for t in tags.split(",")] + sections.append(f" tags: [{', '.join(tag_list)}]") + + if test_names: + names = [n.strip() for n in test_names.split(",")] + for name in names: + sections.append(" - criteria:") + sections.append(f" name: {name}") + + runbook_yaml = "\n".join(sections) + "\n" + + return ( + "Generated LISA runbook:\n\n" + f"```yaml\n{runbook_yaml}```\n\n" + "**Usage:**\n" + "```bash\n" + "lisa -r .yml " + '-v "admin_username:" ' + '-v "admin_private_key_file:~/.ssh/id_rsa"\n' + "```" + ) + + @mcp.tool() + def lisa_validate_runbook(runbook_content: str) -> str: + """Validate a LISA runbook YAML for structural correctness. + Checks required fields, known platform types, and common mistakes. + + Args: + runbook_content: The YAML content of the runbook to validate + """ + import yaml + + errors = [] + warnings = [] + + try: + doc = yaml.safe_load(runbook_content) + except yaml.YAMLError as e: + return f"**YAML parse error:** {e}" + + if not isinstance(doc, dict): + return ( + "**Error:** Runbook must be a YAML mapping (dictionary) at top level." + ) + + # Check platform + if "platform" not in doc: + errors.append( + "Missing `platform` section — " + "LISA needs at least one platform configured." + ) + elif isinstance(doc["platform"], list): + for i, p in enumerate(doc["platform"]): + if not isinstance(p, dict): + errors.append(f"platform[{i}] must be a mapping.") + continue + if "type" not in p: + errors.append(f"platform[{i}] missing `type` field.") + else: + known = { + "azure", + "hyperv", + "local", + "remote", + "mock", + "libvirt", + "baremetal", + "aws", + "ready", + } + if p["type"] not in known: + warnings.append( + f"platform[{i}].type = '{p['type']}' — " + f"not a known built-in type ({', '.join(sorted(known))}). " + "This is fine if you have a custom platform extension." + ) + + # Check testcase + if "testcase" not in doc and "testcase_raw" not in doc: + errors.append( + "Missing `testcase` section — no tests will be selected. " + "Add at least one testcase criteria block." + ) + + # Check extension + if "extension" not in doc: + warnings.append( + "No `extension` section — LISA won't load test suites unless " + "they're on the Python path. Usually you need:\n" + " extension:\n" + ' - "path/to/testsuites"' + ) + + # Check notifier + if "notifier" not in doc: + warnings.append( + "No `notifier` section — consider adding console and html notifiers " + "for visibility." + ) + + # Check variables with secrets + if "variable" in doc and isinstance(doc["variable"], list): + for v in doc["variable"]: + if isinstance(v, dict): + if v.get("is_secret") and v.get("value"): + val = str(v.get("value", "")) + if val and val not in ("", '""', "''"): + errors.append( + f"Variable `{v.get('name', '?')}` is marked is_secret " + "but has a hardcoded value. Use CLI `-v` overrides or " + "environment variables for secrets." + ) + + # Build result + result_parts = [] + if errors: + result_parts.append("**Errors:**\n" + "\n".join(f"- {e}" for e in errors)) + if warnings: + result_parts.append( + "**Warnings:**\n" + "\n".join(f"- {w}" for w in warnings) + ) + if not errors and not warnings: + result_parts.append("Runbook structure looks valid. No issues found.") + + return "\n\n".join(result_parts) + + @mcp.tool() + def lisa_fix_runbook(runbook_content: str) -> str: + """Validate a LISA runbook YAML, fix common issues, and return the + corrected version with explanations of what was changed. + + Args: + runbook_content: The YAML content of the runbook to fix + """ + import yaml + + fixes = [] + + try: + doc = yaml.safe_load(runbook_content) + except yaml.YAMLError as e: + return ( + f"**YAML syntax error — cannot auto-fix:**\n\n```\n{e}\n```\n\n" + "Fix the YAML syntax first, then re-run this tool." + ) + + if not isinstance(doc, dict): + return "Runbook must be a YAML mapping at the top level." + + modified = dict(doc) + + # Fix: missing platform + if "platform" not in modified: + modified["platform"] = [{"type": "azure"}] + fixes.append("Added default `platform` section with `type: azure`.") + + # Fix: platform as dict instead of list + if isinstance(modified.get("platform"), dict): + modified["platform"] = [modified["platform"]] + fixes.append( + "Wrapped `platform` in a list (LISA expects a list of platforms)." + ) + + # Fix: missing notifier + if "notifier" not in modified: + modified["notifier"] = [{"type": "console"}, {"type": "html"}] + fixes.append("Added `notifier` section with console and html output.") + + # Fix: missing testcase + if "testcase" not in modified and "testcase_raw" not in modified: + modified["testcase"] = [{"criteria": {}}] + fixes.append( + "Added empty `testcase` criteria block. " + "Specify `area`, `priority`, or `name` to filter tests." + ) + + # Fix: testcase as dict instead of list + if isinstance(modified.get("testcase"), dict): + modified["testcase"] = [modified["testcase"]] + fixes.append("Wrapped `testcase` in a list.") + + # Fix: keep_environment as bool True (should be string) + if isinstance(modified.get("platform"), list): + for i, p in enumerate(modified["platform"]): + if isinstance(p, dict): + ke = p.get("keep_environment") + if ke is True: + p["keep_environment"] = "always" + fixes.append( + f"platform[{i}]: Changed `keep_environment: true` to " + '`keep_environment: "always"`.' + ) + elif ke is False: + p["keep_environment"] = "no" + fixes.append( + f"platform[{i}]: Changed `keep_environment: false` to " + '`keep_environment: "no"`.' + ) + + # Dump corrected YAML + corrected_yaml = yaml.dump(modified, default_flow_style=False, sort_keys=False) + + if fixes: + fix_list = "\n".join(f"- {f}" for f in fixes) + return ( + f"**Fixes applied ({len(fixes)}):**\n{fix_list}\n\n" + f"**Corrected runbook:**\n\n```yaml\n{corrected_yaml}```" + ) + else: + return ( + "No structural issues found. The runbook looks correct.\n\n" + f"```yaml\n{corrected_yaml}```" + ) diff --git a/mcp/lisa_mcp/tools/test_writer.py b/mcp/lisa_mcp/tools/test_writer.py new file mode 100644 index 0000000000..05fea8ff20 --- /dev/null +++ b/mcp/lisa_mcp/tools/test_writer.py @@ -0,0 +1,823 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Test authoring tools — scaffold suites, cases, and write tests.""" + +from __future__ import annotations + +import json +import os +import re +from pathlib import Path +from typing import Optional + +from lisa_mcp.tools._repo import find_repo_root, load_test_writer_prompt +from mcp.server.fastmcp import FastMCP + + +def register_test_writer_tools(mcp: FastMCP) -> None: # noqa: C901 + @mcp.tool() + def lisa_get_test_writer_guidelines() -> str: + """Return the full LISA test writer guidelines prompt. This is the + authoritative reference for writing LISA test suites and test cases. + + The prompt enforces: + - Validation-first thinking (no code before design plan) + - Pattern matching before generation (search existing tools/features) + - Mandatory Arrange → Act → Assert structure + - Logging, assertion, and cleanup standards + - Cost awareness and node hygiene + + Call this tool FIRST before writing any LISA test code. The guidelines + describe a mandatory three-stage workflow: + 1. Gather — search existing Tools, Features, and similar TestSuites + 2. Research — verify API signatures, never hallucinate + 3. Design Plan — present Arrange → Act → Assert summary for user approval + """ + prompt = load_test_writer_prompt() + if prompt.startswith("("): + return prompt + return ( + "# LISA Test Writer Guidelines\n\n" + "**IMPORTANT**: Follow these guidelines for ALL test authoring. " + "Do NOT write code until the design plan is confirmed.\n\n" + prompt + ) + + @mcp.tool() + def lisa_scaffold_test_suite( + area: str, + class_name: str, + description: str, + category: str = "functional", + ) -> str: + """Generate a complete LISA test suite skeleton with correct decorators, + imports, and structure following the lisa_test_writer guidelines. + + IMPORTANT: Before calling this tool, call get_test_writer_guidelines + first and follow the mandatory workflow (Gather → Research → Design Plan). + + Args: + area: Test area (e.g. "networking", "storage", "provisioning") + class_name: PascalCase class name (e.g. "MyNewFeature") + description: Human-readable description of what this suite tests + category: Test category — "functional", "stress", or "performance" + """ + snake_name = _to_snake_case(class_name) + + code = f'''\ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +from typing import Any + +from assertpy import assert_that + +from lisa import ( + Logger, + Node, + TestCaseMetadata, + TestSuite, + TestSuiteMetadata, + simple_requirement, +) +from lisa.operating_system import Posix + + +@TestSuiteMetadata( + area="{area}", + category="{category}", + description=""" + {description} + """, + requirement=simple_requirement(supported_os=[Posix]), +) +class {class_name}(TestSuite): + @TestCaseMetadata( + description=""" + TODO: Describe what this test case verifies. + Include the observable signal that proves the test passed. + """, + priority=2, + requirement=simple_requirement( + supported_os=[Posix], + ), + ) + def verify_{snake_name}(self, node: Node, log: Logger) -> None: + # --- Arrange --- + # Acquire tools/features. Use node.tools[ToolName] and node.features[Feature]. + # Verify environment meets preconditions. + + # --- Act --- + # Perform minimal actions to trigger the behavior under test. + + # --- Assert --- + # Explicitly verify expected outcomes using assert_that(). + # Each assertion should map to a requirement in metadata. + pass + + def before_case(self, log: Logger, **kwargs: Any) -> None: + pass + + def after_case(self, log: Logger, **kwargs: Any) -> None: + # Guaranteed cleanup. Call node.mark_dirty() if you modified + # kernel params, drivers, network config, or need a reboot. + pass +''' + file_path = f"lisa/microsoft/testsuites/{area}/{snake_name}.py" + return ( + f"Generated test suite skeleton for `{class_name}` in area `{area}`.\n" + f"Suggested file path: {file_path}\n\n" + "**Before filling in test logic**, follow the lisa_test_writer workflow:\n" + "1. **Gather**: Search `lisa/tools/` and `lisa/features/` for existing " + "tools and features you need.\n" + "2. **Research**: Verify API signatures \u2014 " + "never invent APIs that don't exist.\n" + "3. **Design Plan**: Present the Arrange → Act → Assert plan and get " + "user confirmation before writing the implementation.\n\n" + "Call `lisa_get_test_writer_guidelines` " + "for the full authoring protocol.\n\n" + f"```python\n{code}```" + ) + + @mcp.tool() + def lisa_scaffold_test_case( + area: str, + method_name: str, + description: str, + priority: int = 2, + supported_os: str = "Posix", + supported_features: Optional[str] = None, + min_core_count: Optional[int] = None, + min_nic_count: Optional[int] = None, + min_data_disk_count: Optional[int] = None, + ) -> str: + """Generate a single LISA test case method with correct decorators + and requirement specification, following the lisa_test_writer guidelines. + + IMPORTANT: Before calling this tool, call get_test_writer_guidelines + first and follow the mandatory workflow (Gather → Research → Design Plan). + + Args: + area: Test area this case belongs to + method_name: snake_case method name (e.g. "verify_sriov_failover") + description: What this test case verifies + priority: 0=critical, 1=high, 2=normal, 3=stress/long-running + supported_os: Comma-separated OS types — "Posix", "Windows", or both + supported_features: Comma-separated feature classes (e.g. "Gpu,Nvme,Sriov") + min_core_count: Minimum CPU cores required + min_nic_count: Minimum network interfaces required + min_data_disk_count: Minimum data disks required + """ + if not method_name.startswith(("verify_", "test_")): + method_name = f"verify_{method_name}" + + # Build requirement kwargs + req_parts = [] + os_list = [o.strip() for o in supported_os.split(",")] + req_parts.append(f"supported_os=[{', '.join(os_list)}]") + + if supported_features: + features = [f.strip() for f in supported_features.split(",")] + req_parts.append(f"supported_features=[{', '.join(features)}]") + if min_core_count: + req_parts.append(f"min_core_count={min_core_count}") + if min_nic_count: + req_parts.append(f"min_nic_count={min_nic_count}") + if min_data_disk_count: + req_parts.append(f"min_data_disk_count={min_data_disk_count}") + + req_str = ",\n ".join(req_parts) + + # Build feature imports + feature_imports = "" + if supported_features: + features = [f.strip() for f in supported_features.split(",")] + feature_imports = ( + f"\n# Add to imports:\n" + f"# from lisa.features import {', '.join(features)}\n" + ) + + code = f'''\ +{feature_imports} + @TestCaseMetadata( + description=""" + {description} + """, + priority={priority}, + requirement=simple_requirement( + {req_str}, + ), + ) + def {method_name}(self, node: Node, log: Logger) -> None: + # --- Arrange --- + # Acquire tools/features. Use node.tools[ToolName] and node.features[Feature]. + # Verify environment meets preconditions. + + # --- Act --- + # Perform minimal actions to trigger the behavior under test. + + # --- Assert --- + # Explicitly verify expected outcomes using assert_that(). + # Each assertion should map to a requirement in metadata. + pass +''' + return ( + f"Generated test case `{method_name}` for area `{area}`.\n" + f"Add this method to your TestSuite class.\n\n" + "**Before filling in test logic**, follow the lisa_test_writer workflow:\n" + "1. Search `lisa/tools/` for existing tools you need " + "(use `lisa_list_tools`).\n" + "2. Search `lisa/features/` for required features " + "(use `lisa_list_features`).\n" + "3. Verify API signatures \u2014 never invent APIs that don't exist.\n" + "4. Present the Design Plan (Arrange \u2192 Act \u2192 Assert) " + "for user confirmation.\n\n" + f"```python\n{code}```" + ) + + @mcp.tool() + def lisa_list_test_requirements(test_name: str) -> str: + """Search the LISA repo for a test method and return its requirement + specification, explaining what platform/node capabilities it needs. + + Args: + test_name: Exact test method name (e.g. "smoke_test", + "verify_sriov_failover") + """ + repo_root = find_repo_root() + if not repo_root: + return "Could not locate LISA repository root." + + testsuites_dir = repo_root / "lisa" / "microsoft" / "testsuites" + if not testsuites_dir.exists(): + testsuites_dir = repo_root / "lisa" / "examples" / "testsuites" + + results = [] + for py_file in testsuites_dir.rglob("*.py"): + try: + content = py_file.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + + # Look for the method definition + pattern = rf"def\s+{re.escape(test_name)}\s*\(" + match = re.search(pattern, content) + if not match: + continue + + # Extract the @TestCaseMetadata block above it + lines = content[: match.start()].split("\n") + decorator_start = len(lines) - 1 + paren_depth = 0 + found_decorator = False + for i in range(len(lines) - 1, -1, -1): + line = lines[i] + paren_depth += line.count(")") - line.count("(") + if "@TestCaseMetadata" in line: + decorator_start = i + found_decorator = True + break + + if found_decorator: + method_end = content.find("\n def ", match.end()) + if method_end == -1: + method_end = min(match.end() + 500, len(content)) + + decorator_block = "\n".join(lines[decorator_start:]) + method_body = content[match.start() : method_end] + + rel_path = py_file.relative_to(repo_root) + results.append( + f"**Found in `{rel_path}`:**\n\n" + f"```python\n{decorator_block}\n{method_body}\n```" + ) + + if not results: + return ( + f"Test method `{test_name}` not found in the LISA test suites.\n" + f"Searched in: {testsuites_dir}" + ) + + return f"**Requirements for `{test_name}`:**\n\n" + "\n\n---\n\n".join(results) + + @mcp.tool() + def lisa_write_test( + description: str, + area: str, + feature: Optional[str] = None, + tier: Optional[int] = None, + platform: Optional[str] = None, + distro_notes: Optional[str] = None, + class_name: Optional[str] = None, + method_name: Optional[str] = None, + ) -> str: + """Generate a complete, production-quality LISA test case from a + description of what to validate. + + This is the primary tool for writing LISA tests. It performs the + three-pillar validation from the lisa_test_writer prompt: + 1. GATHER — searches the repo for relevant tools, features, and + existing test suites that match the request + 2. RESEARCH — collects API signatures for the discovered tools/features + 3. DESIGN PLAN — produces an Arrange → Act → Assert plan with + workspace references, ready for user confirmation + + Returns structured metadata alongside the design plan — file path, + class name, test method name, required features and tools — so + calling agents can construct a PR without parsing free text. + + Args: + description: What to validate — the Linux capability or behavior + being tested (e.g. "Verify VF count stable after VM hot-resize") + area: The LISA test area (e.g. "network", "storage", "kernel", + "provisioning", "core") + feature: LISA feature class name (e.g. "Sriov", "Nvme", "StartStop", + "Gpu", "Resize", "Hibernate"). Optional. + tier: Test priority tier 0–4 (0=critical, 1=high, 2=normal, + 3=stress, 4=long-running). Optional. + platform: Target platform — "azure", "hyperv", "ready", or None + for platform-agnostic. Optional. + distro_notes: Any distro-specific failure context or requirements + (e.g. "Ubuntu 24.04 only", "Fails on RHEL 9 with kernel 6.x"). + Optional. + class_name: Optional PascalCase suite class name (auto-generated + if omitted) + method_name: Optional snake_case test method name (auto-generated + if omitted) + """ + # Backward-compatible aliases + what_to_validate = description + feature_area = area + + repo_root = find_repo_root() + + # --- Stage 1: GATHER --- + gather_results = [] + + search_query = what_to_validate + " " + feature_area + if feature: + search_query += " " + feature + + found_tools = _search_repo_symbols(repo_root, "lisa/tools", search_query) + found_features = _search_repo_symbols(repo_root, "lisa/features", search_query) + found_suites = _search_repo_symbols( + repo_root, + "lisa/microsoft/testsuites", + search_query, + ) + + if found_tools: + gather_results.append( + "**Relevant Tools found in `lisa/tools/`:**\n" + + "\n".join(f"- `{t}`" for t in found_tools[:10]) + ) + if found_features: + gather_results.append( + "**Relevant Features found in `lisa/features/`:**\n" + + "\n".join(f"- `{f}`" for f in found_features[:10]) + ) + if found_suites: + gather_results.append( + "**Similar TestSuites found in `lisa/microsoft/testsuites/`:**\n" + + "\n".join(f"- `{s}`" for s in found_suites[:10]) + ) + + # Extract existing test methods from matched suites + if repo_root: + existing = _extract_existing_tests(repo_root, found_suites) + query_kws = set( + w.lower() for w in re.split(r"\W+", search_query) if len(w) > 2 + ) + # Only flag methods matching specific (non-common) keywords + specific_kws = query_kws - { + "verify", + "test", + "check", + "module", + "kernel", + "functional", + "basic", + "config", + "storage", + "network", + "core", + "should", + "whether", + } + flag_kws = specific_kws if specific_kws else query_kws + for suite_info in existing: + methods = suite_info["methods"] + rel_path = suite_info["rel_path"] + cls = suite_info["class_name"] + method_lines = [] + matching_methods = [] + for m in methods: + desc_part = f" — {m['description']}" if m["description"] else "" + method_lower = str(m["name"]).lower() + has_match = any(kw in method_lower for kw in flag_kws) + prefix = " - **→**" if has_match else " -" + method_lines.append( + f"{prefix} `{m['name']}` (L{m['line']}){desc_part}" + ) + if has_match: + matching_methods.append(str(m["name"])) + + section = ( + f"\n**Existing tests in `{rel_path}` " + f"(class `{cls}`):**\n" + "\n".join(method_lines) + ) + if matching_methods: + section += ( + f"\n\n> **RELATED TEST ALREADY EXISTS: " + f"`{'`, `'.join(matching_methods)}` in " + f"`{rel_path}`.** You MUST add your new method " + f"to class `{cls}` in this file. Do NOT create " + "a new file." + ) + else: + section += ( + "\n\n> **IMPORTANT: Add your new test method to " + f"this existing class `{cls}` in `{rel_path}` " + "instead of creating a new file.** LISA convention " + "is one test class per file. Only create a new file " + "if the scope is clearly different from this suite." + ) + gather_results.append(section) + + if not gather_results: + gather_results.append( + "*No directly matching tools/features/suites found. " + "You may need to create new Tool or Feature classes.*" + ) + + # --- Stage 2: RESEARCH --- + api_refs = [] + if repo_root: + for tool_file in found_tools[:3]: + snippet = _extract_class_signature( + repo_root / "lisa" / "tools" / tool_file + ) + if snippet: + api_refs.append(f"**`{tool_file}`:**\n```python\n{snippet}\n```") + + for feat_file in found_features[:3]: + snippet = _extract_class_signature( + repo_root / "lisa" / "features" / feat_file + ) + if snippet: + api_refs.append(f"**`{feat_file}`:**\n```python\n{snippet}\n```") + + # --- Stage 3: DESIGN PLAN --- + # If existing suites were found, recommend adding to the best match + existing_suite_info = None + if repo_root and found_suites: + existing = _extract_existing_tests(repo_root, found_suites) + if existing: + # Prefer suite with method names matching query keywords. + # Weight rare/specific keywords higher than common ones + # (e.g. "cifs" is more distinctive than "module"). + query_kws = [ + w.lower() for w in re.split(r"\W+", search_query) if len(w) > 2 + ] + common_words = { + "verify", + "test", + "check", + "module", + "kernel", + "functional", + "basic", + "config", + "storage", + "network", + "core", + "should", + "whether", + } + best_score = -1 + for suite in existing: + score = 0 + has_specific_match = False + for m in suite["methods"]: + method_lower = str(m["name"]).lower() + for kw in query_kws: + if kw in method_lower: + if kw in common_words: + score += 5 + else: + score += 20 + has_specific_match = True + # Bonus for having a specific (non-common) keyword match + if has_specific_match: + score += 100 + if score > best_score: + best_score = score + existing_suite_info = suite + + if existing_suite_info: + # Use the existing suite's file and class + file_path = str(existing_suite_info["rel_path"]) + if not class_name: + class_name = str(existing_suite_info["class_name"]) + else: + if not class_name: + words = re.split(r"[\s_-]+", feature_area) + class_name = "".join(w.capitalize() for w in words) + "Validation" + snake_name = _to_snake_case(class_name) + file_path = f"lisa/microsoft/testsuites/{feature_area}/{snake_name}.py" + + if not method_name: + clean = re.sub(r"[^a-zA-Z0-9\s]", "", what_to_validate) + words = clean.lower().split()[:5] + # Avoid double prefix (e.g. "verify_verify_...") + if words and words[0] in ("verify", "test"): + words = words[1:] + method_name = "verify_" + "_".join(words) + + dirty_keywords = [ + "kernel", + "grub", + "driver", + "reboot", + "module", + "modprobe", + "sysctl", + "network config", + "insmod", + "rmmod", + ] + needs_mark_dirty = any(kw in what_to_validate.lower() for kw in dirty_keywords) + + sections = [] + sections.append("# LISA Test Design Plan\n") + + meta_lines = [ + f"**Validation Target:** {what_to_validate}", + f"**Area:** {feature_area}", + ] + if feature: + meta_lines.append(f"**Feature:** {feature}") + if tier is not None: + meta_lines.append(f"**Tier:** {tier}") + if platform: + meta_lines.append(f"**Platform:** {platform}") + if distro_notes: + meta_lines.append(f"**Distro Notes:** {distro_notes}") + if existing_suite_info: + meta_lines.append( + f"**Target file (EXISTING):** `{file_path}` — " + f"add method `{method_name}` to class `{class_name}`" + ) + else: + meta_lines.append(f"**Suggested file (NEW):** `{file_path}`") + meta_lines.append(f"**Class:** `{class_name}` | **Method:** `{method_name}`") + meta_lines.append( + f"**Required Features:** " + f"{', '.join(found_features[:5]) if found_features else 'none detected'}" + ) + meta_lines.append( + f"**Required Tools:** " + f"{', '.join(found_tools[:5]) if found_tools else 'none detected'}" + ) + sections.append("\n".join(meta_lines) + "\n") + + sections.append("## Stage 1: Gathered Context\n") + sections.extend(gather_results) + + if api_refs: + sections.append("\n## Stage 2: API References\n") + sections.extend(api_refs) + + sections.append("\n## Stage 3: Design Plan (Arrange → Act → Assert)\n") + sections.append( + f"1. **Arrange**: Acquire required tools/features from the node. " + f"Verify preconditions.\n" + f"2. **Act**: {what_to_validate}\n" + f"3. **Assert**: Verify the observable signal confirms success.\n" + ) + + if needs_mark_dirty: + sections.append( + "**Node Hygiene:** `node.mark_dirty()` IS required — this test " + "modifies system state (kernel params, drivers, or network config).\n" + ) + else: + sections.append( + "**Node Hygiene:** `node.mark_dirty()` is likely NOT required — " + "this test appears to be read-only.\n" + ) + + sections.append( + "---\n\n" + "**Confirm this design plan before proceeding to implementation.**\n" + "Once confirmed, use `lisa_scaffold_test_suite` or " + "`lisa_scaffold_test_case` " + "to generate the code skeleton, then fill in the logic using the " + "gathered tools and features above." + ) + + sections.append( + "\n---\n\n" + "*This plan follows the `lisa_test_writer.prompt.md` workflow. " + "Call `lisa_get_test_writer_guidelines` for the full authoring protocol.*" + ) + + # --- Structured response metadata (for agent-to-agent consumption) --- + existing_tests_meta = [] + if repo_root and found_suites: + for suite_info in _extract_existing_tests(repo_root, found_suites): + existing_tests_meta.append( + { + "file": str(suite_info["rel_path"]), + "class": suite_info["class_name"], + "methods": [m["name"] for m in suite_info["methods"]], + } + ) + + metadata = { + "file_path": file_path, + "class_name": class_name, + "method_name": method_name, + "area": feature_area, + "feature": feature, + "tier": tier, + "platform": platform, + "required_features": found_features[:5], + "required_tools": found_tools[:5], + "existing_suites": existing_tests_meta, + } + sections.append( + "\n---\n\n" + "## Structured Metadata\n\n" + f"```json\n{json.dumps(metadata, indent=2)}\n```" + ) + + return "\n\n".join(sections) + + +def _to_snake_case(name: str) -> str: + """Convert PascalCase to snake_case.""" + s1 = re.sub(r"(.)([A-Z][a-z]+)", r"\1_\2", name) + return re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", s1).lower() + + +def _extract_existing_tests( + repo_root: Path, + suite_files: list[str], + subdir: str = "lisa/microsoft/testsuites", +) -> list[dict[str, object]]: + """Extract test method names, descriptions, and locations from suite files. + + Returns a list of dicts with keys: file, rel_path, class_name, methods. + Each method entry has: name, line, description. + """ + results: list[dict[str, object]] = [] + search_dir = repo_root / subdir.replace("/", os.sep) + + for suite_name in suite_files[:5]: + # Find the actual file path (could be in any subdirectory) + matches = list(search_dir.rglob(suite_name)) + if not matches: + continue + py_file = matches[0] + + try: + content = py_file.read_text(encoding="utf-8", errors="replace") + except OSError: + continue + + lines = content.split("\n") + rel_path = str(py_file.relative_to(repo_root)) + current_class = "" + methods: list[dict[str, object]] = [] + + for i, line in enumerate(lines): + stripped = line.strip() + # Track current class + class_match = re.match(r"class\s+(\w+)\s*\(", stripped) + if class_match: + current_class = class_match.group(1) + + # Find test methods (verify_* or test_*) + method_match = re.match(r"def\s+((?:verify|test)_\w+)\s*\(", stripped) + if method_match: + method_name = method_match.group(1) + # Look for description in preceding @TestCaseMetadata + desc = "" + for j in range(max(0, i - 10), i): + desc_match = re.search( + r'description\s*=\s*["\'](.+?)["\']', lines[j] + ) + if desc_match: + desc = desc_match.group(1) + break + methods.append( + { + "name": method_name, + "line": i + 1, + "description": desc, + } + ) + + if methods: + results.append( + { + "file": suite_name, + "rel_path": rel_path, + "class_name": current_class, + "methods": methods, + } + ) + + return results + + +def _search_repo_symbols( + repo_root: Optional[Path], + subdir: str, + query: str, +) -> list[str]: + """Search a repo subdirectory for Python files matching query keywords.""" + if not repo_root: + return [] + + search_dir = repo_root / subdir.replace("/", os.sep) + if not search_dir.exists(): + return [] + + keywords = [ + w.lower() + for w in re.split(r"\W+", query) + if len(w) > 2 + and w.lower() + not in { + "the", + "and", + "for", + "test", + "with", + "that", + "are", + "this", + "from", + "have", + "has", + "was", + "were", + } + ] + + if not keywords: + return [] + + scored: list[tuple[int, str]] = [] + for py_file in search_dir.rglob("*.py"): + if py_file.name.startswith("_"): + continue + filename_lower = py_file.stem.lower() + score = 0 + for kw in keywords: + if kw in filename_lower: + score += 10 + try: + content = py_file.read_text(encoding="utf-8", errors="replace") + content_lower = content.lower() + for kw in keywords: + score += min(content_lower.count(kw), 3) + # Boost score for files with test methods matching keywords + for match in re.finditer(r"def\s+((?:verify|test)_\w+)\s*\(", content): + method_lower = match.group(1).lower() + for kw in keywords: + if kw in method_lower: + score += 20 # strong signal: method name matches + except OSError: + pass + if score > 0: + scored.append((score, py_file.name)) + + scored.sort(key=lambda x: x[0], reverse=True) + return [name for _, name in scored] + + +def _extract_class_signature(py_file: Path) -> str: + """Extract class name and public method signatures from a Python file.""" + if not py_file.exists(): + return "" + try: + content = py_file.read_text(encoding="utf-8", errors="replace") + except OSError: + return "" + + lines = content.split("\n") + result_lines = [] + for line in lines: + stripped = line.strip() + if stripped.startswith("class ") and "(" in stripped: + result_lines.append(stripped) + elif stripped.startswith("def ") and not stripped.startswith("def _"): + result_lines.append(f" {stripped}") + + if len(result_lines) > 15: + break + + return "\n".join(result_lines) if result_lines else "" diff --git a/mcp/pyproject.toml b/mcp/pyproject.toml new file mode 100644 index 0000000000..69d3054c2b --- /dev/null +++ b/mcp/pyproject.toml @@ -0,0 +1,32 @@ +[build-system] +requires = ["setuptools>=70"] +build-backend = "setuptools.build_meta" + +[project] +name = "lisa-mcp" +version = "0.1.0" +description = "MCP server for the LISA test automation framework" +readme = "README.md" +license = "MIT" +requires-python = ">=3.10" +dependencies = [ + "mcp[cli]>=1.0.0", + "pyyaml>=6.0", + "uvicorn>=0.24.0", + "starlette>=0.27.0", +] + +[project.optional-dependencies] +azure = [ + "azure-identity>=1.15.0", + "azure-storage-blob>=12.19.0", +] + +[project.scripts] +lisa-mcp = "lisa_mcp.server:main" + +[tool.setuptools] +packages = ["lisa_mcp", "lisa_mcp.tools"] + +[tool.setuptools.package-data] +lisa_mcp = ["context/*.md", "docs_index.yaml"] diff --git a/mcp/run_tests.py b/mcp/run_tests.py new file mode 100644 index 0000000000..269ad821ce --- /dev/null +++ b/mcp/run_tests.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""LISA MCP Server — test runner. + +Usage (from the mcp/ directory): + python run_tests.py # run all tests + python run_tests.py --unit # unit + functional tests only (fast) + python run_tests.py --integration # MCP protocol integration tests only + python run_tests.py --smoke # quick smoke test (tool registration only) + python run_tests.py --xml # output JUnit XML to test-results.xml + +Exit codes: + 0 — all tests passed + 1 — one or more tests failed +""" + +import argparse +import os +import sys +import unittest +from pathlib import Path + +# Ensure mcp/ is on sys.path +MCP_DIR = Path(__file__).resolve().parent +os.chdir(MCP_DIR) +if str(MCP_DIR) not in sys.path: + sys.path.insert(0, str(MCP_DIR)) + + +def _build_suite(mode: str) -> unittest.TestSuite: + """Build a test suite based on the selected mode.""" + loader = unittest.TestLoader() + suite = unittest.TestSuite() + + if mode in ("all", "unit"): + # Original unit tests + suite.addTests(loader.loadTestsFromName("tests.test_authoring")) + suite.addTests(loader.loadTestsFromName("tests.test_log_analysis")) + # Comprehensive functional tests + suite.addTests(loader.loadTestsFromName("tests.test_all_tools")) + + if mode in ("all", "integration"): + suite.addTests(loader.loadTestsFromName("tests.test_mcp_integration")) + + if mode == "smoke": + suite.addTests( + loader.loadTestsFromName("tests.test_all_tools.TestToolRegistration") + ) + + return suite + + +def main() -> int: + parser = argparse.ArgumentParser( + description="LISA MCP Server test runner", + ) + group = parser.add_mutually_exclusive_group() + group.add_argument( + "--unit", + action="store_true", + help="Run unit and functional tests only (fast, no subprocess)", + ) + group.add_argument( + "--integration", + action="store_true", + help="Run MCP protocol integration tests only (starts server subprocess)", + ) + group.add_argument( + "--smoke", + action="store_true", + help="Quick smoke test — verify all 24 tools are registered", + ) + parser.add_argument( + "--xml", + action="store_true", + help="Output JUnit XML report to test-results.xml", + ) + parser.add_argument( + "-v", + "--verbose", + action="store_true", + default=True, + help="Verbose output (default: True)", + ) + args = parser.parse_args() + + if args.unit: + mode = "unit" + elif args.integration: + mode = "integration" + elif args.smoke: + mode = "smoke" + else: + mode = "all" + + suite = _build_suite(mode) + + if args.xml: + try: + import xmlrunner # type: ignore[import-untyped] + + runner = xmlrunner.XMLTestRunner( + output="test-results", + verbosity=2 if args.verbose else 1, + ) + except ImportError: + print( + "WARNING: xmlrunner not installed. " + "Install with: pip install unittest-xml-reporting\n" + "Falling back to text output.\n", + file=sys.stderr, + ) + runner = unittest.TextTestRunner(verbosity=2 if args.verbose else 1) + else: + runner = unittest.TextTestRunner(verbosity=2 if args.verbose else 1) + + print(f"{'=' * 60}") + print(f"LISA MCP Server Tests — mode: {mode}") + print(f"{'=' * 60}\n") + + result = runner.run(suite) + + print(f"\n{'=' * 60}") + total = result.testsRun + failed = len(result.failures) + len(result.errors) + skipped = len(result.skipped) + passed = total - failed - skipped + print( + f"Results: {passed} passed, {failed} failed, {skipped} skipped / {total} total" + ) + print(f"{'=' * 60}") + + return 0 if result.wasSuccessful() else 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/mcp/server.py b/mcp/server.py new file mode 100644 index 0000000000..52b15bc29b --- /dev/null +++ b/mcp/server.py @@ -0,0 +1,13 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Convenience entrypoint — delegates to lisa_mcp.server. + +Run directly: python server.py [--transport stdio|sse] [--port 8080] +Installed: lisa-mcp [--transport stdio|sse] [--port 8080] +""" + +from lisa_mcp.server import main, mcp # noqa: F401 — mcp re-exported for tests + +if __name__ == "__main__": + main() diff --git a/mcp/tests/__init__.py b/mcp/tests/__init__.py new file mode 100644 index 0000000000..9a0454564d --- /dev/null +++ b/mcp/tests/__init__.py @@ -0,0 +1,2 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. diff --git a/mcp/tests/fixtures/sample_failing_run.log b/mcp/tests/fixtures/sample_failing_run.log new file mode 100644 index 0000000000..a0e3a144f9 --- /dev/null +++ b/mcp/tests/fixtures/sample_failing_run.log @@ -0,0 +1,38 @@ +2024-06-15 14:00:01 INFO lisa_runner: Starting LISA test run +2024-06-15 14:00:02 INFO lisa_runner: Platform: azure +2024-06-15 14:00:03 INFO lisa_runner: Runbook: sriov_tests.yml +2024-06-15 14:00:10 INFO environment[0]: Provisioning environment +2024-06-15 14:00:50 INFO environment[0]: Environment deployed successfully +2024-06-15 14:00:51 INFO node[0]: Connected to 10.0.0.10:22 +2024-06-15 14:00:52 INFO node[0]: OS: RHEL 9.5 +2024-06-15 14:01:00 INFO test_runner: Running test case: smoke_test +2024-06-15 14:01:01 INFO smoke_test: SSH port 22 is opened, connecting and rebooting 'node-0' +2024-06-15 14:01:35 INFO smoke_test: Reboot completed in 34.2s +smoke_test | PASSED | completed in 35s +2024-06-15 14:01:40 INFO test_runner: Running test case: verify_sriov_basic +2024-06-15 14:01:41 INFO verify_sriov_basic: Checking SRIOV device presence +2024-06-15 14:01:42 DEBUG verify_sriov_basic: lspci output: 0002:00:02.0 Ethernet controller: Mellanox MT27710 +2024-06-15 14:01:43 INFO verify_sriov_basic: Found 1 SRIOV VF device(s) +2024-06-15 14:01:44 ERROR verify_sriov_basic: AssertionError: Expected 2 SRIOV VF devices but found 1 +Traceback (most recent call last): + File "lisa/microsoft/testsuites/network/sriov.py", line 142, in verify_sriov_basic + assert_that(vf_count).described_as( + File "assertpy/assertpy.py", line 234, in is_equal_to + raise AssertionError(self.description) +AssertionError: Expected 2 SRIOV VF devices to match NIC count, but found 1. Check if accelerated networking is enabled on all NICs. +verify_sriov_basic | FAILED | Expected 2 SRIOV VF devices but found 1 +2024-06-15 14:02:00 INFO test_runner: Running test case: verify_gpu_driver +2024-06-15 14:02:01 INFO verify_gpu_driver: Checking GPU presence +2024-06-15 14:02:02 INFO verify_gpu_driver: No GPU devices found on this VM size +verify_gpu_driver | SKIPPED | No GPU found — VM size does not support GPU +2024-06-15 14:02:10 INFO test_runner: Running test case: verify_reboot_in_platform +2024-06-15 14:02:11 INFO verify_reboot_in_platform: Initiating platform reboot +2024-06-15 14:02:12 WARNING verify_reboot_in_platform: Reboot taking longer than expected +2024-06-15 14:07:12 ERROR verify_reboot_in_platform: TcpConnectionException: failed to connect to 10.0.0.10:22 after 300s +2024-06-15 14:07:13 INFO verify_reboot_in_platform: Checking serial console for kernel panic +2024-06-15 14:07:14 ERROR verify_reboot_in_platform: Serial console shows: Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(0,0) +2024-06-15 14:07:15 ERROR verify_reboot_in_platform: Call Trace: [] mount_block_root+0x1a3/0x2b0 +verify_reboot_in_platform | FAILED | Kernel panic during reboot - unable to mount root filesystem +2024-06-15 14:07:20 INFO test_runner: Test run completed +2024-06-15 14:07:20 INFO lisa_runner: Results: 1 passed, 2 failed, 1 skipped +2024-06-15 14:07:21 INFO lisa_runner: Total elapsed: 440 seconds diff --git a/mcp/tests/fixtures/sample_passing_run.log b/mcp/tests/fixtures/sample_passing_run.log new file mode 100644 index 0000000000..3d6f711a7d --- /dev/null +++ b/mcp/tests/fixtures/sample_passing_run.log @@ -0,0 +1,24 @@ +2024-06-15 10:00:01 INFO lisa_runner: Starting LISA test run +2024-06-15 10:00:02 INFO lisa_runner: Platform: azure +2024-06-15 10:00:03 INFO lisa_runner: Runbook: test_runbook.yml +2024-06-15 10:00:10 INFO environment[0]: Provisioning environment +2024-06-15 10:00:45 INFO environment[0]: Environment deployed successfully +2024-06-15 10:00:46 INFO node[0]: Connected to 10.0.0.5:22 +2024-06-15 10:00:47 INFO node[0]: OS: Ubuntu 22.04 LTS +2024-06-15 10:01:00 INFO test_runner: Running test case: smoke_test +2024-06-15 10:01:01 INFO smoke_test: SSH port 22 is opened, connecting and rebooting 'node-0' +2024-06-15 10:01:30 INFO smoke_test: Node rebooted successfully +2024-06-15 10:01:31 INFO smoke_test: Reboot completed in 29.5s +smoke_test | PASSED | completed in 31s +2024-06-15 10:01:35 INFO test_runner: Running test case: verify_deployment_provision_synthetic_nic +2024-06-15 10:01:36 INFO verify_deployment_provision_synthetic_nic: SSH port 22 is opened +2024-06-15 10:02:00 INFO verify_deployment_provision_synthetic_nic: Reboot completed +verify_deployment_provision_synthetic_nic | PASSED | completed in 25s +2024-06-15 10:02:05 INFO test_runner: Running test case: verify_stop_start_in_platform +2024-06-15 10:02:06 INFO verify_stop_start_in_platform: Stopping VM via platform +2024-06-15 10:02:30 INFO verify_stop_start_in_platform: VM stopped, starting +2024-06-15 10:03:00 INFO verify_stop_start_in_platform: VM started, verifying connectivity +verify_stop_start_in_platform | PASSED | completed in 55s +2024-06-15 10:03:05 INFO test_runner: Test run completed +2024-06-15 10:03:05 INFO lisa_runner: Results: 3 passed, 0 failed, 0 skipped +2024-06-15 10:03:06 INFO lisa_runner: Total elapsed: 185 seconds diff --git a/mcp/tests/fixtures/sample_runbook.yml b/mcp/tests/fixtures/sample_runbook.yml new file mode 100644 index 0000000000..84065e81a4 --- /dev/null +++ b/mcp/tests/fixtures/sample_runbook.yml @@ -0,0 +1,37 @@ +name: sample-test-run +concurrency: 1 + +extension: + - "../../lisa/microsoft/testsuites" + +platform: + - type: azure + admin_username: "$(admin_username)" + admin_private_key_file: "$(admin_private_key_file)" + keep_environment: no + azure: + subscription_id: "$(subscription_id)" + deploy_location: "westus2" + marketplace: + publisher: canonical + offer: 0001-com-ubuntu-server-jammy + sku: 22_04-lts-gen2 + version: latest + +variable: + - name: admin_username + value: "" + - name: admin_private_key_file + value: "" + - name: subscription_id + value: "" + is_secret: true + +notifier: + - type: console + - type: html + +testcase: + - criteria: + area: provisioning + priority: [0, 1] diff --git a/mcp/tests/test_all_tools.py b/mcp/tests/test_all_tools.py new file mode 100644 index 0000000000..4fa5ab1837 --- /dev/null +++ b/mcp/tests/test_all_tools.py @@ -0,0 +1,789 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Comprehensive functional tests for all 24 MCP tools. + +Run from the mcp/ directory: + python -m pytest tests/test_all_tools.py -v + python -m unittest tests.test_all_tools -v + +These tests invoke each tool directly (without MCP protocol overhead) +and verify correct behavior with realistic inputs. +""" + +import sys +import textwrap +import unittest +from pathlib import Path + +# Ensure mcp/ is on sys.path so `tools.*` imports work +_MCP_DIR = Path(__file__).resolve().parent.parent +if str(_MCP_DIR) not in sys.path: + sys.path.insert(0, str(_MCP_DIR)) + +from server import mcp # noqa: E402 — registers all tools + +FIXTURES_DIR = Path(__file__).parent / "fixtures" +PASSING_LOG = FIXTURES_DIR / "sample_passing_run.log" +FAILING_LOG = FIXTURES_DIR / "sample_failing_run.log" +SAMPLE_RUNBOOK = FIXTURES_DIR / "sample_runbook.yml" + + +def _call(tool_name: str, **kwargs: object) -> str: + """Invoke a registered MCP tool by name and return its string result.""" + tools = {t.name: t for t in mcp._tool_manager.list_tools()} + assert ( + tool_name in tools + ), f"Tool '{tool_name}' not found. Available: {sorted(tools)}" + # Access the underlying function + fn = tools[tool_name].fn + return fn(**kwargs) + + +# ====================================================================== +# Test Authoring tools (7) +# ====================================================================== + + +class TestGetTestWriterGuidelines(unittest.TestCase): + def test_returns_guidelines(self) -> None: + result = _call("lisa_get_test_writer_guidelines") + self.assertIn("LISA", result) + # Should contain workflow stages + self.assertTrue( + "gather" in result.lower() + or "research" in result.lower() + or "design" in result.lower(), + "Guidelines should mention the workflow stages", + ) + + def test_returns_nonempty_string(self) -> None: + result = _call("lisa_get_test_writer_guidelines") + self.assertGreater(len(result), 100) + + +class TestScaffoldTestSuite(unittest.TestCase): + def test_generates_class(self) -> None: + result = _call( + "lisa_scaffold_test_suite", + area="network", + class_name="SriovValidation", + description="Validate SR-IOV VF creation", + ) + self.assertIn("class SriovValidation(TestSuite)", result) + self.assertIn('area="network"', result) + self.assertIn("verify_sriov_validation", result) + + def test_custom_category(self) -> None: + result = _call( + "lisa_scaffold_test_suite", + area="perf", + class_name="PerfBench", + description="Perf benchmarks", + category="performance", + ) + self.assertIn('category="performance"', result) + + def test_snake_case_conversion(self) -> None: + result = _call( + "lisa_scaffold_test_suite", + area="gpu", + class_name="GPUDriverCheck", + description="Check GPU driver", + ) + self.assertIn("verify_gpu_driver_check", result) + + +class TestScaffoldTestCase(unittest.TestCase): + def test_basic_case(self) -> None: + result = _call( + "lisa_scaffold_test_case", + area="storage", + method_name="verify_disk_resize", + description="Verify disk resize works", + ) + self.assertIn("def verify_disk_resize", result) + self.assertIn("priority=2", result) + + def test_with_features_and_requirements(self) -> None: + result = _call( + "lisa_scaffold_test_case", + area="network", + method_name="verify_sriov_failover", + description="Test SR-IOV failover", + priority=1, + supported_features="Sriov,NetworkInterface", + min_nic_count=2, + ) + self.assertIn("priority=1", result) + self.assertIn("Sriov", result) + self.assertIn("NetworkInterface", result) + self.assertIn("min_nic_count=2", result) + + def test_auto_prefix(self) -> None: + result = _call( + "lisa_scaffold_test_case", + area="kernel", + method_name="boot_time", + description="Measure boot time", + ) + self.assertIn("def verify_boot_time", result) + + +class TestGenerateRunbook(unittest.TestCase): + def test_azure_runbook(self) -> None: + result = _call( + "lisa_generate_runbook", + platform="azure", + area="provisioning", + priority=1, + location="westus2", + ) + self.assertIn("type: azure", result) + self.assertIn("subscription_id", result) + self.assertIn("provisioning", result) + self.assertIn("westus2", result) + self.assertIn("[0, 1]", result) + + def test_local_runbook(self) -> None: + result = _call("lisa_generate_runbook", platform="local") + self.assertIn("type: local", result) + self.assertNotIn("subscription_id", result) + + def test_with_image(self) -> None: + result = _call( + "lisa_generate_runbook", + platform="azure", + image="canonical 0001-com-ubuntu-server-jammy 22_04-lts-gen2 latest", + ) + self.assertIn("canonical", result) + self.assertIn("22_04-lts-gen2", result) + + def test_with_test_names(self) -> None: + result = _call( + "lisa_generate_runbook", + test_names="smoke_test,verify_reboot", + ) + self.assertIn("smoke_test", result) + self.assertIn("verify_reboot", result) + + +class TestValidateRunbook(unittest.TestCase): + def test_valid_runbook(self) -> None: + result = _call( + "lisa_validate_runbook", + runbook_content=SAMPLE_RUNBOOK.read_text(), + ) + self.assertIn("valid", result.lower()) + + def test_invalid_yaml(self) -> None: + result = _call("lisa_validate_runbook", runbook_content=": {bad yaml: [") + self.assertIn("error", result.lower()) + + def test_missing_platform(self) -> None: + result = _call( + "lisa_validate_runbook", + runbook_content="testcase:\n - criteria:\n area: demo\n", + ) + self.assertIn("platform", result.lower()) + + def test_missing_testcase(self) -> None: + result = _call( + "lisa_validate_runbook", + runbook_content="platform:\n - type: azure\n", + ) + self.assertIn("testcase", result.lower()) + + def test_unknown_platform_type(self) -> None: + result = _call( + "lisa_validate_runbook", + runbook_content=( + "platform:\n - type: unknown_platform\n" + "testcase:\n - criteria:\n area: test\n" + ), + ) + self.assertIn("unknown_platform", result) + + +class TestListTestRequirements(unittest.TestCase): + def test_nonexistent_test(self) -> None: + result = _call( + "lisa_list_test_requirements", test_name="nonexistent_test_xyz_123" + ) + self.assertIn("not found", result.lower()) + + +class TestWriteTest(unittest.TestCase): + def test_generates_design_plan(self) -> None: + result = _call( + "lisa_write_test", + description="SR-IOV VFs are created for each NIC", + area="network", + feature="Sriov", + ) + self.assertIn("Design Plan", result) + self.assertIn("Arrange", result) + self.assertIn("Act", result) + self.assertIn("Assert", result) + self.assertIn("Structured Metadata", result) + + def test_custom_names(self) -> None: + result = _call( + "lisa_write_test", + description="disk hotplug works", + area="storage", + class_name="DiskHotplug", + method_name="verify_disk_hotplug", + ) + self.assertIn("DiskHotplug", result) + self.assertIn("verify_disk_hotplug", result) + + def test_mark_dirty_detection(self) -> None: + result = _call( + "lisa_write_test", + description="kernel module loads after reboot", + area="kernel", + ) + self.assertIn("mark_dirty", result) + + def test_structured_metadata(self) -> None: + result = _call( + "lisa_write_test", + description="VF count stable after VM hot-resize", + area="network", + feature="Sriov", + tier=1, + platform="azure", + distro_notes="Ubuntu 24.04 only", + ) + self.assertIn("Structured Metadata", result) + self.assertIn('"area": "network"', result) + self.assertIn('"feature": "Sriov"', result) + self.assertIn('"tier": 1', result) + self.assertIn('"platform": "azure"', result) + + def test_existing_suite_detection(self) -> None: + result = _call( + "lisa_write_test", + description="verify CIFS module is functional", + area="core", + feature="storage", + ) + # Should find the existing storage.py suite and list its methods + self.assertIn("Existing tests in", result) + self.assertIn("existing_suites", result) + + +# ====================================================================== +# Log Analysis tools (8) +# ====================================================================== + + +class TestAnalyzeLog(unittest.TestCase): + def test_passing_log_from_file(self) -> None: + result = _call("lisa_analyze_log", log_path=str(PASSING_LOG)) + self.assertIn("passed", result.lower()) + self.assertIn("0 failed", result.lower()) + + def test_failing_log_from_file(self) -> None: + result = _call("lisa_analyze_log", log_path=str(FAILING_LOG)) + self.assertIn("failed", result.lower()) + self.assertIn("Kernel", result) + + def test_log_from_content(self) -> None: + result = _call( + "lisa_analyze_log", + log_content="smoke_test | PASSED | ok\nverify_x | FAILED | boom\n", + ) + self.assertIn("1", result) # 1 passed + self.assertIn("1", result) # 1 failed + + def test_empty_log(self) -> None: + result = _call("lisa_analyze_log", log_content="nothing relevant here\n") + self.assertIn("0 passed", result.lower()) + + def test_no_input_returns_error(self) -> None: + result = _call("lisa_analyze_log") + self.assertIn("error", result.lower()) + + def test_nonexistent_file(self) -> None: + result = _call("lisa_analyze_log", log_path="/nonexistent/path/log.txt") + self.assertIn("error", result.lower()) + + +class TestExplainFailure(unittest.TestCase): + def test_kernel_panic(self) -> None: + result = _call( + "lisa_explain_failure", + failure_text="Kernel panic - not syncing: VFS: Unable to mount root fs", + ) + self.assertIn("Kernel", result) + + def test_ssh_failure(self) -> None: + result = _call( + "lisa_explain_failure", + failure_text="TcpConnectionException: failed to connect to 10.0.0.5:22", + ) + self.assertIn("Connectivity", result) + + def test_assertion_failure(self) -> None: + result = _call( + "lisa_explain_failure", + failure_text="AssertionError: Expected 2 but got 1", + ) + self.assertIn("Assertion", result) + + def test_timeout(self) -> None: + result = _call( + "lisa_explain_failure", + failure_text="Operation timed out after 300 seconds", + ) + self.assertIn("Timeout", result) + + def test_provisioning_error(self) -> None: + result = _call( + "lisa_explain_failure", + failure_text="OverconstrainedAllocationRequest: cannot allocate", + ) + self.assertIn("Provisioning", result) + + def test_skipped(self) -> None: + result = _call( + "lisa_explain_failure", + failure_text="SkippedException: GPU not available", + ) + self.assertIn("Skipped", result) + + def test_unknown_failure(self) -> None: + result = _call( + "lisa_explain_failure", + failure_text="Something completely unexpected happened", + ) + # Should return a classification of some kind + self.assertIn("Failure Classification", result) + + +class TestSummarizeRun(unittest.TestCase): + def test_passing_run(self) -> None: + result = _call("lisa_summarize_run", log_path=str(PASSING_LOG)) + self.assertIn("Passed", result) + self.assertIn("0", result) # 0 failed + + def test_failing_run(self) -> None: + result = _call("lisa_summarize_run", log_path=str(FAILING_LOG)) + self.assertIn("Failed", result) + self.assertIn("Kernel", result) + + def test_from_content(self) -> None: + result = _call( + "lisa_summarize_run", + log_content="test_a | PASSED | ok\ntest_b | PASSED | ok\n", + ) + self.assertIn("2", result) + + +class TestDownloadLogs(unittest.TestCase): + def test_rejects_http(self) -> None: + result = _call( + "lisa_download_logs", + url="http://example.com/logs.tar.gz", + ) + self.assertIn("Error", result) + self.assertIn("HTTPS", result) + + def test_rejects_bad_url(self) -> None: + result = _call( + "lisa_download_logs", + url="not-a-url", + ) + self.assertIn("Error", result) + + def test_no_args_investigation(self) -> None: + result = _call("lisa_start_log_investigation") + self.assertIn("Error", result) + self.assertIn("log_path", result) + + +class TestStartLogInvestigation(unittest.TestCase): + def test_returns_investigation_context(self) -> None: + result = _call( + "lisa_start_log_investigation", + log_path=str(FIXTURES_DIR), + error_message="TcpConnectionException", + ) + self.assertIn("Log Files", result) + self.assertIn("Pattern Hit Counts", result) + self.assertIn("Initial Error Search", result) + self.assertIn("Next Steps", result) + + def test_bad_path(self) -> None: + result = _call( + "lisa_start_log_investigation", + log_path="/nonexistent/path/12345", + ) + self.assertIn("Error", result) + + +class TestGetLogAnalysisPrompts(unittest.TestCase): + def test_returns_prompts(self) -> None: + result = _call("lisa_get_log_analysis_prompts") + # Should contain the agent strategy headings + self.assertIn("Log Search", result) + self.assertIn("Code Search", result) + self.assertIn("Final Answer", result) + + def test_nonempty(self) -> None: + result = _call("lisa_get_log_analysis_prompts") + self.assertGreater(len(result), 500) + + +class TestSearchLogFiles(unittest.TestCase): + def test_search_in_fixtures(self) -> None: + result = _call( + "lisa_search_log_files", + search_string="Kernel panic", + path=str(FIXTURES_DIR), + ) + self.assertIn("match", result.lower()) + self.assertIn("Kernel panic", result) + + def test_search_no_match(self) -> None: + result = _call( + "lisa_search_log_files", + search_string="ZZZ_UNIQUE_STRING_NOT_IN_LOGS_ZZZ", + path=str(FIXTURES_DIR), + ) + self.assertIn("no match", result.lower()) + + def test_bad_directory(self) -> None: + result = _call( + "lisa_search_log_files", + search_string="test", + path="/nonexistent/dir/xyz", + ) + self.assertIn("error", result.lower()) + + def test_extension_filter(self) -> None: + result = _call( + "lisa_search_log_files", + search_string="platform", + path=str(FIXTURES_DIR), + file_extensions=".log", + ) + # Should find matches only in .log files + if "match" in result.lower(): + self.assertIn(".log", result) + + +class TestReadLogFile(unittest.TestCase): + def test_read_passing_log(self) -> None: + result = _call("lisa_read_log_file", file_path=str(PASSING_LOG)) + self.assertIn("lisa_runner", result) + self.assertIn("Starting LISA", result) + + def test_read_range(self) -> None: + result = _call( + "lisa_read_log_file", + file_path=str(FAILING_LOG), + start_line=10, + line_count=5, + ) + # Should contain line numbers + self.assertIn("(10):", result) + self.assertIn("(14):", result) + + def test_bad_file(self) -> None: + result = _call("lisa_read_log_file", file_path="/nonexistent/file.log") + self.assertIn("error", result.lower()) + + +class TestListLogFiles(unittest.TestCase): + def test_list_fixtures(self) -> None: + result = _call("lisa_list_log_files", folder_path=str(FIXTURES_DIR)) + self.assertIn("sample_passing_run.log", result) + self.assertIn("sample_failing_run.log", result) + + def test_extension_filter(self) -> None: + result = _call( + "lisa_list_log_files", + folder_path=str(FIXTURES_DIR), + file_extensions=".yml", + ) + self.assertIn("sample_runbook.yml", result) + self.assertNotIn(".log", result) + + def test_bad_directory(self) -> None: + result = _call("lisa_list_log_files", folder_path="/nonexistent/dir/xyz") + self.assertIn("error", result.lower()) + + +# ====================================================================== +# Bug Fixing / Debugging tools (3) +# ====================================================================== + + +class TestDiagnoseBug(unittest.TestCase): + def test_diagnose_with_assertion(self) -> None: + result = _call( + "lisa_diagnose_bug", + test_name="verify_sriov_basic", + failure_log=( + "AssertionError: Expected 2 SRIOV VF devices but found 1\n" + "assert_that(vf_count).is_equal_to(2)" + ), + ) + self.assertIn("Assertion", result) + self.assertIn("verify_sriov_basic", result) + + def test_diagnose_with_connectivity(self) -> None: + result = _call( + "lisa_diagnose_bug", + test_name="verify_reboot", + failure_log="TcpConnectionException: failed to connect to 10.0.0.5:22", + ) + self.assertIn("Connectivity", result) + + def test_unknown_test(self) -> None: + result = _call( + "lisa_diagnose_bug", + test_name="totally_fake_test_xyz", + failure_log="some error", + ) + # Should still provide classification even without source + self.assertIn("totally_fake_test_xyz", result) + + +class TestFixRunbook(unittest.TestCase): + def test_fix_missing_platform(self) -> None: + result = _call( + "lisa_fix_runbook", + runbook_content="testcase:\n - criteria:\n area: demo\n", + ) + self.assertIn("platform", result.lower()) + self.assertIn("fix", result.lower()) + + def test_fix_boolean_keep_environment(self) -> None: + result = _call( + "lisa_fix_runbook", + runbook_content=textwrap.dedent( + """\ + platform: + - type: azure + keep_environment: true + testcase: + - criteria: + area: demo + """ + ), + ) + self.assertIn("always", result) + + def test_fix_platform_as_dict(self) -> None: + result = _call( + "lisa_fix_runbook", + runbook_content=textwrap.dedent( + """\ + platform: + type: azure + testcase: + - criteria: + area: demo + """ + ), + ) + self.assertIn("list", result.lower()) + + def test_valid_runbook_no_fixes(self) -> None: + result = _call("lisa_fix_runbook", runbook_content=SAMPLE_RUNBOOK.read_text()) + # The sample runbook uses YAML `no` for keep_environment which is + # parsed as boolean false — the tool fixes it to the string "no". + # Either "no structural issues" or a fix report is acceptable. + self.assertIsInstance(result, str) + self.assertGreater(len(result), 10) + + def test_invalid_yaml(self) -> None: + result = _call("lisa_fix_runbook", runbook_content="{{bad: yaml") + self.assertIn("error", result.lower()) + + +class TestExplainError(unittest.TestCase): + def test_tcp_connection(self) -> None: + result = _call("lisa_explain_error", error_text="TcpConnectionException") + self.assertIn("TCP", result) + self.assertIn("SSH", result.upper()) + + def test_skipped_exception(self) -> None: + result = _call("lisa_explain_error", error_text="SkippedException") + self.assertIn("prerequisite", result.lower()) + + def test_quota_exceeded(self) -> None: + result = _call("lisa_explain_error", error_text="QuotaExceeded") + self.assertIn("quota", result.lower()) + + def test_overconstrained(self) -> None: + result = _call( + "lisa_explain_error", + error_text="OverconstrainedAllocationRequest", + ) + self.assertIn("allocat", result.lower()) + + def test_unknown_error(self) -> None: + result = _call( + "lisa_explain_error", + error_text="CompletelyMadeUpExceptionXyz123", + ) + # Should still provide some output (from error_patterns.md search) + self.assertIsInstance(result, str) + self.assertGreater(len(result), 10) + + +# ====================================================================== +# Knowledge tools (5) +# ====================================================================== + + +class TestExplainConcept(unittest.TestCase): + def test_runbook(self) -> None: + result = _call("lisa_explain_concept", concept="runbook") + self.assertIn("runbook", result.lower()) + self.assertIn("YAML", result) + + def test_node(self) -> None: + result = _call("lisa_explain_concept", concept="node") + self.assertIn("node", result.lower()) + + def test_feature(self) -> None: + result = _call("lisa_explain_concept", concept="feature") + self.assertIn("feature", result.lower()) + + def test_tool(self) -> None: + result = _call("lisa_explain_concept", concept="tool") + self.assertIn("tool", result.lower()) + + def test_simple_requirement(self) -> None: + result = _call("lisa_explain_concept", concept="simple_requirement") + self.assertIn("requirement", result.lower()) + + def test_priority(self) -> None: + result = _call("lisa_explain_concept", concept="priority") + self.assertIn("0", result) # T0 + + def test_environment(self) -> None: + result = _call("lisa_explain_concept", concept="environment") + self.assertIn("environment", result.lower()) + + def test_unknown_concept(self) -> None: + result = _call("lisa_explain_concept", concept="xyzzy_nonexistent_thing") + self.assertIn("not found", result.lower()) + + +class TestGetApiReference(unittest.TestCase): + def test_find_testsuite(self) -> None: + result = _call("lisa_get_api_reference", symbol="TestSuiteMetadata") + # Should find the decorator + self.assertTrue( + "TestSuiteMetadata" in result, + f"Expected TestSuiteMetadata in result, got: {result[:200]}", + ) + + def test_unknown_symbol(self) -> None: + result = _call("lisa_get_api_reference", symbol="CompletelyFakeSymbolXyz") + self.assertIn("not found", result.lower()) + + +class TestFindExamples(unittest.TestCase): + def test_search_network(self) -> None: + result = _call("lisa_find_examples", query="network") + # Should return some results or "no test files" + self.assertIsInstance(result, str) + self.assertGreater(len(result), 10) + + def test_search_empty_query(self) -> None: + result = _call("lisa_find_examples", query="a") + # Too short keyword + self.assertIsInstance(result, str) + + +class TestListTools(unittest.TestCase): + def test_returns_tools(self) -> None: + result = _call("lisa_list_tools") + # Should list at least some known tools + has_content = len(result) > 50 + self.assertTrue( + has_content, + f"Expected tool listing, got: {result[:200]}", + ) + + +class TestListFeatures(unittest.TestCase): + def test_returns_features(self) -> None: + result = _call("lisa_list_features") + has_content = len(result) > 50 + self.assertTrue( + has_content, + f"Expected feature listing, got: {result[:200]}", + ) + + +# ====================================================================== +# Cross-cutting: verify all 24 tools are registered +# ====================================================================== + + +class TestToolRegistration(unittest.TestCase): + EXPECTED_TOOLS = { + # test_writer + "lisa_get_test_writer_guidelines", + "lisa_scaffold_test_suite", + "lisa_scaffold_test_case", + "lisa_list_test_requirements", + "lisa_write_test", + # runbook + "lisa_generate_runbook", + "lisa_validate_runbook", + "lisa_fix_runbook", + # log_analysis + "lisa_analyze_log", + "lisa_explain_failure", + "lisa_summarize_run", + "lisa_download_logs", + "lisa_start_log_investigation", + "lisa_get_log_analysis_prompts", + "lisa_search_log_files", + "lisa_read_log_file", + "lisa_list_log_files", + "lisa_diagnose_bug", + # execution + "lisa_run", + # knowledge + "lisa_explain_concept", + "lisa_get_api_reference", + "lisa_find_examples", + "lisa_list_tools", + "lisa_list_features", + } + + def test_all_tools_registered(self) -> None: + registered = {t.name for t in mcp._tool_manager.list_tools()} + missing = self.EXPECTED_TOOLS - registered + self.assertEqual( + missing, + set(), + f"Missing tools: {missing}", + ) + + def test_tool_count(self) -> None: + count = len(mcp._tool_manager.list_tools()) + self.assertEqual(count, 25, f"Expected 25 tools, got {count}") + + def test_all_tools_callable(self) -> None: + """Every registered tool should have a callable function.""" + for tool in mcp._tool_manager.list_tools(): + self.assertTrue( + callable(tool.fn), + f"Tool '{tool.name}' is not callable", + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/mcp/tests/test_authoring.py b/mcp/tests/test_authoring.py new file mode 100644 index 0000000000..07e7145560 --- /dev/null +++ b/mcp/tests/test_authoring.py @@ -0,0 +1,145 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Tests for authoring tools.""" + +import unittest + +import yaml + + +class TestScaffoldTestSuite(unittest.TestCase): + """Validate scaffold_test_suite generates correct Python code.""" + + def test_generates_valid_class(self) -> None: + from lisa_mcp.tools.test_writer import _to_snake_case + + assert _to_snake_case("MyNewFeature") == "my_new_feature" + assert _to_snake_case("GPUValidation") == "gpu_validation" + assert _to_snake_case("SRIOVTest") == "sriov_test" + assert _to_snake_case("Simple") == "simple" + + +class TestGenerateRunbook(unittest.TestCase): + """Validate generate_runbook produces valid YAML.""" + + def test_basic_runbook_is_valid_yaml(self) -> None: + # Exercise the tool indirectly by importing the module and calling + # the generation logic. Since tools are registered on an MCP instance, + # we test the YAML output pattern. + runbook_yaml = _make_basic_runbook() + doc = yaml.safe_load(runbook_yaml) + self.assertIsInstance(doc, dict) + self.assertIn("platform", doc) + self.assertIn("testcase", doc) + + def test_azure_runbook_has_subscription(self) -> None: + runbook_yaml = _make_azure_runbook() + doc = yaml.safe_load(runbook_yaml) + variables = doc.get("variable", []) + names = [v["name"] for v in variables if isinstance(v, dict)] + self.assertIn("subscription_id", names) + + +class TestValidateRunbook(unittest.TestCase): + """Validate runbook validation catches common issues.""" + + def test_missing_platform(self) -> None: + doc = yaml.dump({"testcase": [{"criteria": {"area": "demo"}}]}) + result = _validate(doc) + self.assertIn("platform", result.lower()) + + def test_valid_runbook_passes(self) -> None: + doc = yaml.dump( + { + "platform": [{"type": "azure"}], + "testcase": [{"criteria": {"area": "demo"}}], + "notifier": [{"type": "console"}], + "extension": ["../../lisa/microsoft/testsuites"], + } + ) + result = _validate(doc) + self.assertIn("valid", result.lower()) + + +# --------------------------------------------------------------------------- +# Helpers — inline versions of tool logic for testing without MCP server +# --------------------------------------------------------------------------- + + +def _make_basic_runbook() -> str: + return """\ +name: generated-runbook +concurrency: 1 + +platform: + - type: local + +notifier: + - type: console + +testcase: + - criteria: + area: demo +""" + + +def _make_azure_runbook() -> str: + return """\ +name: generated-runbook +concurrency: 1 + +platform: + - type: azure + admin_username: "$(admin_username)" + admin_private_key_file: "$(admin_private_key_file)" + +variable: + - name: admin_username + value: "" + - name: admin_private_key_file + value: "" + - name: subscription_id + value: "" + is_secret: true + +notifier: + - type: console + +testcase: + - criteria: + area: provisioning +""" + + +def _validate(runbook_content: str) -> str: + """Inline runbook validation matching the authoring tool logic.""" + errors = [] + warnings = [] + + doc = yaml.safe_load(runbook_content) + if not isinstance(doc, dict): + return "Error: not a mapping" + + if "platform" not in doc: + errors.append("Missing `platform` section.") + if "testcase" not in doc and "testcase_raw" not in doc: + errors.append("Missing `testcase` section.") + if "notifier" not in doc: + warnings.append("No notifier section.") + if "extension" not in doc: + warnings.append("No extension section.") + + if not errors and not warnings: + return "Runbook structure looks valid. No issues found." + + parts = [] + if errors: + parts.append("Errors: " + "; ".join(errors)) + if warnings: + parts.append("Warnings: " + "; ".join(warnings)) + return " ".join(parts) + + +if __name__ == "__main__": + unittest.main() diff --git a/mcp/tests/test_log_analysis.py b/mcp/tests/test_log_analysis.py new file mode 100644 index 0000000000..e5c5391b09 --- /dev/null +++ b/mcp/tests/test_log_analysis.py @@ -0,0 +1,93 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Tests for log analysis tools.""" + +import unittest +from pathlib import Path + +FIXTURES_DIR = Path(__file__).parent / "fixtures" + + +class TestAnalyzeLog(unittest.TestCase): + """Validate log parsing extracts correct results.""" + + def test_extract_passed_results(self) -> None: + from lisa_mcp.tools.log_analysis import _extract_test_results + + log = ( + "smoke_test | PASSED | completed in 42s\n" + "verify_sriov | FAILED | assertion error\n" + "verify_gpu | SKIPPED | no GPU found\n" + ) + results = _extract_test_results(log) + statuses = {r["name"]: r["status"] for r in results} + self.assertEqual(statuses.get("smoke_test"), "PASSED") + self.assertEqual(statuses.get("verify_sriov"), "FAILED") + self.assertEqual(statuses.get("verify_gpu"), "SKIPPED") + + def test_extract_errors(self) -> None: + from lisa_mcp.tools.log_analysis import _extract_errors + + log = ( + "2024-01-01 INFO starting test\n" + "2024-01-01 ERROR failed to connect\n" + "2024-01-01 DEBUG details\n" + "2024-01-01 ERROR timeout occurred\n" + ) + errors = _extract_errors(log) + self.assertEqual(len(errors), 2) + + def test_extract_kernel_panics(self) -> None: + from lisa_mcp.tools.log_analysis import _extract_kernel_panics + + log = ( + "[ 1.234] Kernel panic - not syncing: VFS: Unable to mount\n" + "[ 2.345] BUG: soft lockup - CPU#0 stuck for 22s\n" + "[ 3.456] Normal operation\n" + ) + panics = _extract_kernel_panics(log) + self.assertGreaterEqual(len(panics), 2) + + +class TestExplainFailure(unittest.TestCase): + """Validate failure classification.""" + + def test_classifies_kernel_panic(self) -> None: + text = "Kernel panic - not syncing: VFS: Unable to mount root fs" + result = _classify(text) + self.assertIn("kernel", result.lower()) + + def test_classifies_connectivity(self) -> None: + text = "TcpConnectionException: failed to connect to 10.0.0.5:22" + result = _classify(text) + self.assertIn("connect", result.lower()) + + def test_classifies_assertion(self) -> None: + text = "AssertionError: assert_that(0).is_equal_to(1)" + result = _classify(text) + self.assertIn("assert", result.lower()) + + def test_classifies_timeout(self) -> None: + text = "Operation timed out after 300 seconds" + result = _classify(text) + self.assertIn("timeout", result.lower()) + + +def _classify(text: str) -> str: + """Simple classification for testing.""" + text_lower = text.lower() + categories = [] + if "kernel panic" in text_lower or "oops" in text_lower: + categories.append("Kernel Error") + if "tcpconnection" in text_lower or "connection" in text_lower: + categories.append("Connectivity Error") + if "assertionerror" in text_lower or "assert_that" in text_lower: + categories.append("Assertion Failure") + if "timeout" in text_lower or "timed out" in text_lower: + categories.append("Timeout") + return ", ".join(categories) if categories else "Unknown" + + +if __name__ == "__main__": + unittest.main() diff --git a/mcp/tests/test_mcp_integration.py b/mcp/tests/test_mcp_integration.py new file mode 100644 index 0000000000..24f0ae4370 --- /dev/null +++ b/mcp/tests/test_mcp_integration.py @@ -0,0 +1,189 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""MCP protocol integration tests. + +These tests start the LISA MCP server as a subprocess, connect over stdio +using the MCP client SDK, and invoke tools through the protocol — exactly +the way Claude Desktop or VS Code would. + +Run: + python -m pytest tests/test_mcp_integration.py -v + python -m unittest tests.test_mcp_integration -v +""" + +import asyncio +import sys +import unittest +from pathlib import Path + +_MCP_DIR = Path(__file__).resolve().parent.parent +if str(_MCP_DIR) not in sys.path: + sys.path.insert(0, str(_MCP_DIR)) + +FIXTURES_DIR = Path(__file__).parent / "fixtures" + +# Import MCP client SDK at module level — tests skip if unavailable. +try: + from mcp.client.session import ClientSession + from mcp.client.stdio import StdioServerParameters, stdio_client + + _MCP_CLIENT_AVAILABLE = True +except ImportError: + _MCP_CLIENT_AVAILABLE = False + + +def _run(coro): + """Run an async coroutine synchronously.""" + return asyncio.run(coro) + + +def _make_server_params(): + """Create StdioServerParameters pointing at our server.py.""" + return StdioServerParameters( + command=sys.executable, + args=[str(_MCP_DIR / "server.py")], + cwd=str(_MCP_DIR), + ) + + +class TestMCPProtocol(unittest.TestCase): + """Test the MCP server over the real stdio protocol.""" + + def setUp(self) -> None: + if not _MCP_CLIENT_AVAILABLE: + self.skipTest("MCP client SDK not available") + + async def _connect_and_call(self, tool_name: str, arguments: dict) -> str: + """Start the MCP server, connect, call a tool, return the result text.""" + async with stdio_client(_make_server_params()) as (read, write): + async with ClientSession(read, write) as session: + await session.initialize() + + result = await session.call_tool(tool_name, arguments) + texts = [] + for item in result.content: + if hasattr(item, "text"): + texts.append(item.text) + return "\n".join(texts) + + async def _list_tools(self) -> list: + """Start the server and list all available tools.""" + async with stdio_client(_make_server_params()) as (read, write): + async with ClientSession(read, write) as session: + await session.initialize() + result = await session.list_tools() + return result.tools + + # -- Protocol-level tests -- + + def test_server_starts_and_lists_tools(self) -> None: + tools = _run(self._list_tools()) + names = {t.name for t in tools} + self.assertEqual(len(names), 24, f"Expected 24 tools, got {len(names)}") + self.assertIn("lisa_analyze_log", names) + self.assertIn("lisa_write_test", names) + self.assertIn("lisa_explain_concept", names) + + def test_call_explain_concept(self) -> None: + result = _run( + self._connect_and_call("lisa_explain_concept", {"concept": "runbook"}) + ) + self.assertIn("runbook", result.lower()) + self.assertIn("YAML", result) + + def test_call_analyze_log(self) -> None: + log_text = "smoke_test | PASSED | ok\n" "verify_x | FAILED | assertion error\n" + result = _run( + self._connect_and_call("lisa_analyze_log", {"log_content": log_text}) + ) + self.assertIn("passed", result.lower()) + self.assertIn("failed", result.lower()) + + def test_call_explain_failure(self) -> None: + result = _run( + self._connect_and_call( + "lisa_explain_failure", + {"failure_text": "TcpConnectionException: failed to connect"}, + ) + ) + self.assertIn("Connectivity", result) + + def test_call_validate_runbook(self) -> None: + result = _run( + self._connect_and_call( + "lisa_validate_runbook", + { + "runbook_content": FIXTURES_DIR.joinpath( + "sample_runbook.yml" + ).read_text() + }, + ) + ) + self.assertIn("valid", result.lower()) + + def test_call_scaffold_test_suite(self) -> None: + result = _run( + self._connect_and_call( + "lisa_scaffold_test_suite", + { + "area": "network", + "class_name": "SriovTest", + "description": "Test SR-IOV", + }, + ) + ) + self.assertIn("class SriovTest", result) + + def test_call_generate_runbook(self) -> None: + result = _run( + self._connect_and_call( + "lisa_generate_runbook", + {"platform": "azure", "area": "provisioning"}, + ) + ) + self.assertIn("type: azure", result) + + def test_call_explain_error(self) -> None: + result = _run( + self._connect_and_call( + "lisa_explain_error", + {"error_text": "TcpConnectionException"}, + ) + ) + self.assertIn("TCP", result) + + def test_call_list_log_files(self) -> None: + result = _run( + self._connect_and_call( + "lisa_list_log_files", + {"folder_path": str(FIXTURES_DIR)}, + ) + ) + self.assertIn("sample_passing_run.log", result) + + def test_call_search_log_files(self) -> None: + result = _run( + self._connect_and_call( + "lisa_search_log_files", + {"search_string": "Kernel panic", "path": str(FIXTURES_DIR)}, + ) + ) + self.assertIn("match", result.lower()) + + def test_call_read_log_file(self) -> None: + result = _run( + self._connect_and_call( + "lisa_read_log_file", + {"file_path": str(FIXTURES_DIR / "sample_passing_run.log")}, + ) + ) + self.assertIn("lisa_runner", result) + + def test_call_get_log_analysis_prompts(self) -> None: + result = _run(self._connect_and_call("lisa_get_log_analysis_prompts", {})) + self.assertIn("Log Search", result) + + +if __name__ == "__main__": + unittest.main() From 300225d05b9a41809941656d828c98ed2c42da9d Mon Sep 17 00:00:00 2001 From: Johnson George Date: Mon, 1 Jun 2026 13:38:05 -0700 Subject: [PATCH 2/2] mcp: apply PR #4508 review feedback --- mcp/.dockerignore | 22 ++ mcp/Dockerfile | 6 + mcp/lisa_mcp/context/concepts.md | 2 +- mcp/lisa_mcp/docs_index.yaml | 41 +--- mcp/lisa_mcp/server.py | 26 ++- mcp/lisa_mcp/tools/knowledge.py | 3 +- mcp/lisa_mcp/tools/log_analysis.py | 316 ++++++++++++++++++----------- mcp/lisa_mcp/tools/runbook.py | 2 +- mcp/lisa_mcp/tools/test_writer.py | 3 +- mcp/run_tests.py | 4 +- mcp/tests/test_all_tools.py | 8 +- mcp/tests/test_mcp_integration.py | 2 +- pyproject.toml | 6 + 13 files changed, 265 insertions(+), 176 deletions(-) create mode 100644 mcp/.dockerignore diff --git a/mcp/.dockerignore b/mcp/.dockerignore new file mode 100644 index 0000000000..ce87d0de0c --- /dev/null +++ b/mcp/.dockerignore @@ -0,0 +1,22 @@ +# Build / packaging artifacts +**/__pycache__ +**/*.pyc +**/*.pyo +**/*.egg-info +build/ +dist/ + +# Virtualenvs and editor state +.venv/ +venv/ +.env +.env.* +.vscode/ +.idea/ + +# Internal / local-only scripts and scratch files (gitignored) +tmp/ + +# Tests are still copied (used by smoke tests in CI), but exclude fixtures +# generated locally: +tests/__pycache__/ diff --git a/mcp/Dockerfile b/mcp/Dockerfile index d8180863cc..53a0230681 100644 --- a/mcp/Dockerfile +++ b/mcp/Dockerfile @@ -20,6 +20,12 @@ RUN pip install --no-cache-dir "/app/lisa/mcp[azure]" # Point the MCP server at the cloned repo ENV LISA_REPO_ROOT=/app/lisa +# Drop privileges: run as a non-root user. /app must remain readable but +# writable areas (logs, downloads) are scoped to /home/mcp at runtime. +RUN useradd --create-home --shell /usr/sbin/nologin --uid 10001 mcp \ + && chown -R mcp:mcp /app +USER mcp + EXPOSE 8080 ENTRYPOINT ["lisa-mcp"] diff --git a/mcp/lisa_mcp/context/concepts.md b/mcp/lisa_mcp/context/concepts.md index 2e09f8d1b6..2660f3f9ac 100644 --- a/mcp/lisa_mcp/context/concepts.md +++ b/mcp/lisa_mcp/context/concepts.md @@ -19,7 +19,7 @@ platform: # one or more platform configs - type: azure admin_username: "$(admin_username)" admin_private_key_file: "$(admin_private_key_file)" - keep_environment: no # "no", "always", or "failed" + keep_environment: "no" # "no", "always", or "failed" environment: # optional pre-defined environments environments: diff --git a/mcp/lisa_mcp/docs_index.yaml b/mcp/lisa_mcp/docs_index.yaml index 61cb371fbf..0ec4a86591 100644 --- a/mcp/lisa_mcp/docs_index.yaml +++ b/mcp/lisa_mcp/docs_index.yaml @@ -90,37 +90,6 @@ tools: - docs/write_test/write_case.rst # ── knowledge.py ─────────────────────────────────────────────── - lisa_explain_concept: - primary: docs/write_test/concepts.rst - supplementary: [] - - lisa_get_api_reference: - primary: docs/write_test/write_case.rst - supplementary: [] - - lisa_find_examples: - primary: docs/write_test/write_case.rst - supplementary: [] - - lisa_list_tools: - primary: docs/write_test/write_case.rst - supplementary: [] - - lisa_list_features: - primary: docs/write_test/write_case.rst - supplementary: [] - - lisa_explain_error: - primary: docs/run_test/troubleshoot_failures.rst - supplementary: [] - - # ── execution.py ─────────────────────────────────────────────── - lisa_run: - primary: docs/run_test/run.rst - supplementary: - - docs/run_test/command_line.rst - - # ── Framework Knowledge ──────────────────────────────────────── lisa_explain_concept: primary: docs/write_test/concepts.rst supplementary: @@ -146,6 +115,16 @@ tools: primary: docs/write_test/extension.rst supplementary: [] + lisa_explain_error: + primary: docs/run_test/troubleshoot_failures.rst + supplementary: [] + + # ── execution.py ─────────────────────────────────────────────── + lisa_run: + primary: docs/run_test/run.rst + supplementary: + - docs/run_test/command_line.rst + # ── Topic index (used by explain_concept for targeted lookup) ──── # diff --git a/mcp/lisa_mcp/server.py b/mcp/lisa_mcp/server.py index 1427bc3173..6f6e37b96b 100644 --- a/mcp/lisa_mcp/server.py +++ b/mcp/lisa_mcp/server.py @@ -6,12 +6,13 @@ import argparse import logging +from mcp.server.fastmcp import FastMCP + from lisa_mcp.tools.execution import register_execution_tools from lisa_mcp.tools.knowledge import register_knowledge_tools from lisa_mcp.tools.log_analysis import register_log_analysis_tools from lisa_mcp.tools.runbook import register_runbook_tools from lisa_mcp.tools.test_writer import register_test_writer_tools -from mcp.server.fastmcp import FastMCP logging.basicConfig(level=logging.INFO) log = logging.getLogger("lisa-mcp") @@ -88,19 +89,18 @@ def main() -> None: import os import uvicorn + from mcp.server.sse import SseServerTransport from starlette.applications import Starlette from starlette.middleware import Middleware from starlette.middleware.trustedhost import TrustedHostMiddleware - from starlette.routing import Mount, Route - - from mcp.server.sse import SseServerTransport + from starlette.routing import Mount sse = SseServerTransport("/messages/") - async def handle_sse(request): - async with sse.connect_sse( - request.scope, request.receive, request._send - ) as streams: + # Raw ASGI endpoint — avoids Starlette Request._send (private API + # that has changed between versions and silently breaks SSE). + async def handle_sse(scope, receive, send): + async with sse.connect_sse(scope, receive, send) as streams: await mcp._mcp_server.run( streams[0], streams[1], @@ -115,7 +115,7 @@ async def handle_sse(request): app = Starlette( routes=[ - Route("/sse", endpoint=handle_sse), + Mount("/sse", app=handle_sse), Mount("/messages/", app=sse.handle_post_message), ], middleware=[ @@ -126,12 +126,18 @@ async def handle_sse(request): ], ) + # Restrict which proxy IPs may set X-Forwarded-* headers. Set + # FORWARDED_ALLOW_IPS to your reverse proxy's IP(s) in deployment. + # Defaults to loopback to prevent client-side spoofing of the + # forwarded client IP. + forwarded_allow_ips = os.environ.get("FORWARDED_ALLOW_IPS", "127.0.0.1") + uvicorn.run( app, host=args.host, port=args.port, log_level="info", - forwarded_allow_ips="*", + forwarded_allow_ips=forwarded_allow_ips, proxy_headers=True, ) else: diff --git a/mcp/lisa_mcp/tools/knowledge.py b/mcp/lisa_mcp/tools/knowledge.py index 310614f01d..e9a2f3f110 100644 --- a/mcp/lisa_mcp/tools/knowledge.py +++ b/mcp/lisa_mcp/tools/knowledge.py @@ -8,13 +8,14 @@ import re from pathlib import Path +from mcp.server.fastmcp import FastMCP + from lisa_mcp.tools._repo import ( find_repo_root, load_context_file, load_doc_for_topic, load_docs_for_tool, ) -from mcp.server.fastmcp import FastMCP def register_knowledge_tools(mcp: FastMCP) -> None: # noqa: C901 diff --git a/mcp/lisa_mcp/tools/log_analysis.py b/mcp/lisa_mcp/tools/log_analysis.py index 5586eb1f13..d40bdce314 100644 --- a/mcp/lisa_mcp/tools/log_analysis.py +++ b/mcp/lisa_mcp/tools/log_analysis.py @@ -10,15 +10,17 @@ import shutil import tarfile import tempfile +import time import zipfile from pathlib import Path from typing import Optional from urllib.parse import unquote, urlparse from urllib.request import Request, urlopen -from lisa_mcp.tools._repo import find_repo_root, load_context_file, load_docs_for_tool from mcp.server.fastmcp import FastMCP +from lisa_mcp.tools._repo import find_repo_root, load_context_file, load_docs_for_tool + def _load_ai_prompts() -> str: """Load the LISA AI log analyzer prompts from lisa/ai/prompts/default/. @@ -394,102 +396,24 @@ def lisa_download_logs( url: HTTPS URL, Azure Blob URL, or Azure Portal storage URL auth_token: Optional bearer token for non-Azure URLs """ - # Auto-convert Azure Portal URLs to blob prefix downloads - portal_info = _parse_portal_storage_url(url) - if portal_info: - download_dir = tempfile.mkdtemp(prefix="lisa_logs_") - try: - result_dir, count = _download_azure_blob_prefix( - portal_info["account"], - portal_info["container"], - portal_info["prefix"], - download_dir, - ) - return ( - f"**Downloaded** {count} file(s) → `{result_dir}`\n\n" - f"Use this path with:\n" - f'- `lisa_start_log_investigation(log_path="{result_dir}")`\n' - f'- `lisa_search_log_files(path="{result_dir}", ...)`\n' - f'- `lisa_list_log_files(folder_path="{result_dir}")`' - ) - except Exception as exc: - shutil.rmtree(download_dir, ignore_errors=True) - return f"**Error:** Download failed — {type(exc).__name__}: {exc}" - - parsed = urlparse(url) - if parsed.scheme not in ("https",): - return "**Error:** Only HTTPS URLs are supported." - if not parsed.hostname: - return "**Error:** Could not parse hostname from URL." - - is_azure_blob = parsed.hostname and parsed.hostname.endswith( - ".blob.core.windows.net" - ) - - # Azure blob prefix (virtual directory) — list + download all - if is_azure_blob and not auth_token: - path_parts = [p for p in parsed.path.strip("/").split("/") if p] - if len(path_parts) >= 2: - container = path_parts[0] - prefix = "/".join(path_parts[1:]) - account = parsed.hostname.split(".")[0] - download_dir = tempfile.mkdtemp(prefix="lisa_logs_") - try: - result_dir, count = _download_azure_blob_prefix( - account, - container, - prefix, - download_dir, - ) - return ( - f"**Downloaded** {count} file(s) → `{result_dir}`\n\n" - f"Use this path with:\n" - f"- `lisa_start_log_investigation" - f'(log_path="{result_dir}")`\n' - f"- `lisa_search_log_files" - f'(path="{result_dir}", ...)`\n' - f"- `lisa_list_log_files" - f'(folder_path="{result_dir}")`' - ) - except Exception as exc: - shutil.rmtree(download_dir, ignore_errors=True) - return ( - f"**Error:** Download failed — " f"{type(exc).__name__}: {exc}" - ) - - download_dir = tempfile.mkdtemp(prefix="lisa_logs_") - filename = os.path.basename(parsed.path) or "logs" - # Sanitize filename - filename = re.sub(r"[^\w.\-]", "_", filename) - if not filename: - filename = "logs" - download_path = os.path.join(download_dir, filename) - try: - headers = {} - if auth_token: - headers["Authorization"] = f"Bearer {auth_token}" - req = Request(url, headers=headers) - with urlopen(req, timeout=120) as resp: # noqa: S310 - with open(download_path, "wb") as f: - shutil.copyfileobj(resp, f) - - size_mb = os.path.getsize(download_path) / (1024 * 1024) - result_dir = _extract_archive(download_path, download_dir) - - file_count = sum(1 for _, _, files in os.walk(result_dir) for _ in files) + result_dir, count, size_mb = _download_url_to_dir(url, auth_token) + except Exception as exc: + return f"**Error:** Download failed — {type(exc).__name__}: {exc}" - return ( + if size_mb is not None: + header = ( f"**Downloaded** {size_mb:.1f} MB → `{result_dir}`\n" - f"**Files:** {file_count}\n\n" - f"Use this path with:\n" - f'- `lisa_start_log_investigation(log_path="{result_dir}")`\n' - f'- `lisa_search_log_files(path="{result_dir}", ...)`\n' - f'- `lisa_list_log_files(folder_path="{result_dir}")`' + f"**Files:** {count}\n\n" ) - except Exception as exc: - shutil.rmtree(download_dir, ignore_errors=True) - return f"**Error:** Download failed — {type(exc).__name__}: {exc}" + else: + header = f"**Downloaded** {count} file(s) → `{result_dir}`\n\n" + return ( + header + "Use this path with:\n" + f'- `lisa_start_log_investigation(log_path="{result_dir}")`\n' + f'- `lisa_search_log_files(path="{result_dir}", ...)`\n' + f'- `lisa_list_log_files(folder_path="{result_dir}")`' + ) @mcp.tool() def lisa_start_log_investigation( @@ -543,14 +467,12 @@ def lisa_start_log_investigation( """ # Resolve log directory — either from local path or downloaded URL if log_url and not log_path: - download_result = lisa_download_logs(url=log_url, auth_token=auth_token) - if download_result.startswith("**Error:"): - return download_result - # Extract the path from the download result - path_match = re.search(r"`(/[^`]+)`", download_result) - if not path_match: - return "**Error:** Could not determine downloaded log path." - resolved_path = path_match.group(1) + try: + resolved_path, _count, _size_mb = _download_url_to_dir( + log_url, auth_token + ) + except Exception as exc: + return f"**Error:** Download failed — {type(exc).__name__}: {exc}" elif log_path: resolved_path = log_path else: @@ -724,7 +646,10 @@ def lisa_search_log_files( file_extensions: Comma-separated extensions to include (default: ``.log,.txt,.out``) """ - path_obj = Path(path) + resolved, err = _resolve_under_log_root(path) + if err: + return err + path_obj = resolved if not path_obj.is_dir(): return f"**Error:** Directory not found: {path}" @@ -800,7 +725,10 @@ def lisa_read_log_file( start_line: Line number to start reading from (1-based, default 1) line_count: Number of lines to read (default 200, max 300) """ - p = Path(file_path) + resolved, err = _resolve_under_log_root(file_path) + if err: + return err + p = resolved if not p.is_file(): return f"**Error:** File not found: {file_path}" @@ -859,7 +787,10 @@ def lisa_list_log_files( recursive: Whether to search subdirectories (default True) max_files: Maximum number of files to return (default 200) """ - p = Path(folder_path) + resolved, err = _resolve_under_log_root(folder_path) + if err: + return err + p = resolved if not p.is_dir(): return f"**Error:** Directory not found: {folder_path}" @@ -970,6 +901,37 @@ def lisa_diagnose_bug( _MAX_SEARCH_MATCHES = 200 _MAX_READ_LINES = 300 _MAX_READ_CHARS = 30000 +# Cap remote downloads to defend against disk exhaustion in hosted SSE mode. +_MAX_DOWNLOAD_BYTES = 2 * 1024 * 1024 * 1024 # 2 GB + + +def _resolve_under_log_root(path: str) -> tuple[Optional[Path], Optional[str]]: + """Resolve *path* and confirm it lives under ``LISA_LOG_ROOT`` (if set). + + When the env var is unset (typical for local stdio usage), the path is + returned unchanged. When set, any path that resolves outside the root — + or that doesn't exist as a child of it — is rejected. This blocks remote + file disclosure when the server is exposed over SSE. + """ + log_root = os.environ.get("LISA_LOG_ROOT") + try: + resolved = Path(path).resolve() + except (OSError, RuntimeError) as exc: + return None, f"**Error:** Could not resolve path `{path}`: {exc}" + if not log_root: + return resolved, None + try: + root = Path(log_root).resolve() + except (OSError, RuntimeError) as exc: + return None, f"**Error:** LISA_LOG_ROOT misconfigured: {exc}" + try: + resolved.relative_to(root) + except ValueError: + return None, ( + f"**Error:** Path `{path}` is outside the configured " + f"LISA_LOG_ROOT (`{root}`)." + ) + return resolved, None def _search_in_files( @@ -1028,18 +990,23 @@ def _get_log_text( def _extract_test_results(text: str) -> list[dict[str, str]]: """Extract test result entries from LISA log output.""" results = [] + # Patterns are anchored at line boundaries with explicit word edges to + # avoid spurious matches (e.g., the substring "test" inside an + # identifier like "smoke_test" used to drag pattern 3 into matching the + # adjacent pipe character as a "test name"). patterns = [ re.compile( - r"(\w+)\s*\|\s*(PASSED|FAILED|SKIPPED|ATTEMPTED)\s*(?:\|\s*(.*))?", - re.IGNORECASE, + r"^\s*(\w+)\s*\|\s*(PASSED|FAILED|SKIPPED|ATTEMPTED)\b" + r"\s*(?:\|\s*(.*))?$", + re.IGNORECASE | re.MULTILINE, ), re.compile( - r"\[?(PASSED|FAILED|SKIPPED|ATTEMPTED)\]?\s+(?:test\s+)?(\w+)" - r"(?:\s*[:\-]\s*(.*))?", - re.IGNORECASE, + r"^\s*\[?(PASSED|FAILED|SKIPPED|ATTEMPTED)\]?\s+(?:test\s+)?" + r"(\w+)(?:\s*[:\-]\s*(.*))?$", + re.IGNORECASE | re.MULTILINE, ), re.compile( - r"(?:test|case)\s+(\S+)\s+.*?(PASSED|FAILED|SKIPPED|ATTEMPTED)" + r"\b(?:test|case)\s+(\w+)\b.*?\b(PASSED|FAILED|SKIPPED|ATTEMPTED)\b" r"(?:\s*[:\-]\s*(.*))?", re.IGNORECASE, ), @@ -1272,7 +1239,8 @@ def _parse_portal_storage_url(url: str) -> Optional[dict[str, str]]: Returns ``None`` if the URL is not a portal storage URL. """ parsed = urlparse(url) - if not parsed.hostname or not parsed.hostname.endswith("portal.azure.com"): + host = (parsed.hostname or "").rstrip(".").lower() + if host != "portal.azure.com": return None if not parsed.fragment: return None @@ -1306,6 +1274,96 @@ def _parse_portal_storage_url(url: str) -> Optional[dict[str, str]]: return {"account": account, "container": container, "prefix": prefix} +def _download_url_to_dir( # noqa: C901 + url: str, + auth_token: Optional[str], +) -> tuple[str, int, Optional[float]]: + """Download *url* to a fresh temp directory and return ``(dir, count, size_mb)``. + + ``size_mb`` is ``None`` for blob-prefix downloads (where individual blob + sizes aren't summarised). Raises on failure; the caller is responsible + for surfacing the error message. The temp directory is cleaned up only + on failure — on success the caller owns it. + """ + portal_info = _parse_portal_storage_url(url) + if portal_info: + download_dir = tempfile.mkdtemp(prefix="lisa_logs_") + try: + result_dir, count = _download_azure_blob_prefix( + portal_info["account"], + portal_info["container"], + portal_info["prefix"], + download_dir, + ) + return result_dir, count, None + except Exception: + shutil.rmtree(download_dir, ignore_errors=True) + raise + + parsed = urlparse(url) + if parsed.scheme != "https": + raise ValueError("Only HTTPS URLs are supported") + if not parsed.hostname: + raise ValueError("Could not parse hostname from URL") + + is_azure_blob = parsed.hostname.endswith(".blob.core.windows.net") + has_sas = "sig=" in (parsed.query or "") + + if is_azure_blob and not auth_token and not has_sas: + path_parts = [p for p in parsed.path.strip("/").split("/") if p] + if len(path_parts) >= 2: + container = path_parts[0] + prefix = "/".join(path_parts[1:]) + account = parsed.hostname.split(".")[0] + download_dir = tempfile.mkdtemp(prefix="lisa_logs_") + try: + result_dir, count = _download_azure_blob_prefix( + account, container, prefix, download_dir + ) + return result_dir, count, None + except Exception: + shutil.rmtree(download_dir, ignore_errors=True) + raise + + download_dir = tempfile.mkdtemp(prefix="lisa_logs_") + filename = os.path.basename(parsed.path) or "logs" + filename = re.sub(r"[^\w.\-]", "_", filename) or "logs" + download_path = os.path.join(download_dir, filename) + + try: + headers = {} + if auth_token: + headers["Authorization"] = f"Bearer {auth_token}" + req = Request(url, headers=headers) + with urlopen(req, timeout=120) as resp: # noqa: S310 + content_length = resp.headers.get("Content-Length") + if content_length and int(content_length) > _MAX_DOWNLOAD_BYTES: + raise ValueError( + f"Content-Length {int(content_length):,} exceeds the " + f"{_MAX_DOWNLOAD_BYTES:,}-byte download limit" + ) + downloaded_bytes = 0 + with open(download_path, "wb") as f: + while True: + chunk = resp.read(1024 * 1024) + if not chunk: + break + downloaded_bytes += len(chunk) + if downloaded_bytes > _MAX_DOWNLOAD_BYTES: + raise ValueError( + f"Download exceeded " + f"{_MAX_DOWNLOAD_BYTES:,}-byte limit; aborted" + ) + f.write(chunk) + size_mb = os.path.getsize(download_path) / (1024 * 1024) + result_dir = _extract_archive(download_path, download_dir) + file_count = sum(1 for _, _, files in os.walk(result_dir) for _ in files) + return result_dir, file_count, size_mb + except Exception: + shutil.rmtree(download_dir, ignore_errors=True) + raise + + def _download_azure_blob_prefix( account: str, container: str, @@ -1333,7 +1391,10 @@ class _StaticTokenCredential(TokenCredential): """Wraps a pre-fetched token for the Azure SDK.""" def get_token(self, *scopes, **kwargs): # type: ignore[override] - return AccessToken(storage_token, 0) + # Treat the injected token as valid for one hour. Setting + # expiry to 0 caused some SDK versions to reject it as + # already-expired. + return AccessToken(storage_token, int(time.time()) + 3600) credential = _StaticTokenCredential() else: @@ -1397,25 +1458,32 @@ def _extract_archive(download_path: str, download_dir: str) -> str: if tarfile.is_tarfile(download_path): os.makedirs(extract_dir, exist_ok=True) + abs_extract = os.path.abspath(extract_dir) with tarfile.open(download_path) as tf: - safe_members = [ - m - for m in tf.getmembers() - if not m.name.startswith(("/", "..")) and ".." not in m.name - ] - tf.extractall(extract_dir, members=safe_members) + safe_members = [] + for m in tf.getmembers(): + target = os.path.abspath(os.path.join(abs_extract, m.name)) + if os.path.commonpath([abs_extract, target]) != abs_extract: + continue + safe_members.append(m) + # filter="data" (PEP 706) blocks unsafe members (links, abs paths, + # device files) on Python 3.12+; older versions ignore the kwarg + # via the try/except. + try: + tf.extractall(extract_dir, members=safe_members, filter="data") + except TypeError: + tf.extractall(extract_dir, members=safe_members) os.remove(download_path) return extract_dir if zipfile.is_zipfile(download_path): os.makedirs(extract_dir, exist_ok=True) + abs_extract = os.path.abspath(extract_dir) with zipfile.ZipFile(download_path) as zf: - safe_names = [ - n - for n in zf.namelist() - if not n.startswith(("/", "..")) and ".." not in n - ] - for name in safe_names: + for name in zf.namelist(): + target = os.path.abspath(os.path.join(abs_extract, name)) + if os.path.commonpath([abs_extract, target]) != abs_extract: + continue zf.extract(name, extract_dir) os.remove(download_path) return extract_dir diff --git a/mcp/lisa_mcp/tools/runbook.py b/mcp/lisa_mcp/tools/runbook.py index 23f8fed091..24d9b189d0 100644 --- a/mcp/lisa_mcp/tools/runbook.py +++ b/mcp/lisa_mcp/tools/runbook.py @@ -55,7 +55,7 @@ def lisa_generate_runbook( sections.append(f" - type: {platform}") sections.append(' admin_username: "$(admin_username)"') sections.append(' admin_private_key_file: "$(admin_private_key_file)"') - sections.append(f" keep_environment: {keep_environment}") + sections.append(f' keep_environment: "{keep_environment}"') if platform == "azure": sections.append(" azure:") diff --git a/mcp/lisa_mcp/tools/test_writer.py b/mcp/lisa_mcp/tools/test_writer.py index 05fea8ff20..43668427b5 100644 --- a/mcp/lisa_mcp/tools/test_writer.py +++ b/mcp/lisa_mcp/tools/test_writer.py @@ -11,9 +11,10 @@ from pathlib import Path from typing import Optional -from lisa_mcp.tools._repo import find_repo_root, load_test_writer_prompt from mcp.server.fastmcp import FastMCP +from lisa_mcp.tools._repo import find_repo_root, load_test_writer_prompt + def register_test_writer_tools(mcp: FastMCP) -> None: # noqa: C901 @mcp.tool() diff --git a/mcp/run_tests.py b/mcp/run_tests.py index 269ad821ce..bb04b7d8da 100644 --- a/mcp/run_tests.py +++ b/mcp/run_tests.py @@ -70,12 +70,12 @@ def main() -> int: group.add_argument( "--smoke", action="store_true", - help="Quick smoke test — verify all 24 tools are registered", + help="Quick smoke test — verify all 25 tools are registered", ) parser.add_argument( "--xml", action="store_true", - help="Output JUnit XML report to test-results.xml", + help="Output JUnit XML reports into the test-results/ directory", ) parser.add_argument( "-v", diff --git a/mcp/tests/test_all_tools.py b/mcp/tests/test_all_tools.py index 4fa5ab1837..9ed29e8052 100644 --- a/mcp/tests/test_all_tools.py +++ b/mcp/tests/test_all_tools.py @@ -1,7 +1,7 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -"""Comprehensive functional tests for all 24 MCP tools. +"""Comprehensive functional tests for all 25 MCP tools. Run from the mcp/ directory: python -m pytest tests/test_all_tools.py -v @@ -295,8 +295,8 @@ def test_log_from_content(self) -> None: "lisa_analyze_log", log_content="smoke_test | PASSED | ok\nverify_x | FAILED | boom\n", ) - self.assertIn("1", result) # 1 passed - self.assertIn("1", result) # 1 failed + self.assertIn("passed", result.lower()) + self.assertIn("1 failed", result.lower()) def test_empty_log(self) -> None: result = _call("lisa_analyze_log", log_content="nothing relevant here\n") @@ -726,7 +726,7 @@ def test_returns_features(self) -> None: # ====================================================================== -# Cross-cutting: verify all 24 tools are registered +# Cross-cutting: verify all 25 tools are registered # ====================================================================== diff --git a/mcp/tests/test_mcp_integration.py b/mcp/tests/test_mcp_integration.py index 24f0ae4370..c49252ddcf 100644 --- a/mcp/tests/test_mcp_integration.py +++ b/mcp/tests/test_mcp_integration.py @@ -80,7 +80,7 @@ async def _list_tools(self) -> list: def test_server_starts_and_lists_tools(self) -> None: tools = _run(self._list_tools()) names = {t.name for t in tools} - self.assertEqual(len(names), 24, f"Expected 24 tools, got {len(names)}") + self.assertEqual(len(names), 25, f"Expected 25 tools, got {len(names)}") self.assertIn("lisa_analyze_log", names) self.assertIn("lisa_write_test", names) self.assertIn("lisa_explain_concept", names) diff --git a/pyproject.toml b/pyproject.toml index 47e1cc03ce..197d4a3c44 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -193,6 +193,12 @@ use_parentheses = true ensure_newline_before_comments = true line_length = 88 extend_skip_glob = [".nox/*", ".venv*/*"] +# Pin classification so local (editable-installed `lisa_mcp`) and CI +# (no install) agree on import grouping. Without this, isort treats +# `lisa_mcp` as first-party locally but third-party in CI, which +# flips I003/I004 between environments. +known_first_party = ["lisa", "lisa_mcp"] +known_third_party = ["mcp"] [tool.mypy]