Merge pull request #195 from bentito/add-netedge-evals

openshift-merge-bot[bot] · web-flow · commit 1596731c86c9 · 2026-04-22T06:07:37.000Z
NO-JIRA: feat: add remaining netedge evaluations (follow-up to #138)
diff --git a/docs/openshift/NETEDGE-validation-how-to.md b/docs/openshift/NETEDGE-validation-how-to.md
@@ -7,20 +7,19 @@ This document outlines the procedure for validating the NetEdge (NIDS) toolset o
 - **Go**: v1.22+ (Ensure `GOROOT` is set correctly)
 - **OpenShift Cluster**: Running and accessible (OCP, CRC, or similar). **Note: Evaluations WILL FAIL on Kind.**
 - **oc** or **kubectl**: Installed and configured to talk to your cluster.
-- **gevals**: Built and in the root directory (see Setup below).
+- **mcpchecker**: Built and in the root directory (see Setup below).
 - **Gemini CLI**: Installed (`npm install -g @google/gemini-cli`)
-- **RH_GEMINI_API_KEY**: Required for the Agent.
-- **OPENAI_API_KEY**: Required for the Judge (until Gemini compatibility is fully verified).
+- **RH_GEMINI_API_KEY**: Required for the Agent and Judge.
 
 ## Setup
 
-1.  **Build/Install gevals**:
+1.  **Build/Install mcpchecker**:
     if not already present:
     ```bash
     git clone https://github.com/mcpchecker/mcpchecker.git ../mcpchecker
     cd ../mcpchecker
-    go build -o gevals ./cmd/mcpchecker
-    mv gevals ../openshift-mcp-server/
+    go build -o mcpchecker ./cmd/mcpchecker
+    mv mcpchecker ../openshift-mcp-server/
     ```
 
 2.  **Connect to OpenShift Cluster**:
@@ -45,24 +44,44 @@ This document outlines the procedure for validating the NetEdge (NIDS) toolset o
     ```
 
 3.  **Run the Evaluations**:
-    Run `mcpchecker` (gevals) checks using the Gemini Agent.
+    Run `mcpchecker` checks using the Gemini Agent.
     
     **Test 1: Get CoreDNS Configuration**
     ```bash
-    export RH_GEMINI_API_KEY=$RH_GEMINI_API_KEY && export JUDGE_API_KEY=$OPENAI_API_KEY && export JUDGE_BASE_URL="https://api.openai.com/v1" && export JUDGE_MODEL_NAME="gpt-4o" && ./gevals check evals/gemini-agent/eval.yaml --run "get-coredns-config" -v
+    export RH_GEMINI_API_KEY=$RH_GEMINI_API_KEY && ./mcpchecker check evals/gemini-agent/eval.yaml --run "get-coredns-config" -v
     ```
 
     **Test 2: Query Prometheus Diagnostics**
     ```bash
-    export RH_GEMINI_API_KEY=$RH_GEMINI_API_KEY && export JUDGE_API_KEY=$OPENAI_API_KEY && export JUDGE_BASE_URL="https://api.openai.com/v1" && export JUDGE_MODEL_NAME="gpt-4o" && ./gevals check evals/gemini-agent/eval.yaml --run "query-prometheus-ingress" -v
+    export RH_GEMINI_API_KEY=$RH_GEMINI_API_KEY && ./mcpchecker check evals/gemini-agent/eval.yaml --run "query-prometheus-ingress" -v
+    ```
+
+    **Test 3: Inspect Route**
+    ```bash
+    export RH_GEMINI_API_KEY=$RH_GEMINI_API_KEY && ./mcpchecker check evals/gemini-agent/eval.yaml --run "inspect-route" -v
+    ```
+
+    **Test 4: Get Service Endpoints**
+    ```bash
+    export RH_GEMINI_API_KEY=$RH_GEMINI_API_KEY && ./mcpchecker check evals/gemini-agent/eval.yaml --run "get-service-endpoints" -v
+    ```
+
+    **Test 5: Probe DNS Local**
+    ```bash
+    export RH_GEMINI_API_KEY=$RH_GEMINI_API_KEY && ./mcpchecker check evals/gemini-agent/eval.yaml --run "probe-dns-local" -v
+    ```
+
+    **Test 6: Probe HTTP**
+    ```bash
+    export RH_GEMINI_API_KEY=$RH_GEMINI_API_KEY && ./mcpchecker check evals/gemini-agent/eval.yaml --run "probe-http" -v
     ```
 
     **Tip**: To see the full agent conversation and debug details:
     ```bash
-    ./gevals view mcpchecker-gemini-agent-netedge-eval-out.json
+    ./mcpchecker view mcpchecker-gemini-agent-netedge-eval-out.json
     ```
 
 ## Observing Results
 
-- **Console Output**: `gevals` will show the Gemini agent's progress.
+- **Console Output**: `mcpchecker` will show the Gemini agent's progress.
 - **Server Logs**: Watch `server.log` to see the `netedge` toolset servicing requests from Gemini.
diff --git a/evals/gemini-agent/agent.yaml b/evals/gemini-agent/agent.yaml
@@ -1,84 +1,6 @@
 kind: Agent
 metadata:
   name: "gemini-command-agent"
-commands:
-  useVirtualHome: false
-  argTemplateMcpServer: "{{ .File }}"
-  argTemplateAllowedTools: "mcp__{{ .ServerName }}__{{ .ToolName }}"
-  allowedToolsJoinSeparator: ","
-  runPrompt: |-
-    set -euo pipefail
-
-    if ! command -v jq >/dev/null 2>&1; then
-      echo "jq is required to extract MCP server details" >&2
-      exit 1
-    fi
-    
-    # Check for gemini CLI - naive check first, then fallback to known path if needed
-    if ! command -v gemini >/dev/null 2>&1; then
-       # Try to find it in common npm location if not in PATH
-       NPM_PREFIX="$(npm prefix -g 2>/dev/null || echo "")"
-       if [[ -n "${NPM_PREFIX}" && -x "${NPM_PREFIX}/bin/gemini" ]]; then
-          export PATH="${NPM_PREFIX}/bin:${PATH}"
-       else
-          echo "gemini CLI not found. Please install it: npm install -g @google/gemini-cli" >&2
-          exit 1
-       fi
-    fi
-
-    MCP_SERVER_FILE="{{ .McpServerFileArgs }}"
-    if [[ ! -f "${MCP_SERVER_FILE}" ]]; then
-      echo "MCP server file not found: ${MCP_SERVER_FILE}" >&2
-      exit 1
-    fi
-
-    # Extract the MCP server URL for 'netedge' from the config file
-    NETEDGE_URL="$(jq -r '.mcpServers.kubernetes.url' "${MCP_SERVER_FILE}")"
-    if [[ -z "${NETEDGE_URL}" || "${NETEDGE_URL}" == "null" ]]; then
-      echo "Unable to parse netedge MCP URL from ${MCP_SERVER_FILE}" >&2
-      exit 1
-    fi
-
-    PROMPT_FILE="$(mktemp)"
-    printf '%b' {{ printf "%q" .Prompt }} > "${PROMPT_FILE}"
-
-    TMP_HOME="$(mktemp -d)"
-    mkdir -p "${TMP_HOME}"
-    
-    # Copy kubeconfig if available to allow oc/kubectl commands to work if the agent uses them (optional but good practice)
-    if [[ -n "${KUBECONFIG:-}" ]]; then
-       export KUBECONFIG="${KUBECONFIG}"
-    fi
-
-    export HOME="${TMP_HOME}"
-    cd "${TMP_HOME}"
-
-    # Configure the Gemini CLI to use the MCP server
-    # We use 'netedge' as the server name in the gemini config
-    gemini mcp add netedge "${NETEDGE_URL}" --transport http >/dev/null
-
-    PROMPT_CONTENT="$(cat "${PROMPT_FILE}")"
-    
-    # Construct arguments for gemini
-    # usage: gemini [prompt] [flags]
-    # We use YOLO mode to avoid interactive prompts during CI/demo
-    GEMINI_ARGS=("--approval-mode" "yolo" "--output-format" "stream-json")
-    
-    # Use the API key provided in the environment
-    if [[ -z "${RH_GEMINI_API_KEY:-}" ]]; then
-        echo "Error: RH_GEMINI_API_KEY is not set." >&2
-        exit 1
-    fi
-    # The gemini CLI uses GOOGLE_API_KEY by default
-    export GOOGLE_API_KEY="${RH_GEMINI_API_KEY}"
-
-    if [[ -n "${GEMINI_MODEL:-}" ]]; then
-        GEMINI_ARGS+=(--model "${GEMINI_MODEL}")
-    fi
-
-    # Execute Gemini
-    gemini "${PROMPT_CONTENT}" "${GEMINI_ARGS[@]}"
-
-    # Cleanup
-    rm -rf "${TMP_HOME}"
-    rm -f "${PROMPT_FILE}"
+acp:
+  cmd: "gemini"
+  args: ["--acp"]
diff --git a/evals/gemini-agent/eval.yaml b/evals/gemini-agent/eval.yaml
@@ -7,10 +7,9 @@ config:
     path: "agent.yaml"
   mcpConfigFile: "../mcp-config.yaml"
   llmJudge:
-    env:
-      baseUrlKey: JUDGE_BASE_URL
-      apiKeyKey: JUDGE_API_KEY
-      modelNameKey: JUDGE_MODEL_NAME
+    ref:
+      type: "builtin.llm-agent"
+      model: "openai:gemini-2.5-pro"
   taskSets:
     - glob: ../tasks/netedge/*/*.yaml
       assertions:
diff --git a/evals/tasks/netedge/get_service_endpoints/task.yaml b/evals/tasks/netedge/get_service_endpoints/task.yaml
@@ -0,0 +1,15 @@
+kind: Task
+metadata:
+  name: get-service-endpoints
+steps:
+  verify:
+    contains: "Addresses"
+  prompt:
+    inline: Use the get_service_endpoints tool to list the endpoint slices for the router-default service in the openshift-ingress namespace. Report the backend pod addresses and node names from the result.
+  assertions:
+    toolsUsed:
+      - server: kubernetes
+        toolPattern: "get_service_endpoints"
+        args:
+          namespace: "openshift-ingress"
+          service: "router-default"
diff --git a/evals/tasks/netedge/inspect_route/task.yaml b/evals/tasks/netedge/inspect_route/task.yaml
@@ -0,0 +1,15 @@
+kind: Task
+metadata:
+  name: inspect-route
+steps:
+  verify:
+    contains: "console"
+  prompt:
+    inline: Inspect the default OpenShift console route in the openshift-console namespace and report key fields like host, TLS termination type, and backend service.
+  assertions:
+    toolsUsed:
+      - server: kubernetes
+        toolPattern: "inspect_route"
+        args:
+          namespace: "openshift-console"
+          route: "console"
diff --git a/evals/tasks/netedge/probe_dns_local/task.yaml b/evals/tasks/netedge/probe_dns_local/task.yaml
@@ -0,0 +1,16 @@
+kind: Task
+metadata:
+  name: probe-dns-local
+steps:
+  verify:
+    contains: "NOERROR"
+  prompt:
+    inline: You have a specialized tool called probe_dns_local. Use it to run a DNS query from the MCP server for kubernetes.default.svc.cluster.local against the cluster DNS server at 172.30.0.10, record type A. Report the resolved addresses and response code (Rcode).
+  assertions:
+    toolsUsed:
+      - server: kubernetes
+        toolPattern: "probe_dns_local"
+        args:
+          server: "172.30.0.10"
+          name: "kubernetes.default.svc.cluster.local"
+          type: "A"
diff --git a/evals/tasks/netedge/probe_http/task.yaml b/evals/tasks/netedge/probe_http/task.yaml
@@ -0,0 +1,15 @@
+kind: Task
+metadata:
+  name: probe-http
+steps:
+  verify:
+    contains: "status_code"
+  prompt:
+    inline: You have a specialized tool called probe_http. Use it to send an HTTP GET request to https://kubernetes.default.svc:443 from the MCP server host to verify reachability. Report the status code and response headers.
+  assertions:
+    toolsUsed:
+      - server: kubernetes
+        toolPattern: "probe_http"
+        args:
+          url: "https://kubernetes.default.svc:443"
+          method: "GET"