diff --git a/tools/monitoring/Dockerfile.exporter b/tools/monitoring/Dockerfile.exporter new file mode 100644 index 000000000..d362ffe2d --- /dev/null +++ b/tools/monitoring/Dockerfile.exporter @@ -0,0 +1,6 @@ +FROM python:3.11-slim +WORKDIR /app +RUN pip install requests prometheus_client +COPY prometheus_exporter.py . +EXPOSE 8000 +CMD ["python", "prometheus_exporter.py", "--listen-port", "8000"] diff --git a/tools/monitoring/README.md b/tools/monitoring/README.md new file mode 100644 index 000000000..628774eca --- /dev/null +++ b/tools/monitoring/README.md @@ -0,0 +1,184 @@ +# RustChain Prometheus Monitoring Stack + +Comprehensive monitoring solution for RustChain nodes using Prometheus metrics collection and Grafana visualization. + +## Features + +- **Prometheus Exporter**: Python-based exporter collecting RustChain node metrics +- **Pre-built Grafana Dashboard**: Ready-to-import dashboard with 10 panels +- **Docker Compose Setup**: One-command deployment for the entire stack +- **Systemd Service**: Persistent background service for bare-metal deployments + +## Quick Start (Docker Compose) + +```bash +# Navigate to the RustChain root directory +cd /path/to/Rustchain + +# Launch the full monitoring stack +docker-compose -f tools/monitoring/docker-compose.monitoring.yml up -d + +# View logs +docker-compose -f tools/monitoring/docker-compose.monitoring.yml logs -f +``` + +Access services: +- **Grafana**: http://localhost:3000 (admin/rustchain123) +- **Prometheus**: http://localhost:9090 + +## Standalone Python Setup + +### Prerequisites + +```bash +pip install requests prometheus_client +``` + +### Run the Exporter + +```bash +python prometheus_exporter.py \ + --node-url http://50.28.86.131 \ + --listen-port 8000 \ + --scrape-interval 30 +``` + +### Environment Variables + +| Variable | Default | Description | +|---|---|---| +| `RUSTCHAIN_NODE` | `http://50.28.86.131` | RustChain node API URL | +| `EXPORTER_PORT` | `8000` | Port to listen on | +| `SCRAPE_INTERVAL` | `30` | Scrape interval in seconds | +| `TLS_VERIFY` | `false` | Verify TLS certificates | + +### Configuration File (JSON) + +Create `config.json`: + +```json +{ + "node_url": "http://50.28.86.131", + "listen_port": 8000, + "scrape_interval": 30, + "request_timeout": 10 +} +``` + +Run with config: + +```bash +python prometheus_exporter.py --config config.json +``` + +## Systemd Service Setup + +### Installation + +```bash +# Copy the service file +sudo cp rustchain-exporter.service /etc/systemd/system/ + +# Edit the service file to set your paths +sudo vim /etc/systemd/system/rustchain-exporter.service + +# Reload systemd +sudo systemctl daemon-reload + +# Enable and start +sudo systemctl enable rustchain-exporter +sudo systemctl start rustchain-exporter + +# Check status +sudo systemctl status rustchain-exporter +``` + +### Service File Configuration + +Edit `/etc/systemd/system/rustchain-exporter.service` and set: +- `WorkingDirectory` to the monitoring directory path +- `Environment` variables for your node URL and port + +## Grafana Dashboard Import + +1. Open Grafana at http://localhost:3000 +2. Login with admin credentials +3. Click **+** → **Import** +4. Upload `grafana_dashboard.json` or paste its contents +5. Select Prometheus datasource (or create one pointing to `http://prometheus:9090`) +6. Click **Import** + +### Dashboard Panels + +| Panel | Metric | Description | +|---|---|---| +| Node Health | `rustchain_node_up` | Up/Down status indicator | +| Current Epoch | `rustchain_epoch_current` | Current epoch number | +| Current Slot | `rustchain_epoch_slot` | Current slot in epoch | +| Active Miners | `rustchain_active_miners` | Count of active miners | +| RTC Supply | `rustchain_total_rtc_supply` | Total RTC token supply | +| Epoch Pot | `rustchain_epoch_pot` | Current epoch reward pot | +| API Response Time | `rustchain_api_response_time_seconds` | Per-endpoint response times | +| API Requests Total | `rustchain_api_requests_total` | Request count by endpoint/status | +| Scrape Errors | `rustchain_scrape_errors_total` | Error breakdown by type | +| Scrape Duration | `rustchain_scrape_duration_seconds` | Time per scrape cycle | + +## Prometheus Configuration + +The `prometheus.yml` file configures Prometheus to scrape the exporter: + +```yaml +global: + scrape_interval: 30s + evaluation_interval: 30s + +scrape_configs: + - job_name: 'rustchain-exporter' + static_configs: + - targets: ['rustchain-exporter:8000'] +``` + +## Metrics Reference + +| Metric | Type | Labels | Description | +|---|---|---|---| +| `rustchain_node_up` | Gauge | node_url | Node availability (1=up, 0=down) | +| `rustchain_node_version` | Info | node_url, version | Node software version | +| `rustchain_node_uptime_seconds` | Gauge | node_url | Node uptime | +| `rustchain_epoch_current` | Gauge | node_url | Current epoch number | +| `rustchain_epoch_slot` | Gauge | node_url | Current slot | +| `rustchain_epoch_pot` | Gauge | node_url | Epoch reward pot | +| `rustchain_block_height` | Gauge | node_url | Current block height | +| `rustchain_total_miners` | Gauge | node_url | Total registered miners | +| `rustchain_active_miners` | Gauge | node_url | Active miners count | +| `rustchain_total_rtc_supply` | Gauge | node_url | Total RTC supply | +| `rustchain_api_response_time_seconds` | Gauge | node_url, endpoint | API response time | +| `rustchain_scrape_errors_total` | Counter | node_url, error_type | Scrape error count | +| `rustchain_api_requests_total` | Counter | node_url, endpoint, status | Total API requests | +| `rustchain_scrape_duration_seconds` | Gauge | node_url | Scrape cycle duration | +| `rustchain_epoch_block_time_avg` | Gauge | node_url | Average block time in epoch | +| `rustchain_miner_antiquity_distribution` | Histogram | node_url | Miner antiquity score distribution | +| `rustchain_tx_pool_size` | Gauge | node_url | Pending transaction pool size | + +## Endpoints Scraped + +The exporter queries these RustChain API endpoints: + +- `GET /health` - Node health and version +- `GET /epoch` - Epoch info (number, slot, pot, supply) +- `GET /api/miners` - Miner list (active/total counts) +- `GET /tx/pool` - Transaction pool size + +## Troubleshooting + +### Exporter not responding + +Check logs: `docker logs rustchain-exporter` or `journalctl -u rustchain-exporter` + +### Prometheus not scraping + +Verify target in Prometheus UI: Status → Targets + +### Grafana shows no data + +Check datasource URL is `http://prometheus:9090` and that Prometheus is successfully scraping the exporter. diff --git a/tools/monitoring/grafana_dashboard.json b/tools/monitoring/grafana_dashboard.json new file mode 100644 index 000000000..7e4e879e9 --- /dev/null +++ b/tools/monitoring/grafana_dashboard.json @@ -0,0 +1,543 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { "options": { "0": { "color": "red", "index": 1, "text": "DOWN" } }, "type": "value" }, + { "options": { "1": { "color": "green", "index": 0, "text": "UP" } }, "type": "value" } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { "color": "red", "value": null }, + { "color": "green", "value": 1 } + ] + }, + "unit": "none" + } + }, + "gridPos": { "h": 4, "w": 6, "x": 0, "y": 0 }, + "id": 1, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "rustchain_node_up{node_url=~\"$node_url\"}", + "legendFormat": "Node Status", + "refId": "A" + } + ], + "title": "Node Health", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "none" + } + }, + "gridPos": { "h": 4, "w": 6, "x": 6, "y": 0 }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "rustchain_epoch_current{node_url=~\"$node_url\"}", + "legendFormat": "Epoch {{node_url}}", + "refId": "A" + } + ], + "title": "Current Epoch", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "none" + } + }, + "gridPos": { "h": 4, "w": 6, "x": 12, "y": 0 }, + "id": 3, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "rustchain_epoch_slot{node_url=~\"$node_url\"}", + "legendFormat": "Slot {{node_url}}", + "refId": "A" + } + ], + "title": "Current Slot", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "none" + } + }, + "gridPos": { "h": 4, "w": 6, "x": 18, "y": 0 }, + "id": 4, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": ["lastNotNull"], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "targets": [ + { + "expr": "rustchain_active_miners{node_url=~\"$node_url\"}", + "legendFormat": "Active Miners {{node_url}}", + "refId": "A" + } + ], + "title": "Active Miners", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "none" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 }, + "id": 5, + "options": { + "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "expr": "rustchain_total_rtc_supply{node_url=~\"$node_url\"}", + "legendFormat": "RTC Supply {{node_url}}", + "refId": "A" + } + ], + "title": "RTC Supply", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "none" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 }, + "id": 6, + "options": { + "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "expr": "rustchain_epoch_pot{node_url=~\"$node_url\"}", + "legendFormat": "Epoch Pot {{node_url}}", + "refId": "A" + } + ], + "title": "Epoch Pot", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "s" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 12 }, + "id": 7, + "options": { + "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "expr": "rustchain_api_response_time_seconds{node_url=~\"$node_url\"}", + "legendFormat": "{{endpoint}} {{node_url}}", + "refId": "A" + } + ], + "title": "API Response Time (by endpoint)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { "group": "A", "mode": "normal" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "short" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 12 }, + "id": 8, + "options": { + "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "expr": "increase(rustchain_api_requests_total{node_url=~\"$node_url\"}[5m])", + "legendFormat": "{{endpoint}} ({{status}}) {{node_url}}", + "refId": "A" + } + ], + "title": "API Requests Total (by endpoint)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 80, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { "group": "A", "mode": "normal" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "short" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 20 }, + "id": 9, + "options": { + "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "expr": "increase(rustchain_scrape_errors_total{node_url=~\"$node_url\"}[5m])", + "legendFormat": "{{error_type}} {{node_url}}", + "refId": "A" + } + ], + "title": "Scrape Errors (error type breakdown)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { "mode": "palette-classic" }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { "legend": false, "tooltip": false, "viz": false }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "type": "linear" }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { "group": "A", "mode": "none" }, + "thresholdsStyle": { "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [{ "color": "green", "value": null }] + }, + "unit": "s" + } + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 20 }, + "id": 10, + "options": { + "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, + "tooltip": { "mode": "single", "sort": "none" } + }, + "targets": [ + { + "expr": "rustchain_scrape_duration_seconds{node_url=~\"$node_url\"}", + "legendFormat": "Scrape Duration {{node_url}}", + "refId": "A" + } + ], + "title": "Scrape Duration", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 38, + "style": "dark", + "tags": ["rustchain", "monitoring"], + "templating": { + "list": [ + { + "current": {}, + "hide": 0, + "includeAll": false, + "label": "Prometheus", + "multi": false, + "name": "DS_PROMETHEUS", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": ".*", + "current": {}, + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "definition": "label_values(rustchain_node_up, node_url)", + "hide": 0, + "includeAll": true, + "label": "Node URL", + "multi": true, + "name": "node_url", + "options": [], + "query": { + "query": "label_values(rustchain_node_up, node_url)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "RustChain Node Monitor", + "uid": "rustchain-node-monitor", + "version": 1, + "weekStart": "" +} diff --git a/tools/monitoring/prometheus.yml b/tools/monitoring/prometheus.yml new file mode 100644 index 000000000..f98f61290 --- /dev/null +++ b/tools/monitoring/prometheus.yml @@ -0,0 +1,11 @@ +global: + scrape_interval: 30s + evaluation_interval: 30s + +scrape_configs: + - job_name: 'rustchain-exporter' + static_configs: + - targets: ['rustchain-exporter:8000'] + relabel_configs: + - source_labels: [__address__] + target_label: instance diff --git a/tools/monitoring/prometheus_exporter.py b/tools/monitoring/prometheus_exporter.py index 72d7c3d17..b59415b98 100644 --- a/tools/monitoring/prometheus_exporter.py +++ b/tools/monitoring/prometheus_exporter.py @@ -3,7 +3,7 @@ """ RustChain Prometheus Exporter (tools edition) -Cherry-picked from LaphoqueRC PR #1711, with infrastructure refs fixed. +Cherry-picked from LaplaceRC PR #1711, with infrastructure refs fixed. For the simpler standalone exporter, see monitoring/rustchain-exporter.py. This version adds: @@ -11,6 +11,8 @@ - CLI arguments (--node-url, --listen-port, --scrape-interval) - Per-endpoint response-time gauges - JSON config file support + - Additional v2 metrics: api_requests_total, scrape_duration_seconds, + epoch_block_time_avg, miner_antiquity_distribution, tx_pool_size """ import time @@ -22,7 +24,7 @@ from typing import Dict, Optional, Any import requests -from prometheus_client import start_http_server, Gauge, Counter, Info +from prometheus_client import start_http_server, Gauge, Counter, Info, Histogram logging.basicConfig( level=logging.INFO, @@ -30,17 +32,17 @@ ) logger = logging.getLogger(__name__) -# --------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- # Configuration defaults — fixed to real RustChain infrastructure -# --------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- DEFAULT_NODE_URL = "https://50.28.86.131" DEFAULT_LISTEN_PORT = 8000 DEFAULT_SCRAPE_INTERVAL = 30 DEFAULT_REQUEST_TIMEOUT = 10 -# --------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- # Prometheus metrics -# --------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- rustchain_up = Gauge( 'rustchain_node_up', 'Whether the RustChain node is responding', @@ -102,6 +104,34 @@ ['node_url', 'endpoint'], ) +# --- v2 metrics --- +rustchain_api_requests_total = Counter( + 'rustchain_api_requests_total', + 'Total number of API requests by endpoint and status', + ['node_url', 'endpoint', 'status'], +) +rustchain_scrape_duration_seconds = Gauge( + 'rustchain_scrape_duration_seconds', + 'Time taken for each scrape cycle', + ['node_url'], +) +rustchain_epoch_block_time_avg = Gauge( + 'rustchain_epoch_block_time_avg', + 'Average block time in current epoch', + ['node_url'], +) +rustchain_miner_antiquity_distribution = Histogram( + 'rustchain_miner_antiquity_distribution', + 'Distribution of miner antiquity scores', + ['node_url'], + buckets=[0.1, 0.25, 0.5, 0.75, 1.0, 2.5, 5.0, 7.5, 10.0, 15.0, 30.0], +) +rustchain_tx_pool_size = Gauge( + 'rustchain_tx_pool_size', + 'Pending transaction pool size', + ['node_url'], +) + class RustChainPrometheusExporter: """Scrapes the RustChain node API and updates Prometheus gauges.""" @@ -119,15 +149,20 @@ def __init__( self.session.headers.update({ 'User-Agent': 'RustChain-Prometheus-Exporter/1.0', }) - # Self-signed cert on 50.28.86.131 - self.session.verify = False + # Use pinned cert if available, else system CA bundle + try: + from node.tls_config import get_tls_verify + self.session.verify = get_tls_verify() + except ImportError: + cert = os.path.expanduser("~/.rustchain/node_cert.pem") + self.session.verify = cert if os.path.exists(cert) else True self.running = False logger.info("Initialized exporter for node: %s", self.node_url) - # ------------------------------------------------------------------ + # ------------------------------------------------------------------------- # HTTP helpers - # ------------------------------------------------------------------ + # ------------------------------------------------------------------------- def _make_request(self, endpoint: str) -> Optional[Dict[str, Any]]: """GET *endpoint* with timing and error handling.""" @@ -142,9 +177,16 @@ def _make_request(self, endpoint: str) -> Optional[Dict[str, Any]]: ).set(elapsed) if response.status_code == 200: + rustchain_api_requests_total.labels( + node_url=self.node_url, endpoint=endpoint, status='200', + ).inc() return response.json() logger.warning("API returned %d for %s", response.status_code, endpoint) + rustchain_api_requests_total.labels( + node_url=self.node_url, endpoint=endpoint, + status=str(response.status_code), + ).inc() rustchain_scrape_errors.labels( node_url=self.node_url, error_type='http_error', ).inc() @@ -172,9 +214,9 @@ def _make_request(self, endpoint: str) -> Optional[Dict[str, Any]]: ).inc() return None - # ------------------------------------------------------------------ + # ------------------------------------------------------------------------- # Metric scrapers — aligned to real node endpoints on port 8099 - # ------------------------------------------------------------------ + # ------------------------------------------------------------------------- def _scrape_health(self): """GET /health -> node up/version/uptime.""" @@ -194,7 +236,7 @@ def _scrape_health(self): rustchain_up.labels(node_url=self.node_url).set(0) def _scrape_epoch(self): - """GET /epoch -> epoch number, slot, pot, enrolled miners, supply.""" + """GET /epoch -> epoch number, slot, pot, supply.""" data = self._make_request('/epoch') if data: rustchain_epoch_current.labels(node_url=self.node_url).set( @@ -209,6 +251,11 @@ def _scrape_epoch(self): rustchain_total_rtc_supply.labels(node_url=self.node_url).set( data.get('total_supply_rtc', 0), ) + # v2: average block time in epoch + block_time_avg = data.get('epoch_block_time_avg', 0) + rustchain_epoch_block_time_avg.labels( + node_url=self.node_url, + ).set(block_time_avg) def _scrape_miners(self): """GET /api/miners -> active miner count.""" @@ -217,23 +264,48 @@ def _scrape_miners(self): rustchain_active_miners.labels(node_url=self.node_url).set(len(data)) rustchain_total_miners.labels(node_url=self.node_url).set(len(data)) + # v2: miner antiquity score distribution + for miner in data: + antiquity = miner.get('antiquity_score', 0) + rustchain_miner_antiquity_distribution.labels( + node_url=self.node_url, + ).observe(antiquity) + + def _scrape_transactions(self): + """GET /tx/pool -> pending transaction pool size.""" + data = self._make_request('/tx/pool') + if data: + # /tx/pool may return { "pool_size": N } or just a number + if isinstance(data, dict): + pool_size = data.get('pool_size', 0) + else: + pool_size = int(data) if data else 0 + rustchain_tx_pool_size.labels(node_url=self.node_url).set(pool_size) + def _scrape_all(self): """One complete scrape cycle.""" logger.debug("Starting metrics scrape") + scrape_start = time.time() + self._scrape_health() # Only continue if node is alive if rustchain_up.labels(node_url=self.node_url)._value.get() == 1: self._scrape_epoch() self._scrape_miners() + self._scrape_transactions() - logger.debug("Metrics scrape completed") + elapsed = time.time() - scrape_start + rustchain_scrape_duration_seconds.labels( + node_url=self.node_url, + ).set(elapsed) + logger.debug("Metrics scrape completed in %.2fs", elapsed) - # ------------------------------------------------------------------ + # ------------------------------------------------------------------------- # Main loop - # ------------------------------------------------------------------ + # ------------------------------------------------------------------------- - def start_scraping(self): + def start_scrapping(self): self.running = True logger.info("Scrape loop started (%ds interval)", self.scrape_interval) while self.running: @@ -251,9 +323,9 @@ def stop(self): logger.info("Scraping stopped") -# --------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- # CLI -# --------------------------------------------------------------------------- +# ----------------------------------------------------------------------------- def load_config_file(path: str) -> Dict[str, Any]: try: @@ -333,7 +405,7 @@ def main(): start_http_server(listen_port) logger.info("Metrics server started on http://0.0.0.0:%d", listen_port) - scrape_thread = Thread(target=exporter.start_scraping, daemon=True) + scrape_thread = Thread(target=exporter.start_scrapping, daemon=True) scrape_thread.start() try: