diff --git a/nixos/mirai/services/grafana.nix b/nixos/mirai/services/grafana.nix index e44063c1..d92bef24 100644 --- a/nixos/mirai/services/grafana.nix +++ b/nixos/mirai/services/grafana.nix @@ -188,5 +188,11 @@ environment.etc = { "grafana/dashboards/system-dashboard.json".source = ./grafana/system-dashboard.json; "grafana/dashboards/processes-dashboard.json".source = ./grafana/processes-dashboard.json; + "grafana/dashboards/multi-device-system-dashboard.json".source = + ./grafana/multi-device-system-dashboard.json; + "grafana/dashboards/multi-device-processes-dashboard.json".source = + ./grafana/multi-device-processes-dashboard.json; + "grafana/dashboards/device-specific-system-dashboard.json".source = + ./grafana/device-specific-system-dashboard.json; }; } diff --git a/nixos/mirai/services/grafana/README.md b/nixos/mirai/services/grafana/README.md new file mode 100644 index 00000000..f57e168c --- /dev/null +++ b/nixos/mirai/services/grafana/README.md @@ -0,0 +1,117 @@ +# Multi-Device Grafana Dashboard Setup + +This directory contains Grafana dashboards configured to monitor multiple NixOS devices: `mirai`, `tsuba`, and `ryu`. + +## Dashboard Overview + +### 1. Multi-Device System Metrics (`multi-device-system-dashboard.json`) +- **Purpose**: Compare system metrics across all devices on a single dashboard +- **Features**: + - Device filtering via template variable (can select one, multiple, or all devices) + - CPU usage comparison by device + - Memory usage comparison by device + - Disk usage comparison by device + - Network I/O comparison by device + - Service status overview + - System uptime tracking + - Load average trends + +### 2. Device-Specific System Dashboard (`device-specific-system-dashboard.json`) +- **Purpose**: Detailed system monitoring for a single selected device +- **Features**: + - Device selection via template variable + - Detailed CPU, memory, and disk metrics + - Network and disk I/O operations + - System status indicators + - Load average trends (1m, 5m, 15m) + - Enhanced visualizations with thresholds and color coding + +### 3. Multi-Device Process Monitoring (`multi-device-processes-dashboard.json`) +- **Purpose**: Monitor processes across all devices +- **Features**: + - Device filtering for process metrics + - Process resource usage table with device column + - CPU usage trends by process and device + - Memory usage (resident and virtual) by process + - Process I/O throughput monitoring + - Process count tracking per device + +### 4. Legacy Dashboards +- `system-dashboard.json`: Original single-device system dashboard +- `processes-dashboard.json`: Original single-device process dashboard + +## Device Labels + +Each device is configured with specific labels for better organization: + +- **mirai**: `device=mirai, type=server, arch=x86_64` +- **tsuba**: `device=tsuba, type=server, arch=aarch64` +- **ryu**: `device=ryu, type=desktop, arch=x86_64` + +## Metrics Collection + +### Node Exporter (Port 9100) +Collects system-level metrics: +- CPU usage and load average +- Memory utilization +- Disk usage and I/O +- Network interface statistics +- SystemD service status +- System uptime + +### Process Exporter (Port 9256) +Collects process-level metrics: +- CPU usage per process +- Memory usage (resident and virtual) +- Process I/O operations +- Process count and lifecycle + +## Template Variables + +### Device Selection +All multi-device dashboards include a `$device` template variable that allows: +- **Single device selection**: Monitor one specific device +- **Multiple device selection**: Compare selected devices +- **All devices**: Monitor all available devices simultaneously + +The variable automatically populates with available devices based on Prometheus metrics. + +## Usage Tips + +1. **Quick Overview**: Start with the multi-device system dashboard to get an overview of all devices +2. **Detailed Analysis**: Use device-specific dashboards for in-depth analysis of a particular device +3. **Process Monitoring**: Use the multi-device process dashboard to track resource-intensive processes across devices +4. **Filtering**: Use the device template variable to focus on specific devices of interest +5. **Time Ranges**: Adjust time ranges based on your monitoring needs (default: last 1 hour) + +## Customization + +### Adding New Devices +To add monitoring for new devices: +1. Configure Prometheus exporters on the new device +2. Add scrape targets to the Prometheus configuration in `grafana.nix` +3. Include appropriate device labels +4. The dashboards will automatically detect the new device + +### Modifying Queries +All dashboard queries use the `device=~"$device"` filter to support multi-device functionality. When customizing: +- Ensure queries include the device filter +- Use `{{device}}` in legend formats to distinguish between devices +- Consider device-specific thresholds if needed + +## Troubleshooting + +### Device Not Appearing +1. Check if Prometheus can reach the device's exporters +2. Verify the device label is correctly configured +3. Check Prometheus targets at `prometheus.darksailor.dev/targets` + +### Missing Metrics +1. Ensure the required exporters are running on the target device +2. Check firewall rules allow access to exporter ports (9100, 9256) +3. Verify network connectivity between devices + +### Performance +- Limit the number of devices selected simultaneously for better performance +- Adjust refresh rates if dashboards become slow +- Consider shorter time ranges for detailed analysis \ No newline at end of file diff --git a/nixos/mirai/services/grafana/device-specific-system-dashboard.json b/nixos/mirai/services/grafana/device-specific-system-dashboard.json new file mode 100644 index 00000000..9d0e1244 --- /dev/null +++ b/nixos/mirai/services/grafana/device-specific-system-dashboard.json @@ -0,0 +1,1114 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 200, + "panels": [], + "title": "${device} - System Metrics", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 70 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "CPU Usage" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "legend": { + "calcs": [ + "last", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "100 - (avg(irate(node_cpu_seconds_total{mode=\"idle\",device=\"${device}\"}[5m])) * 100)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "CPU Usage", + "refId": "A" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Used Memory" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Total Memory" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "light-blue", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 1 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_MemTotal_bytes{device=\"${device}\"} - node_memory_MemFree_bytes{device=\"${device}\"} - node_memory_Buffers_bytes{device=\"${device}\"} - node_memory_Cached_bytes{device=\"${device}\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Used Memory", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_MemTotal_bytes{device=\"${device}\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Total Memory", + "refId": "B" + } + ], + "title": "Memory Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 80 + }, + { + "color": "red", + "value": 95 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 1 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "100 - ((node_filesystem_avail_bytes{mountpoint=\"/\",fstype!=\"tmpfs\",device=\"${device}\"} * 100) / node_filesystem_size_bytes{mountpoint=\"/\",fstype!=\"tmpfs\",device=\"${device}\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Root Disk Usage", + "refId": "A" + } + ], + "title": "Disk Usage", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 201, + "panels": [], + "title": "Network & I/O", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 10 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_network_receive_bytes_total{device_name!=\"lo\",device=\"${device}\"}[5m]) * 8", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device_name}} RX", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_network_transmit_bytes_total{device_name!=\"lo\",device=\"${device}\"}[5m]) * 8", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device_name}} TX", + "refId": "B" + } + ], + "title": "Network I/O", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "iops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_disk_reads_completed_total{device=\"${device}\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device_name}} Reads", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_disk_writes_completed_total{device=\"${device}\"}[5m])", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device_name}} Writes", + "refId": "B" + } + ], + "title": "Disk I/O Operations", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 202, + "panels": [], + "title": "System Status", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "DOWN" + }, + "1": { + "color": "green", + "index": 0, + "text": "UP" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 19 + }, + "id": 6, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "up{device=\"${device}\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{job}}", + "refId": "A" + } + ], + "title": "Service Status", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 86400 + }, + { + "color": "red", + "value": 604800 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 19 + }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "time() - node_boot_time_seconds{device=\"${device}\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Uptime", + "refId": "A" + } + ], + "title": "System Uptime", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "red", + "value": 4 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 19 + }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_load1{device=\"${device}\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Load (1m)", + "refId": "A" + } + ], + "title": "Load Average (1m)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 100 + }, + { + "color": "red", + "value": 200 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 19 + }, + "id": 9, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_systemd_units{state=\"active\",device=\"${device}\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Active Services", + "refId": "A" + } + ], + "title": "Active SystemD Services", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 23 + }, + "id": 203, + "panels": [], + "title": "Load Average Trends", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ + "last", + "mean", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_load1{device=\"${device}\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Load 1m", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_load5{device=\"${device}\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Load 5m", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_load15{device=\"${device}\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Load 15m", + "refId": "C" + } + ], + "title": "Load Average", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 30, + "style": "dark", + "tags": [ + "system", + "monitoring", + "device-specific" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "mirai", + "value": "mirai" + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(up, device)", + "description": "Select device to monitor", + "error": null, + "hide": 0, + "includeAll": false, + "label": "Device", + "multi": false, + "name": "device", + "options": [], + "query": { + "query": "label_values(up, device)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "${device} System Metrics", + "uid": "device-system-${device}", + "version": 1 +} diff --git a/nixos/mirai/services/grafana/multi-device-processes-dashboard.json b/nixos/mirai/services/grafana/multi-device-processes-dashboard.json new file mode 100644 index 00000000..2afa5da6 --- /dev/null +++ b/nixos/mirai/services/grafana/multi-device-processes-dashboard.json @@ -0,0 +1,782 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 100, + "panels": [], + "title": "Process Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "CPU %" + }, + "properties": [ + { + "id": "unit", + "value": "percent" + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-background" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Memory" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + } + ] + }, + "gridPos": { + "h": 12, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": ["sum"], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "CPU %" + } + ] + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "namedprocess_namegroup_memory_bytes{memtype=\"resident\",device=~\"$device\"}", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "rate(namedprocess_namegroup_cpu_seconds_total{device=~\"$device\"}[5m]) * 100", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "namedprocess_namegroup_num_procs{device=~\"$device\"}", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "C" + } + ], + "title": "Process Resource Usage by Device", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "__name__": true, + "instance": true, + "job": true, + "memtype": true + }, + "indexByName": {}, + "renameByName": { + "Value #A": "Memory", + "Value #B": "CPU %", + "Value #C": "Process Count", + "groupname": "Process Name", + "device": "Device" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "CPU % per Process", + "binary": { + "left": "CPU %", + "operator": "/", + "reducer": "sum", + "right": "Process Count" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + }, + "replaceFields": false + } + } + ], + "type": "table" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "id": 101, + "panels": [], + "title": "CPU Usage Trends", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "last", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "rate(namedprocess_namegroup_cpu_seconds_total{device=~\"$device\"}[5m]) * 100", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - {{groupname}}", + "refId": "A" + } + ], + "title": "CPU Usage by Process and Device", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 102, + "panels": [], + "title": "Memory Usage Trends", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 23 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "namedprocess_namegroup_memory_bytes{memtype=\"resident\",device=~\"$device\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - {{groupname}} RSS", + "refId": "A" + } + ], + "title": "Resident Memory Usage by Process", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 23 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "namedprocess_namegroup_memory_bytes{memtype=\"virtual\",device=~\"$device\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - {{groupname}} Virtual", + "refId": "A" + } + ], + "title": "Virtual Memory Usage by Process", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 103, + "panels": [], + "title": "Process I/O", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 5, + "options": { + "legend": { + "calcs": [ + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "rate(namedprocess_namegroup_read_bytes_total{device=~\"$device\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - {{groupname}} Read", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "rate(namedprocess_namegroup_write_bytes_total{device=~\"$device\"}[5m])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - {{groupname}} Write", + "refId": "B" + } + ], + "title": "Process I/O Throughput", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ + "last", + "max" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "namedprocess_namegroup_num_procs{device=~\"$device\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} - {{groupname}} Count", + "refId": "A" + } + ], + "title": "Process Count by Device", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 30, + "style": "dark", + "tags": [ + "processes", + "monitoring", + "multi-device" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(namedprocess_namegroup_num_procs, device)", + "description": "Select devices to monitor", + "error": null, + "hide": 0, + "includeAll": true, + "label": "Device", + "multi": true, + "name": "device", + "options": [], + "query": { + "query": "label_values(namedprocess_namegroup_num_procs, device)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Multi-Device Process Monitoring", + "uid": "multi-device-processes", + "version": 1 +} diff --git a/nixos/mirai/services/grafana/multi-device-system-dashboard.json b/nixos/mirai/services/grafana/multi-device-system-dashboard.json new file mode 100644 index 00000000..1f1ecc7a --- /dev/null +++ b/nixos/mirai/services/grafana/multi-device-system-dashboard.json @@ -0,0 +1,949 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 100, + "panels": [], + "title": "System Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 70 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "legend": { + "calcs": [ + "last", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "100 - (avg by (device) (irate(node_cpu_seconds_total{mode=\"idle\",device=~\"$device\"}[5m])) * 100)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}} CPU Usage", + "refId": "A" + } + ], + "title": "CPU Usage by Device", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 0.8 + }, + { + "color": "red", + "value": 0.9 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "last", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_MemTotal_bytes{device=~\"$device\"} - node_memory_MemFree_bytes{device=~\"$device\"} - node_memory_Buffers_bytes{device=~\"$device\"} - node_memory_Cached_bytes{device=~\"$device\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}} Used Memory", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_memory_MemTotal_bytes{device=~\"$device\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}} Total Memory", + "refId": "B" + } + ], + "title": "Memory Usage by Device", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 80 + }, + { + "color": "red", + "value": 95 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ + "last", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "100 - ((node_filesystem_avail_bytes{mountpoint=\"/\",fstype!=\"tmpfs\",device=~\"$device\"} * 100) / node_filesystem_size_bytes{mountpoint=\"/\",fstype!=\"tmpfs\",device=~\"$device\"})", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}} Root Disk Usage", + "refId": "A" + } + ], + "title": "Disk Usage by Device", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "last", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_network_receive_bytes_total{device_name!=\"lo\",device=~\"$device\"}[5m]) * 8", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}} {{device_name}} RX", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "irate(node_network_transmit_bytes_total{device_name!=\"lo\",device=~\"$device\"}[5m]) * 8", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}} {{device_name}} TX", + "refId": "B" + } + ], + "title": "Network I/O by Device", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 101, + "panels": [], + "title": "Device Status", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "DOWN" + }, + "1": { + "color": "green", + "index": 0, + "text": "UP" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 0, + "y": 18 + }, + "id": 5, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "up{device=~\"$device\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}} {{job}}", + "refId": "A" + } + ], + "title": "Service Status", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 604800 + }, + { + "color": "red", + "value": 86400 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 6, + "y": 18 + }, + "id": 6, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "time() - node_boot_time_seconds{device=~\"$device\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}} Uptime", + "refId": "A" + } + ], + "title": "System Uptime", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "red", + "value": 4 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 12, + "y": 18 + }, + "id": 7, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_load1{device=~\"$device\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}} Load (1m)", + "refId": "A" + } + ], + "title": "Load Average (1m)", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 100 + }, + { + "color": "red", + "value": 200 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 18, + "y": 18 + }, + "id": 8, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "pluginVersion": "8.0.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_systemd_units{state=\"active\",device=~\"$device\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}} Active Services", + "refId": "A" + } + ], + "title": "Active SystemD Services", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 102, + "panels": [], + "title": "Load Average Trends", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "vis": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 23 + }, + "id": 9, + "options": { + "legend": { + "calcs": [ + "last", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_load1{device=~\"$device\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}} Load 1m", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_load5{device=~\"$device\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}} Load 5m", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "expr": "node_load15{device=~\"$device\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{device}} Load 15m", + "refId": "C" + } + ], + "title": "Load Average by Device", + "type": "timeseries" + } + ], + "refresh": "30s", + "schemaVersion": 30, + "style": "dark", + "tags": [ + "system", + "monitoring", + "multi-device" + ], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "label_values(up, device)", + "description": "Select devices to monitor", + "error": null, + "hide": 0, + "includeAll": true, + "label": "Device", + "multi": true, + "name": "device", + "options": [], + "query": { + "query": "label_values(up, device)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Multi-Device System Metrics", + "uid": "multi-device-system", + "version": 1 +}