{ config, pkgs, lib, ... }: let # Port configurations ports = { grafana = 3001; # Changed from 3000 to avoid clash with Gitea prometheus = 9090; # System exporters node = 9100; systemd = 9558; process = 9256; # Infrastructure exporters postgres = 9187; redis = 9121; cadvisor = 8080; # Application exporters caddy = 2019; }; in { sops.secrets."grafana/secret_key" = { owner = "grafana"; }; # Grafana configuration with Authelia integration services.grafana = { enable = true; settings = { server = { http_addr = "127.0.0.1"; http_port = ports.grafana; domain = "grafana.darksailor.dev"; root_url = "https://grafana.darksailor.dev"; }; # Disable Grafana's own auth since we use Authelia auth.disable_login_form = true; "auth.basic".enabled = false; "auth.anonymous".enabled = false; "auth.proxy" = { enabled = true; header_name = "REMOTE-USER"; header_property = "username"; auto_sign_up = true; }; users = { allow_sign_up = false; auto_assign_org = true; auto_assign_org_role = "Admin"; }; security = { disable_gravatar = true; cookie_secure = true; secret_key = ''$__file{${config.sops.secrets."grafana/secret_key".path}}''; }; analytics = { reporting_enabled = false; check_for_updates = false; }; }; provision = { enable = true; datasources.settings.datasources = [ { name = "Prometheus"; type = "prometheus"; access = "proxy"; url = "http://localhost:${toString ports.prometheus}"; isDefault = true; jsonData = { timeInterval = "30s"; }; } ]; # Provision popular community dashboards dashboards = { settings = { apiVersion = 1; providers = [ { name = "default"; orgId = 1; folder = ""; type = "file"; disableDeletion = false; updateIntervalSeconds = 10; allowUiUpdates = true; options.path = "/var/lib/grafana/dashboards"; } ]; }; }; }; }; # Caddy virtual host for Grafana with Authelia services.caddy.virtualHosts."grafana.darksailor.dev".extraConfig = '' import auth reverse_proxy localhost:${toString ports.grafana} ''; # Central Prometheus server services.prometheus = { enable = true; port = ports.prometheus; # Retention settings (90 days) retentionTime = "90d"; # Global scrape config globalConfig = { scrape_interval = "30s"; evaluation_interval = "30s"; }; # System exporters for tako exporters = { node = { enable = true; port = ports.node; enabledCollectors = [ "systemd" "textfile" "filesystem" "loadavg" "meminfo" "netdev" "netstat" "stat" "time" "uname" "vmstat" "diskstats" "cpu" ]; }; systemd = { enable = true; port = ports.systemd; }; process = { enable = true; settings.process_names = [ { name = "{{.Comm}}"; cmdline = [".*"]; } ]; }; postgres = { enable = true; port = ports.postgres; runAsLocalSuperUser = true; }; redis = { enable = true; port = ports.redis; }; }; # Scrape configurations for all targets scrapeConfigs = [ # System metrics - tako (local) { job_name = "tako-system"; static_configs = [ { targets = [ "localhost:${toString ports.node}" "localhost:${toString ports.systemd}" "localhost:${toString ports.process}" ]; labels = { instance = "tako"; machine = "tako"; role = "server"; }; } ]; } # Infrastructure - tako { job_name = "tako-infrastructure"; static_configs = [ { targets = [ "localhost:${toString ports.postgres}" "localhost:${toString ports.redis}" "localhost:${toString ports.cadvisor}" ]; labels = { instance = "tako"; machine = "tako"; }; } ]; } # Caddy metrics - tako { job_name = "tako-caddy"; static_configs = [ { targets = ["localhost:${toString ports.caddy}"]; labels = { instance = "tako"; machine = "tako"; service = "caddy"; }; } ]; } # Application metrics - tako { job_name = "tako-applications"; static_configs = [ { targets = [ "localhost:3000" # gitea "localhost:5555" # authelia (if metrics enabled) ]; labels = { instance = "tako"; machine = "tako"; }; } ]; } # System metrics - tsuba (remote via Tailscale) { job_name = "tsuba-system"; static_configs = [ { targets = [ "tsuba:9100" # node "tsuba:9558" # systemd "tsuba:9256" # process ]; labels = { instance = "tsuba"; machine = "tsuba"; role = "server"; }; } ]; } # Infrastructure - tsuba { job_name = "tsuba-infrastructure"; static_configs = [ { targets = [ "tsuba:8080" # cadvisor "tsuba:2019" # caddy ]; labels = { instance = "tsuba"; machine = "tsuba"; }; } ]; } # Media services - tsuba { job_name = "tsuba-media"; static_configs = [ { targets = [ "tsuba:8096" # jellyfin (built-in /metrics endpoint) # "tsuba:8123" # homeassistant (configure prometheus integration) "tsuba:9617" # pihole-exporter ]; labels = { instance = "tsuba"; machine = "tsuba"; }; } ]; metrics_path = "/metrics"; relabel_configs = [ { source_labels = ["__address__"]; regex = "tsuba:8096"; target_label = "__metrics_path__"; replacement = "/metrics"; } # { # source_labels = ["__address__"]; # regex = "tsuba:8123"; # target_label = "__metrics_path__"; # replacement = "/api/prometheus"; # } ]; } # Servarr stack - tsuba (exportarr) { job_name = "tsuba-servarr"; static_configs = [ { targets = [ "tsuba:9707" # sonarr "tsuba:9708" # radarr "tsuba:9709" # lidarr "tsuba:9710" # bazarr ]; labels = { instance = "tsuba"; machine = "tsuba"; stack = "servarr"; }; } ]; } # Deluge - tsuba { job_name = "tsuba-deluge"; static_configs = [ { targets = ["tsuba:9354"]; labels = { instance = "tsuba"; machine = "tsuba"; service = "deluge"; }; } ]; } # System metrics - ryu (remote via Tailscale) { job_name = "ryu-system"; static_configs = [ { targets = [ "ryu:9100" "ryu:9558" "ryu:9256" "ryu:9835" # nvidia-gpu ]; labels = { instance = "ryu"; machine = "ryu"; role = "desktop"; }; } ]; } # Infrastructure - ryu { job_name = "ryu-infrastructure"; static_configs = [ { targets = [ "ryu:8080" # cadvisor "ryu:2019" # caddy ]; labels = { instance = "ryu"; machine = "ryu"; }; } ]; } ]; }; # Docker cAdvisor for container metrics # virtualisation.oci-containers.containers.cadvisor = { # image = "gcr.io/cadvisor/cadvisor:v0.49.1"; # ports = ["127.0.0.1:${toString ports.cadvisor}:8080"]; # volumes = [ # "/:/rootfs:ro" # "/var/run:/var/run:ro" # "/sys:/sys:ro" # "/var/lib/docker/:/var/lib/docker:ro" # "/dev/disk/:/dev/disk:ro" # ]; # extraOptions = [ # "--privileged" # "--device=/dev/kmsg" # ]; # }; # Link dashboard files from Nix store to Grafana's expected location systemd.tmpfiles.rules = let # Define dashboard files with proper hashes nodeExporterFull = pkgs.fetchurl { url = "https://grafana.com/api/dashboards/1860/revisions/37/download"; sha256 = "0qza4j8lywrj08bqbww52dgh2p2b9rkhq5p313g72i57lrlkacfl"; }; nvidiaDashboardRaw = pkgs.fetchurl { url = "https://grafana.com/api/dashboards/14574/revisions/9/download"; sha256 = "170ijap5i99sapkxlf3k0lnvwmb6g9jkk7q66nwjwswkj2a7rqbr"; }; # Fix NVIDIA dashboard to use our Prometheus datasource nvidiaDashboard = pkgs.runCommand "nvidia-gpu-fixed.json" {} '' ${pkgs.gnused}/bin/sed 's/\''${DS_PROMETHEUS}/Prometheus/g' ${nvidiaDashboardRaw} > $out ''; postgresqlDashboardRaw = pkgs.fetchurl { url = "https://grafana.com/api/dashboards/9628/revisions/7/download"; sha256 = "0xmk68kqb9b8aspjj2f8wxv2mxiqk9k3xs0yal4szmzbv65c6k66"; }; # Fix PostgreSQL dashboard to use our Prometheus datasource postgresqlDashboard = pkgs.runCommand "postgresql-fixed.json" {} '' ${pkgs.gnused}/bin/sed 's/\''${DS_PROMETHEUS}/Prometheus/g' ${postgresqlDashboardRaw} > $out ''; redisDashboard = pkgs.fetchurl { url = "https://grafana.com/api/dashboards/11835/revisions/1/download"; sha256 = "15lbn4i8j5hiypl4dsg0d72jgrgjwpagkf5kcwx66gyps17jcrxx"; }; dockerDashboardRaw = pkgs.fetchurl { url = "https://grafana.com/api/dashboards/193/revisions/1/download"; sha256 = "1lxbbl91fh0yfh8x53205b7nw5ivghlpfb0m308z2p6fzvz2iq2m"; }; # Fix Docker dashboard to use our Prometheus datasource dockerDashboard = pkgs.runCommand "docker-cadvisor-fixed.json" {} '' ${pkgs.gnused}/bin/sed 's/\''${DS_PROMETHEUS}/Prometheus/g' ${dockerDashboardRaw} > $out ''; caddyDashboardRaw = pkgs.fetchurl { url = "https://grafana.com/api/dashboards/14280/revisions/1/download"; sha256 = "0j3q68cq1nj8gcxkqz5h1kn1ds5kgq4jlkw73xp6yc88mbm5nyh4"; }; # Fix Caddy dashboard to use our Prometheus datasource caddyDashboard = pkgs.runCommand "caddy-fixed.json" {} '' ${pkgs.gnused}/bin/sed 's/\''${DS_PROMETHEUS}/Prometheus/g' ${caddyDashboardRaw} > $out ''; piholeDashboardRaw = pkgs.fetchurl { url = "https://grafana.com/api/dashboards/10176/revisions/3/download"; sha256 = "18f8w3l5k178agipfbimg29lkf2i32xynin1g1v5abiac3ahj7ih"; }; # Fix Pi-hole dashboard to use our Prometheus datasource piholeDashboard = pkgs.runCommand "pihole-fixed.json" {} '' ${pkgs.gnused}/bin/sed 's/\''${DS_PROMETHEUS}/Prometheus/g' ${piholeDashboardRaw} > $out ''; in [ "d /var/lib/grafana/dashboards 0755 grafana grafana -" "L+ /var/lib/grafana/dashboards/node-exporter-full.json - - - - ${nodeExporterFull}" "L+ /var/lib/grafana/dashboards/nvidia-gpu.json - - - - ${nvidiaDashboard}" "L+ /var/lib/grafana/dashboards/postgresql.json - - - - ${postgresqlDashboard}" "L+ /var/lib/grafana/dashboards/redis.json - - - - ${redisDashboard}" "L+ /var/lib/grafana/dashboards/docker-cadvisor.json - - - - ${dockerDashboard}" "L+ /var/lib/grafana/dashboards/caddy.json - - - - ${caddyDashboard}" "L+ /var/lib/grafana/dashboards/pihole.json - - - - ${piholeDashboard}" ]; # Open firewall ports for Prometheus to scrape exporters networking.firewall = { # allowedTCPPorts = [ # ports.node # ports.systemd # ports.process # ]; # Allow Prometheus and Grafana access from Tailscale network interfaces."tailscale0".allowedTCPPorts = [ ports.prometheus ports.grafana ports.node ports.systemd ports.process ports.postgres ports.redis ports.cadvisor ]; }; }