diff options
6 files changed, 177 insertions, 0 deletions
diff --git a/nixos/modules/services/monitoring/prometheus/exporters.nix b/nixos/modules/services/monitoring/prometheus/exporters.nix index 62e90232e11..d29d50706ef 100644 --- a/nixos/modules/services/monitoring/prometheus/exporters.nix +++ b/nixos/modules/services/monitoring/prometheus/exporters.nix @@ -61,6 +61,7 @@ let "rtl_433" "script" "snmp" + "smartctl" "smokeping" "sql" "surfboard" diff --git a/nixos/modules/services/monitoring/prometheus/exporters/smartctl.nix b/nixos/modules/services/monitoring/prometheus/exporters/smartctl.nix new file mode 100644 index 00000000000..b6416b93e69 --- /dev/null +++ b/nixos/modules/services/monitoring/prometheus/exporters/smartctl.nix @@ -0,0 +1,64 @@ +{ config, lib, pkgs, options }: + +with lib; + +let + cfg = config.services.prometheus.exporters.smartctl; + format = pkgs.formats.yaml {}; + configFile = format.generate "smartctl-exporter.yml" { + smartctl_exporter = { + bind_to = "${cfg.listenAddress}:${toString cfg.port}"; + url_path = "/metrics"; + smartctl_location = "${pkgs.smartmontools}/bin/smartctl"; + collect_not_more_than_period = cfg.maxInterval; + devices = cfg.devices; + }; + }; +in { + port = 9633; + + extraOpts = { + devices = mkOption { + type = types.listOf types.str; + default = []; + example = literalExpression '' + [ "/dev/sda", "/dev/nvme0n1" ]; + ''; + description = '' + Paths to disks that will be monitored. + ''; + }; + maxInterval = mkOption { + type = types.str; + default = "60s"; + example = "2m"; + description = '' + Interval that limits how often a disk can be queried. + ''; + }; + }; + + serviceOpts = { + serviceConfig = { + AmbientCapabilities = [ + "CAP_SYS_ADMIN" + ]; + CapabilityBoundingSet = [ + "CAP_SYS_ADMIN" + ]; + DevicePolicy = "closed"; + DeviceAllow = lib.mkForce cfg.devices; + ExecStart = '' + ${pkgs.prometheus-smartctl-exporter}/bin/smartctl_exporter -config ${configFile} + ''; + PrivateDevices = lib.mkForce false; + ProtectProc = "invisible"; + ProcSubset = "pid"; + SupplementaryGroups = [ "disk" ]; + SystemCallFilter = [ + "@system-service" + "~@privileged @resources" + ]; + }; + }; +} diff --git a/nixos/tests/prometheus-exporters.nix b/nixos/tests/prometheus-exporters.nix index 80052a40c3f..62deb386495 100644 --- a/nixos/tests/prometheus-exporters.nix +++ b/nixos/tests/prometheus-exporters.nix @@ -1018,6 +1018,25 @@ let ''; }; + smartctl = { + exporterConfig = { + enable = true; + devices = [ + "/dev/vda" + ]; + }; + exporterTest = '' + wait_for_unit("prometheus-smartctl-exporter.service") + wait_for_open_port("9633") + wait_until_succeeds( + "curl -sSf 'localhost:9633/metrics'" + ) + wait_until_succeeds( + 'journalctl -eu prometheus-smartctl-exporter.service -o cat | grep "/dev/vda: Unable to detect device type"' + ) + ''; + }; + smokeping = { exporterConfig = { enable = true; diff --git a/pkgs/servers/monitoring/prometheus/smartctl-exporter/0001-Return-the-cached-value-if-it-s-not-time-to-scan-aga.patch b/pkgs/servers/monitoring/prometheus/smartctl-exporter/0001-Return-the-cached-value-if-it-s-not-time-to-scan-aga.patch new file mode 100644 index 00000000000..28616251f37 --- /dev/null +++ b/pkgs/servers/monitoring/prometheus/smartctl-exporter/0001-Return-the-cached-value-if-it-s-not-time-to-scan-aga.patch @@ -0,0 +1,51 @@ +From e81b06df67b1d42ef915615fafa0b56ef956673b Mon Sep 17 00:00:00 2001 +From: Andreas Fuchs <asf@boinkor.net> +Date: Thu, 11 Feb 2021 17:30:44 -0500 +Subject: [PATCH] Return the cached value if it's not time to scan again yet + +This should ensure that if we have a valid value cached (which ought +to be every time after the first scan), we return it as metrics. + +This fixes the crashes that would happen if queries happened earlier +than the re-scan interval allowed. + +Address review feedback: Shorten the time-to-scan logic + +We can express this in a single if statement, so it takes fewer lines +to do the "should we check again" check. +--- + readjson.go | 11 ++--------- + 1 file changed, 2 insertions(+), 9 deletions(-) + +diff --git a/readjson.go b/readjson.go +index da35448..c9996fd 100644 +--- a/readjson.go ++++ b/readjson.go +@@ -78,14 +78,7 @@ func readData(device string) (gjson.Result, error) { + + if _, err := os.Stat(device); err == nil { + cacheValue, cacheOk := jsonCache[device] +- timeToScan := false +- if cacheOk { +- timeToScan = time.Now().After(cacheValue.LastCollect.Add(options.SMARTctl.CollectPeriodDuration)) +- } else { +- timeToScan = true +- } +- +- if timeToScan { ++ if !cacheOk || time.Now().After(cacheValue.LastCollect.Add(options.SMARTctl.CollectPeriodDuration)) { + json, ok := readSMARTctl(device) + if ok { + jsonCache[device] = JSONCache{JSON: json, LastCollect: time.Now()} +@@ -93,7 +86,7 @@ func readData(device string) (gjson.Result, error) { + } + return gjson.Parse(DEFAULT_EMPTY_JSON), fmt.Errorf("smartctl returned bad data for device %s", device) + } +- return gjson.Parse(DEFAULT_EMPTY_JSON), fmt.Errorf("Too early collect called for device %s", device) ++ return cacheValue.JSON, nil + } + return gjson.Parse(DEFAULT_EMPTY_JSON), fmt.Errorf("Device %s unavialable", device) + } +-- +2.33.1 + diff --git a/pkgs/servers/monitoring/prometheus/smartctl-exporter/default.nix b/pkgs/servers/monitoring/prometheus/smartctl-exporter/default.nix new file mode 100644 index 00000000000..45315a0f3d7 --- /dev/null +++ b/pkgs/servers/monitoring/prometheus/smartctl-exporter/default.nix @@ -0,0 +1,41 @@ +{ lib +, fetchFromGitHub +, fetchpatch +, buildGoModule +}: + +buildGoModule rec { + pname = "smartctl_exporter"; + version = "unstable-2020-11-14"; + + src = fetchFromGitHub { + owner = "prometheus-community"; + repo = pname; + rev = "e27581d56ad80340fb076d3ce22cef337ed76679"; + sha256 = "sha256-iWaFDjVLBIAA9zGe0utbuvmEdA3R5lge0iCh3j2JfE8="; + }; + + patches = [ + # Fixes out of range panic (https://github.com/prometheus-community/smartctl_exporter/issues/19) + (fetchpatch { + url = "https://github.com/prometheus-community/smartctl_exporter/commit/15575301a8e2fe5802a8c066c6fa9765d50b8cfa.patch"; + sha256 = "sha256-HLUrGXNz3uKpuQBUgQBSw6EGbGl23hQnimTGl64M5bQ="; + }) + # Fix validation on empty smartctl response (https://github.com/prometheus-community/smartctl_exporter/pull/31) + (fetchpatch { + url = "https://github.com/prometheus-community/smartctl_exporter/commit/744b4e5f6a46e029d31d5aa46642e85f429c2cfa.patch"; + sha256 = "sha256-MgLtYR1SpM6XrZQQ3AgQRmNF3OnaBCqXMJRV9BOzKPc="; + }) + # Fixes missing metrics if outside of query interval (https://github.com/prometheus-community/smartctl_exporter/pull/18) + ./0001-Return-the-cached-value-if-it-s-not-time-to-scan-aga.patch + ]; + + vendorSha256 = "1xhrzkfm2p20k7prgdfax4408g4qpa4wbxigmcmfz7kjg2zi88ld"; + + meta = with lib; { + description = "Export smartctl statistics for Prometheus"; + homepage = "https://github.com/prometheus-community/smartctl_exporter"; + license = licenses.lgpl3; + maintainers = with maintainers; [ hexa ]; + }; +} diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index 28d3ca67806..8b54479e1cf 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -21405,6 +21405,7 @@ with pkgs; prometheus-rabbitmq-exporter = callPackage ../servers/monitoring/prometheus/rabbitmq-exporter.nix { }; prometheus-rtl_433-exporter = callPackage ../servers/monitoring/prometheus/rtl_433-exporter.nix { }; prometheus-script-exporter = callPackage ../servers/monitoring/prometheus/script-exporter.nix { }; + prometheus-smartctl-exporter = callPackage ../servers/monitoring/prometheus/smartctl-exporter { }; prometheus-smokeping-prober = callPackage ../servers/monitoring/prometheus/smokeping-prober.nix { }; prometheus-snmp-exporter = callPackage ../servers/monitoring/prometheus/snmp-exporter.nix { }; prometheus-statsd-exporter = callPackage ../servers/monitoring/prometheus/statsd-exporter.nix { }; |