summary refs log blame commit diff
path: root/nixos/modules/services/monitoring/prometheus/default.nix
blob: d7e06484b69289e5829e4e82d1b842516222ca5a (plain) (tree)
1
2
3
4
5
6
7
8
9





                                   
 
                                          
 

                                                           
                           
                              




                                                           
 

                               
                                       


                                                              


                                                                       
                
                                                    
                                                                              
                                                                           
       
                                                             


                                  

    
                     
                                        
                                                    
                                  
                                                         
 
                                   

                                             
                                                                             

                                                                                                      
      
                                                                                   
 
                                                                                          













                                                                  
                                                                           
 


                                                              
 




                                         
 


















                                                               










                                                              

































                                                                       










                                                                                                     














                                                                
            










                                                             











                                                                      



                                                     







                                                                      















                                                                            


                                                                        





                                                                                


      
                                             








                                                    
                     






                                                                  







                                             


































                                                                                     











                                                                                                                                   
 






                                      


      







                                                       



                                                              




                                                


                                                     
 









                                                         



                                                     



                                                           








                                                                 


                                                                      













                                                                                          




                                              
                        




                                                                          

           



                                                   




                                              


















                                                                           



                                                               








                                                                           


      

                                          



















                                                                      


      
    




                                                                                    
                                 
 





                                                
      
 







                                                   
 
                     
                        





                          

                              
                          



                                                                       
 







                                                                                              
 






                                                            
 








                                                                            
 







                                                                      
 






                                                               
 






                                                                    
 






                                                  
 


















                                                                                                                                                                                    
 






                                                                          
 






                                                    
 








                                                                            













                                                                    
    
 
                            














                                                                              




                                                             
      










                                                               
        

      
 
{ config, pkgs, lib, ... }:

with lib;

let
  cfg = config.services.prometheus;

  workingDir = "/var/lib/" + cfg.stateDir;

  # a wrapper that verifies that the configuration is valid
  promtoolCheck = what: name: file:
    if cfg.checkConfig then
      pkgs.runCommandNoCCLocal
        "${name}-${replaceStrings [" "] [""] what}-checked"
        { buildInputs = [ cfg.package ]; } ''
      ln -s ${file} $out
      promtool ${what} $out
    '' else file;

  # Pretty-print JSON to a file
  writePrettyJSON = name: x:
    pkgs.runCommandNoCCLocal name {} ''
      echo '${builtins.toJSON x}' | ${pkgs.jq}/bin/jq . > $out
    '';

  generatedPrometheusYml = writePrettyJSON "prometheus.yml" promConfig;

  # This becomes the main config file for Prometheus
  promConfig = {
    global = filterValidPrometheus cfg.globalConfig;
    rule_files = map (promtoolCheck "check rules" "rules") (cfg.ruleFiles ++ [
      (pkgs.writeText "prometheus.rules" (concatStringsSep "\n" cfg.rules))
    ]);
    scrape_configs = filterValidPrometheus cfg.scrapeConfigs;
    alerting = {
      inherit (cfg) alertmanagers;
    };
  };

  prometheusYml = let
    yml = if cfg.configText != null then
      pkgs.writeText "prometheus.yml" cfg.configText
      else generatedPrometheusYml;
    in promtoolCheck "check config" "prometheus.yml" yml;

  cmdlineArgs = cfg.extraFlags ++ [
    "--storage.tsdb.path=${workingDir}/data/"
    "--config.file=${prometheusYml}"
    "--web.listen-address=${cfg.listenAddress}:${builtins.toString cfg.port}"
    "--alertmanager.notification-queue-capacity=${toString cfg.alertmanagerNotificationQueueCapacity}"
    "--alertmanager.timeout=${toString cfg.alertmanagerTimeout}s"
  ] ++
  optional (cfg.webExternalUrl != null) "--web.external-url=${cfg.webExternalUrl}";

  filterValidPrometheus = filterAttrsListRecursive (n: v: !(n == "_module" || v == null));
  filterAttrsListRecursive = pred: x:
    if isAttrs x then
      listToAttrs (
        concatMap (name:
          let v = x.${name}; in
          if pred name v then [
            (nameValuePair name (filterAttrsListRecursive pred v))
          ] else []
        ) (attrNames x)
      )
    else if isList x then
      map (filterAttrsListRecursive pred) x
    else x;

  mkDefOpt = type : defaultStr : description : mkOpt type (description + ''

    Defaults to <literal>${defaultStr}</literal> in prometheus
    when set to <literal>null</literal>.
  '');

  mkOpt = type : description : mkOption {
    type = types.nullOr type;
    default = null;
    inherit description;
  };

  promTypes.globalConfig = types.submodule {
    options = {
      scrape_interval = mkDefOpt types.str "1m" ''
        How frequently to scrape targets by default.
      '';

      scrape_timeout = mkDefOpt types.str "10s" ''
        How long until a scrape request times out.
      '';

      evaluation_interval = mkDefOpt types.str "1m" ''
        How frequently to evaluate rules by default.
      '';

      external_labels = mkOpt (types.attrsOf types.str) ''
        The labels to add to any time series or alerts when
        communicating with external systems (federation, remote
        storage, Alertmanager).
      '';
    };
  };

  promTypes.scrape_config = types.submodule {
    options = {
      job_name = mkOption {
        type = types.str;
        description = ''
          The job name assigned to scraped metrics by default.
        '';
      };
      scrape_interval = mkOpt types.str ''
        How frequently to scrape targets from this job. Defaults to the
        globally configured default.
      '';

      scrape_timeout = mkOpt types.str ''
        Per-target timeout when scraping this job. Defaults to the
        globally configured default.
      '';

      metrics_path = mkDefOpt types.str "/metrics" ''
        The HTTP resource path on which to fetch metrics from targets.
      '';

      honor_labels = mkDefOpt types.bool "false" ''
        Controls how Prometheus handles conflicts between labels
        that are already present in scraped data and labels that
        Prometheus would attach server-side ("job" and "instance"
        labels, manually configured target labels, and labels
        generated by service discovery implementations).

        If honor_labels is set to "true", label conflicts are
        resolved by keeping label values from the scraped data and
        ignoring the conflicting server-side labels.

        If honor_labels is set to "false", label conflicts are
        resolved by renaming conflicting labels in the scraped data
        to "exported_&lt;original-label&gt;" (for example
        "exported_instance", "exported_job") and then attaching
        server-side labels. This is useful for use cases such as
        federation, where all labels specified in the target should
        be preserved.
      '';

      honor_timestamps = mkDefOpt types.bool "true" ''
        honor_timestamps controls whether Prometheus respects the timestamps present
        in scraped data.

        If honor_timestamps is set to <literal>true</literal>, the timestamps of the metrics exposed
        by the target will be used.

        If honor_timestamps is set to <literal>false</literal>, the timestamps of the metrics exposed
        by the target will be ignored.
      '';

      scheme = mkDefOpt (types.enum ["http" "https"]) "http" ''
        The URL scheme with which to fetch metrics from targets.
      '';

      params = mkOpt (types.attrsOf (types.listOf types.str)) ''
        Optional HTTP URL parameters.
      '';

      basic_auth = mkOpt (types.submodule {
        options = {
          username = mkOption {
            type = types.str;
            description = ''
              HTTP username
            '';
          };
          password = mkOption {
            type = types.str;
            description = ''
              HTTP password
            '';
          };
        };
      }) ''
        Optional http login credentials for metrics scraping.
      '';

      bearer_token = mkOpt types.str ''
        Sets the `Authorization` header on every scrape request with
        the configured bearer token. It is mutually exclusive with
        <option>bearer_token_file</option>.
      '';

      bearer_token_file = mkOpt types.str ''
        Sets the `Authorization` header on every scrape request with
        the bearer token read from the configured file. It is mutually
        exclusive with <option>bearer_token</option>.
      '';

      tls_config = mkOpt promTypes.tls_config ''
        Configures the scrape request's TLS settings.
      '';

      proxy_url = mkOpt types.str ''
        Optional proxy URL.
      '';

      ec2_sd_configs = mkOpt (types.listOf promTypes.ec2_sd_config) ''
        List of EC2 service discovery configurations.
      '';

      dns_sd_configs = mkOpt (types.listOf promTypes.dns_sd_config) ''
        List of DNS service discovery configurations.
      '';

      consul_sd_configs = mkOpt (types.listOf promTypes.consul_sd_config) ''
        List of Consul service discovery configurations.
      '';

      file_sd_configs = mkOpt (types.listOf promTypes.file_sd_config) ''
        List of file service discovery configurations.
      '';

      static_configs = mkOpt (types.listOf promTypes.static_config) ''
        List of labeled target groups for this job.
      '';

      relabel_configs = mkOpt (types.listOf promTypes.relabel_config) ''
        List of relabel configurations.
      '';

      sample_limit = mkDefOpt types.int "0" ''
        Per-scrape limit on number of scraped samples that will be accepted.
        If more than this number of samples are present after metric relabelling
        the entire scrape will be treated as failed. 0 means no limit.
      '';
    };
  };

  promTypes.static_config = types.submodule {
    options = {
      targets = mkOption {
        type = types.listOf types.str;
        description = ''
          The targets specified by the target group.
        '';
      };
      labels = mkOption {
        type = types.attrsOf types.str;
        default = {};
        description = ''
          Labels assigned to all metrics scraped from the targets.
        '';
      };
    };
  };

  promTypes.ec2_sd_config = types.submodule {
    options = {
      region = mkOption {
        type = types.str;
        description = ''
          The AWS Region.
        '';
      };
      endpoint = mkOpt types.str ''
        Custom endpoint to be used.
      '';

      access_key = mkOpt types.str ''
        The AWS API key id. If blank, the environment variable
        <literal>AWS_ACCESS_KEY_ID</literal> is used.
      '';

      secret_key = mkOpt types.str ''
        The AWS API key secret. If blank, the environment variable
         <literal>AWS_SECRET_ACCESS_KEY</literal> is used.
      '';

      profile = mkOpt  types.str ''
        Named AWS profile used to connect to the API.
      '';

      role_arn = mkOpt types.str ''
        AWS Role ARN, an alternative to using AWS API keys.
      '';

      refresh_interval = mkDefOpt types.str "60s" ''
        Refresh interval to re-read the instance list.
      '';

      port = mkDefOpt types.int "80" ''
        The port to scrape metrics from. If using the public IP
        address, this must instead be specified in the relabeling
        rule.
      '';

      filters = mkOpt (types.listOf promTypes.filter) ''
        Filters can be used optionally to filter the instance list by other criteria.
      '';
    };
  };

  promTypes.filter = types.submodule {
    options = {
      name = mkOption {
        type = types.str;
        description = ''
          See <link xlink:href="https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_DescribeInstances.html">this list</link>
          for the available filters.
        '';
      };

      value = mkOption {
        type = types.listOf types.str;
        default = [];
        description = ''
          Value of the filter.
        '';
      };
    };
  };

  promTypes.dns_sd_config = types.submodule {
    options = {
      names = mkOption {
        type = types.listOf types.str;
        description = ''
          A list of DNS SRV record names to be queried.
        '';
      };

      refresh_interval = mkDefOpt types.str "30s" ''
        The time after which the provided names are refreshed.
      '';
    };
  };

  promTypes.consul_sd_config = types.submodule {
    options = {
      server = mkDefOpt types.str "localhost:8500" ''
        Consul server to query.
      '';

      token = mkOpt types.str "Consul token";

      datacenter = mkOpt types.str "Consul datacenter";

      scheme = mkDefOpt types.str "http" "Consul scheme";

      username = mkOpt types.str "Consul username";

      password = mkOpt types.str "Consul password";

      tls_config = mkOpt promTypes.tls_config ''
        Configures the Consul request's TLS settings.
      '';

      services = mkOpt (types.listOf types.str) ''
        A list of services for which targets are retrieved.
      '';

      tags = mkOpt (types.listOf types.str) ''
        An optional list of tags used to filter nodes for a given
        service. Services must contain all tags in the list.
      '';

      node_meta = mkOpt (types.attrsOf types.str) ''
        Node metadata used to filter nodes for a given service.
      '';

      tag_separator = mkDefOpt types.str "," ''
        The string by which Consul tags are joined into the tag label.
      '';

      allow_stale = mkOpt types.bool ''
        Allow stale Consul results
        (see <link xlink:href="https://www.consul.io/api/index.html#consistency-modes"/>).

        Will reduce load on Consul.
      '';

      refresh_interval = mkDefOpt types.str "30s" ''
        The time after which the provided names are refreshed.

        On large setup it might be a good idea to increase this value
        because the catalog will change all the time.
      '';
    };
  };

  promTypes.file_sd_config = types.submodule {
    options = {
      files = mkOption {
        type = types.listOf types.str;
        description = ''
          Patterns for files from which target groups are extracted. Refer
          to the Prometheus documentation for permitted filename patterns
          and formats.
        '';
      };

      refresh_interval = mkDefOpt types.str "5m" ''
        Refresh interval to re-read the files.
      '';
    };
  };

  promTypes.relabel_config = types.submodule {
    options = {
      source_labels = mkOpt (types.listOf types.str) ''
        The source labels select values from existing labels. Their content
        is concatenated using the configured separator and matched against
        the configured regular expression.
      '';

      separator = mkDefOpt types.str ";" ''
        Separator placed between concatenated source label values.
      '';

      target_label = mkOpt types.str ''
        Label to which the resulting value is written in a replace action.
        It is mandatory for replace actions.
      '';

      regex = mkDefOpt types.str "(.*)" ''
        Regular expression against which the extracted value is matched.
      '';

      modulus = mkOpt types.int ''
        Modulus to take of the hash of the source label values.
      '';

      replacement = mkDefOpt types.str "$1" ''
        Replacement value against which a regex replace is performed if the
        regular expression matches.
      '';

      action = mkDefOpt (types.enum ["replace" "keep" "drop"]) "replace" ''
        Action to perform based on regex matching.
      '';

    };
  };

  promTypes.tls_config = types.submodule {
    options = {
      ca_file = mkOpt types.str ''
        CA certificate to validate API server certificate with.
      '';

      cert_file = mkOpt types.str ''
        Certificate file for client cert authentication to the server.
      '';

      key_file = mkOpt types.str ''
        Key file for client cert authentication to the server.
      '';

      server_name = mkOpt types.str ''
        ServerName extension to indicate the name of the server.
        http://tools.ietf.org/html/rfc4366#section-3.1
      '';

      insecure_skip_verify = mkOpt types.bool ''
        Disable validation of the server certificate.
      '';
    };
  };

in {

  imports = [
    (mkRenamedOptionModule [ "services" "prometheus2" ] [ "services" "prometheus" ])
  ];

  options.services.prometheus = {

    enable = mkOption {
      type = types.bool;
      default = false;
      description = ''
        Enable the Prometheus monitoring daemon.
      '';
    };

    package = mkOption {
      type = types.package;
      default = pkgs.prometheus;
      defaultText = "pkgs.prometheus";
      description = ''
        The prometheus package that should be used.
      '';
    };

    port = mkOption {
      type = types.port;
      default = 9090;
      description = ''
        Port to listen on.
      '';
    };

    listenAddress = mkOption {
      type = types.str;
      default = "0.0.0.0";
      description = ''
        Address to listen on for the web interface, API, and telemetry.
      '';
    };

    stateDir = mkOption {
      type = types.str;
      default = "prometheus2";
      description = ''
        Directory below <literal>/var/lib</literal> to store Prometheus metrics data.
        This directory will be created automatically using systemd's StateDirectory mechanism.
      '';
    };

    extraFlags = mkOption {
      type = types.listOf types.str;
      default = [];
      description = ''
        Extra commandline options when launching Prometheus.
      '';
    };

    configText = mkOption {
      type = types.nullOr types.lines;
      default = null;
      description = ''
        If non-null, this option defines the text that is written to
        prometheus.yml. If null, the contents of prometheus.yml is generated
        from the structured config options.
      '';
    };

    globalConfig = mkOption {
      type = promTypes.globalConfig;
      default = {};
      description = ''
        Parameters that are valid in all  configuration contexts. They
        also serve as defaults for other configuration sections
      '';
    };

    rules = mkOption {
      type = types.listOf types.str;
      default = [];
      description = ''
        Alerting and/or Recording rules to evaluate at runtime.
      '';
    };

    ruleFiles = mkOption {
      type = types.listOf types.path;
      default = [];
      description = ''
        Any additional rules files to include in this configuration.
      '';
    };

    scrapeConfigs = mkOption {
      type = types.listOf promTypes.scrape_config;
      default = [];
      description = ''
        A list of scrape configurations.
      '';
    };

    alertmanagers = mkOption {
      type = types.listOf types.attrs;
      example = literalExample ''
        [ {
          scheme = "https";
          path_prefix = "/alertmanager";
          static_configs = [ {
            targets = [
              "prometheus.domain.tld"
            ];
          } ];
        } ]
      '';
      default = [];
      description = ''
        A list of alertmanagers to send alerts to.
        See <link xlink:href="https://prometheus.io/docs/prometheus/latest/configuration/configuration/#alertmanager_config">the official documentation</link> for more information.
      '';
    };

    alertmanagerNotificationQueueCapacity = mkOption {
      type = types.int;
      default = 10000;
      description = ''
        The capacity of the queue for pending alert manager notifications.
      '';
    };

    alertmanagerTimeout = mkOption {
      type = types.int;
      default = 10;
      description = ''
        Alert manager HTTP API timeout (in seconds).
      '';
    };

    webExternalUrl = mkOption {
      type = types.nullOr types.str;
      default = null;
      example = "https://example.com/";
      description = ''
        The URL under which Prometheus is externally reachable (for example,
        if Prometheus is served via a reverse proxy).
      '';
    };

    checkConfig = mkOption {
      type = types.bool;
      default = true;
      description = ''
        Check configuration with <literal>promtool
        check</literal>. The call to <literal>promtool</literal> is
        subject to sandboxing by Nix. When credentials are stored in
        external files (<literal>password_file</literal>,
        <literal>bearer_token_file</literal>, etc), they will not be
        visible to <literal>promtool</literal> and it will report
        errors, despite a correct configuration.
      '';
    };
  };

  config = mkIf cfg.enable {
    assertions = [
      ( let
          legacy = builtins.match "(.*):(.*)" cfg.listenAddress;
        in {
          assertion = legacy == null;
          message = ''
            Do not specify the port for Prometheus to listen on in the
            listenAddress option; use the port option instead:
              services.prometheus.listenAddress = ${builtins.elemAt legacy 0};
              services.prometheus.port = ${builtins.elemAt legacy 1};
          '';
        }
      )
    ];

    users.groups.prometheus.gid = config.ids.gids.prometheus;
    users.users.prometheus = {
      description = "Prometheus daemon user";
      uid = config.ids.uids.prometheus;
      group = "prometheus";
    };
    systemd.services.prometheus = {
      wantedBy = [ "multi-user.target" ];
      after    = [ "network.target" ];
      serviceConfig = {
        ExecStart = "${cfg.package}/bin/prometheus" +
          optionalString (length cmdlineArgs != 0) (" \\\n  " +
            concatStringsSep " \\\n  " cmdlineArgs);
        User = "prometheus";
        Restart  = "always";
        WorkingDirectory = workingDir;
        StateDirectory = cfg.stateDir;
      };
    };
  };
}