summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--nixos/modules/module-list.nix1
-rw-r--r--nixos/modules/services/databases/foundationdb.nix360
-rw-r--r--nixos/modules/services/databases/foundationdb.xml279
3 files changed, 640 insertions, 0 deletions
diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix
index 5a56554dc98..1261fe95092 100644
--- a/nixos/modules/module-list.nix
+++ b/nixos/modules/module-list.nix
@@ -195,6 +195,7 @@
   ./services/databases/clickhouse.nix
   ./services/databases/couchdb.nix
   ./services/databases/firebird.nix
+  ./services/databases/foundationdb.nix
   ./services/databases/hbase.nix
   ./services/databases/influxdb.nix
   ./services/databases/memcached.nix
diff --git a/nixos/modules/services/databases/foundationdb.nix b/nixos/modules/services/databases/foundationdb.nix
new file mode 100644
index 00000000000..ba921a9c152
--- /dev/null
+++ b/nixos/modules/services/databases/foundationdb.nix
@@ -0,0 +1,360 @@
+{ config, lib, pkgs, ... }:
+
+with lib;
+
+let
+  cfg = config.services.foundationdb;
+
+  # used for initial cluster configuration
+  initialIpAddr = if (cfg.publicAddress != "auto") then cfg.publicAddress else "127.0.0.1";
+
+  fdbServers = n:
+    concatStringsSep "\n" (map (x: "[fdbserver.${toString (x+cfg.listenPortStart)}]") (range 0 (n - 1)));
+
+  backupAgents = n:
+    concatStringsSep "\n" (map (x: "[backup_agent.${toString x}]") (range 1 n));
+
+  configFile = pkgs.writeText "foundationdb.conf" ''
+    [general]
+    cluster_file  = /etc/foundationdb/fdb.cluster
+
+    [fdbmonitor]
+    restart_delay = ${toString cfg.restartDelay}
+    user          = ${cfg.user}
+    group         = ${cfg.group}
+
+    [fdbserver]
+    command        = ${pkgs.foundationdb}/bin/fdbserver
+    public_address = ${cfg.publicAddress}:$ID
+    listen_address = ${cfg.listenAddress}
+    datadir        = ${cfg.dataDir}/$ID
+    logdir         = ${cfg.logDir}
+    logsize        = ${cfg.logSize}
+    maxlogssize    = ${cfg.maxLogSize}
+    ${optionalString (cfg.class != null) "class = ${cfg.class}"}
+    memory         = ${cfg.memory}
+    storage_memory = ${cfg.storageMemory}
+
+    ${optionalString (cfg.locality.machineId    != null) "locality_machineid=${cfg.locality.machineId}"}
+    ${optionalString (cfg.locality.zoneId       != null) "locality_zoneid=${cfg.locality.zoneId}"}
+    ${optionalString (cfg.locality.datacenterId != null) "locality_dcid=${cfg.locality.datacenterId}"}
+    ${optionalString (cfg.locality.dataHall     != null) "locality_data_hall=${cfg.locality.dataHall}"}
+
+    ${fdbServers cfg.serverProcesses}
+
+    [backup_agent]
+    command = ${pkgs.foundationdb}/libexec/backup_agent
+    ${backupAgents cfg.backupProcesses}
+  '';
+in
+{
+  options.services.foundationdb = {
+
+    enable = mkEnableOption "FoundationDB Server";
+
+    publicAddress = mkOption {
+      type        = types.str;
+      default     = "auto";
+      description = "Publicly visible IP address of the process. Port is determined by process ID";
+    };
+
+    listenAddress = mkOption {
+      type        = types.str;
+      default     = "public";
+      description = "Publicly visible IP address of the process. Port is determined by process ID";
+    };
+
+    listenPortStart = mkOption {
+      type          = types.int;
+      default       = 4500;
+      description   = ''
+        Starting port number for database listening sockets. Every FDB process binds to a
+        subsequent port, to this number reflects the start of the overall range. e.g. having
+        8 server processes will use all ports between 4500 and 4507.
+      '';
+    };
+
+    openFirewall = mkOption {
+      type        = types.bool;
+      default     = false;
+      description = ''
+        Open the firewall ports corresponding to FoundationDB processes and coordinators
+        using <option>config.networking.firewall.*</option>.
+      '';
+    };
+
+    dataDir = mkOption {
+      type        = types.path;
+      default     = "/var/lib/foundationdb";
+      description = "Data directory. All cluster data will be put under here.";
+    };
+
+    logDir = mkOption {
+      type        = types.path;
+      default     = "/var/log/foundationdb";
+      description = "Log directory.";
+    };
+
+    user = mkOption {
+      type        = types.str;
+      default     = "foundationdb";
+      description = "User account under which FoundationDB runs.";
+    };
+
+    group = mkOption {
+      type        = types.str;
+      default     = "foundationdb";
+      description = "Group account under which FoundationDB runs.";
+    };
+
+    class = mkOption {
+      type        = types.nullOr (types.enum [ "storage" "transaction" "stateless" ]);
+      default     = null;
+      description = "Process class";
+    };
+
+    restartDelay = mkOption {
+      type = types.int;
+      default = 10;
+      description = "Number of seconds to wait before restarting servers.";
+    };
+
+    logSize = mkOption {
+      type        = types.string;
+      default     = "10MiB";
+      description = ''
+        Roll over to a new log file after the current log file
+        reaches the specified size.
+      '';
+    };
+
+    maxLogSize = mkOption {
+      type        = types.string;
+      default     = "100MiB";
+      description = ''
+        Delete the oldest log file when the total size of all log
+        files exceeds the specified size. If set to 0, old log files
+        will not be deleted.
+      '';
+    };
+
+    serverProcesses = mkOption {
+      type = types.int;
+      default = 1;
+      description = "Number of fdbserver processes to run.";
+    };
+
+    backupProcesses = mkOption {
+      type = types.int;
+      default = 1;
+      description = "Number of backup_agent processes to run for snapshots.";
+    };
+
+    memory = mkOption {
+      type        = types.string;
+      default     = "8GiB";
+      description = ''
+        Maximum memory used by the process. The default value is
+        <literal>8GiB</literal>. When specified without a unit,
+        <literal>MiB</literal> is assumed. This parameter does not
+        change the memory allocation of the program. Rather, it sets
+        a hard limit beyond which the process will kill itself and
+        be restarted. The default value of <literal>8GiB</literal>
+        is double the intended memory usage in the default
+        configuration (providing an emergency buffer to deal with
+        memory leaks or similar problems). It is not recommended to
+        decrease the value of this parameter below its default
+        value. It may be increased if you wish to allocate a very
+        large amount of storage engine memory or cache. In
+        particular, when the <literal>storageMemory</literal>
+        parameter is increased, the <literal>memory</literal>
+        parameter should be increased by an equal amount.
+      '';
+    };
+
+    storageMemory = mkOption {
+      type        = types.string;
+      default     = "1GiB";
+      description = ''
+        Maximum memory used for data storage. The default value is
+        <literal>1GiB</literal>. When specified without a unit,
+        <literal>MB</literal> is assumed. Clusters using the memory
+        storage engine will be restricted to using this amount of
+        memory per process for purposes of data storage. Memory
+        overhead associated with storing the data is counted against
+        this total. If you increase the
+        <literal>storageMemory</literal>, you should also increase
+        the <literal>memory</literal> parameter by the same amount.
+      '';
+    };
+
+    locality = mkOption {
+      default = {
+        machineId    = null;
+        zoneId       = null;
+        datacenterId = null;
+        dataHall     = null;
+      };
+
+      description = ''
+        FoundationDB locality settings.
+      '';
+
+      type = types.submodule ({
+        options = {
+          machineId = mkOption {
+            default = null;
+            type = types.nullOr types.str;
+            description = ''
+	      Machine identifier key. All processes on a machine should share a
+              unique id. By default, processes on a machine determine a unique id to share.
+              This does not generally need to be set.
+            '';
+          };
+
+          zoneId = mkOption {
+            default = null;
+            type = types.nullOr types.str;
+            description = ''
+	      Zone identifier key. Processes that share a zone id are
+              considered non-unique for the purposes of data replication.
+              If unset, defaults to machine id.
+            '';
+          };
+
+          datacenterId = mkOption {
+            default = null;
+            type = types.nullOr types.str;
+            description = ''
+	      Data center identifier key. All processes physically located in a
+              data center should share the id. If you are depending on data
+              center based replication this must be set on all processes.
+            '';
+          };
+
+          dataHall = mkOption {
+            default = null;
+            type = types.nullOr types.str;
+            description = ''
+	      Data hall identifier key. All processes physically located in a
+              data hall should share the id. If you are depending on data
+              hall based replication this must be set on all processes.
+            '';
+          };
+        };
+      });
+    };
+
+    extraReadWritePaths = mkOption {
+      default = [ ];
+      type = types.listOf types.path;
+      description = ''
+        An extra set of filesystem paths that FoundationDB can read to
+        and write from. By default, FoundationDB runs under a heavily
+        namespaced systemd environment without write access to most of
+        the filesystem outside of its data and log directories. By
+        adding paths to this list, the set of writeable paths will be
+        expanded. This is useful for allowing e.g. backups to local files,
+        which must be performed on behalf of the foundationdb service.
+      '';
+    };
+
+    pidfile = mkOption {
+      type        = types.path;
+      default     = "/run/foundationdb.pid";
+      description = "Path to pidfile for fdbmonitor.";
+    };
+  };
+
+  config = mkIf cfg.enable {
+    meta.doc         = ./foundationdb.xml;
+    meta.maintainers = with lib.maintainers; [ thoughtpolice ];
+
+    environment.systemPackages = [ pkgs.foundationdb ];
+
+    users.extraUsers = optionalAttrs (cfg.user == "foundationdb") (singleton
+      { name        = "foundationdb";
+        description = "FoundationDB User";
+        uid         = config.ids.uids.foundationdb;
+        group       = cfg.group;
+      });
+
+    users.extraGroups = optionalAttrs (cfg.group == "foundationdb") (singleton
+      { name = "foundationdb";
+        gid  = config.ids.gids.foundationdb;
+      });
+
+    networking.firewall.allowedTCPPortRanges = mkIf cfg.openFirewall
+      [ { from = cfg.listenPortStart;
+          to = (cfg.listenPortStart + cfg.serverProcesses) - 1;
+        }
+      ];
+
+    systemd.services.foundationdb = {
+      description             = "FoundationDB Service";
+
+      after                   = [ "network.target" ];
+      wantedBy                = [ "multi-user.target" ];
+      unitConfig =
+        { RequiresMountsFor = "${cfg.dataDir} ${cfg.logDir}";
+        };
+
+      serviceConfig =
+        let rwpaths = [ cfg.dataDir cfg.logDir cfg.pidfile "/etc/foundationdb" ]
+                   ++ cfg.extraReadWritePaths;
+        in
+        { Type       = "simple";
+          Restart    = "always";
+          RestartSec = 5;
+          User       = cfg.user;
+          Group      = cfg.group;
+          PIDFile    = "${cfg.pidfile}";
+
+          PermissionsStartOnly = true;  # setup needs root perms
+          TimeoutSec           = 120;   # give reasonable time to shut down
+
+          # Security options
+          NoNewPrivileges       = true;
+          ProtectHome           = true;
+          ProtectSystem         = "strict";
+          ProtectKernelTunables = true;
+          ProtectControlGroups  = true;
+          PrivateTmp            = true;
+          PrivateDevices        = true;
+          ReadWritePaths        = lib.concatStringsSep " " (map (x: "-" + x) rwpaths);
+        };
+
+      path = [ pkgs.foundationdb pkgs.coreutils ];
+
+      preStart = ''
+        rm -f ${cfg.pidfile}   && \
+          touch ${cfg.pidfile} && \
+          chown -R ${cfg.user}:${cfg.group} ${cfg.pidfile}
+
+        for x in "${cfg.logDir}" "${cfg.dataDir}" /etc/foundationdb; do
+          [ ! -d "$x" ] && mkdir -m 0700 -vp "$x" && chown -R ${cfg.user}:${cfg.group} "$x";
+        done
+
+        if [ ! -f /etc/foundationdb/fdb.cluster ]; then
+            cf=/etc/foundationdb/fdb.cluster
+            desc=$(tr -dc A-Za-z0-9 </dev/urandom 2>/dev/null | head -c8)
+            rand=$(tr -dc A-Za-z0-9 </dev/urandom 2>/dev/null | head -c8)
+            echo ''${desc}:''${rand}@${initialIpAddr}:${builtins.toString cfg.listenPortStart} > $cf
+            chmod 0660 $cf && chown -R ${cfg.user}:${cfg.group} $cf
+            touch "${cfg.dataDir}/.first_startup"
+        fi
+      '';
+
+      script = ''
+        exec fdbmonitor --lockfile ${cfg.pidfile} --conffile ${configFile};
+      '';
+
+      postStart = ''
+        if [ -e "${cfg.dataDir}/.first_startup" ]; then
+          fdbcli --exec "configure new single ssd"
+          rm -f "${cfg.dataDir}/.first_startup";
+        fi
+      '';
+    };
+  };
+}
diff --git a/nixos/modules/services/databases/foundationdb.xml b/nixos/modules/services/databases/foundationdb.xml
new file mode 100644
index 00000000000..d10a5cfe836
--- /dev/null
+++ b/nixos/modules/services/databases/foundationdb.xml
@@ -0,0 +1,279 @@
+<chapter xmlns="http://docbook.org/ns/docbook"
+         xmlns:xlink="http://www.w3.org/1999/xlink"
+         xmlns:xi="http://www.w3.org/2001/XInclude"
+         version="5.0"
+         xml:id="module-foundationdb">
+
+<title>FoundationDB</title>
+
+<para><emphasis>Source:</emphasis> <filename>modules/services/databases/foundationdb.nix</filename></para>
+
+<para><emphasis>Upstream documentation:</emphasis> <link xlink:href="https://apple.github.io/foundationdb/"/></para>
+
+<para><emphasis>Maintainer:</emphasis> Austin Seipp</para>
+
+<para><emphasis>Default version:</emphasis> 5.1.x</para>
+
+<para>FoundationDB (or "FDB") is a distributed, open source, high performance,
+transactional key-value store. It can store petabytes of data and deliver
+exceptional performance while maintaining consistency and ACID semantics over a
+large cluster.</para>
+
+<section><title>Configuring and basic setup</title>
+
+<para>To enable FoundationDB, add the following to your
+<filename>configuration.nix</filename>:
+
+<programlisting>
+services.foundationdb.enable = true;
+</programlisting>
+</para>
+
+<para>After running <command>nixos-rebuild</command>, you can verify whether
+FoundationDB is running by executing <command>fdbcli</command> (which is added
+to <option>environment.systemPackages</option>):
+
+<programlisting>
+$ sudo -u foundationdb fdbcli
+Using cluster file `/etc/foundationdb/fdb.cluster'.
+
+The database is available.
+
+Welcome to the fdbcli. For help, type `help'.
+fdb> status
+
+Using cluster file `/etc/foundationdb/fdb.cluster'.
+
+Configuration:
+  Redundancy mode        - single
+  Storage engine         - memory
+  Coordinators           - 1
+
+Cluster:
+  FoundationDB processes - 1
+  Machines               - 1
+  Memory availability    - 5.4 GB per process on machine with least available
+  Fault Tolerance        - 0 machines
+  Server time            - 04/20/18 15:21:14
+
+...
+
+fdb>
+</programlisting>
+</para>
+
+<para>FoundationDB is run under the <command>foundationdb</command> user and
+group by default, but this may be changed in the NixOS configuration. The
+systemd unit <command>foundationdb.service</command> controls the
+<command>fdbmonitor</command> process.</para>
+
+<para>By default, the NixOS module for FoundationDB creates a single
+SSD-storage based database for development and basic usage. This storage engine
+is designed for SSDs and will perform poorly on HDDs; however it can handle far
+more data than the alternative "memory" engine and is a better default choice
+for most deployments. (Note that you can change the storage backend on-the-fly
+for a given FoundationDB cluster using <command>fdbcli</command>.)</para>
+
+<para>Furthermore, only 1 server process and 1 backup agent are started in the
+default configuration. See below for more on scaling to increase this.</para>
+
+<para>FoundationDB stores all data for all server processes under
+<filename>/var/lib/foundationdb</filename>. You can override this using
+<option>services.foundationdb.dataDir</option>, e.g.
+
+<programlisting>
+services.foundationdb.dataDir = "/data/fdb";
+</programlisting>
+
+</para>
+
+<para>Similarly, logs are stored under
+<filename>/var/log/foundationdb</filename> by default, and there is a
+corresponding <option>services.foundationdb.logDir</option> as well.</para>
+
+</section>
+
+<section><title>Scaling processes and backup agents</title>
+
+<para>Scaling the number of server processes is quite easy; simply specify
+<option>services.foundationdb.serverProcesses</option> to be the number of
+FoundationDB worker processes that should be started on the machine.</para>
+
+<para>FoundationDB worker processes typically require 4GB of RAM per-process at
+minimum for good performance, so this option is set to 1 by default since the
+maximum aount of RAM is unknown. You're advised to abide by this restriction,
+so pick a number of processes so that each has 4GB or more.</para>
+
+<para>A similar option exists in order to scale backup agent processes,
+<option>services.foundationdb.backupProcesses</option>. Backup agents are not
+as performance/RAM sensitive, so feel free to experiment with the number of
+available backup processes.</para>
+
+</section>
+
+<section><title>Clustering</title>
+
+<para>FoundationDB on NixOS works similarly to other Linux systems, so this
+section will be brief. Please refer to the full FoundationDB documentation for
+more on clustering.</para>
+
+<para>FoundationDB organizes clusters using a set of
+<emphasis>coordinators</emphasis>, which are just specially-designated worker
+processes. By default, every installation of FoundationDB on NixOS will start
+as its own individual cluster, with a single coordinator: the first worker
+process on <command>localhost</command>.</para>
+
+<para>Coordinators are specified globally using the
+<command>/etc/foundationdb/fdb.cluster</command> file, which all servers and
+client applications will use to find and join coordinators. Note that this file
+<emphasis>can not</emphasis> be managed by NixOS so easily: FoundationDB is
+designed so that it will rewrite the file at runtime for all clients and nodes
+when cluster coordinators change, with clients transparently handling this
+without intervention.</para>
+
+<para>When dealing with a cluster, there are two main things you want to
+do:</para>
+
+<itemizedlist>
+  <listitem><para>Add a node to the cluster for storage/compute.</para></listitem>
+  <listitem><para>Promote an ordinary worker to a coordinator.</para></listitem>
+</itemizedlist>
+
+<para>A node must already be a member of the cluster in order to properly be
+promoted to a coordinator, so you must always add it first if you wish to
+promote it.</para>
+
+<para>To add a machine to a FoundationDB cluster:</para>
+
+<itemizedlist>
+  <listitem><para>Choose one of the servers to start as the initial coordinator.
+      </para></listitem>
+  <listitem><para>Copy the <command>/etc/foundationdb/fdb.cluster</command> file
+      from this server to all the other servers. Restart FoundationDB on all of
+      these other servers, so they join the cluster.</para></listitem>
+  <listitem><para>All of these servers are now connected and working together
+      in the cluster, under the chosen coordinator.</para></listitem>
+</itemizedlist>
+
+<para>At this point, you can add as many nodes as you want by just repeating
+the above steps. By default there will still be a single coordinator: you can
+use <command>fdbcli</command> to change this and add new coordinators.</para>
+
+<para>As a convenience, FoundationDB can automatically assign coordinators
+based on the redundancy mode you wish to achieve for the cluster. Once all the
+nodes have been joined, simply set the replication policy, and then issue the
+<command>coordinators auto</command> command</para>
+
+<para>For example, assuming we have 3 nodes available, we can enable double
+redundancy mode, then auto-select coordinators. For double redundancy, 3
+coordinators is ideal: therefore FoundationDB will make
+<emphasis>every</emphasis> node a coordinator automatically:</para>
+
+<programlisting>
+fdbcli> configure double ssd
+fdbcli> coordinators auto
+</programlisting>
+
+<para>This will transparently update all the servers within seconds, and
+appropriately rewrite the <command>fdb.cluster</command> file, as well as
+informing all client processes to do the same.</para>
+
+</section>
+
+<section><title>Client connectivity</title>
+
+<para>By default, all clients must use the current
+<command>fdb.cluster</command> file to access a given FoundationDB cluster.
+This file is located by default in
+<command>/etc/foundationdb/fdb.cluster</command> on all machines with the
+FoundationDB service enabled, so you may copy the active one from your cluster
+to a new node in order to connect, if it is not part of the cluster.</para>
+
+</section>
+
+<section><title>Backups and Disaster Recovery</title>
+
+<para>The usual rules for doing FoundationDB backups apply on NixOS as written
+in the FoundationDB manual. However, one important difference is the security
+profile for NixOS: by default, the <command>foundationdb</command> systemd unit
+uses <emphasis>Linux namespaces</emphasis> to restrict write access to the
+system, except for the log directory, data directory, and the
+<command>/etc/foundationdb/</command> directory. This is enforced by default
+and cannot be disabled.</para>
+
+<para>However, a side effect of this is that the <command>fdbbackup</command>
+command doesn't work properly for local filesystem backups: FoundationDB uses a
+server process alongside the database processes to perform backups and copy the
+backups to the filesystem. As a result, this process is put under the
+restricted namespaces above: the backup process can only write to a limited
+number of paths.</para>
+
+<para>In order to allow flexible backup locations on local disks, the
+FoundationDB NixOS module supports a
+<option>services.foundationdb.extraReadWritePaths</option> option. This option
+takes a list of paths, and adds them to the systemd unit, allowing the
+processes inside the service to write (and read) the specified
+directories.</para>
+
+<para>For example, to create backups in <command>/opt/fdb-backups</command>,
+first set up the paths in the module options:</para>
+
+<programlisting>
+services.foundationdb.extraReadWritePaths = [ "/opt/fdb-backups" ];
+</programlisting>
+
+<para>Restart the FoundationDB service, and it will now be able to write to
+this directory (even if it does not yet exist.) Note: this path
+<emphasis>must</emphasis> exist before restarting the unit. Otherwise, systemd
+will not include it in the private FoundationDB namespace (and it will not add
+it dynamically at runtime).</para>
+
+<para>You can now perform a backup:</para>
+
+<programlisting>
+$ sudo -u foundationdb fdbbackup start  -t default -d file:///opt/fdb-backups
+$ sudo -u foundationdb fdbbackup status -t default
+</programlisting>
+
+</section>
+
+<section><title>Known limitations</title>
+
+<para>The FoundationDB setup for NixOS should currently be considered beta.
+FoundationDB is not new software, but the NixOS compilation and integration has
+only undergone fairly basic testing of all the available functionality.</para>
+
+<itemizedlist>
+  <listitem><para>TLS plugin support is compiled in, but it's currently not
+      possible to specify the set of TLS certificate options in
+      <command>services.foundationdb</command></para></listitem>
+  <listitem><para>There is no way to specify individual parameters for
+      individual <command>fdbserver</command> processes. Currently, all server
+      processes inherit all the global <command>fdbmonitor</command> settings.
+      </para></listitem>
+  <listitem><para>Python bindings are not currently installed.</para></listitem>
+  <listitem><para>Ruby bindings are not currently installed.</para></listitem>
+  <listitem><para>Java bindings are not currently installed.</para></listitem>
+  <listitem><para>Go bindings are not currently installed.</para></listitem>
+</itemizedlist>
+
+</section>
+
+<section><title>Options</title>
+
+<para>NixOS's FoundationDB module allows you to configure all of the most
+relevant configuration options for <command>fdbmonitor</command>, matching it
+quite closely. For a complete list of all options, check <command>man
+configuration.nix</command>.</para>
+
+</section>
+
+<section><title>Full documentation</title>
+
+<para>FoundationDB is a complex piece of software, and requires careful
+administration to properly use. Full documentation for administration can be
+found here: <link xlink:href="https://apple.github.io/foundationdb/"/>.</para>
+
+</section>
+
+</chapter>