diff options
-rw-r--r-- | nixos/modules/module-list.nix | 1 | ||||
-rw-r--r-- | nixos/modules/services/databases/foundationdb.nix | 360 | ||||
-rw-r--r-- | nixos/modules/services/databases/foundationdb.xml | 279 |
3 files changed, 640 insertions, 0 deletions
diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix index 5a56554dc98..1261fe95092 100644 --- a/nixos/modules/module-list.nix +++ b/nixos/modules/module-list.nix @@ -195,6 +195,7 @@ ./services/databases/clickhouse.nix ./services/databases/couchdb.nix ./services/databases/firebird.nix + ./services/databases/foundationdb.nix ./services/databases/hbase.nix ./services/databases/influxdb.nix ./services/databases/memcached.nix diff --git a/nixos/modules/services/databases/foundationdb.nix b/nixos/modules/services/databases/foundationdb.nix new file mode 100644 index 00000000000..ba921a9c152 --- /dev/null +++ b/nixos/modules/services/databases/foundationdb.nix @@ -0,0 +1,360 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + cfg = config.services.foundationdb; + + # used for initial cluster configuration + initialIpAddr = if (cfg.publicAddress != "auto") then cfg.publicAddress else "127.0.0.1"; + + fdbServers = n: + concatStringsSep "\n" (map (x: "[fdbserver.${toString (x+cfg.listenPortStart)}]") (range 0 (n - 1))); + + backupAgents = n: + concatStringsSep "\n" (map (x: "[backup_agent.${toString x}]") (range 1 n)); + + configFile = pkgs.writeText "foundationdb.conf" '' + [general] + cluster_file = /etc/foundationdb/fdb.cluster + + [fdbmonitor] + restart_delay = ${toString cfg.restartDelay} + user = ${cfg.user} + group = ${cfg.group} + + [fdbserver] + command = ${pkgs.foundationdb}/bin/fdbserver + public_address = ${cfg.publicAddress}:$ID + listen_address = ${cfg.listenAddress} + datadir = ${cfg.dataDir}/$ID + logdir = ${cfg.logDir} + logsize = ${cfg.logSize} + maxlogssize = ${cfg.maxLogSize} + ${optionalString (cfg.class != null) "class = ${cfg.class}"} + memory = ${cfg.memory} + storage_memory = ${cfg.storageMemory} + + ${optionalString (cfg.locality.machineId != null) "locality_machineid=${cfg.locality.machineId}"} + ${optionalString (cfg.locality.zoneId != null) "locality_zoneid=${cfg.locality.zoneId}"} + ${optionalString (cfg.locality.datacenterId != null) "locality_dcid=${cfg.locality.datacenterId}"} + ${optionalString (cfg.locality.dataHall != null) "locality_data_hall=${cfg.locality.dataHall}"} + + ${fdbServers cfg.serverProcesses} + + [backup_agent] + command = ${pkgs.foundationdb}/libexec/backup_agent + ${backupAgents cfg.backupProcesses} + ''; +in +{ + options.services.foundationdb = { + + enable = mkEnableOption "FoundationDB Server"; + + publicAddress = mkOption { + type = types.str; + default = "auto"; + description = "Publicly visible IP address of the process. Port is determined by process ID"; + }; + + listenAddress = mkOption { + type = types.str; + default = "public"; + description = "Publicly visible IP address of the process. Port is determined by process ID"; + }; + + listenPortStart = mkOption { + type = types.int; + default = 4500; + description = '' + Starting port number for database listening sockets. Every FDB process binds to a + subsequent port, to this number reflects the start of the overall range. e.g. having + 8 server processes will use all ports between 4500 and 4507. + ''; + }; + + openFirewall = mkOption { + type = types.bool; + default = false; + description = '' + Open the firewall ports corresponding to FoundationDB processes and coordinators + using <option>config.networking.firewall.*</option>. + ''; + }; + + dataDir = mkOption { + type = types.path; + default = "/var/lib/foundationdb"; + description = "Data directory. All cluster data will be put under here."; + }; + + logDir = mkOption { + type = types.path; + default = "/var/log/foundationdb"; + description = "Log directory."; + }; + + user = mkOption { + type = types.str; + default = "foundationdb"; + description = "User account under which FoundationDB runs."; + }; + + group = mkOption { + type = types.str; + default = "foundationdb"; + description = "Group account under which FoundationDB runs."; + }; + + class = mkOption { + type = types.nullOr (types.enum [ "storage" "transaction" "stateless" ]); + default = null; + description = "Process class"; + }; + + restartDelay = mkOption { + type = types.int; + default = 10; + description = "Number of seconds to wait before restarting servers."; + }; + + logSize = mkOption { + type = types.string; + default = "10MiB"; + description = '' + Roll over to a new log file after the current log file + reaches the specified size. + ''; + }; + + maxLogSize = mkOption { + type = types.string; + default = "100MiB"; + description = '' + Delete the oldest log file when the total size of all log + files exceeds the specified size. If set to 0, old log files + will not be deleted. + ''; + }; + + serverProcesses = mkOption { + type = types.int; + default = 1; + description = "Number of fdbserver processes to run."; + }; + + backupProcesses = mkOption { + type = types.int; + default = 1; + description = "Number of backup_agent processes to run for snapshots."; + }; + + memory = mkOption { + type = types.string; + default = "8GiB"; + description = '' + Maximum memory used by the process. The default value is + <literal>8GiB</literal>. When specified without a unit, + <literal>MiB</literal> is assumed. This parameter does not + change the memory allocation of the program. Rather, it sets + a hard limit beyond which the process will kill itself and + be restarted. The default value of <literal>8GiB</literal> + is double the intended memory usage in the default + configuration (providing an emergency buffer to deal with + memory leaks or similar problems). It is not recommended to + decrease the value of this parameter below its default + value. It may be increased if you wish to allocate a very + large amount of storage engine memory or cache. In + particular, when the <literal>storageMemory</literal> + parameter is increased, the <literal>memory</literal> + parameter should be increased by an equal amount. + ''; + }; + + storageMemory = mkOption { + type = types.string; + default = "1GiB"; + description = '' + Maximum memory used for data storage. The default value is + <literal>1GiB</literal>. When specified without a unit, + <literal>MB</literal> is assumed. Clusters using the memory + storage engine will be restricted to using this amount of + memory per process for purposes of data storage. Memory + overhead associated with storing the data is counted against + this total. If you increase the + <literal>storageMemory</literal>, you should also increase + the <literal>memory</literal> parameter by the same amount. + ''; + }; + + locality = mkOption { + default = { + machineId = null; + zoneId = null; + datacenterId = null; + dataHall = null; + }; + + description = '' + FoundationDB locality settings. + ''; + + type = types.submodule ({ + options = { + machineId = mkOption { + default = null; + type = types.nullOr types.str; + description = '' + Machine identifier key. All processes on a machine should share a + unique id. By default, processes on a machine determine a unique id to share. + This does not generally need to be set. + ''; + }; + + zoneId = mkOption { + default = null; + type = types.nullOr types.str; + description = '' + Zone identifier key. Processes that share a zone id are + considered non-unique for the purposes of data replication. + If unset, defaults to machine id. + ''; + }; + + datacenterId = mkOption { + default = null; + type = types.nullOr types.str; + description = '' + Data center identifier key. All processes physically located in a + data center should share the id. If you are depending on data + center based replication this must be set on all processes. + ''; + }; + + dataHall = mkOption { + default = null; + type = types.nullOr types.str; + description = '' + Data hall identifier key. All processes physically located in a + data hall should share the id. If you are depending on data + hall based replication this must be set on all processes. + ''; + }; + }; + }); + }; + + extraReadWritePaths = mkOption { + default = [ ]; + type = types.listOf types.path; + description = '' + An extra set of filesystem paths that FoundationDB can read to + and write from. By default, FoundationDB runs under a heavily + namespaced systemd environment without write access to most of + the filesystem outside of its data and log directories. By + adding paths to this list, the set of writeable paths will be + expanded. This is useful for allowing e.g. backups to local files, + which must be performed on behalf of the foundationdb service. + ''; + }; + + pidfile = mkOption { + type = types.path; + default = "/run/foundationdb.pid"; + description = "Path to pidfile for fdbmonitor."; + }; + }; + + config = mkIf cfg.enable { + meta.doc = ./foundationdb.xml; + meta.maintainers = with lib.maintainers; [ thoughtpolice ]; + + environment.systemPackages = [ pkgs.foundationdb ]; + + users.extraUsers = optionalAttrs (cfg.user == "foundationdb") (singleton + { name = "foundationdb"; + description = "FoundationDB User"; + uid = config.ids.uids.foundationdb; + group = cfg.group; + }); + + users.extraGroups = optionalAttrs (cfg.group == "foundationdb") (singleton + { name = "foundationdb"; + gid = config.ids.gids.foundationdb; + }); + + networking.firewall.allowedTCPPortRanges = mkIf cfg.openFirewall + [ { from = cfg.listenPortStart; + to = (cfg.listenPortStart + cfg.serverProcesses) - 1; + } + ]; + + systemd.services.foundationdb = { + description = "FoundationDB Service"; + + after = [ "network.target" ]; + wantedBy = [ "multi-user.target" ]; + unitConfig = + { RequiresMountsFor = "${cfg.dataDir} ${cfg.logDir}"; + }; + + serviceConfig = + let rwpaths = [ cfg.dataDir cfg.logDir cfg.pidfile "/etc/foundationdb" ] + ++ cfg.extraReadWritePaths; + in + { Type = "simple"; + Restart = "always"; + RestartSec = 5; + User = cfg.user; + Group = cfg.group; + PIDFile = "${cfg.pidfile}"; + + PermissionsStartOnly = true; # setup needs root perms + TimeoutSec = 120; # give reasonable time to shut down + + # Security options + NoNewPrivileges = true; + ProtectHome = true; + ProtectSystem = "strict"; + ProtectKernelTunables = true; + ProtectControlGroups = true; + PrivateTmp = true; + PrivateDevices = true; + ReadWritePaths = lib.concatStringsSep " " (map (x: "-" + x) rwpaths); + }; + + path = [ pkgs.foundationdb pkgs.coreutils ]; + + preStart = '' + rm -f ${cfg.pidfile} && \ + touch ${cfg.pidfile} && \ + chown -R ${cfg.user}:${cfg.group} ${cfg.pidfile} + + for x in "${cfg.logDir}" "${cfg.dataDir}" /etc/foundationdb; do + [ ! -d "$x" ] && mkdir -m 0700 -vp "$x" && chown -R ${cfg.user}:${cfg.group} "$x"; + done + + if [ ! -f /etc/foundationdb/fdb.cluster ]; then + cf=/etc/foundationdb/fdb.cluster + desc=$(tr -dc A-Za-z0-9 </dev/urandom 2>/dev/null | head -c8) + rand=$(tr -dc A-Za-z0-9 </dev/urandom 2>/dev/null | head -c8) + echo ''${desc}:''${rand}@${initialIpAddr}:${builtins.toString cfg.listenPortStart} > $cf + chmod 0660 $cf && chown -R ${cfg.user}:${cfg.group} $cf + touch "${cfg.dataDir}/.first_startup" + fi + ''; + + script = '' + exec fdbmonitor --lockfile ${cfg.pidfile} --conffile ${configFile}; + ''; + + postStart = '' + if [ -e "${cfg.dataDir}/.first_startup" ]; then + fdbcli --exec "configure new single ssd" + rm -f "${cfg.dataDir}/.first_startup"; + fi + ''; + }; + }; +} diff --git a/nixos/modules/services/databases/foundationdb.xml b/nixos/modules/services/databases/foundationdb.xml new file mode 100644 index 00000000000..d10a5cfe836 --- /dev/null +++ b/nixos/modules/services/databases/foundationdb.xml @@ -0,0 +1,279 @@ +<chapter xmlns="http://docbook.org/ns/docbook" + xmlns:xlink="http://www.w3.org/1999/xlink" + xmlns:xi="http://www.w3.org/2001/XInclude" + version="5.0" + xml:id="module-foundationdb"> + +<title>FoundationDB</title> + +<para><emphasis>Source:</emphasis> <filename>modules/services/databases/foundationdb.nix</filename></para> + +<para><emphasis>Upstream documentation:</emphasis> <link xlink:href="https://apple.github.io/foundationdb/"/></para> + +<para><emphasis>Maintainer:</emphasis> Austin Seipp</para> + +<para><emphasis>Default version:</emphasis> 5.1.x</para> + +<para>FoundationDB (or "FDB") is a distributed, open source, high performance, +transactional key-value store. It can store petabytes of data and deliver +exceptional performance while maintaining consistency and ACID semantics over a +large cluster.</para> + +<section><title>Configuring and basic setup</title> + +<para>To enable FoundationDB, add the following to your +<filename>configuration.nix</filename>: + +<programlisting> +services.foundationdb.enable = true; +</programlisting> +</para> + +<para>After running <command>nixos-rebuild</command>, you can verify whether +FoundationDB is running by executing <command>fdbcli</command> (which is added +to <option>environment.systemPackages</option>): + +<programlisting> +$ sudo -u foundationdb fdbcli +Using cluster file `/etc/foundationdb/fdb.cluster'. + +The database is available. + +Welcome to the fdbcli. For help, type `help'. +fdb> status + +Using cluster file `/etc/foundationdb/fdb.cluster'. + +Configuration: + Redundancy mode - single + Storage engine - memory + Coordinators - 1 + +Cluster: + FoundationDB processes - 1 + Machines - 1 + Memory availability - 5.4 GB per process on machine with least available + Fault Tolerance - 0 machines + Server time - 04/20/18 15:21:14 + +... + +fdb> +</programlisting> +</para> + +<para>FoundationDB is run under the <command>foundationdb</command> user and +group by default, but this may be changed in the NixOS configuration. The +systemd unit <command>foundationdb.service</command> controls the +<command>fdbmonitor</command> process.</para> + +<para>By default, the NixOS module for FoundationDB creates a single +SSD-storage based database for development and basic usage. This storage engine +is designed for SSDs and will perform poorly on HDDs; however it can handle far +more data than the alternative "memory" engine and is a better default choice +for most deployments. (Note that you can change the storage backend on-the-fly +for a given FoundationDB cluster using <command>fdbcli</command>.)</para> + +<para>Furthermore, only 1 server process and 1 backup agent are started in the +default configuration. See below for more on scaling to increase this.</para> + +<para>FoundationDB stores all data for all server processes under +<filename>/var/lib/foundationdb</filename>. You can override this using +<option>services.foundationdb.dataDir</option>, e.g. + +<programlisting> +services.foundationdb.dataDir = "/data/fdb"; +</programlisting> + +</para> + +<para>Similarly, logs are stored under +<filename>/var/log/foundationdb</filename> by default, and there is a +corresponding <option>services.foundationdb.logDir</option> as well.</para> + +</section> + +<section><title>Scaling processes and backup agents</title> + +<para>Scaling the number of server processes is quite easy; simply specify +<option>services.foundationdb.serverProcesses</option> to be the number of +FoundationDB worker processes that should be started on the machine.</para> + +<para>FoundationDB worker processes typically require 4GB of RAM per-process at +minimum for good performance, so this option is set to 1 by default since the +maximum aount of RAM is unknown. You're advised to abide by this restriction, +so pick a number of processes so that each has 4GB or more.</para> + +<para>A similar option exists in order to scale backup agent processes, +<option>services.foundationdb.backupProcesses</option>. Backup agents are not +as performance/RAM sensitive, so feel free to experiment with the number of +available backup processes.</para> + +</section> + +<section><title>Clustering</title> + +<para>FoundationDB on NixOS works similarly to other Linux systems, so this +section will be brief. Please refer to the full FoundationDB documentation for +more on clustering.</para> + +<para>FoundationDB organizes clusters using a set of +<emphasis>coordinators</emphasis>, which are just specially-designated worker +processes. By default, every installation of FoundationDB on NixOS will start +as its own individual cluster, with a single coordinator: the first worker +process on <command>localhost</command>.</para> + +<para>Coordinators are specified globally using the +<command>/etc/foundationdb/fdb.cluster</command> file, which all servers and +client applications will use to find and join coordinators. Note that this file +<emphasis>can not</emphasis> be managed by NixOS so easily: FoundationDB is +designed so that it will rewrite the file at runtime for all clients and nodes +when cluster coordinators change, with clients transparently handling this +without intervention.</para> + +<para>When dealing with a cluster, there are two main things you want to +do:</para> + +<itemizedlist> + <listitem><para>Add a node to the cluster for storage/compute.</para></listitem> + <listitem><para>Promote an ordinary worker to a coordinator.</para></listitem> +</itemizedlist> + +<para>A node must already be a member of the cluster in order to properly be +promoted to a coordinator, so you must always add it first if you wish to +promote it.</para> + +<para>To add a machine to a FoundationDB cluster:</para> + +<itemizedlist> + <listitem><para>Choose one of the servers to start as the initial coordinator. + </para></listitem> + <listitem><para>Copy the <command>/etc/foundationdb/fdb.cluster</command> file + from this server to all the other servers. Restart FoundationDB on all of + these other servers, so they join the cluster.</para></listitem> + <listitem><para>All of these servers are now connected and working together + in the cluster, under the chosen coordinator.</para></listitem> +</itemizedlist> + +<para>At this point, you can add as many nodes as you want by just repeating +the above steps. By default there will still be a single coordinator: you can +use <command>fdbcli</command> to change this and add new coordinators.</para> + +<para>As a convenience, FoundationDB can automatically assign coordinators +based on the redundancy mode you wish to achieve for the cluster. Once all the +nodes have been joined, simply set the replication policy, and then issue the +<command>coordinators auto</command> command</para> + +<para>For example, assuming we have 3 nodes available, we can enable double +redundancy mode, then auto-select coordinators. For double redundancy, 3 +coordinators is ideal: therefore FoundationDB will make +<emphasis>every</emphasis> node a coordinator automatically:</para> + +<programlisting> +fdbcli> configure double ssd +fdbcli> coordinators auto +</programlisting> + +<para>This will transparently update all the servers within seconds, and +appropriately rewrite the <command>fdb.cluster</command> file, as well as +informing all client processes to do the same.</para> + +</section> + +<section><title>Client connectivity</title> + +<para>By default, all clients must use the current +<command>fdb.cluster</command> file to access a given FoundationDB cluster. +This file is located by default in +<command>/etc/foundationdb/fdb.cluster</command> on all machines with the +FoundationDB service enabled, so you may copy the active one from your cluster +to a new node in order to connect, if it is not part of the cluster.</para> + +</section> + +<section><title>Backups and Disaster Recovery</title> + +<para>The usual rules for doing FoundationDB backups apply on NixOS as written +in the FoundationDB manual. However, one important difference is the security +profile for NixOS: by default, the <command>foundationdb</command> systemd unit +uses <emphasis>Linux namespaces</emphasis> to restrict write access to the +system, except for the log directory, data directory, and the +<command>/etc/foundationdb/</command> directory. This is enforced by default +and cannot be disabled.</para> + +<para>However, a side effect of this is that the <command>fdbbackup</command> +command doesn't work properly for local filesystem backups: FoundationDB uses a +server process alongside the database processes to perform backups and copy the +backups to the filesystem. As a result, this process is put under the +restricted namespaces above: the backup process can only write to a limited +number of paths.</para> + +<para>In order to allow flexible backup locations on local disks, the +FoundationDB NixOS module supports a +<option>services.foundationdb.extraReadWritePaths</option> option. This option +takes a list of paths, and adds them to the systemd unit, allowing the +processes inside the service to write (and read) the specified +directories.</para> + +<para>For example, to create backups in <command>/opt/fdb-backups</command>, +first set up the paths in the module options:</para> + +<programlisting> +services.foundationdb.extraReadWritePaths = [ "/opt/fdb-backups" ]; +</programlisting> + +<para>Restart the FoundationDB service, and it will now be able to write to +this directory (even if it does not yet exist.) Note: this path +<emphasis>must</emphasis> exist before restarting the unit. Otherwise, systemd +will not include it in the private FoundationDB namespace (and it will not add +it dynamically at runtime).</para> + +<para>You can now perform a backup:</para> + +<programlisting> +$ sudo -u foundationdb fdbbackup start -t default -d file:///opt/fdb-backups +$ sudo -u foundationdb fdbbackup status -t default +</programlisting> + +</section> + +<section><title>Known limitations</title> + +<para>The FoundationDB setup for NixOS should currently be considered beta. +FoundationDB is not new software, but the NixOS compilation and integration has +only undergone fairly basic testing of all the available functionality.</para> + +<itemizedlist> + <listitem><para>TLS plugin support is compiled in, but it's currently not + possible to specify the set of TLS certificate options in + <command>services.foundationdb</command></para></listitem> + <listitem><para>There is no way to specify individual parameters for + individual <command>fdbserver</command> processes. Currently, all server + processes inherit all the global <command>fdbmonitor</command> settings. + </para></listitem> + <listitem><para>Python bindings are not currently installed.</para></listitem> + <listitem><para>Ruby bindings are not currently installed.</para></listitem> + <listitem><para>Java bindings are not currently installed.</para></listitem> + <listitem><para>Go bindings are not currently installed.</para></listitem> +</itemizedlist> + +</section> + +<section><title>Options</title> + +<para>NixOS's FoundationDB module allows you to configure all of the most +relevant configuration options for <command>fdbmonitor</command>, matching it +quite closely. For a complete list of all options, check <command>man +configuration.nix</command>.</para> + +</section> + +<section><title>Full documentation</title> + +<para>FoundationDB is a complex piece of software, and requires careful +administration to properly use. Full documentation for administration can be +found here: <link xlink:href="https://apple.github.io/foundationdb/"/>.</para> + +</section> + +</chapter> |