summary refs log tree commit diff
path: root/nixos/modules/services/misc/bees.nix
blob: fa00d7e4f55d0b2719c5af6c03f25ead15594391 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
{ config, lib, pkgs, ... }:

with lib;

let

  cfg = config.services.beesd;

  logLevels = { emerg = 0; alert = 1; crit = 2; err = 3; warning = 4; notice = 5; info = 6; debug = 7; };

  fsOptions = with types; {
    options.spec = mkOption {
      type = str;
      description = ''
        Description of how to identify the filesystem to be duplicated by this
        instance of bees. Note that deduplication crosses subvolumes; one must
        not configure multiple instances for subvolumes of the same filesystem
        (or block devices which are part of the same filesystem), but only for
        completely independent btrfs filesystems.
        </para>
        <para>
        This must be in a format usable by findmnt; that could be a key=value
        pair, or a bare path to a mount point.
        Using bare paths will allow systemd to start the beesd service only
        after mounting the associated path.
      '';
      example = "LABEL=MyBulkDataDrive";
    };
    options.hashTableSizeMB = mkOption {
      type = types.addCheck types.int (n: mod n 16 == 0);
      default = 1024; # 1GB; default from upstream beesd script
      description = ''
        Hash table size in MB; must be a multiple of 16.
        </para>
        <para>
        A larger ratio of index size to storage size means smaller blocks of
        duplicate content are recognized.
        </para>
        <para>
        If you have 1TB of data, a 4GB hash table (which is to say, a value of
        4096) will permit 4KB extents (the smallest possible size) to be
        recognized, whereas a value of 1024 -- creating a 1GB hash table --
        will recognize only aligned duplicate blocks of 16KB.
      '';
    };
    options.verbosity = mkOption {
      type = types.enum (attrNames logLevels ++ attrValues logLevels);
      apply = v: if isString v then logLevels.${v} else v;
      default = "info";
      description = "Log verbosity (syslog keyword/level).";
    };
    options.workDir = mkOption {
      type = str;
      default = ".beeshome";
      description = ''
        Name (relative to the root of the filesystem) of the subvolume where
        the hash table will be stored.
      '';
    };
    options.extraOptions = mkOption {
      type = listOf str;
      default = [ ];
      description = ''
        Extra command-line options passed to the daemon. See upstream bees documentation.
      '';
      example = literalExpression ''
        [ "--thread-count" "4" ]
      '';
    };
  };

in
{

  options.services.beesd = {
    filesystems = mkOption {
      type = with types; attrsOf (submodule fsOptions);
      description = "BTRFS filesystems to run block-level deduplication on.";
      default = { };
      example = literalExpression ''
        {
          root = {
            spec = "LABEL=root";
            hashTableSizeMB = 2048;
            verbosity = "crit";
            extraOptions = [ "--loadavg-target" "5.0" ];
          };
        }
      '';
    };
  };
  config = {
    systemd.services = mapAttrs'
      (name: fs: nameValuePair "beesd@${name}" {
        description = "Block-level BTRFS deduplication for %i";
        after = [ "sysinit.target" ];

        serviceConfig =
          let
            configOpts = [
              fs.spec
              "verbosity=${toString fs.verbosity}"
              "idxSizeMB=${toString fs.hashTableSizeMB}"
              "workDir=${fs.workDir}"
            ];
            configOptsStr = escapeShellArgs configOpts;
          in
          {
            # Values from https://github.com/Zygo/bees/blob/v0.6.5/scripts/beesd@.service.in
            ExecStart = "${pkgs.bees}/bin/bees-service-wrapper run ${configOptsStr} -- --no-timestamps ${escapeShellArgs fs.extraOptions}";
            ExecStopPost = "${pkgs.bees}/bin/bees-service-wrapper cleanup ${configOptsStr}";
            CPUAccounting = true;
            CPUSchedulingPolicy = "batch";
            CPUWeight = 12;
            IOSchedulingClass = "idle";
            IOSchedulingPriority = 7;
            IOWeight = 10;
            KillMode = "control-group";
            KillSignal = "SIGTERM";
            MemoryAccounting = true;
            Nice = 19;
            Restart = "on-abnormal";
            StartupCPUWeight = 25;
            StartupIOWeight = 25;
            SyslogIdentifier = "beesd"; # would otherwise be "bees-service-wrapper"
          };
        unitConfig.RequiresMountsFor = lib.mkIf (lib.hasPrefix "/" fs.spec) fs.spec;
        wantedBy = [ "multi-user.target" ];
      })
      cfg.filesystems;
  };
}