summary refs log tree commit diff
path: root/nixos/modules/services/network-filesystems/ceph.nix
blob: 7a1444decafaf7b475945e0543ac8100d43b605e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
{ config, lib, pkgs, ... }:

with lib;

let
  cfg  = config.services.ceph;

  # function that translates "camelCaseOptions" to "camel case options", credits to tilpner in #nixos@freenode
  expandCamelCase = replaceStrings upperChars (map (s: " ${s}") lowerChars);
  expandCamelCaseAttrs = mapAttrs' (name: value: nameValuePair (expandCamelCase name) value);

  makeServices = (daemonType: daemonIds:
    mkMerge (map (daemonId:
      { "ceph-${daemonType}-${daemonId}" = makeService daemonType daemonId cfg.global.clusterName pkgs.ceph; })
      daemonIds));

  makeService = (daemonType: daemonId: clusterName: ceph:
    let
      stateDirectory = "ceph/${if daemonType == "rgw" then "radosgw" else daemonType}/${clusterName}-${daemonId}"; in {
    enable = true;
    description = "Ceph ${builtins.replaceStrings lowerChars upperChars daemonType} daemon ${daemonId}";
    after = [ "network-online.target" "time-sync.target" ] ++ optional (daemonType == "osd") "ceph-mon.target";
    wants = [ "network-online.target" "time-sync.target" ];
    partOf = [ "ceph-${daemonType}.target" ];
    wantedBy = [ "ceph-${daemonType}.target" ];

    path = [ pkgs.getopt ];

    # Don't start services that are not yet initialized
    unitConfig.ConditionPathExists = "/var/lib/${stateDirectory}/keyring";
    startLimitBurst =
      if daemonType == "osd" then 30 else if lib.elem daemonType ["mgr" "mds"] then 3 else 5;
    startLimitIntervalSec = 60 * 30;  # 30 mins

    serviceConfig = {
      LimitNOFILE = 1048576;
      LimitNPROC = 1048576;
      Environment = "CLUSTER=${clusterName}";
      ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
      PrivateDevices = "yes";
      PrivateTmp = "true";
      ProtectHome = "true";
      ProtectSystem = "full";
      Restart = "on-failure";
      StateDirectory = stateDirectory;
      User = "ceph";
      Group = if daemonType == "osd" then "disk" else "ceph";
      ExecStart = ''${ceph.out}/bin/${if daemonType == "rgw" then "radosgw" else "ceph-${daemonType}"} \
                    -f --cluster ${clusterName} --id ${daemonId}'';
    } // optionalAttrs (daemonType == "osd") {
      ExecStartPre = "${ceph.lib}/libexec/ceph/ceph-osd-prestart.sh --id ${daemonId} --cluster ${clusterName}";
      RestartSec = "20s";
      PrivateDevices = "no"; # osd needs disk access
    } // optionalAttrs ( daemonType == "mon") {
      RestartSec = "10";
    };
  });

  makeTarget = (daemonType:
    {
      "ceph-${daemonType}" = {
        description = "Ceph target allowing to start/stop all ceph-${daemonType} services at once";
        partOf = [ "ceph.target" ];
        wantedBy = [ "ceph.target" ];
        before = [ "ceph.target" ];
        unitConfig.StopWhenUnneeded = true;
      };
    }
  );
in
{
  options.services.ceph = {
    # Ceph has a monolithic configuration file but different sections for
    # each daemon, a separate client section and a global section
    enable = mkEnableOption "Ceph global configuration";

    global = {
      fsid = mkOption {
        type = types.str;
        example = ''
          433a2193-4f8a-47a0-95d2-209d7ca2cca5
        '';
        description = ''
          Filesystem ID, a generated uuid, its must be generated and set before
          attempting to start a cluster
        '';
      };

      clusterName = mkOption {
        type = types.str;
        default = "ceph";
        description = ''
          Name of cluster
        '';
      };

      mgrModulePath = mkOption {
        type = types.path;
        default = "${pkgs.ceph.lib}/lib/ceph/mgr";
        defaultText = literalExpression ''"''${pkgs.ceph.lib}/lib/ceph/mgr"'';
        description = ''
          Path at which to find ceph-mgr modules.
        '';
      };

      monInitialMembers = mkOption {
        type = with types; nullOr commas;
        default = null;
        example = ''
          node0, node1, node2
        '';
        description = ''
          List of hosts that will be used as monitors at startup.
        '';
      };

      monHost = mkOption {
        type = with types; nullOr commas;
        default = null;
        example = ''
          10.10.0.1, 10.10.0.2, 10.10.0.3
        '';
        description = ''
          List of hostname shortnames/IP addresses of the initial monitors.
        '';
      };

      maxOpenFiles = mkOption {
        type = types.int;
        default = 131072;
        description = ''
          Max open files for each OSD daemon.
        '';
      };

      authClusterRequired = mkOption {
        type = types.enum [ "cephx" "none" ];
        default = "cephx";
        description = ''
          Enables requiring daemons to authenticate with eachother in the cluster.
        '';
      };

      authServiceRequired = mkOption {
        type = types.enum [ "cephx" "none" ];
        default = "cephx";
        description = ''
          Enables requiring clients to authenticate with the cluster to access services in the cluster (e.g. radosgw, mds or osd).
        '';
      };

      authClientRequired = mkOption {
        type = types.enum [ "cephx" "none" ];
        default = "cephx";
        description = ''
          Enables requiring the cluster to authenticate itself to the client.
        '';
      };

      publicNetwork = mkOption {
        type = with types; nullOr commas;
        default = null;
        example = ''
          10.20.0.0/24, 192.168.1.0/24
        '';
        description = ''
          A comma-separated list of subnets that will be used as public networks in the cluster.
        '';
      };

      clusterNetwork = mkOption {
        type = with types; nullOr commas;
        default = null;
        example = ''
          10.10.0.0/24, 192.168.0.0/24
        '';
        description = ''
          A comma-separated list of subnets that will be used as cluster networks in the cluster.
        '';
      };

      rgwMimeTypesFile = mkOption {
        type = with types; nullOr path;
        default = "${pkgs.mailcap}/etc/mime.types";
        defaultText = literalExpression ''"''${pkgs.mailcap}/etc/mime.types"'';
        description = ''
          Path to mime types used by radosgw.
        '';
      };
    };

    extraConfig = mkOption {
      type = with types; attrsOf str;
      default = {};
      example = {
        "ms bind ipv6" = "true";
      };
      description = ''
        Extra configuration to add to the global section. Use for setting values that are common for all daemons in the cluster.
      '';
    };

    mgr = {
      enable = mkEnableOption "Ceph MGR daemon";
      daemons = mkOption {
        type = with types; listOf str;
        default = [];
        example = [ "name1" "name2" ];
        description = ''
          A list of names for manager daemons that should have a service created. The names correspond
          to the id part in ceph i.e. [ "name1" ] would result in mgr.name1
        '';
      };
      extraConfig = mkOption {
        type = with types; attrsOf str;
        default = {};
        description = ''
          Extra configuration to add to the global section for manager daemons.
        '';
      };
    };

    mon = {
      enable = mkEnableOption "Ceph MON daemon";
      daemons = mkOption {
        type = with types; listOf str;
        default = [];
        example = [ "name1" "name2" ];
        description = ''
          A list of monitor daemons that should have a service created. The names correspond
          to the id part in ceph i.e. [ "name1" ] would result in mon.name1
        '';
      };
      extraConfig = mkOption {
        type = with types; attrsOf str;
        default = {};
        description = ''
          Extra configuration to add to the monitor section.
        '';
      };
    };

    osd = {
      enable = mkEnableOption "Ceph OSD daemon";
      daemons = mkOption {
        type = with types; listOf str;
        default = [];
        example = [ "name1" "name2" ];
        description = ''
          A list of OSD daemons that should have a service created. The names correspond
          to the id part in ceph i.e. [ "name1" ] would result in osd.name1
        '';
      };

      extraConfig = mkOption {
        type = with types; attrsOf str;
        default = {
          "osd journal size" = "10000";
          "osd pool default size" = "3";
          "osd pool default min size" = "2";
          "osd pool default pg num" = "200";
          "osd pool default pgp num" = "200";
          "osd crush chooseleaf type" = "1";
        };
        description = ''
          Extra configuration to add to the OSD section.
        '';
      };
    };

    mds = {
      enable = mkEnableOption "Ceph MDS daemon";
      daemons = mkOption {
        type = with types; listOf str;
        default = [];
        example = [ "name1" "name2" ];
        description = ''
          A list of metadata service daemons that should have a service created. The names correspond
          to the id part in ceph i.e. [ "name1" ] would result in mds.name1
        '';
      };
      extraConfig = mkOption {
        type = with types; attrsOf str;
        default = {};
        description = ''
          Extra configuration to add to the MDS section.
        '';
      };
    };

    rgw = {
      enable = mkEnableOption "Ceph RadosGW daemon";
      daemons = mkOption {
        type = with types; listOf str;
        default = [];
        example = [ "name1" "name2" ];
        description = ''
          A list of rados gateway daemons that should have a service created. The names correspond
          to the id part in ceph i.e. [ "name1" ] would result in client.name1, radosgw daemons
          aren't daemons to cluster in the sense that OSD, MGR or MON daemons are. They are simply
          daemons, from ceph, that uses the cluster as a backend.
        '';
      };
    };

    client = {
      enable = mkEnableOption "Ceph client configuration";
      extraConfig = mkOption {
        type = with types; attrsOf (attrsOf str);
        default = {};
        example = literalExpression ''
          {
            # This would create a section for a radosgw daemon named node0 and related
            # configuration for it
            "client.radosgw.node0" = { "some config option" = "true"; };
          };
        '';
        description = ''
          Extra configuration to add to the client section. Configuration for rados gateways
          would be added here, with their own sections, see example.
        '';
      };
    };
  };

  config = mkIf config.services.ceph.enable {
    assertions = [
      { assertion = cfg.global.fsid != "";
        message = "fsid has to be set to a valid uuid for the cluster to function";
      }
      { assertion = cfg.mon.enable == true -> cfg.mon.daemons != [];
        message = "have to set id of atleast one MON if you're going to enable Monitor";
      }
      { assertion = cfg.mds.enable == true -> cfg.mds.daemons != [];
        message = "have to set id of atleast one MDS if you're going to enable Metadata Service";
      }
      { assertion = cfg.osd.enable == true -> cfg.osd.daemons != [];
        message = "have to set id of atleast one OSD if you're going to enable OSD";
      }
      { assertion = cfg.mgr.enable == true -> cfg.mgr.daemons != [];
        message = "have to set id of atleast one MGR if you're going to enable MGR";
      }
    ];

    warnings = optional (cfg.global.monInitialMembers == null)
      "Not setting up a list of members in monInitialMembers requires that you set the host variable for each mon daemon or else the cluster won't function";

    environment.etc."ceph/ceph.conf".text = let
      # Merge the extraConfig set for mgr daemons, as mgr don't have their own section
      globalSection = expandCamelCaseAttrs (cfg.global // cfg.extraConfig // optionalAttrs cfg.mgr.enable cfg.mgr.extraConfig);
      # Remove all name-value pairs with null values from the attribute set to avoid making empty sections in the ceph.conf
      globalSection' = filterAttrs (name: value: value != null) globalSection;
      totalConfig = {
          global = globalSection';
        } // optionalAttrs (cfg.mon.enable && cfg.mon.extraConfig != {}) { mon = cfg.mon.extraConfig; }
          // optionalAttrs (cfg.mds.enable && cfg.mds.extraConfig != {}) { mds = cfg.mds.extraConfig; }
          // optionalAttrs (cfg.osd.enable && cfg.osd.extraConfig != {}) { osd = cfg.osd.extraConfig; }
          // optionalAttrs (cfg.client.enable && cfg.client.extraConfig != {})  cfg.client.extraConfig;
      in
        generators.toINI {} totalConfig;

    users.users.ceph = {
      uid = config.ids.uids.ceph;
      description = "Ceph daemon user";
      group = "ceph";
      extraGroups = [ "disk" ];
    };

    users.groups.ceph = {
      gid = config.ids.gids.ceph;
    };

    systemd.services = let
      services = []
        ++ optional cfg.mon.enable (makeServices "mon" cfg.mon.daemons)
        ++ optional cfg.mds.enable (makeServices "mds" cfg.mds.daemons)
        ++ optional cfg.osd.enable (makeServices "osd" cfg.osd.daemons)
        ++ optional cfg.rgw.enable (makeServices "rgw" cfg.rgw.daemons)
        ++ optional cfg.mgr.enable (makeServices "mgr" cfg.mgr.daemons);
      in
        mkMerge services;

    systemd.targets = let
      targets = [
        { ceph = {
          description = "Ceph target allowing to start/stop all ceph service instances at once";
          wantedBy = [ "multi-user.target" ];
          unitConfig.StopWhenUnneeded = true;
        }; } ]
        ++ optional cfg.mon.enable (makeTarget "mon")
        ++ optional cfg.mds.enable (makeTarget "mds")
        ++ optional cfg.osd.enable (makeTarget "osd")
        ++ optional cfg.rgw.enable (makeTarget "rgw")
        ++ optional cfg.mgr.enable (makeTarget "mgr");
      in
        mkMerge targets;

    systemd.tmpfiles.rules = [
      "d /etc/ceph - ceph ceph - -"
      "d /run/ceph 0770 ceph ceph -"
      "d /var/lib/ceph - ceph ceph - -"]
    ++ optionals cfg.mgr.enable [ "d /var/lib/ceph/mgr - ceph ceph - -"]
    ++ optionals cfg.mon.enable [ "d /var/lib/ceph/mon - ceph ceph - -"]
    ++ optionals cfg.osd.enable [ "d /var/lib/ceph/osd - ceph ceph - -"];
  };
}