summary refs log tree commit diff
path: root/nixos/modules/services/networking/multipath.nix
blob: 1a44184ff6dcc0754251ca85c17f7df192ca8f9b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
{ config, lib, pkgs, ... }: with lib;

# See http://christophe.varoqui.free.fr/usage.html and
# https://github.com/opensvc/multipath-tools/blob/master/multipath/multipath.conf.5

let
  cfg = config.services.multipath;

  indentLines = n: str: concatStringsSep "\n" (
    map (line: "${fixedWidthString n " " " "}${line}") (
      filter ( x: x != "" ) ( splitString "\n" str )
    )
  );

  addCheckDesc = desc: elemType: check: types.addCheck elemType check
    // { description = "${elemType.description} (with check: ${desc})"; };
  hexChars = stringToCharacters "0123456789abcdef";
  isHexString = s: all (c: elem c hexChars) (stringToCharacters (toLower s));
  hexStr = addCheckDesc "hexadecimal string" types.str isHexString;

in {

  options.services.multipath = with types; {

    enable = mkEnableOption "the device mapper multipath (DM-MP) daemon";

    package = mkOption {
      type = package;
      description = "multipath-tools package to use";
      default = pkgs.multipath-tools;
      defaultText = "pkgs.multipath-tools";
    };

    devices = mkOption {
      default = [ ];
      example = literalExpression ''
        [
          {
            vendor = "\"COMPELNT\"";
            product = "\"Compellent Vol\"";
            path_checker = "tur";
            no_path_retry = "queue";
            max_sectors_kb = 256;
          }, ...
        ]
      '';
      description = ''
        This option allows you to define arrays for use in multipath
        groups.
      '';
      type = listOf (submodule {
        options = {

          vendor = mkOption {
            type = str;
            example = "COMPELNT";
            description = "Regular expression to match the vendor name";
          };

          product = mkOption {
            type = str;
            example = "Compellent Vol";
            description = "Regular expression to match the product name";
          };

          revision = mkOption {
            type = nullOr str;
            default = null;
            description = "Regular expression to match the product revision";
          };

          product_blacklist = mkOption {
            type = nullOr str;
            default = null;
            description = "Products with the given vendor matching this string are blacklisted";
          };

          alias_prefix = mkOption {
            type = nullOr str;
            default = null;
            description = "The user_friendly_names prefix to use for this device type, instead of the default mpath";
          };

          vpd_vendor = mkOption {
            type = nullOr str;
            default = null;
            description = "The vendor specific vpd page information, using the vpd page abbreviation";
          };

          hardware_handler = mkOption {
            type = nullOr (enum [ "emc" "rdac" "hp_sw" "alua" "ana" ]);
            default = null;
            description = "The hardware handler to use for this device type";
          };

          # Optional arguments
          path_grouping_policy = mkOption {
            type = nullOr (enum [ "failover" "multibus" "group_by_serial" "group_by_prio" "group_by_node_name" ]);
            default = null; # real default: "failover"
            description = "The default path grouping policy to apply to unspecified multipaths";
          };

          uid_attribute = mkOption {
            type = nullOr str;
            default = null;
            description = "The udev attribute providing a unique path identifier (WWID)";
          };

          getuid_callout = mkOption {
            type = nullOr str;
            default = null;
            description = ''
              (Superseded by uid_attribute) The default program and args to callout
              to obtain a unique path identifier. Should be specified with an absolute path.
            '';
          };

          path_selector = mkOption {
            type = nullOr (enum [
              ''"round-robin 0"''
              ''"queue-length 0"''
              ''"service-time 0"''
              ''"historical-service-time 0"''
            ]);
            default = null; # real default: "service-time 0"
            description = "The default path selector algorithm to use; they are offered by the kernel multipath target";
          };

          path_checker = mkOption {
            type = enum [ "readsector0" "tur" "emc_clariion" "hp_sw" "rdac" "directio" "cciss_tur" "none" ];
            default = "tur";
            description = "The default method used to determine the paths state";
          };

          prio = mkOption {
            type = nullOr (enum [
              "none" "const" "sysfs" "emc" "alua" "ontap" "rdac" "hp_sw" "hds"
              "random" "weightedpath" "path_latency" "ana" "datacore" "iet"
            ]);
            default = null; # real default: "const"
            description = "The name of the path priority routine";
          };

          prio_args = mkOption {
            type = nullOr str;
            default = null;
            description = "Arguments to pass to to the prio function";
          };

          features = mkOption {
            type = nullOr str;
            default = null;
            description = "Specify any device-mapper features to be used";
          };

          failback = mkOption {
            type = nullOr str;
            default = null; # real default: "manual"
            description = "Tell multipathd how to manage path group failback. Quote integers as strings";
          };

          rr_weight = mkOption {
            type = nullOr (enum [ "priorities" "uniform" ]);
            default = null; # real default: "uniform"
            description = ''
              If set to priorities the multipath configurator will assign path weights
              as "path prio * rr_min_io".
            '';
          };

          no_path_retry = mkOption {
            type = nullOr str;
            default = null; # real default: "fail"
            description = "Specify what to do when all paths are down. Quote integers as strings";
          };

          rr_min_io = mkOption {
            type = nullOr int;
            default = null; # real default: 1000
            description = ''
              Number of I/O requests to route to a path before switching to the next in the
              same path group. This is only for Block I/O (BIO) based multipath and
              only apply to round-robin path_selector.
            '';
          };

          rr_min_io_rq = mkOption {
            type = nullOr int;
            default = null; # real default: 1
            description = ''
              Number of I/O requests to route to a path before switching to the next in the
              same path group. This is only for Request based multipath and
              only apply to round-robin path_selector.
            '';
          };

          fast_io_fail_tmo = mkOption {
            type = nullOr str;
            default = null; # real default: 5
            description = ''
              Specify the number of seconds the SCSI layer will wait after a problem has been
              detected on a FC remote port before failing I/O to devices on that remote port.
              This should be smaller than dev_loss_tmo. Setting this to "off" will disable
              the timeout. Quote integers as strings.
            '';
          };

          dev_loss_tmo = mkOption {
            type = nullOr str;
            default = null; # real default: 600
            description = ''
              Specify the number of seconds the SCSI layer will wait after a problem has
              been detected on a FC remote port before removing it from the system. This
              can be set to "infinity" which sets it to the max value of 2147483647
              seconds, or 68 years. It will be automatically adjusted to the overall
              retry interval no_path_retry * polling_interval
              if a number of retries is given with no_path_retry and the
              overall retry interval is longer than the specified dev_loss_tmo value.
              The Linux kernel will cap this value to 600 if fast_io_fail_tmo
              is not set.
            '';
          };

          flush_on_last_del = mkOption {
            type = nullOr (enum [ "yes" "no" ]);
            default = null; # real default: "no"
            description = ''
              If set to "yes" multipathd will disable queueing when the last path to a
              device has been deleted.
            '';
          };

          user_friendly_names = mkOption {
            type = nullOr (enum [ "yes" "no" ]);
            default = null; # real default: "no"
            description = ''
              If set to "yes", using the bindings file /etc/multipath/bindings
              to assign a persistent and unique alias to the multipath, in the
              form of mpath. If set to "no" use the WWID as the alias. In either
              case this be will be overridden by any specific aliases in the
              multipaths section.
            '';
          };

          detect_prio = mkOption {
            type = nullOr (enum [ "yes" "no" ]);
            default = null; # real default: "yes"
            description = ''
              If set to "yes", multipath will try to detect if the device supports
              SCSI-3 ALUA. If so, the device will automatically use the sysfs
              prioritizer if the required sysf attributes access_state and
              preferred_path are supported, or the alua prioritizer if not. If set
              to "no", the prioritizer will be selected as usual.
            '';
          };

          detect_checker = mkOption {
            type = nullOr (enum [ "yes" "no" ]);
            default = null; # real default: "yes"
            description = ''
              If set to "yes", multipath will try to detect if the device supports
              SCSI-3 ALUA. If so, the device will automatically use the tur checker.
              If set to "no", the checker will be selected as usual.
            '';
          };

          deferred_remove = mkOption {
            type = nullOr (enum [ "yes" "no" ]);
            default = null; # real default: "no"
            description = ''
              If set to "yes", multipathd will do a deferred remove instead of a
              regular remove when the last path device has been deleted. This means
              that if the multipath device is still in use, it will be freed when
              the last user closes it. If path is added to the multipath device
              before the last user closes it, the deferred remove will be canceled.
            '';
          };

          san_path_err_threshold = mkOption {
            type = nullOr str;
            default = null;
            description = ''
              If set to a value greater than 0, multipathd will watch paths and check
              how many times a path has been failed due to errors.If the number of
              failures on a particular path is greater then the san_path_err_threshold,
              then the path will not reinstate till san_path_err_recovery_time. These
              path failures should occur within a san_path_err_forget_rate checks, if
              not we will consider the path is good enough to reinstantate.
            '';
          };

          san_path_err_forget_rate = mkOption {
            type = nullOr str;
            default = null;
            description = ''
              If set to a value greater than 0, multipathd will check whether the path
              failures has exceeded the san_path_err_threshold within this many checks
              i.e san_path_err_forget_rate. If so we will not reinstante the path till
              san_path_err_recovery_time.
            '';
          };

          san_path_err_recovery_time = mkOption {
            type = nullOr str;
            default = null;
            description = ''
              If set to a value greater than 0, multipathd will make sure that when
              path failures has exceeded the san_path_err_threshold within
              san_path_err_forget_rate then the path will be placed in failed state
              for san_path_err_recovery_time duration. Once san_path_err_recovery_time
              has timeout we will reinstante the failed path. san_path_err_recovery_time
              value should be in secs.
            '';
          };

          marginal_path_err_sample_time = mkOption {
            type = nullOr int;
            default = null;
            description = "One of the four parameters of supporting path check based on accounting IO error such as intermittent error";
          };

          marginal_path_err_rate_threshold = mkOption {
            type = nullOr int;
            default = null;
            description = "The error rate threshold as a permillage (1/1000)";
          };

          marginal_path_err_recheck_gap_time = mkOption {
            type = nullOr str;
            default = null;
            description = "One of the four parameters of supporting path check based on accounting IO error such as intermittent error";
          };

          marginal_path_double_failed_time = mkOption {
            type = nullOr str;
            default = null;
            description = "One of the four parameters of supporting path check based on accounting IO error such as intermittent error";
          };

          delay_watch_checks = mkOption {
            type = nullOr str;
            default = null;
            description = "This option is deprecated, and mapped to san_path_err_forget_rate";
          };

          delay_wait_checks = mkOption {
            type = nullOr str;
            default = null;
            description = "This option is deprecated, and mapped to san_path_err_recovery_time";
          };

          skip_kpartx = mkOption {
            type = nullOr (enum [ "yes" "no" ]);
            default = null; # real default: "no"
            description = "If set to yes, kpartx will not automatically create partitions on the device";
          };

          max_sectors_kb = mkOption {
            type = nullOr int;
            default = null;
            description = "Sets the max_sectors_kb device parameter on all path devices and the multipath device to the specified value";
          };

          ghost_delay = mkOption {
            type = nullOr int;
            default = null;
            description = "Sets the number of seconds that multipath will wait after creating a device with only ghost paths before marking it ready for use in systemd";
          };

          all_tg_pt = mkOption {
            type = nullOr str;
            default = null;
            description = "Set the 'all targets ports' flag when registering keys with mpathpersist";
          };

        };
      });
    };

    defaults = mkOption {
      type = nullOr str;
      default = null;
      description = ''
        This section defines default values for attributes which are used
        whenever no values are given in the appropriate device or multipath
        sections.
      '';
    };

    blacklist = mkOption {
      type = nullOr str;
      default = null;
      description = ''
        This section defines which devices should be excluded from the
        multipath topology discovery.
      '';
    };

    blacklist_exceptions = mkOption {
      type = nullOr str;
      default = null;
      description = ''
        This section defines which devices should be included in the
        multipath topology discovery, despite being listed in the
        blacklist section.
      '';
    };

    overrides = mkOption {
      type = nullOr str;
      default = null;
      description = ''
        This section defines values for attributes that should override the
        device-specific settings for all devices.
      '';
    };

    extraConfig = mkOption {
      type = nullOr str;
      default = null;
      description = "Lines to append to default multipath.conf";
    };

    extraConfigFile = mkOption {
      type = nullOr str;
      default = null;
      description = "Append an additional file's contents to /etc/multipath.conf";
    };

    pathGroups = mkOption {
      example = literalExpression ''
        [
          {
            wwid = "360080e500043b35c0123456789abcdef";
            alias = 10001234;
            array = "bigarray.example.com";
            fsType = "zfs"; # optional
            options = "ro"; # optional
          }, ...
        ]
      '';
      description = ''
        This option allows you to define multipath groups as described
        in http://christophe.varoqui.free.fr/usage.html.
      '';
      type = listOf (submodule {
        options = {

          alias = mkOption {
            type = int;
            example = 1001234;
            description = "The name of the multipath device";
          };

          wwid = mkOption {
            type = hexStr;
            example = "360080e500043b35c0123456789abcdef";
            description = "The identifier for the multipath device";
          };

          array = mkOption {
            type = str;
            default = null;
            example = "bigarray.example.com";
            description = "The DNS name of the storage array";
          };

          fsType = mkOption {
            type = nullOr str;
            default = null;
            example = "zfs";
            description = "Type of the filesystem";
          };

          options = mkOption {
            type = nullOr str;
            default = null;
            example = "ro";
            description = "Options used to mount the file system";
          };

        };
      });
    };

  };

  config = mkIf cfg.enable {
    environment.etc."multipath.conf".text =
      let
        inherit (cfg) defaults blacklist blacklist_exceptions overrides;

        mkDeviceBlock = cfg: let
          nonNullCfg = lib.filterAttrs (k: v: v != null) cfg;
          attrs = lib.mapAttrsToList (name: value: "  ${name} ${toString value}") nonNullCfg;
        in ''
          device {
          ${lib.concatStringsSep "\n" attrs}
          }
        '';
        devices = lib.concatMapStringsSep "\n" mkDeviceBlock cfg.devices;

        mkMultipathBlock = m: ''
          multipath {
            wwid ${m.wwid}
            alias ${toString m.alias}
          }
        '';
        multipaths = lib.concatMapStringsSep "\n" mkMultipathBlock cfg.pathGroups;

      in ''
        devices {
        ${indentLines 2 devices}
        }

        ${optionalString (!isNull defaults) ''
          defaults {
          ${indentLines 2 defaults}
            multipath_dir ${cfg.package}/lib/multipath
          }
        ''}
        ${optionalString (!isNull blacklist) ''
          blacklist {
          ${indentLines 2 blacklist}
          }
        ''}
        ${optionalString (!isNull blacklist_exceptions) ''
          blacklist_exceptions {
          ${indentLines 2 blacklist_exceptions}
          }
        ''}
        ${optionalString (!isNull overrides) ''
          overrides {
          ${indentLines 2 overrides}
          }
        ''}
        multipaths {
        ${indentLines 2 multipaths}
        }
      '';

    systemd.packages = [ cfg.package ];

    environment.systemPackages = [ cfg.package ];
    boot.kernelModules = [ "dm-multipath" "dm-service-time" ];

    # We do not have systemd in stage-1 boot so must invoke `multipathd`
    # with the `-1` argument which disables systemd calls. Invoke `multipath`
    # to display the multipath mappings in the output of `journalctl -b`.
    boot.initrd.kernelModules = [ "dm-multipath" "dm-service-time" ];
    boot.initrd.postDeviceCommands = ''
      modprobe -a dm-multipath dm-service-time
      multipathd -s
      (set -x && sleep 1 && multipath -ll)
    '';
  };
}