From 7df55477fd04b1443051f767fd77ea42cb50ed59 Mon Sep 17 00:00:00 2001 From: Charles Duffy Date: Sun, 14 Oct 2018 00:40:37 -0500 Subject: bees: init at 0.6.1 Introduce an extent-layer (as opposed to the existing file-level) deduplication system for btrfs. This provides a means of finding similarities within non-identical files, when they contain identical, aligned blocks. --- pkgs/tools/filesystems/bees/bees-service-wrapper | 223 +++++++++++++++++++++++ pkgs/tools/filesystems/bees/default.nix | 69 +++++++ pkgs/top-level/all-packages.nix | 2 + 3 files changed, 294 insertions(+) create mode 100755 pkgs/tools/filesystems/bees/bees-service-wrapper create mode 100644 pkgs/tools/filesystems/bees/default.nix diff --git a/pkgs/tools/filesystems/bees/bees-service-wrapper b/pkgs/tools/filesystems/bees/bees-service-wrapper new file mode 100755 index 00000000000..8ef46afc18f --- /dev/null +++ b/pkgs/tools/filesystems/bees/bees-service-wrapper @@ -0,0 +1,223 @@ +#!@bash@/bin/bash +PATH=@bash@/bin:@coreutils@/bin:@utillinux@/bin:@btrfsProgs@/bin:$PATH +beesd_bin=@bees@/lib/bees/bees +# PLEASE KEEP NIX-ISMS ABOVE THIS LINE TO EASE UPSTREAM MERGE +#!/usr/bin/env bash + +shopt -s extglob + +# Upstream wrapper requires UUID to be used for configuration. + +# However, when declaratively describing a host, we may not know its UUID, and +# shouldn't need to persist something that will differ between hosts built from +# the same configuration template. + +# Thus, for using bees from NixOS, we have our own wrapper, which supports not +# just UUID but any specification permitted by findmnt + +[[ $bees_debug ]] && { PS4=':${BASH_SOURCE##*/}:$LINENO+'; set -x; } + +usage() { + cat >&2 <&2; exit 1; } + +allConfigNames=( blockdev fsSpec home idxSize idxSizeMB mntDir runDir status verbosity workDir ) + +# Alternate names for configuration values; "bees_" will always be prepended +declare -A altConfigNames=( + # from original bees wrapper + [BEESHOME]=home + [BEESSTATUS]=status + [MNT_DIR]=mntDir + [UUID]=uuid + [WORK_DIR]=runDir + [DB_SIZE]=idxSize +) + +# legacy bees config files can be arbitrary shell scripts, so we need to actually evaluate them +sandboxedConfigFileEval() { + bash_exe=$(type -P bash) || exit + PATH=/var/empty ENV='' BASH_ENV='' AL128K="$((128*1024))" AL16M="$((16*1024*1024))" "$bash_exe" -r ${bees_debug+-x} \ + -c 'eval "$(&2; for var; do [[ ${!var} ]] && printf "%q=%s\\0" "$var" "${!var}"; done' \ + "${!altConfigNames[@]}" "${allConfigNames[@]}" \ + <"$1" +} + +readConfigFileIfExists() { + local line + [[ -s $1 ]] || return 1 + while IFS= read -r -d '' line; do + line=${line%%+([[:space:]])"#"*} + [[ $line ]] || continue + [[ $line = *=* ]] || { + printf 'WARNING: Config file line not recognized: %q\n' "$line" >&2 + continue + } + set_option "$line" + done < <(sandboxedConfigFileEval "$1") +} + +set_option() { + local k v + k="${1%%=*}" v="${1#*=}" + [[ ${altConfigNames[$k]} ]] && k=${altConfigNames[$k]} + printf -v "bees_$k" %s "$v" +} + +uuid_re='^[[:xdigit:]]{8}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{4}-[[:xdigit:]]{12}$' + +# Shared code for setting configuration used by other operations. +# +# Reads from global associative array "opts" containing options passed in as +# key=value pairs on the command line, looks for config-file overrides, and +# sets individual global variables. +_setup() { + declare fstype + bees_fsSpec=$1; shift + + # Look for file-based configuration, additional to honoring configuration on the command line + bees_config_dir="${bees_config_dir:-/etc/bees}" + if [[ $bees_fsSpec =~ $uuid_re ]]; then + bees_uuid=$bees_fsSpec + # If our spec looks like a bare UUID, and no config file exists in the new + # format, fall back to legacy config file search mechanism (grep; ewww). + if ! readConfigFileIfExists "$bees_config_dir/UUID=$bees_fsSpec.conf"; then + # Legacy approach to finding a config file: Grep for a *.conf file + # containing the UUID within its text. Permitting spaces around the "=" + # appears to be a bug, but is retained for compatibility with the + # original upstream script. + allConfFiles=( "$bees_config_dir"/*.conf ) + if (( ${#allConfFiles[@]} )); then + # in read or readarray with -d '', the NUL terminating the empty string is used as delimiter character. + readarray -d '' -t matchingConfFiles < <(grep -E -l -Z "^[^#]*UUID[[:space:]]*=[[:space:]]*" "${allConfFiles[@]}") + else + matchingConfFiles=( ) + fi + if (( ${#matchingConfFiles[@]} == 1 )); then + # Exactly one configuration file exists in our target directory with a reference to the UUID given. + bees_config_file=${matchingConfFiles[0]} + readConfigFileIfExists "$bees_config_file" + echo "NOTE: Please consider renaming $bees_config_file to $bees_config_dir/UUID=$bees_fsSpec" >&2 + echo " ...and passing UUID=$bees_fsSpec on startup." >&2 + elif (( ${#matchingConfFiles[@]} > 1 )); then + # The legacy wrapper would silently use the first file and ignore + # others, but... no. + echo "ERROR: Passed a bare UUID, but multiple configuration files match it:" >&2 + printf ' - %q\n' "${matchingConfFiles[@]}" >&2 + die "Unable to continue." + fi + fi + else + # For a non-UUID fsSpec that is not a path, look only for a config file + # exactly matching its text. + # + # (Passing a mount point as a fsSpec is only supported with the new + # wrapper; all key=value pairs can be passed on the command line in this + # mode, so config file support is not needed). + [[ $bees_fsSpec = */* ]] || readConfigFileIfExists "$bees_config_dir/$bees_fsSpec.conf" + fi + + [[ $bees_uuid ]] || { + # if bees_uuid is not in our .conf file, look it up with findmnt + read -r bees_uuid fstype < <(findmnt -n -o uuid,fstype "$bees_fsSpec") && [[ $fstype ]] || exit + [[ $fstype = btrfs ]] || die "Device type is $fstype, not btrfs" + } + + [[ $bees_uuid = */* ]] || readConfigFileIfExists "$bees_config_dir/UUID=$bees_uuid.conf" + + # Honor any values read from config files above; otherwise, set defaults. + bees_workDir="${bees_workDir:-.beeshome}" + bees_runDir="${bees_runDir:-/run/bees}" + bees_mntDir="${bees_mntDir:-$bees_runDir/mnt/$bees_uuid}" + bees_home="${bees_home:-$bees_mntDir/$bees_workDir}" + bees_status="${bees_status:-${bees_runDir}/$bees_uuid.status}" + bees_verbosity="${bees_verbosity:-6}" + bees_idxSizeMB="${bees_idxSizeMB:-1024}" + bees_idxSize=${bees_idxSize:-"$(( bees_idxSizeMB * 1024 * 1024 ))"} + bees_blockdev=${bees_blockdev:-"/dev/disk/by-uuid/$bees_uuid"} + + [[ -b $bees_blockdev ]] || die "Block device $bees_blockdev missing" + (( bees_idxSize % (16 * 1024 * 1024) == 0 )) || die "DB size must be divisible by 16MB" +} + +do_run() { + local db old_db_size + + _setup "$1"; shift + mkdir -p -- "$bees_mntDir" || exit + + # subvol id 5 is reserved for the root subvolume of a btrfs filesystem. + mountpoint -q "$bees_mntDir" || mount -osubvolid=5 -- "$bees_blockdev" "$bees_mntDir" || exit + if [[ -d $bees_home ]]; then + btrfs subvolume show "$bees_home" >/dev/null 2>&1 || die "$bees_home exists but is not a subvolume" + else + btrfs subvolume create "$bees_home" || exit + sync # workaround for Zygo/bees#93 + fi + db=$bees_home/beeshash.dat + touch -- "$db" + + old_db_size=$(stat -c %s -- "$db") + new_db_size=$bees_idxSize + + if (( old_db_size != new_db_size )); then + rm -f -- "$bees_home"/beescrawl."$bees_uuid".dat + truncate -s "$new_db_size" -- "$db" || exit + fi + chmod 700 -- "$bees_home" + + # BEESSTATUS and BEESHOME are the only variables handled by the legacy + # wrapper for which getenv() is called in C code. + BEESSTATUS=$bees_status BEESHOME=$bees_home exec "${beesd_bin:-/lib/bees/bees}" \ + --verbose "$bees_verbosity" \ + "$@" "$bees_mntDir" || exit +} + +do_cleanup() { + _setup "$1"; shift + mountpoint -q "$bees_mntDir" && umount -l -- "$bees_mntDir" || exit +} + +(( $# >= 2 )) || usage +declare -f "do_$1" >/dev/null 2>&1 || usage +mode=$1; shift # must be a do_* function; currently "run" or "cleanup" + +declare -a args=( "$1" ); shift # pass first argument (config-name|fsSpec) through literally + +# parse other arguments as key=value pairs, or pass them through literally if they do not match that form. +# similarly, any option after "--" will be passed through literally. +while (( $# )); do + if [[ $1 = *=* ]]; then + set_option "$1" + elif [[ $1 = -- ]]; then + shift + args+=( "$@" ) + break + else + args+=( "$1" ) + fi + shift +done + +"do_$mode" "${args[@]}" diff --git a/pkgs/tools/filesystems/bees/default.nix b/pkgs/tools/filesystems/bees/default.nix new file mode 100644 index 00000000000..c43962cb075 --- /dev/null +++ b/pkgs/tools/filesystems/bees/default.nix @@ -0,0 +1,69 @@ +{ stdenv, runCommand, makeWrapper, fetchFromGitHub, bash, btrfs-progs, coreutils, pythonPackages, utillinux }: + +let + + version = "0.6.1"; + sha256 = "0h7idclmhyp14mq6786x7f2237vqpn70gyi88ik4g70xl84yfgyh"; + + bees = stdenv.mkDerivation rec { + name = "bees-${version}"; + inherit version; + + src = fetchFromGitHub { + owner = "Zygo"; + repo = "bees"; + rev = "v${version}"; + inherit sha256; + }; + + buildInputs = [ + btrfs-progs # for btrfs/ioctl.h + utillinux # for uuid.h + ]; + + nativeBuildInputs = [ + pythonPackages.markdown # documentation build + ]; + + preBuild = '' + git() { if [[ $1 = describe ]]; then echo ${version}; else command git "$@"; fi; } + export -f git + ''; + + postBuild = '' + unset -f git + ''; + + buildFlags = [ + "ETC_PREFIX=/var/run/bees/configs" + ]; + + makeFlags = [ + "SHELL=bash" + "PREFIX=$(out)" + "ETC_PREFIX=$(out)/etc" + "BEES_VERSION=${version}" + "SYSTEMD_SYSTEM_UNIT_DIR=$(out)/etc/systemd/system" + ]; + + meta = with stdenv.lib; { + homepage = "https://github.com/Zygo/bees"; + description = "Block-oriented BTRFS deduplication service"; + license = licenses.gpl3; + platforms = platforms.linux; + maintainers = with maintainers; [ chaduffy ]; + longDescription = "Best-Effort Extent-Same: bees finds not just identical files, but also identical extents within files that differ"; + }; + }; + +in + +runCommand "bees-service-${version}" { + inherit bash bees coreutils utillinux; + btrfsProgs = btrfs-progs; # needs to be a valid shell variable name +} '' + mkdir -p -- "$out/bin" + substituteAll ${./bees-service-wrapper} "$out"/bin/bees-service-wrapper + chmod +x "$out"/bin/bees-service-wrapper + ln -s ${bees}/bin/beesd "$out"/bin/beesd +'' diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix index 09f566ccd7f..c7fe0a978c6 100644 --- a/pkgs/top-level/all-packages.nix +++ b/pkgs/top-level/all-packages.nix @@ -21893,6 +21893,8 @@ with pkgs; beep = callPackage ../misc/beep { }; + bees = callPackage ../tools/filesystems/bees { }; + blackbird = callPackage ../misc/themes/blackbird { }; bootil = callPackage ../development/libraries/bootil { }; -- cgit 1.4.1