diff options
9 files changed, 114 insertions, 107 deletions
diff --git a/doc/languages-frameworks/cuda.section.md b/doc/languages-frameworks/cuda.section.md index fccf66bf79d..27bae33bc71 100644 --- a/doc/languages-frameworks/cuda.section.md +++ b/doc/languages-frameworks/cuda.section.md @@ -32,3 +32,22 @@ mypkg = let }}); in callPackage { inherit cudaPackages; }; ``` + +The CUDA NVCC compiler requires flags to determine which hardware you +want to target for in terms of SASS (real hardware) or PTX (JIT kernels). + +Nixpkgs tries to target support real architecture defaults based on the +CUDA toolkit version with PTX support for future hardware. Experienced +users may optmize this configuration for a variety of reasons such as +reducing binary size and compile time, supporting legacy hardware, or +optimizing for specific hardware. + +You may provide capabilities to add support or reduce binary size through +`config` using `cudaCapabilities = [ "6.0" "7.0" ];` and +`cudaForwardCompat = true;` if you want PTX support for future hardware. + +Please consult [GPUs supported](https://en.wikipedia.org/wiki/CUDA#GPUs_supported) +for your specific card(s). + +Library maintainers should consult [NVCC Docs](https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/) +and release notes for their software package. diff --git a/pkgs/applications/science/math/mxnet/default.nix b/pkgs/applications/science/math/mxnet/default.nix index 9a1b550d882..dcba888ce2f 100644 --- a/pkgs/applications/science/math/mxnet/default.nix +++ b/pkgs/applications/science/math/mxnet/default.nix @@ -2,11 +2,10 @@ , opencv3, gtest, blas, gomp, llvmPackages, perl , cudaSupport ? config.cudaSupport or false, cudaPackages ? {}, nvidia_x11 , cudnnSupport ? cudaSupport -, cudaCapabilities ? [ "3.7" "5.0" "6.0" "7.0" "7.5" "8.0" "8.6" ] }: let - inherit (cudaPackages) cudatoolkit cudnn; + inherit (cudaPackages) cudatoolkit cudaFlags cudnn; in assert cudnnSupport -> cudaSupport; @@ -51,7 +50,7 @@ stdenv.mkDerivation rec { "-DUSE_OLDCMAKECUDA=ON" # see https://github.com/apache/incubator-mxnet/issues/10743 "-DCUDA_ARCH_NAME=All" "-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc" - "-DMXNET_CUDA_ARCH=${lib.concatStringsSep ";" cudaCapabilities}" + "-DMXNET_CUDA_ARCH=${cudaFlags.cudaCapabilitiesSemiColonString}" ] else [ "-DUSE_CUDA=OFF" ]) ++ lib.optional (!cudnnSupport) "-DUSE_CUDNN=OFF"; diff --git a/pkgs/development/compilers/cudatoolkit/extension.nix b/pkgs/development/compilers/cudatoolkit/extension.nix index 862c8316799..c11f12b118a 100644 --- a/pkgs/development/compilers/cudatoolkit/extension.nix +++ b/pkgs/development/compilers/cudatoolkit/extension.nix @@ -10,6 +10,8 @@ final: prev: let ### Add classic cudatoolkit package cudatoolkit = buildCudaToolkitPackage ((attrs: attrs // { gcc = prev.pkgs.${attrs.gcc}; }) cudatoolkitVersions.${final.cudaVersion}); + cudaFlags = final.callPackage ./flags.nix {}; + in { - inherit cudatoolkit; + inherit cudatoolkit cudaFlags; } diff --git a/pkgs/development/compilers/cudatoolkit/flags.nix b/pkgs/development/compilers/cudatoolkit/flags.nix new file mode 100644 index 00000000000..24f653ded29 --- /dev/null +++ b/pkgs/development/compilers/cudatoolkit/flags.nix @@ -0,0 +1,78 @@ +{ config +, lib +, cudatoolkit +}: +let + + # Flags are determined based on your CUDA toolkit by default. You may benefit + # from improved performance, reduced file size, or greater hardware suppport by + # passing a configuration based on your specific GPU environment. + # + # config.cudaCapabilities: list of hardware generations to support (e.g., "8.0") + # config.cudaForwardCompat: bool for compatibility with future GPU generations + # + # Please see the accompanying documentation or https://github.com/NixOS/nixpkgs/pull/205351 + + defaultCudaCapabilities = rec { + cuda9 = [ + "3.0" + "3.5" + "5.0" + "5.2" + "6.0" + "6.1" + "7.0" + ]; + + cuda10 = cuda9 ++ [ + "7.5" + ]; + + cuda11 = [ + "3.5" + "5.0" + "5.2" + "6.0" + "6.1" + "7.0" + "7.5" + "8.0" + "8.6" + ]; + + }; + + cudaMicroarchitectureNames = { + "3" = "Kepler"; + "5" = "Maxwell"; + "6" = "Pascal"; + "7" = "Volta"; + "8" = "Ampere"; + "9" = "Hopper"; + }; + + defaultCudaArchList = defaultCudaCapabilities."cuda${lib.versions.major cudatoolkit.version}"; + cudaRealCapabilities = config.cudaCapabilities or defaultCudaArchList; + capabilitiesForward = "${lib.last cudaRealCapabilities}+PTX"; + + dropDot = ver: builtins.replaceStrings ["."] [""] ver; + + archMapper = feat: map (ver: "${feat}_${dropDot ver}"); + gencodeMapper = feat: map (ver: "-gencode=arch=compute_${dropDot ver},code=${feat}_${dropDot ver}"); + cudaRealArchs = archMapper "sm" cudaRealCapabilities; + cudaPTXArchs = archMapper "compute" cudaRealCapabilities; + cudaArchs = cudaRealArchs ++ [ (lib.last cudaPTXArchs) ]; + + cudaArchNames = lib.unique (map (v: cudaMicroarchitectureNames.${lib.versions.major v}) cudaRealCapabilities); + cudaCapabilities = cudaRealCapabilities ++ lib.optional (config.cudaForwardCompat or true) capabilitiesForward; + cudaGencode = gencodeMapper "sm" cudaRealCapabilities ++ lib.optionals (config.cudaForwardCompat or true) (gencodeMapper "compute" [ (lib.last cudaPTXArchs) ]); + + cudaCapabilitiesCommaString = lib.strings.concatStringsSep "," cudaCapabilities; + cudaCapabilitiesSemiColonString = lib.strings.concatStringsSep ";" cudaCapabilities; + cudaRealCapabilitiesCommaString = lib.strings.concatStringsSep "," cudaRealCapabilities; + +in +{ + inherit cudaArchs cudaArchNames cudaCapabilities cudaCapabilitiesCommaString cudaCapabilitiesSemiColonString + cudaRealCapabilities cudaRealCapabilitiesCommaString cudaGencode cudaRealArchs cudaPTXArchs; +} diff --git a/pkgs/development/libraries/science/math/magma/default.nix b/pkgs/development/libraries/science/math/magma/default.nix index 05d7d4fa184..06b4e12d04e 100644 --- a/pkgs/development/libraries/science/math/magma/default.nix +++ b/pkgs/development/libraries/science/math/magma/default.nix @@ -1,7 +1,7 @@ { lib, stdenv, fetchurl, cmake, gfortran, ninja, cudaPackages, libpthreadstubs, lapack, blas }: let - inherit (cudaPackages) cudatoolkit; + inherit (cudaPackages) cudatoolkit cudaFlags; in assert let majorIs = lib.versions.major cudatoolkit.version; @@ -10,36 +10,6 @@ assert let majorIs = lib.versions.major cudatoolkit.version; let version = "2.6.2"; - # We define a specific set of CUDA compute capabilities here, - # because CUDA 11 does not support compute capability 3.0. Also, - # we use it to enable newer capabilities that are not enabled - # by magma by default. The list of supported architectures - # can be found in magma's top-level CMakeLists.txt. - cudaCapabilities = rec { - cuda9 = [ - "Kepler" # 3.0, 3.5 - "Maxwell" # 5.0 - "Pascal" # 6.0 - "Volta" # 7.0 - ]; - - cuda10 = [ - "Turing" # 7.5 - ] ++ cuda9; - - cuda11 = [ - "sm_35" # sm_30 is not supported by CUDA 11 - "Maxwell" # 5.0 - "Pascal" # 6.0 - "Volta" # 7.0 - "Turing" # 7.5 - "Ampere" # 8.0 - ]; - }; - - capabilityString = lib.strings.concatStringsSep "," - cudaCapabilities."cuda${lib.versions.major cudatoolkit.version}"; - in stdenv.mkDerivation { pname = "magma"; inherit version; @@ -53,7 +23,9 @@ in stdenv.mkDerivation { buildInputs = [ cudatoolkit libpthreadstubs lapack blas ]; - cmakeFlags = [ "-DGPU_TARGET=${capabilityString}" ]; + cmakeFlags = [ + "-DGPU_TARGET=${builtins.concatStringsSep "," cudaFlags.cudaRealArchs}" + ]; doCheck = false; diff --git a/pkgs/development/python-modules/jaxlib/default.nix b/pkgs/development/python-modules/jaxlib/default.nix index 37bfe4d739f..4018655cc48 100644 --- a/pkgs/development/python-modules/jaxlib/default.nix +++ b/pkgs/development/python-modules/jaxlib/default.nix @@ -41,7 +41,6 @@ , zlib # CUDA flags: -, cudaCapabilities ? [ "sm_35" "sm_50" "sm_60" "sm_70" "sm_75" "compute_80" ] , cudaSupport ? false , cudaPackages ? {} @@ -50,7 +49,7 @@ }: let - inherit (cudaPackages) cudatoolkit cudnn nccl; + inherit (cudaPackages) cudatoolkit cudaFlags cudnn nccl; pname = "jaxlib"; version = "0.3.22"; @@ -165,7 +164,7 @@ let build --action_env TF_CUDA_PATHS="${cudatoolkit_joined},${cudnn},${nccl}" build --action_env TF_CUDA_VERSION="${lib.versions.majorMinor cudatoolkit.version}" build --action_env TF_CUDNN_VERSION="${lib.versions.major cudnn.version}" - build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${lib.concatStringsSep "," cudaCapabilities}" + build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${cudaFlags.cudaRealCapabilitiesCommaString}" '' + '' CFG ''; diff --git a/pkgs/development/python-modules/tensorflow/default.nix b/pkgs/development/python-modules/tensorflow/default.nix index 39461fd7953..a549fe393b9 100644 --- a/pkgs/development/python-modules/tensorflow/default.nix +++ b/pkgs/development/python-modules/tensorflow/default.nix @@ -22,8 +22,6 @@ , tensorboardSupport ? true # XLA without CUDA is broken , xlaSupport ? cudaSupport -# Default from ./configure script -, cudaCapabilities ? [ "sm_35" "sm_50" "sm_60" "sm_70" "sm_75" "compute_80" ] , sse42Support ? stdenv.hostPlatform.sse4_2Support , avx2Support ? stdenv.hostPlatform.avx2Support , fmaSupport ? stdenv.hostPlatform.fmaSupport @@ -32,7 +30,7 @@ }: let - inherit (cudaPackages) cudatoolkit cudnn nccl; + inherit (cudaPackages) cudatoolkit cudaFlags cudnn nccl; in assert cudaSupport -> cudatoolkit != null @@ -305,7 +303,7 @@ let TF_CUDA_PATHS = lib.optionalString cudaSupport "${cudatoolkit_joined},${cudnn},${nccl}"; GCC_HOST_COMPILER_PREFIX = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin"; GCC_HOST_COMPILER_PATH = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin/gcc"; - TF_CUDA_COMPUTE_CAPABILITIES = lib.concatStringsSep "," cudaCapabilities; + TF_CUDA_COMPUTE_CAPABILITIES = builtins.concatStringsSep "," cudaFlags.cudaRealArchs; postPatch = '' # bazel 3.3 should work just as well as bazel 3.1 diff --git a/pkgs/development/python-modules/torch/default.nix b/pkgs/development/python-modules/torch/default.nix index 887738f2c5f..17ecd3f280b 100644 --- a/pkgs/development/python-modules/torch/default.nix +++ b/pkgs/development/python-modules/torch/default.nix @@ -3,7 +3,6 @@ mklDnnSupport ? true, useSystemNccl ? true, MPISupport ? false, mpi, buildDocs ? false, - cudaArchList ? null, # Native build inputs cmake, util-linux, linkFarm, symlinkJoin, which, pybind11, removeReferencesTo, @@ -33,7 +32,7 @@ isPy3k, pythonOlder }: let - inherit (cudaPackages) cudatoolkit cudnn nccl; + inherit (cudaPackages) cudatoolkit cudaFlags cudnn nccl; in # assert that everything needed for cuda is present and that the correct cuda versions are used @@ -52,64 +51,6 @@ let paths = [ cudatoolkit.out cudatoolkit.lib nccl.dev nccl.out ]; }; - # Give an explicit list of supported architectures for the build, See: - # - pytorch bug report: https://github.com/pytorch/pytorch/issues/23573 - # - pytorch-1.2.0 build on nixpks: https://github.com/NixOS/nixpkgs/pull/65041 - # - # This list was selected by omitting the TORCH_CUDA_ARCH_LIST parameter, - # observing the fallback option (which selected all architectures known - # from cudatoolkit_10_0, pytorch-1.2, and python-3.6), and doing a binary - # searching to find offending architectures. - # - # NOTE: Because of sandboxing, this derivation can't auto-detect the hardware's - # cuda architecture, so there is also now a problem around new architectures - # not being supported until explicitly added to this derivation. - # - # FIXME: CMake is throwing the following warning on python-1.2: - # - # ``` - # CMake Warning at cmake/public/utils.cmake:172 (message): - # In the future we will require one to explicitly pass TORCH_CUDA_ARCH_LIST - # to cmake instead of implicitly setting it as an env variable. This will - # become a FATAL_ERROR in future version of pytorch. - # ``` - # If this is causing problems for your build, this derivation may have to strip - # away the standard `buildPythonPackage` and use the - # [*Adjust Build Options*](https://github.com/pytorch/pytorch/tree/v1.2.0#adjust-build-options-optional) - # instructions. This will also add more flexibility around configurations - # (allowing FBGEMM to be built in pytorch-1.1), and may future proof this - # derivation. - brokenArchs = [ "3.0" ]; # this variable is only used as documentation. - - cudaCapabilities = rec { - cuda9 = [ - "3.5" - "5.0" - "5.2" - "6.0" - "6.1" - "7.0" - "7.0+PTX" # I am getting a "undefined architecture compute_75" on cuda 9 - # which leads me to believe this is the final cuda-9-compatible architecture. - ]; - - cuda10 = cuda9 ++ [ - "7.5" - "7.5+PTX" # < most recent architecture as of cudatoolkit_10_0 and pytorch-1.2.0 - ]; - - cuda11 = cuda10 ++ [ - "8.0" - "8.0+PTX" # < CUDA toolkit 11.0 - "8.6" - "8.6+PTX" # < CUDA toolkit 11.1 - ]; - }; - final_cudaArchList = - if !cudaSupport || cudaArchList != null - then cudaArchList - else cudaCapabilities."cuda${lib.versions.major cudatoolkit.version}"; - # Normally libcuda.so.1 is provided at runtime by nvidia-x11 via # LD_LIBRARY_PATH=/run/opengl-driver/lib. We only use the stub # libcuda.so from cudatoolkit for running tests, so that we don’t have @@ -153,7 +94,7 @@ in buildPythonPackage rec { ]; preConfigure = lib.optionalString cudaSupport '' - export TORCH_CUDA_ARCH_LIST="${lib.strings.concatStringsSep ";" final_cudaArchList}" + export TORCH_CUDA_ARCH_LIST="${cudaFlags.cudaCapabilitiesSemiColonString}" export CC=${cudatoolkit.cc}/bin/gcc CXX=${cudatoolkit.cc}/bin/g++ '' + lib.optionalString (cudaSupport && cudnn != null) '' export CUDNN_INCLUDE_DIR=${cudnn}/include @@ -308,7 +249,6 @@ in buildPythonPackage rec { passthru = { inherit cudaSupport cudaPackages; - cudaArchList = final_cudaArchList; # At least for 1.10.2 `torch.fft` is unavailable unless BLAS provider is MKL. This attribute allows for easy detection of its availability. blasProvider = blas.provider; }; diff --git a/pkgs/development/python-modules/torchvision/default.nix b/pkgs/development/python-modules/torchvision/default.nix index 223ef3f1d86..212401efe54 100644 --- a/pkgs/development/python-modules/torchvision/default.nix +++ b/pkgs/development/python-modules/torchvision/default.nix @@ -15,7 +15,7 @@ }: let - inherit (torch.cudaPackages) cudatoolkit cudnn; + inherit (torch.cudaPackages) cudatoolkit cudaFlags cudnn; cudatoolkit_joined = symlinkJoin { name = "${cudatoolkit.name}-unsplit"; @@ -45,7 +45,7 @@ in buildPythonPackage rec { propagatedBuildInputs = [ numpy pillow torch scipy ]; preBuild = lib.optionalString cudaSupport '' - export TORCH_CUDA_ARCH_LIST="${cudaArchStr}" + export TORCH_CUDA_ARCH_LIST="${cudaFlags.cudaCapabilitiesSemiColonString}" export FORCE_CUDA=1 ''; |