python3Packages.tensorflow: fix `GLIBCXX_3.4.30' not found

Make tensorflow (and a bunch of ther things) use CUDA-compatible toolchain. Introduces cudaPackages.backendStdenv
author: Someone Serge <sergei.kozlukov@aalto.fi> 2023-02-27 16:28:07 +0200
committer: Someone Serge <sergei.kozlukov@aalto.fi> 2023-03-04 01:03:51 +0200
commit: 5f4bdbe6c387bf740025581d94bbfba9a887c76f (patch)
tree: 99182b77f71e23c59a39dea0c73de0eb43c6b328
parent: d378cc6fb23d67f3d9f86c39051f810c563789ca (diff)
download: nixpkgs-5f4bdbe6c387bf740025581d94bbfba9a887c76f.tar
nixpkgs-5f4bdbe6c387bf740025581d94bbfba9a887c76f.tar.gz
nixpkgs-5f4bdbe6c387bf740025581d94bbfba9a887c76f.tar.bz2
nixpkgs-5f4bdbe6c387bf740025581d94bbfba9a887c76f.tar.lz
nixpkgs-5f4bdbe6c387bf740025581d94bbfba9a887c76f.tar.xz
nixpkgs-5f4bdbe6c387bf740025581d94bbfba9a887c76f.tar.zst
nixpkgs-5f4bdbe6c387bf740025581d94bbfba9a887c76f.zip
8 files changed, 88 insertions, 69 deletions
diff --git a/pkgs/development/compilers/cudatoolkit/common.nix b/pkgs/development/compilers/cudatoolkit/common.nix
index a94f6fbdaf7..e6d7cbc377c 100644
--- a/pkgs/development/compilers/cudatoolkit/common.nix
+++ b/pkgs/development/compilers/cudatoolkit/common.nix
@@ -11,7 +11,7 @@ args@
 , fetchurl
 , fontconfig
 , freetype
-, gcc
+, gcc # :: String
 , gdk-pixbuf
 , glib
 , glibc
@@ -22,13 +22,13 @@ args@
 , perl
 , python3
 , requireFile
-, stdenv
+, backendStdenv # E.g. gcc11Stdenv, set in extension.nix
 , unixODBC
 , xorg
 , zlib
 }:
 
-stdenv.mkDerivation rec {
+backendStdenv.mkDerivation rec {
   pname = "cudatoolkit";
   inherit version runPatches;
 
@@ -146,37 +146,24 @@ stdenv.mkDerivation rec {
 
     # Fix builds with newer glibc version
     sed -i "1 i#define _BITS_FLOATN_H" "$out/include/host_defines.h"
-
-    # Ensure that cmake can find CUDA.
+  '' +
+  # Point NVCC at a compatible compiler
+  # FIXME: redist cuda_nvcc copy-pastes this code
+  # Refer to comments in the overrides for cuda_nvcc for explanation
+  # CUDA_TOOLKIT_ROOT_DIR is legacy,
+  # Cf. https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
+  ''
     mkdir -p $out/nix-support
-    echo "cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out'" >> $out/nix-support/setup-hook
-
-    # Set the host compiler to be used by nvcc.
-    # FIXME: redist cuda_nvcc copy-pastes this code
-
-    # For CMake-based projects:
-    # https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
-    # https://cmake.org/cmake/help/latest/envvar/CUDAHOSTCXX.html
-    # https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_HOST_COMPILER.html
-
-    # For non-CMake projects:
-    # FIXME: results in "incompatible redefinition" warnings ...but we keep
-    # both this and cmake variables until we come up with a more general
-    # solution
-    # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin
-
     cat <<EOF >> $out/nix-support/setup-hook
-
-    cmakeFlags+=' -DCUDA_HOST_COMPILER=${gcc}/bin'
-    cmakeFlags+=' -DCMAKE_CUDA_HOST_COMPILER=${gcc}/bin'
+    cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out'
+    cmakeFlags+=' -DCUDA_HOST_COMPILER=${backendStdenv.cc}/bin'
+    cmakeFlags+=' -DCMAKE_CUDA_HOST_COMPILER=${backendStdenv.cc}/bin'
     if [ -z "\''${CUDAHOSTCXX-}" ]; then
-      export CUDAHOSTCXX=${gcc}/bin;
+      export CUDAHOSTCXX=${backendStdenv.cc}/bin;
     fi
-
-    export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${gcc}/bin'
+    export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin'
     EOF
 
-
     # Move some libraries to the lib output so that programs that
     # depend on them don't pull in this entire monstrosity.
     mkdir -p $lib/lib
@@ -212,11 +199,10 @@ stdenv.mkDerivation rec {
 
       # The path to libstdc++ and such
       #
-      # NB:
-      # 1. "gcc" (gcc-wrapper) here is what's exposed as cudaPackages.cudatoolkit.cc
-      # 2. "gcc.cc" is the unwrapped gcc
-      # 3. "gcc.cc.lib" is one of its outputs
-      "${gcc.cc.lib}/lib64"
+      # `backendStdenv` is the cuda-compatible toolchain that we pick in
+      # extension.nix; we hand it to NVCC to use as a back-end, and we link
+      # cudatoolkit's binaries against its libstdc++
+      "${backendStdenv.cc.cc.lib}/lib64"
 
       "$out/jre/lib/amd64/jli"
       "$out/lib64"
@@ -286,7 +272,7 @@ stdenv.mkDerivation rec {
     popd
   '';
   passthru = {
-    cc = gcc;
+    cc = backendStdenv.cc;
     majorMinorVersion = lib.versions.majorMinor version;
     majorVersion = lib.versions.majorMinor version;
   };
diff --git a/pkgs/development/compilers/cudatoolkit/extension.nix b/pkgs/development/compilers/cudatoolkit/extension.nix
index c11f12b118a..72cab97f8ff 100644
--- a/pkgs/development/compilers/cudatoolkit/extension.nix
+++ b/pkgs/development/compilers/cudatoolkit/extension.nix
@@ -7,11 +7,24 @@ final: prev: let
   # Version info for the classic cudatoolkit packages that contain everything that is in redist.
   cudatoolkitVersions = final.lib.importTOML ./versions.toml;
 
+  finalVersion = cudatoolkitVersions.${final.cudaVersion};
+
+  # Exposed as cudaPackages.backendStdenv.
+  # We don't call it just "stdenv" to avoid confusion: e.g. this toolchain doesn't contain nvcc.
+  # Instead, it's the back-end toolchain for nvcc to use.
+  # We also use this to link a compatible libstdc++ (backendStdenv.cc.cc.lib)
+  # Cf. https://github.com/NixOS/nixpkgs/pull/218265 for context
+  backendStdenv = prev.pkgs."${finalVersion.gcc}Stdenv";
+
   ### Add classic cudatoolkit package
-  cudatoolkit = buildCudaToolkitPackage ((attrs: attrs // { gcc = prev.pkgs.${attrs.gcc}; }) cudatoolkitVersions.${final.cudaVersion});
+  cudatoolkit = buildCudaToolkitPackage (finalVersion // { inherit backendStdenv; });
 
   cudaFlags = final.callPackage ./flags.nix {};
 
-in {
-  inherit cudatoolkit cudaFlags;
+in
+{
+  inherit
+    backendStdenv
+    cudatoolkit
+    cudaFlags;
 }
diff --git a/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix b/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix
index 3bf9184eefa..1b216ee625a 100644
--- a/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix
+++ b/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix
@@ -1,5 +1,5 @@
 { lib
-, stdenv
+, backendStdenv
 , fetchurl
 , autoPatchelfHook
 , autoAddOpenGLRunpathHook
@@ -11,7 +11,7 @@ attrs:
 let
   arch = "linux-x86_64";
 in
-stdenv.mkDerivation {
+backendStdenv.mkDerivation {
   inherit pname;
   inherit (attrs) version;
 
@@ -33,11 +33,8 @@ stdenv.mkDerivation {
     # autoPatchelfHook will search for a libstdc++ and we're giving it a
     # "compatible" libstdc++ from the same toolchain that NVCC uses.
     #
-    # E.g. it might happen that stdenv=gcc12Stdenv, but we build against cuda11
-    # that only "supports" gcc11. Linking against gcc12's libraries we might
-    # sometimes actually sometimes encounter dynamic linkage errors at runtime
     # NB: We don't actually know if this is the right thing to do
-    cudatoolkit.cc.cc.lib
+    backendStdenv.cc.cc.lib
   ];
 
   dontBuild = true;
@@ -51,7 +48,7 @@ stdenv.mkDerivation {
     runHook postInstall
   '';
 
-  passthru.stdenv = stdenv;
+  passthru.stdenv = backendStdenv;
 
   meta = {
     description = attrs.name;
diff --git a/pkgs/development/compilers/cudatoolkit/redist/overrides.nix b/pkgs/development/compilers/cudatoolkit/redist/overrides.nix
index 663af1db763..96b782d8c99 100644
--- a/pkgs/development/compilers/cudatoolkit/redist/overrides.nix
+++ b/pkgs/development/compilers/cudatoolkit/redist/overrides.nix
@@ -24,7 +24,7 @@ in
 
   cuda_nvcc = prev.cuda_nvcc.overrideAttrs (oldAttrs:
     let
-      inherit (prev.cudatoolkit) cc;
+      inherit (prev.backendStdenv) cc;
     in
     {
       # Point NVCC at a compatible compiler
@@ -44,7 +44,6 @@ in
       postInstall = (oldAttrs.postInstall or "") + ''
         mkdir -p $out/nix-support
         cat <<EOF >> $out/nix-support/setup-hook
-        cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out'
         cmakeFlags+=' -DCUDA_HOST_COMPILER=${cc}/bin'
         cmakeFlags+=' -DCMAKE_CUDA_HOST_COMPILER=${cc}/bin'
         if [ -z "\''${CUDAHOSTCXX-}" ]; then
diff --git a/pkgs/development/libraries/science/math/cudnn/generic.nix b/pkgs/development/libraries/science/math/cudnn/generic.nix
index d4e1f641a95..b2844ae6b07 100644
--- a/pkgs/development/libraries/science/math/cudnn/generic.nix
+++ b/pkgs/development/libraries/science/math/cudnn/generic.nix
@@ -1,11 +1,11 @@
 {
-  stdenv,
+  backendStdenv,
   lib,
   zlib,
   useCudatoolkitRunfile ? false,
   cudaVersion,
   cudaMajorVersion,
-  cudatoolkit, # if cuda>=11: only used for .cc
+  cudatoolkit, # For cuda < 11
   libcublas ? null, # cuda <11 doesn't ship redist packages
   autoPatchelfHook,
   autoAddOpenGLRunpathHook,
@@ -26,7 +26,7 @@
   maxCudaVersion,
 }:
 assert useCudatoolkitRunfile || (libcublas != null); let
-  inherit (cudatoolkit) cc;
+  inherit (backendStdenv) cc;
   inherit (lib) lists strings trivial versions;
 
   # majorMinorPatch :: String -> String
@@ -46,7 +46,7 @@ assert useCudatoolkitRunfile || (libcublas != null); let
     then cudatoolkit
     else libcublas;
 in
-  stdenv.mkDerivation {
+  backendStdenv.mkDerivation {
     pname = "cudatoolkit-${cudaMajorVersion}-cudnn";
     version = versionTriple;
 
diff --git a/pkgs/development/libraries/science/math/tensorrt/generic.nix b/pkgs/development/libraries/science/math/tensorrt/generic.nix
index 3447087051f..31090f715c2 100644
--- a/pkgs/development/libraries/science/math/tensorrt/generic.nix
+++ b/pkgs/development/libraries/science/math/tensorrt/generic.nix
@@ -1,5 +1,5 @@
 { lib
-, stdenv
+, backendStdenv
 , requireFile
 , autoPatchelfHook
 , autoAddOpenGLRunpathHook
@@ -18,7 +18,7 @@
 assert lib.assertMsg (lib.strings.versionAtLeast cudnn.version fileVersionCudnn)
   "This version of TensorRT requires at least cuDNN ${fileVersionCudnn} (current version is ${cudnn.version})";
 
-stdenv.mkDerivation rec {
+backendStdenv.mkDerivation rec {
   pname = "cudatoolkit-${cudatoolkit.majorVersion}-tensorrt";
   version = fullVersion;
   src = requireFile rec {
@@ -45,7 +45,7 @@ stdenv.mkDerivation rec {
 
   # Used by autoPatchelfHook
   buildInputs = [
-    cudatoolkit.cc.cc.lib # libstdc++
+    backendStdenv.cc.cc.lib # libstdc++
     cudatoolkit
     cudnn
   ];
@@ -74,6 +74,8 @@ stdenv.mkDerivation rec {
         "$out/lib/libnvinfer_builder_resource.so.${mostOfVersion}"
     '';
 
+  passthru.stdenv = backendStdenv;
+
   meta = with lib; {
     # Check that the cudatoolkit version satisfies our min/max constraints (both
     # inclusive). We mark the package as broken if it fails to satisfies the
diff --git a/pkgs/development/python-modules/tensorflow/default.nix b/pkgs/development/python-modules/tensorflow/default.nix
index f18a924c31f..adc7b1c1e0b 100644
--- a/pkgs/development/python-modules/tensorflow/default.nix
+++ b/pkgs/development/python-modules/tensorflow/default.nix
@@ -32,6 +32,26 @@
 }:
 
 let
+  originalStdenv = stdenv;
+in
+let
+  # Tensorflow looks at many toolchain-related variables which may diverge.
+  #
+  # Toolchain for cuda-enabled builds.
+  # We want to achieve two things:
+  # 1. NVCC should use a compatible back-end (e.g. gcc11 for cuda11)
+  # 2. Normal C++ files should be compiled with the same toolchain,
+  #    to avoid potential weird dynamic linkage errors at runtime.
+  #    This may not be necessary though
+  #
+  # Toolchain for Darwin:
+  # clang 7 fails to emit a symbol for
+  # __ZN4llvm11SmallPtrSetIPKNS_10AllocaInstELj8EED1Ev in any of the
+  # translation units, so the build fails at link time
+  stdenv =
+    if cudaSupport then cudaPackages.backendStdenv
+    else if originalStdenv.isDarwin then llvmPackages_11.stdenv
+    else originalStdenv;
   inherit (cudaPackages) cudatoolkit cudnn nccl;
 in
 
@@ -44,6 +64,7 @@ assert ! (stdenv.isDarwin && cudaSupport);
 let
   withTensorboard = (pythonOlder "3.6") || tensorboardSupport;
 
+  # FIXME: migrate to redist cudaPackages
   cudatoolkit_joined = symlinkJoin {
     name = "${cudatoolkit.name}-merged";
     paths = [
@@ -56,10 +77,13 @@ let
     ];
   };
 
+  # Tensorflow expects bintools at hard-coded paths, e.g. /usr/bin/ar
+  # The only way to overcome that is to set GCC_HOST_COMPILER_PREFIX,
+  # but that path must contain cc as well, so we merge them
   cudatoolkit_cc_joined = symlinkJoin {
-    name = "${cudatoolkit.cc.name}-merged";
+    name = "${stdenv.cc.name}-merged";
     paths = [
-      cudatoolkit.cc
+      stdenv.cc
       binutils.bintools # for ar, dwp, nm, objcopy, objdump, strip
     ];
   };
@@ -175,12 +199,7 @@ let
     '';
   }) else _bazel-build;
 
-  _bazel-build = (buildBazelPackage.override (lib.optionalAttrs stdenv.isDarwin {
-    # clang 7 fails to emit a symbol for
-    # __ZN4llvm11SmallPtrSetIPKNS_10AllocaInstELj8EED1Ev in any of the
-    # translation units, so the build fails at link time
-    stdenv = llvmPackages_11.stdenv;
-  })) {
+  _bazel-build = buildBazelPackage.override { inherit stdenv; } {
     name = "${pname}-${version}";
     bazel = bazel_5;
 
@@ -211,12 +230,13 @@ let
       flatbuffers-core
       giflib
       grpc
-      icu
+      # Necessary to fix the "`GLIBCXX_3.4.30' not found" error
+      (icu.override { inherit stdenv; })
       jsoncpp
       libjpeg_turbo
       libpng
       lmdb-core
-      pybind11
+      (pybind11.overridePythonAttrs (_: { inherit stdenv; }))
       snappy
       sqlite
     ] ++ lib.optionals cudaSupport [
@@ -301,10 +321,12 @@ let
 
     TF_NEED_CUDA = tfFeature cudaSupport;
     TF_CUDA_PATHS = lib.optionalString cudaSupport "${cudatoolkit_joined},${cudnn},${nccl}";
-    GCC_HOST_COMPILER_PREFIX = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin";
-    GCC_HOST_COMPILER_PATH = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin/gcc";
     TF_CUDA_COMPUTE_CAPABILITIES = lib.concatStringsSep "," cudaCapabilities;
 
+    # Needed even when we override stdenv: e.g. for ar
+    GCC_HOST_COMPILER_PREFIX = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin";
+    GCC_HOST_COMPILER_PATH = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin/cc";
+
     postPatch = ''
       # bazel 3.3 should work just as well as bazel 3.1
       rm -f .bazelversion
diff --git a/pkgs/test/cuda/cuda-library-samples/generic.nix b/pkgs/test/cuda/cuda-library-samples/generic.nix
index e01664bab31..e9a481c94a7 100644
--- a/pkgs/test/cuda/cuda-library-samples/generic.nix
+++ b/pkgs/test/cuda/cuda-library-samples/generic.nix
@@ -1,4 +1,4 @@
-{ lib, stdenv, fetchFromGitHub
+{ lib, backendStdenv, fetchFromGitHub
 , cmake, addOpenGLRunpath
 , cudatoolkit
 , cutensor
@@ -35,13 +35,13 @@ let
 in
 
 {
-  cublas = stdenv.mkDerivation (commonAttrs // {
+  cublas = backendStdenv.mkDerivation (commonAttrs // {
     pname = "cuda-library-samples-cublas";
 
     src = "${src}/cuBLASLt";
   });
 
-  cusolver = stdenv.mkDerivation (commonAttrs // {
+  cusolver = backendStdenv.mkDerivation (commonAttrs // {
     pname = "cuda-library-samples-cusolver";
 
     src = "${src}/cuSOLVER";
@@ -49,7 +49,7 @@ in
     sourceRoot = "cuSOLVER/gesv";
   });
 
-  cutensor = stdenv.mkDerivation (commonAttrs // {
+  cutensor = backendStdenv.mkDerivation (commonAttrs // {
     pname = "cuda-library-samples-cutensor";
 
     src = "${src}/cuTENSOR";
author	Someone Serge <sergei.kozlukov@aalto.fi>	2023-02-27 16:28:07 +0200
committer	Someone Serge <sergei.kozlukov@aalto.fi>	2023-03-04 01:03:51 +0200
commit	5f4bdbe6c387bf740025581d94bbfba9a887c76f (patch)
tree	99182b77f71e23c59a39dea0c73de0eb43c6b328
parent	d378cc6fb23d67f3d9f86c39051f810c563789ca (diff)
download	nixpkgs-5f4bdbe6c387bf740025581d94bbfba9a887c76f.tar nixpkgs-5f4bdbe6c387bf740025581d94bbfba9a887c76f.tar.gz nixpkgs-5f4bdbe6c387bf740025581d94bbfba9a887c76f.tar.bz2 nixpkgs-5f4bdbe6c387bf740025581d94bbfba9a887c76f.tar.lz nixpkgs-5f4bdbe6c387bf740025581d94bbfba9a887c76f.tar.xz nixpkgs-5f4bdbe6c387bf740025581d94bbfba9a887c76f.tar.zst nixpkgs-5f4bdbe6c387bf740025581d94bbfba9a887c76f.zip