summary refs log tree commit diff
path: root/pkgs/development/libraries/opencv
diff options
context:
space:
mode:
authorSamuel Ainsworth <skainsworth@gmail.com>2023-03-29 21:36:52 -0400
committerGitHub <noreply@github.com>2023-03-29 21:36:52 -0400
commit2cc30d5125baace5109dcce88b085426ff0057bc (patch)
tree28a745b3c197cf4279b4da397e2a5066baec2ca9 /pkgs/development/libraries/opencv
parent80baf3a2dee3d7429f129355c19b695d168b56c3 (diff)
parentcd16931110f1e5647308c338454eab030a66c791 (diff)
downloadnixpkgs-2cc30d5125baace5109dcce88b085426ff0057bc.tar
nixpkgs-2cc30d5125baace5109dcce88b085426ff0057bc.tar.gz
nixpkgs-2cc30d5125baace5109dcce88b085426ff0057bc.tar.bz2
nixpkgs-2cc30d5125baace5109dcce88b085426ff0057bc.tar.lz
nixpkgs-2cc30d5125baace5109dcce88b085426ff0057bc.tar.xz
nixpkgs-2cc30d5125baace5109dcce88b085426ff0057bc.tar.zst
nixpkgs-2cc30d5125baace5109dcce88b085426ff0057bc.zip
Merge pull request #221370 from ConnorBaker/feat/opencv-use-cudaPackages-staging-reland
opencv: misc CUDA-related updates and fixes; add enableLto
Diffstat (limited to 'pkgs/development/libraries/opencv')
-rw-r--r--pkgs/development/libraries/opencv/4.x.nix68
1 files changed, 58 insertions, 10 deletions
diff --git a/pkgs/development/libraries/opencv/4.x.nix b/pkgs/development/libraries/opencv/4.x.nix
index a9f7b0304e8..496412dda32 100644
--- a/pkgs/development/libraries/opencv/4.x.nix
+++ b/pkgs/development/libraries/opencv/4.x.nix
@@ -37,9 +37,14 @@
 , enableContrib ? true
 
 , enableCuda ? (config.cudaSupport or false) && stdenv.hostPlatform.isx86_64
-, cudaPackages ? { }
+, enableCublas ? enableCuda
+, enableCudnn ? false # NOTE: CUDNN has a large impact on closure size so we disable it by default
+, enableCufft ? enableCuda
+, cudaPackages ? {}
+, symlinkJoin
 , nvidia-optical-flow-sdk
 
+, enableLto ? true
 , enableUnfree ? false
 , enableIpp ? false
 , enablePython ? false
@@ -79,9 +84,6 @@
 }:
 
 let
-  inherit (cudaPackages) cudatoolkit;
-  inherit (cudaPackages.cudaFlags) cudaCapabilities;
-
   version = "4.7.0";
 
   src = fetchFromGitHub {
@@ -227,6 +229,33 @@ let
   #multithreaded openblas conflicts with opencv multithreading, which manifest itself in hung tests
   #https://github.com/xianyi/OpenBLAS/wiki/Faq/4bded95e8dc8aadc70ce65267d1093ca7bdefc4c#multi-threaded
   openblas_ = blas.provider.override { singleThreaded = true; };
+
+  inherit (cudaPackages) backendStdenv cudaFlags cudaVersion;
+  inherit (cudaFlags) cudaCapabilities;
+
+  cuda-common-redist = with cudaPackages; [
+    cuda_cccl # <thrust/*>
+    libnpp # npp.h
+  ] ++ lib.optionals enableCublas [
+    libcublas # cublas_v2.h
+  ] ++ lib.optionals enableCudnn [
+    cudnn # cudnn.h
+  ] ++ lib.optionals enableCufft [
+    libcufft # cufft.h
+  ];
+
+  cuda-native-redist = symlinkJoin {
+    name = "cuda-native-redist-${cudaVersion}";
+    paths = with cudaPackages; [
+      cuda_cudart # cuda_runtime.h
+      cuda_nvcc
+    ] ++ cuda-common-redist;
+   };
+
+  cuda-redist = symlinkJoin {
+    name = "cuda-redist-${cudaVersion}";
+    paths = cuda-common-redist;
+   };
 in
 
 stdenv.mkDerivation {
@@ -298,17 +327,18 @@ stdenv.mkDerivation {
     ++ lib.optionals enableTesseract [ tesseract leptonica ]
     ++ lib.optional enableTbb tbb
     ++ lib.optionals stdenv.isDarwin [ bzip2 AVFoundation Cocoa VideoDecodeAcceleration CoreMedia MediaToolbox ]
-    ++ lib.optionals enableDocs [ doxygen graphviz-nox ];
+    ++ lib.optionals enableDocs [ doxygen graphviz-nox ]
+    ++ lib.optionals enableCuda [ cuda-redist ];
 
   propagatedBuildInputs = lib.optional enablePython pythonPackages.numpy
-    ++ lib.optionals enableCuda [ cudatoolkit nvidia-optical-flow-sdk ];
+    ++ lib.optionals enableCuda [ nvidia-optical-flow-sdk ];
 
   nativeBuildInputs = [ cmake pkg-config unzip ]
   ++ lib.optionals enablePython [
     pythonPackages.pip
     pythonPackages.wheel
     pythonPackages.setuptools
-  ];
+  ] ++ lib.optionals enableCuda [ cuda-native-redist ];
 
   env.NIX_CFLAGS_COMPILE = lib.optionalString enableEXR "-I${ilmbase.dev}/include/OpenEXR";
 
@@ -338,12 +368,30 @@ stdenv.mkDerivation {
     (opencvFlag "OPENEXR" enableEXR)
     (opencvFlag "OPENJPEG" enableJPEG2000)
     "-DWITH_JASPER=OFF" # OpenCV falls back to a vendored copy of Jasper when OpenJPEG is disabled
-    (opencvFlag "CUDA" enableCuda)
-    (opencvFlag "CUBLAS" enableCuda)
     (opencvFlag "TBB" enableTbb)
+
+    # CUDA options
+    (opencvFlag "CUDA" enableCuda)
+    (opencvFlag "CUDA_FAST_MATH" enableCuda)
+    (opencvFlag "CUBLAS" enableCublas)
+    (opencvFlag "CUDNN" enableCudnn)
+    (opencvFlag "CUFFT" enableCufft)
+
+    # LTO options
+    (opencvFlag "ENABLE_LTO" enableLto)
+    (opencvFlag "ENABLE_THIN_LTO" (
+      enableLto && (
+        # Only clang supports thin LTO, so we must either be using clang through the stdenv,
+        stdenv.cc.isClang ||
+          # or through the backend stdenv.
+          (enableCuda && backendStdenv.cc.isClang)
+      )
+    ))
   ] ++ lib.optionals enableCuda [
     "-DCUDA_FAST_MATH=ON"
-    "-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc"
+    # We need to set the C and C++ host compilers for CUDA to the same compiler.
+    "-DCMAKE_C_COMPILER=${backendStdenv.cc}/bin/cc"
+    "-DCMAKE_CXX_COMPILER=${backendStdenv.cc}/bin/c++"
     "-DCUDA_NVCC_FLAGS=--expt-relaxed-constexpr"
 
     # OpenCV respects at least three variables: