3 files changed, 64 insertions, 11 deletions
diff --git a/pkgs/tools/misc/ollama/default.nix b/pkgs/tools/misc/ollama/default.nix
index be186402488..30be00d72a1 100644
--- a/pkgs/tools/misc/ollama/default.nix
+++ b/pkgs/tools/misc/ollama/default.nix
@@ -1,35 +1,50 @@
 { lib
 , buildGoModule
 , fetchFromGitHub
+, llama-cpp
 , stdenv
-, darwin
 }:
 
 buildGoModule rec {
   pname = "ollama";
-  version = "0.0.17";
+  version = "0.1.7";
 
   src = fetchFromGitHub {
     owner = "jmorganca";
     repo = "ollama";
     rev = "v${version}";
-    hash = "sha256-idsFcjsRD1zPmG742gnYQJcgSWDA2DLMHksCFNe2GiY=";
+    hash = "sha256-rzcuRU2qcYTMo/GxiSHwJYnvA9samfWlztMEhOGzbRg=";
   };
 
-  buildInputs = lib.optionals stdenv.isDarwin (with darwin.apple_sdk_11_0.frameworks; [
-    Accelerate
-    MetalPerformanceShaders
-    MetalKit
-  ]);
+  patches = [
+    # disable passing the deprecated gqa flag to llama-cpp-server
+    # see https://github.com/ggerganov/llama.cpp/issues/2975
+    ./disable-gqa.patch
 
-  vendorHash = "sha256-IgEf/WOc1eNGCif1fViIFxbgZAd6mHBqfxcaqH/WvGg=";
+    # replace the call to the bundled llama-cpp-server with the one in the llama-cpp package
+    ./set-llamacpp-path.patch
+  ];
 
-  ldflags = [ "-s" "-w" ];
+  postPatch = ''
+    substituteInPlace llm/llama.go \
+      --subst-var-by llamaCppServer "${llama-cpp}/bin/llama-cpp-server"
+  '';
+
+  vendorHash = "sha256-Qt5QVqRkwK61BJPVhFWtox6b9E8BpAIseNB0yhh+/90=";
+
+  ldflags = [
+    "-s"
+    "-w"
+    "-X=github.com/jmorganca/ollama/version.Version=${version}"
+    "-X=github.com/jmorganca/ollama/server.mode=release"
+  ];
 
   meta = with lib; {
     description = "Get up and running with large language models locally";
     homepage = "https://github.com/jmorganca/ollama";
     license = licenses.mit;
-    maintainers = with maintainers; [ dit7ya ];
+    mainProgram = "ollama";
+    maintainers = with maintainers; [ dit7ya elohmeier ];
+    platforms = platforms.unix;
   };
 }
diff --git a/pkgs/tools/misc/ollama/disable-gqa.patch b/pkgs/tools/misc/ollama/disable-gqa.patch
new file mode 100644
index 00000000000..b54440cd3d5
--- /dev/null
+++ b/pkgs/tools/misc/ollama/disable-gqa.patch
@@ -0,0 +1,15 @@
+diff --git a/llm/llama.go b/llm/llama.go
+index 0b460e9..b79e04a 100644
+--- a/llm/llama.go
++++ b/llm/llama.go
+@@ -299,10 +299,6 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
+ 		params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", numGPU))
+ 	}
+ 
+-	if opts.NumGQA > 0 {
+-		params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA))
+-	}
+-
+ 	if len(adapters) > 0 {
+ 		// TODO: applying multiple adapters is not supported by the llama.cpp server yet
+ 		params = append(params, "--lora", adapters[0])
diff --git a/pkgs/tools/misc/ollama/set-llamacpp-path.patch b/pkgs/tools/misc/ollama/set-llamacpp-path.patch
new file mode 100644
index 00000000000..e90e552bab4
--- /dev/null
+++ b/pkgs/tools/misc/ollama/set-llamacpp-path.patch
@@ -0,0 +1,23 @@
+diff --git a/llm/llama.go b/llm/llama.go
+index f23d5d8..6563550 100644
+--- a/llm/llama.go
++++ b/llm/llama.go
+@@ -25,7 +25,6 @@ import (
+ 	"github.com/jmorganca/ollama/api"
+ )
+ 
+-//go:embed llama.cpp/*/build/*/bin/*
+ var llamaCppEmbed embed.FS
+ 
+ type ModelRunner struct {
+@@ -33,6 +32,10 @@ type ModelRunner struct {
+ }
+ 
+ func chooseRunners(workDir, runnerType string) []ModelRunner {
++	return []ModelRunner{
++		{Path: "@llamaCppServer@"},
++	}
++
+ 	buildPath := path.Join("llama.cpp", runnerType, "build")
+ 	var runners []string
+