From a5e0fc9f53c0def4328e5d4075ea1d0ef3f1433e Mon Sep 17 00:00:00 2001
From: Svjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Date: Fri, 20 Jun 2025 21:28:23 +0300
Subject: [PATCH] Experimental inference settings

---
 .../eu/svjatoslav/alyverkko_cli/AiTask.java     | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java b/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java
index 8df6bd7..59e4aa7 100644
--- a/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java
+++ b/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java
@@ -239,19 +239,17 @@ public class AiTask {
                 "--model " + mailQuery.model.filesystemPath,
                 "--threads " + configuration.getThreadCount(),
                 "--threads-batch " + configuration.getBatchThreadCount(),
-                "--top-k 40",
-                "--top-p 0.95",
-                "--min-p 0.1",
-                "--repeat-penalty 1.1",
-                "--dry-multiplier 0.5",
+                "--top-k 1", "--top-p 0", // Activates true greedy decoding, now the recommended way to force determinism in recent llama.cpp builds
+                "--min-p 0",
+                "--samplers \"dry;top_k;temperature\"",
+                "--repeat-penalty 1.05",
+                "--dry-multiplier 0.4", // Lowâish dry-multiplier adds a soft anti-repetition guard without wrecking logic
                 "--presence-penalty 0.1",
                 "--mirostat 0",
-             //   "--cache-type-k q8_0",
-             //   "--cache-type-v q8_0",
                 "--no-display-prompt",
                 "--no-warmup",
                 "--flash-attn",
-                "--temp 0.6",
+                "--temp 0",
                 "--ctx-size " + mailQuery.model.contextSizeTokens,
                 "--batch-size 512",
                 "--no-conversation",
@@ -259,6 +257,9 @@ public class AiTask {
                 "--file " + inputFile
         );
 
+        //   "--cache-type-k q8_0",
+        //   "--cache-type-v q8_0",
+
     }
 
     /**
-- 
2.20.1