From a5e0fc9f53c0def4328e5d4075ea1d0ef3f1433e Mon Sep 17 00:00:00 2001 From: Svjatoslav Agejenko Date: Fri, 20 Jun 2025 21:28:23 +0300 Subject: [PATCH] Experimental inference settings --- .../eu/svjatoslav/alyverkko_cli/AiTask.java | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java b/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java index 8df6bd7..59e4aa7 100644 --- a/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java +++ b/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java @@ -239,19 +239,17 @@ public class AiTask { "--model " + mailQuery.model.filesystemPath, "--threads " + configuration.getThreadCount(), "--threads-batch " + configuration.getBatchThreadCount(), - "--top-k 40", - "--top-p 0.95", - "--min-p 0.1", - "--repeat-penalty 1.1", - "--dry-multiplier 0.5", + "--top-k 1", "--top-p 0", // Activates true greedy decoding, now the recommended way to force determinism in recent llama.cpp builds + "--min-p 0", + "--samplers \"dry;top_k;temperature\"", + "--repeat-penalty 1.05", + "--dry-multiplier 0.4", // Low‐ish dry-multiplier adds a soft anti-repetition guard without wrecking logic "--presence-penalty 0.1", "--mirostat 0", - // "--cache-type-k q8_0", - // "--cache-type-v q8_0", "--no-display-prompt", "--no-warmup", "--flash-attn", - "--temp 0.6", + "--temp 0", "--ctx-size " + mailQuery.model.contextSizeTokens, "--batch-size 512", "--no-conversation", @@ -259,6 +257,9 @@ public class AiTask { "--file " + inputFile ); + // "--cache-type-k q8_0", + // "--cache-type-v q8_0", + } /** -- 2.20.1