Update llama-cpp CLI arguments

author Svjatoslav Agejenko <svjatoslav@svjatoslav.eu>

Tue, 4 Nov 2025 19:30:25 +0000 (21:30 +0200)

committer Svjatoslav Agejenko <svjatoslav@svjatoslav.eu>

Tue, 4 Nov 2025 19:30:25 +0000 (21:30 +0200)
author Svjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Tue, 4 Nov 2025 19:30:25 +0000 (21:30 +0200)
committer Svjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Tue, 4 Nov 2025 19:30:25 +0000 (21:30 +0200)
diff --git a/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java b/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java

index fe360fe..9bb8bb7 100644 (file)
--- a/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java
+++ b/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java
@@ -240,16 +240,17 @@ public class AiTask {
                  "--threads " + configuration.getThreadCount(),
                  "--threads-batch " + configuration.getBatchThreadCount(),
  
-                "--top-k 1", // Restricts token selection to the K tokens with the highest probabilities.
-                             // 1 mean true greedy decoding.
+                "--top-k 20",   // Restricts token selection to the K tokens with the highest probabilities.
  
-                "--top-p 0",   // Restricts token selection to the smallest possible set
-                               // of tokens whose cumulative probability exceeds the specified
-                               // threshold P.
+                "--top-p 0.95", // Restricts token selection to the smallest possible set
+                // of tokens whose cumulative probability exceeds the specified
+                // threshold P.
  
-                "--min-p 0",    // Filters the vocabulary to include only tokens whose
-                                // probability is at least a certain fraction (Min P) of the
-                                // probability of the most likely token.
+                "--min-p 0.1",  // Filters the vocabulary to include only tokens whose
+                // probability is at least a certain fraction (Min P) of the
+                // probability of the most likely token.
+
+                // "--chat-format qwen3",   // Ensure that model sees the <|im_start|>system … / <|im_start|>user … markup it was trained on
  
                  // Avoid getting stuck in a forever repetition loop
                  "--repeat-penalty 1.05", // Very little penalty, because computer code is often repetitive
@@ -258,19 +259,19 @@ public class AiTask {
                  "--dry-multiplier 0.1", // Controls the strength of the penalty for a detected repetition sequence.
  
                  "--presence-penalty 0", // In a code we want the model to reuse the same variable names,
-                                        // keywords, and syntax consistently. A presence penalty,
-                                        // even a small 0.1, could cause the model to needlessly
-                                        // rename variables.
+                // keywords, and syntax consistently. A presence penalty,
+                // even a small 0.1, could cause the model to needlessly
+                // rename variables.
  
                  "--mirostat 0", // Disable mirostat
  
                  "--no-display-prompt",
                  "--no-warmup",
-                "--flash-attn",
-                "--temp 0", // Coding tasks need precision, not randomness
+                "--flash-attn on",
+                "--temp 0.6",
                  "--ctx-size " + mailQuery.model.contextSizeTokens,
                  "--batch-size 512",
-                "--no-conversation",
+                "--single-turn", // run conversation for a single turn only, then exit when done
                  "-n -1",
                  "--file " + inputFile
          );
author	Svjatoslav Agejenko <svjatoslav@svjatoslav.eu>
	Tue, 4 Nov 2025 19:30:25 +0000 (21:30 +0200)
committer	Svjatoslav Agejenko <svjatoslav@svjatoslav.eu>
	Tue, 4 Nov 2025 19:30:25 +0000 (21:30 +0200)