Still attempting to increase generated code quality

author Svjatoslav Agejenko <svjatoslav@svjatoslav.eu>

Thu, 17 Jul 2025 09:07:08 +0000 (12:07 +0300)

committer Svjatoslav Agejenko <svjatoslav@svjatoslav.eu>

Thu, 17 Jul 2025 09:07:08 +0000 (12:07 +0300)
author Svjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Thu, 17 Jul 2025 09:07:08 +0000 (12:07 +0300)
committer Svjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Thu, 17 Jul 2025 09:07:08 +0000 (12:07 +0300)
diff --git a/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java b/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java

index ea53d5f..fe360fe 100644 (file)
--- a/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java
+++ b/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java
@@ -239,16 +239,35 @@ public class AiTask {
                  "--model " + mailQuery.model.filesystemPath,
                  "--threads " + configuration.getThreadCount(),
                  "--threads-batch " + configuration.getBatchThreadCount(),
-                "--top-k 5", "--top-p 0.3",
-                "--min-p 0",
-                "--repeat-penalty 1.05",
-                "--dry-multiplier 0.4", // Low‐ish dry-multiplier adds a soft anti-repetition guard without wrecking logic
-                "--presence-penalty 0.1",
-                "--mirostat 0",
+
+                "--top-k 1", // Restricts token selection to the K tokens with the highest probabilities.
+                             // 1 mean true greedy decoding.
+
+                "--top-p 0",   // Restricts token selection to the smallest possible set
+                               // of tokens whose cumulative probability exceeds the specified
+                               // threshold P.
+
+                "--min-p 0",    // Filters the vocabulary to include only tokens whose
+                                // probability is at least a certain fraction (Min P) of the
+                                // probability of the most likely token.
+
+                // Avoid getting stuck in a forever repetition loop
+                "--repeat-penalty 1.05", // Very little penalty, because computer code is often repetitive
+                "--repeat-last-n 512",   // Last n tokens to consider for penalizing repetition
+
+                "--dry-multiplier 0.1", // Controls the strength of the penalty for a detected repetition sequence.
+
+                "--presence-penalty 0", // In a code we want the model to reuse the same variable names,
+                                        // keywords, and syntax consistently. A presence penalty,
+                                        // even a small 0.1, could cause the model to needlessly
+                                        // rename variables.
+
+                "--mirostat 0", // Disable mirostat
+
                  "--no-display-prompt",
                  "--no-warmup",
                  "--flash-attn",
-                "--temp 0.1",
+                "--temp 0", // Coding tasks need precision, not randomness
                  "--ctx-size " + mailQuery.model.contextSizeTokens,
                  "--batch-size 512",
                  "--no-conversation",
@@ -256,8 +275,8 @@ public class AiTask {
                  "--file " + inputFile
          );
  
-        //   "--cache-type-k q8_0",
-        //   "--cache-type-v q8_0",
+        //   "--cache-type-k q8_0",    might save RAM, need to test precision loss is acceptable
+        //   "--cache-type-v q8_0",    might save RAM, need to test precision loss is acceptable
  
      }
author	Svjatoslav Agejenko <svjatoslav@svjatoslav.eu>
	Thu, 17 Jul 2025 09:07:08 +0000 (12:07 +0300)
committer	Svjatoslav Agejenko <svjatoslav@svjatoslav.eu>
	Thu, 17 Jul 2025 09:07:08 +0000 (12:07 +0300)