From: Svjatoslav Agejenko Date: Tue, 4 Nov 2025 19:30:25 +0000 (+0200) Subject: Update llama-cpp CLI arguments X-Git-Url: http://www2.svjatoslav.eu/gitweb/?a=commitdiff_plain;h=848bc53db0def086a2c81df9c78dfcfeb87ff282;p=alyverkko-cli.git Update llama-cpp CLI arguments --- diff --git a/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java b/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java index fe360fe..9bb8bb7 100644 --- a/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java +++ b/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java @@ -240,16 +240,17 @@ public class AiTask { "--threads " + configuration.getThreadCount(), "--threads-batch " + configuration.getBatchThreadCount(), - "--top-k 1", // Restricts token selection to the K tokens with the highest probabilities. - // 1 mean true greedy decoding. + "--top-k 20", // Restricts token selection to the K tokens with the highest probabilities. - "--top-p 0", // Restricts token selection to the smallest possible set - // of tokens whose cumulative probability exceeds the specified - // threshold P. + "--top-p 0.95", // Restricts token selection to the smallest possible set + // of tokens whose cumulative probability exceeds the specified + // threshold P. - "--min-p 0", // Filters the vocabulary to include only tokens whose - // probability is at least a certain fraction (Min P) of the - // probability of the most likely token. + "--min-p 0.1", // Filters the vocabulary to include only tokens whose + // probability is at least a certain fraction (Min P) of the + // probability of the most likely token. + + // "--chat-format qwen3", // Ensure that model sees the <|im_start|>system … / <|im_start|>user … markup it was trained on // Avoid getting stuck in a forever repetition loop "--repeat-penalty 1.05", // Very little penalty, because computer code is often repetitive @@ -258,19 +259,19 @@ public class AiTask { "--dry-multiplier 0.1", // Controls the strength of the penalty for a detected repetition sequence. "--presence-penalty 0", // In a code we want the model to reuse the same variable names, - // keywords, and syntax consistently. A presence penalty, - // even a small 0.1, could cause the model to needlessly - // rename variables. + // keywords, and syntax consistently. A presence penalty, + // even a small 0.1, could cause the model to needlessly + // rename variables. "--mirostat 0", // Disable mirostat "--no-display-prompt", "--no-warmup", - "--flash-attn", - "--temp 0", // Coding tasks need precision, not randomness + "--flash-attn on", + "--temp 0.6", "--ctx-size " + mailQuery.model.contextSizeTokens, "--batch-size 512", - "--no-conversation", + "--single-turn", // run conversation for a single turn only, then exit when done "-n -1", "--file " + inputFile );