"--threads " + configuration.getThreadCount(),
"--threads-batch " + configuration.getBatchThreadCount(),
- "--top-k 1", // Restricts token selection to the K tokens with the highest probabilities.
- // 1 mean true greedy decoding.
+ "--top-k 20", // Restricts token selection to the K tokens with the highest probabilities.
- "--top-p 0", // Restricts token selection to the smallest possible set
- // of tokens whose cumulative probability exceeds the specified
- // threshold P.
+ "--top-p 0.95", // Restricts token selection to the smallest possible set
+ // of tokens whose cumulative probability exceeds the specified
+ // threshold P.
- "--min-p 0", // Filters the vocabulary to include only tokens whose
- // probability is at least a certain fraction (Min P) of the
- // probability of the most likely token.
+ "--min-p 0.1", // Filters the vocabulary to include only tokens whose
+ // probability is at least a certain fraction (Min P) of the
+ // probability of the most likely token.
+
+ // "--chat-format qwen3", // Ensure that model sees the <|im_start|>system … / <|im_start|>user … markup it was trained on
// Avoid getting stuck in a forever repetition loop
"--repeat-penalty 1.05", // Very little penalty, because computer code is often repetitive
"--dry-multiplier 0.1", // Controls the strength of the penalty for a detected repetition sequence.
"--presence-penalty 0", // In a code we want the model to reuse the same variable names,
- // keywords, and syntax consistently. A presence penalty,
- // even a small 0.1, could cause the model to needlessly
- // rename variables.
+ // keywords, and syntax consistently. A presence penalty,
+ // even a small 0.1, could cause the model to needlessly
+ // rename variables.
"--mirostat 0", // Disable mirostat
"--no-display-prompt",
"--no-warmup",
- "--flash-attn",
- "--temp 0", // Coding tasks need precision, not randomness
+ "--flash-attn on",
+ "--temp 0.6",
"--ctx-size " + mailQuery.model.contextSizeTokens,
"--batch-size 512",
- "--no-conversation",
+ "--single-turn", // run conversation for a single turn only, then exit when done
"-n -1",
"--file " + inputFile
);