"--threads " + configuration.getThreadCount(),
"--threads-batch " + configuration.getBatchThreadCount(),
- "--top-k 20", // Restricts token selection to the K tokens with the highest probabilities.
+ //"--top-k 20", // Restricts token selection to the K tokens with the highest probabilities.
"--top-p 0.95", // Restricts token selection to the smallest possible set
// of tokens whose cumulative probability exceeds the specified
// threshold P.
- "--min-p 0.1", // Filters the vocabulary to include only tokens whose
+ //"--min-p 0.1", // Filters the vocabulary to include only tokens whose
// probability is at least a certain fraction (Min P) of the
// probability of the most likely token.
// "--chat-format qwen3", // Ensure that model sees the <|im_start|>system … / <|im_start|>user … markup it was trained on
// Avoid getting stuck in a forever repetition loop
- "--repeat-penalty 1.05", // Very little penalty, because computer code is often repetitive
+ "--repeat-penalty 1.1", // Very little penalty, because computer code is often repetitive
"--repeat-last-n 512", // Last n tokens to consider for penalizing repetition
- "--dry-multiplier 0.1", // Controls the strength of the penalty for a detected repetition sequence.
+ //"--dry-multiplier 0.1", // Controls the strength of the penalty for a detected repetition sequence.
"--presence-penalty 0", // In a code we want the model to reuse the same variable names,
// keywords, and syntax consistently. A presence penalty,
"--no-display-prompt",
"--no-warmup",
"--flash-attn on",
- "--temp 0.6",
+ "--temp 0.85",
"--ctx-size " + mailQuery.model.contextSizeTokens,
"--batch-size 512",
"--single-turn", // run conversation for a single turn only, then exit when done