"--model " + mailQuery.model.filesystemPath,
"--threads " + configuration.getThreadCount(),
"--threads-batch " + configuration.getBatchThreadCount(),
- "--top-k 1", "--top-p 0", // Activates true greedy decoding, now the recommended way to force determinism in recent llama.cpp builds
+ "--top-k 5", "--top-p 0.3",
"--min-p 0",
- "--samplers \"top_k\"",
"--repeat-penalty 1.05",
"--dry-multiplier 0.4", // Low‐ish dry-multiplier adds a soft anti-repetition guard without wrecking logic
"--presence-penalty 0.1",
"--no-display-prompt",
"--no-warmup",
"--flash-attn",
- "--temp 0",
+ "--temp 0.1",
"--ctx-size " + mailQuery.model.contextSizeTokens,
"--batch-size 512",
"--no-conversation",