"--model " + mailQuery.model.filesystemPath,
"--threads " + configuration.getThreadCount(),
"--threads-batch " + configuration.getBatchThreadCount(),
- "--top-k 40",
- "--top-p 0.95",
- "--min-p 0.1",
- "--repeat-penalty 1.1",
- "--dry-multiplier 0.5",
+ "--top-k 1", "--top-p 0", // Activates true greedy decoding, now the recommended way to force determinism in recent llama.cpp builds
+ "--min-p 0",
+ "--samplers \"dry;top_k;temperature\"",
+ "--repeat-penalty 1.05",
+ "--dry-multiplier 0.4", // Low‐ish dry-multiplier adds a soft anti-repetition guard without wrecking logic
"--presence-penalty 0.1",
"--mirostat 0",
- // "--cache-type-k q8_0",
- // "--cache-type-v q8_0",
"--no-display-prompt",
"--no-warmup",
"--flash-attn",
- "--temp 0.6",
+ "--temp 0",
"--ctx-size " + mailQuery.model.contextSizeTokens,
"--batch-size 512",
"--no-conversation",
"--file " + inputFile
);
+ // "--cache-type-k q8_0",
+ // "--cache-type-v q8_0",
+
}
/**