"--model " + mailQuery.model.filesystemPath,
"--threads " + configuration.getThreadCount(),
"--threads-batch " + configuration.getBatchThreadCount(),
- "--top-k 5", "--top-p 0.3",
- "--min-p 0",
- "--repeat-penalty 1.05",
- "--dry-multiplier 0.4", // Low‐ish dry-multiplier adds a soft anti-repetition guard without wrecking logic
- "--presence-penalty 0.1",
- "--mirostat 0",
+
+ "--top-k 1", // Restricts token selection to the K tokens with the highest probabilities.
+ // 1 mean true greedy decoding.
+
+ "--top-p 0", // Restricts token selection to the smallest possible set
+ // of tokens whose cumulative probability exceeds the specified
+ // threshold P.
+
+ "--min-p 0", // Filters the vocabulary to include only tokens whose
+ // probability is at least a certain fraction (Min P) of the
+ // probability of the most likely token.
+
+ // Avoid getting stuck in a forever repetition loop
+ "--repeat-penalty 1.05", // Very little penalty, because computer code is often repetitive
+ "--repeat-last-n 512", // Last n tokens to consider for penalizing repetition
+
+ "--dry-multiplier 0.1", // Controls the strength of the penalty for a detected repetition sequence.
+
+ "--presence-penalty 0", // In a code we want the model to reuse the same variable names,
+ // keywords, and syntax consistently. A presence penalty,
+ // even a small 0.1, could cause the model to needlessly
+ // rename variables.
+
+ "--mirostat 0", // Disable mirostat
+
"--no-display-prompt",
"--no-warmup",
"--flash-attn",
- "--temp 0.1",
+ "--temp 0", // Coding tasks need precision, not randomness
"--ctx-size " + mailQuery.model.contextSizeTokens,
"--batch-size 512",
"--no-conversation",
"--file " + inputFile
);
- // "--cache-type-k q8_0",
- // "--cache-type-v q8_0",
+ // "--cache-type-k q8_0", might save RAM, need to test precision loss is acceptable
+ // "--cache-type-v q8_0", might save RAM, need to test precision loss is acceptable
}