"--threads " + configuration.getThreadCount(),
"--threads-batch " + configuration.getBatchThreadCount(),
- "--top-k 20", // Restricts token selection to the K tokens with the highest probabilities.
- // 1 mean true greedy decoding.
+ "--top-k 20", // Restricts token selection to the K tokens with the highest probabilities.
- "--top-p 0.95", // Restricts token selection to the smallest possible set
- // of tokens whose cumulative probability exceeds the specified
- // threshold P.
+ "--top-p 0.95", // Restricts token selection to the smallest possible set
+ // of tokens whose cumulative probability exceeds the specified
+ // threshold P.
- "--min-p 0", // Filters the vocabulary to include only tokens whose
+ "--min-p 0.1", // Filters the vocabulary to include only tokens whose
// probability is at least a certain fraction (Min P) of the
// probability of the most likely token.
"--no-display-prompt",
"--no-warmup",
"--flash-attn",
- "--temp 0", // Coding tasks need precision, not randomness
+ "--temp 0.6",
"--ctx-size " + mailQuery.model.contextSizeTokens,
"--batch-size 512",
"--single-turn", // run conversation for a single turn only, then exit when done