/**
* Calculate effective temperature based on override hierarchy.
*/
- public float getEffectiveTemperature() {
+ public Float getEffectiveTemperature() {
// 1. Skill specific temperature has priority
- if (skill != null && skill.getTemperature() != null) {
- return skill.getTemperature();
- }
+ if (skill != null && skill.getTemperature() != null) return skill.getTemperature();
// 2. If not in skill, check if model specifies it
- if (model.temperature != null) {
- return model.temperature;
- }
+ if (model.temperature != null) return model.temperature;
// 3. Fall back to global default
return configuration.getDefaultTemperature();
*
* @return the applicable top-p value for this query
*/
- public float getEffectiveTopP() {
+ public Float getEffectiveTopP() {
// Skill-specific has highest priority
- if (skill != null && skill.getTopP() != null) {
- return skill.getTopP();
- }
+ if (skill != null && skill.getTopP() != null) return skill.getTopP();
// Model-specific next
- if (model.topP != null) {
- return model.topP;
- }
+ if (model.topP != null) return model.topP;
// Global default as fallback
return configuration.getDefaultTopP();
*
* @return the applicable repeat penalty for this query
*/
- public float getEffectiveRepeatPenalty() {
+ public Float getEffectiveRepeatPenalty() {
// Skill-specific has highest priority
- if (skill != null && skill.getRepeatPenalty() != null) {
- return skill.getRepeatPenalty();
- }
+ if (skill != null && skill.getRepeatPenalty() != null) return skill.getRepeatPenalty();
// Model-specific next
- if (model.repeatPenalty != null) {
- return model.repeatPenalty;
- }
+ if (model.repeatPenalty != null) return model.repeatPenalty;
// Global default as fallback
return configuration.getDefaultRepeatPenalty();
}
- public float getEffectiveTopK() {
+ public Float getEffectiveTopK() {
if (skill != null && skill.getTopK() != null) return skill.getTopK();
- else if (model.topK != null) return model.topK.floatValue();
- else return configuration.getDefaultTopK().floatValue();
+ else if (model.topK != null) return model.topK;
+ else return configuration.getDefaultTopK();
}
- public float getEffectiveMinP() {
+ public Float getEffectiveMinP() {
if (skill != null && skill.getMinP() != null) return skill.getMinP();
- else if (model.minP != null) return model.minP.floatValue();
- else return configuration.getDefaultMinP().floatValue();
+ else if (model.minP != null) return model.minP;
+ else return configuration.getDefaultMinP();
}
import java.io.*;
import java.nio.file.Files;
+import java.util.ArrayList;
import static eu.svjatoslav.alyverkko_cli.Main.configuration;
import static java.lang.String.join;
int niceValue = 10; // niceness level for background tasks
String executablePath = configuration.getLlamaCliPath().getAbsolutePath();
- return join(" ",
- "nice", "-n", Integer.toString(niceValue),
- executablePath,
- "--model " + task.model.filesystemPath,
- "--threads " + configuration.getThreadCount(),
- "--threads-batch " + configuration.getBatchThreadCount(),
+ ArrayList <String> args = new ArrayList<>();
+ args.add("nice -n " + niceValue);
+ args.add(executablePath);
+ args.add("--model " + task.model.filesystemPath);
+ args.add("--threads " + configuration.getThreadCount());
+ args.add("--threads-batch " + configuration.getBatchThreadCount());
- // Restricts token selection to the smallest possible set of tokens whose cumulative probability
- // exceeds the specified threshold P.
- "--top-p " + task.getEffectiveTopP(),
+ Float topP = task.getEffectiveTopP();
+ if (topP != null) args.add("--top-p " + topP);
- "--repeat-penalty " + task.getEffectiveRepeatPenalty(),
+ Float topK = task.getEffectiveTopK();
+ if (topK != null) args.add("--top-k " + topK);
- // Restricts token selection to the K tokens with the highest probabilities.
- "--top-k " + task.getEffectiveTopK(),
+ Float minP = task.getEffectiveMinP();
+ if (minP != null) args.add("--min-p " + minP);
- // Filters the vocabulary to include only tokens whose
- // probability is at least a certain fraction (Min P) of the
- // probability of the most likely token.
- "--min-p " + task.getEffectiveMinP(),
+ Float repetitionPenalty = task.getEffectiveRepeatPenalty();
+ if (repetitionPenalty != null) args.add("--repeat-penalty " + repetitionPenalty);
- // Ensure that model sees the <|im_start|>system … / <|im_start|>user … markup it was trained on
- // "--chat-format qwen3",
+ args.add("--repeat-last-n 512");
- // Last n tokens to consider for penalizing repetition
- "--repeat-last-n 512",
+ args.add("--mirostat 0");
+ args.add("--no-display-prompt");
+ args.add("--no-warmup");
+ args.add("--flash-attn on");
- // Controls the strength of the penalty for a detected repetition sequence.
- //"--dry-multiplier 0.1",
+ Float temperature = task.getEffectiveTemperature();
+ if (temperature != null) args.add("--temp " + task.getEffectiveTemperature());
- // In a code we want the model to reuse the same variable names, keywords, and syntax consistently.
- // A presence penalty, even a small 0.1, could cause the model to needlessly rename variables.
- //"--presence-penalty 1",
+ args.add("--ctx-size " + task.model.contextSizeTokens);
+ args.add("--batch-size 512");
+ args.add("--single-turn");
+ args.add("-n -1");
+ args.add("--file " + inputFile);
- "--mirostat 0", // Disable mirostat
+ return join(" ", args);
- "--no-display-prompt",
- "--no-warmup",
- "--flash-attn on",
- "--temp " + task.getEffectiveTemperature(),
- "--ctx-size " + task.model.contextSizeTokens,
- "--batch-size 512",
- "--single-turn", // run conversation for a single turn only, then exit when done
- "-n -1",
- "--file " + inputFile
-
- // "--cache-type-k q8_0",
- // might save RAM, need to test if precision loss is acceptable
-
- // might save RAM, need to test if precision loss is acceptable
- // "--cache-type-v q8_0",
-
- );
+ // "--cache-type-k q8_0",
+ // might save RAM, need to test if precision loss is acceptable
+ // might save RAM, need to test if precision loss is acceptable
+ // "--cache-type-v q8_0",
}