public class AiTask {
public static final String AI_RESPONSE_MARKER = "ASSISTANT:";
private static final String LLAMA_CPP_META_INFO_MARKER = "llm_load_print_meta: ";
- public static final String MAIN_EXECUTABLE_NAME = "main";
+ public static final String MAIN_EXECUTABLE_NAME = "llama-cli";
MailQuery mailQuery;
private final Float temperature;
"--threads-batch " + configuration.getBatchThreadCount(),
"--mirostat 2",
"--log-disable",
+ "--flash-attn",
"--temp " + temperature,
"--ctx-size " + mailQuery.model.contextSizeTokens,
"--batch-size 8",