From b5cab1b69d2caa333e76d3a14aa476aea8e5175a Mon Sep 17 00:00:00 2001 From: Svjatoslav Agejenko Date: Mon, 1 Jul 2024 00:53:15 +0300 Subject: [PATCH] Enable flash attention. Use new binary name. --- src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java b/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java index 2beb037..09c5f4e 100644 --- a/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java +++ b/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java @@ -11,7 +11,7 @@ import static java.lang.String.join; public class AiTask { public static final String AI_RESPONSE_MARKER = "ASSISTANT:"; private static final String LLAMA_CPP_META_INFO_MARKER = "llm_load_print_meta: "; - public static final String MAIN_EXECUTABLE_NAME = "main"; + public static final String MAIN_EXECUTABLE_NAME = "llama-cli"; MailQuery mailQuery; private final Float temperature; @@ -153,6 +153,7 @@ public class AiTask { "--threads-batch " + configuration.getBatchThreadCount(), "--mirostat 2", "--log-disable", + "--flash-attn", "--temp " + temperature, "--ctx-size " + mailQuery.model.contextSizeTokens, "--batch-size 8", -- 2.20.1