From: Svjatoslav Agejenko Date: Sun, 30 Jun 2024 21:53:15 +0000 (+0300) Subject: Enable flash attention. Use new binary name. X-Git-Url: http://www2.svjatoslav.eu/gitweb/?a=commitdiff_plain;h=b5cab1b69d2caa333e76d3a14aa476aea8e5175a;p=alyverkko-cli.git Enable flash attention. Use new binary name. --- diff --git a/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java b/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java index 2beb037..09c5f4e 100644 --- a/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java +++ b/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java @@ -11,7 +11,7 @@ import static java.lang.String.join; public class AiTask { public static final String AI_RESPONSE_MARKER = "ASSISTANT:"; private static final String LLAMA_CPP_META_INFO_MARKER = "llm_load_print_meta: "; - public static final String MAIN_EXECUTABLE_NAME = "main"; + public static final String MAIN_EXECUTABLE_NAME = "llama-cli"; MailQuery mailQuery; private final Float temperature; @@ -153,6 +153,7 @@ public class AiTask { "--threads-batch " + configuration.getBatchThreadCount(), "--mirostat 2", "--log-disable", + "--flash-attn", "--temp " + temperature, "--ctx-size " + mailQuery.model.contextSizeTokens, "--batch-size 8",