Enable flash attention. Use new binary name.
authorSvjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Sun, 30 Jun 2024 21:53:15 +0000 (00:53 +0300)
committerSvjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Sun, 30 Jun 2024 21:53:15 +0000 (00:53 +0300)
src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java

index 2beb037..09c5f4e 100644 (file)
@@ -11,7 +11,7 @@ import static java.lang.String.join;
 public class AiTask {
     public static final String AI_RESPONSE_MARKER = "ASSISTANT:";
     private static final String LLAMA_CPP_META_INFO_MARKER = "llm_load_print_meta: ";
-    public static final String MAIN_EXECUTABLE_NAME = "main";
+    public static final String MAIN_EXECUTABLE_NAME = "llama-cli";
 
     MailQuery mailQuery;
     private final Float temperature;
@@ -153,6 +153,7 @@ public class AiTask {
                 "--threads-batch " + configuration.getBatchThreadCount(),
                 "--mirostat 2",
                 "--log-disable",
+                "--flash-attn",
                 "--temp " + temperature,
                 "--ctx-size " + mailQuery.model.contextSizeTokens,
                 "--batch-size 8",