From: Svjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Date: Thu, 21 Aug 2025 20:41:19 +0000 (+0300)
Subject: Add important findings
X-Git-Url: http://www2.svjatoslav.eu/gitweb/?a=commitdiff_plain;h=da1b701b4ffef3413da400d2eb3d4881d79ce871;p=alyverkko-cli.git

Add important findings
---

diff --git a/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java b/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java
deleted file mode 100644
index 896f69b..0000000
--- a/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java
+++ /dev/null
@@ -1,294 +0,0 @@
-package eu.svjatoslav.alyverkko_cli;
-
-import eu.svjatoslav.alyverkko_cli.commands.mail_correspondant.MailQuery;
-
-import java.io.*;
-import java.nio.file.Files;
-
-import static eu.svjatoslav.alyverkko_cli.Main.configuration;
-import static java.lang.String.join;
-
-/**
- * <p>Executes AI inference tasks through llama.cpp CLI. This class handles the complete workflow
- * from prompt construction to response formatting, including temporary file management and process execution.
- * <p>Key processing steps:
- * <ol>
- *   <li>Build standardized input prompt</li>
- *   <li>Create a temporary input file</li>
- *   <li>Execute llama.cpp with appropriate parameters</li>
- *   <li>Capture and filter output</li>
- *   <li>Perform cleanup operations</li>
- * </ol>
- * 
- * <p>Temperature settings, context size, and thread counts are all derived from the current configuration.
- * The response is formatted to match org-mode conventions while preserving original conversation structure.
- */
-public class AiTask {
-
-    /**
-     * Marker for the AI's response block, used in the constructed prompt string.
-     */
-    public static final String AI_RESPONSE_MARKER = "ASSISTANT:";
-
-    /**
-     * Marker used by llama.cpp to print metadata. We monitor and display these lines.
-     */
-    private static final String LLAMA_CPP_META_INFO_MARKER = "llm_load_print_meta: ";
-
-    /**
-     * The mail query defining system prompt, user prompt, and which model to use.
-     */
-    private final MailQuery mailQuery;
-
-    /**
-     * The temperature (creativity factor) for the AI.
-     */
-    private final Float temperature;
-
-    /**
-     * Temporary file used as input to the llama.cpp CLI.
-     */
-    private File inputFile;
-
-    /**
-     * Creates a new AI task with a given mail query.
-     *
-     * @param mailQuery the mail query containing model and prompts.
-     */
-    public AiTask(MailQuery mailQuery) {
-        this.mailQuery = mailQuery;
-        this.temperature = configuration.getDefaultTemperature();
-    }
-
-    /**
-     * Builds the prompt text that is fed to llama.cpp, including the system prompt,
-     * the user prompt, and an "ASSISTANT:" marker signifying where the AI response begins.
-     *
-     * @return a string containing the fully prepared query prompt.
-     */
-    private String buildAiQuery() {
-        return mailQuery.systemPrompt.replace("<TASK-FILE>", mailQuery.userPrompt);
-    }
-
-    /**
-     * Runs the AI query by constructing the prompt, writing it to a temp file,
-     * invoking llama.cpp, collecting output, and performing any final cleanup.
-     *
-     * @return the AI's response in a format suitable for appending back into
-     *         the conversation file.
-     * @throws InterruptedException if the process is interrupted.
-     * @throws IOException if reading/writing the file fails or the process fails to start.
-     */
-    public String runAiQuery() throws InterruptedException, IOException {
-        try {
-            // Record the start time of the query
-            mailQuery.startTimeMillis = System.currentTimeMillis();
-
-            // Build input prompt
-            initializeInputFile(buildAiQuery());
-
-            // Prepare a process builder
-            ProcessBuilder processBuilder = new ProcessBuilder();
-            processBuilder.command(getCliCommand().split("\\s+")); // Splitting the command string into tokens
-
-            // Start process
-            Process process = processBuilder.start();
-
-            // Handle process's error stream
-            handleErrorThread(process);
-
-            // Handle the process's output stream
-            StringBuilder result = new StringBuilder();
-            Thread outputThread = handleResultThread(process, result);
-
-            // Wait for the process to finish
-            process.waitFor();
-
-            // Wait for the output thread to finish reading
-            outputThread.join();
-
-            // Record the end time of the query
-            mailQuery.endTimeMillis = System.currentTimeMillis();
-
-            // Clean up the AI response: remove partial prompt text, end-of-text marker, etc.
-            return cleanupAiResponse(result.toString());
-        } finally {
-            deleteTemporaryFile();
-        }
-    }
-
-    /**
-     * Creates a temporary file for the AI input and writes the prompt to it.
-     *
-     * @param aiQuery the final prompt string for the AI to process.
-     * @throws IOException if file creation or writing fails.
-     */
-    private void initializeInputFile(String aiQuery) throws IOException {
-        inputFile = createTemporaryFile();
-        Files.write(inputFile.toPath(), aiQuery.getBytes());
-    }
-
-    /**
-     * Creates a temporary file that will be used for the AI prompt input.
-     *
-     * @return a new {@link File} referencing the created temporary file.
-     * @throws IOException if the file could not be created.
-     */
-    private File createTemporaryFile() throws IOException {
-        File file = Files.createTempFile("ai-inference", ".tmp").toFile();
-        file.deleteOnExit();
-        return file;
-    }
-
-    /**
-     * Cleans up the AI response by removing the partial text before the
-     * AI response marker and after the end-of-text marker, if specified.
-     *
-     * @param result the raw output from llama.cpp.
-     * @return the cleaned AI response.
-     */
-    private String cleanupAiResponse(String result) {
-
-        // remove text after the end of text marker if it exists
-        if (mailQuery.model.endOfTextMarker != null) {
-            int endOfTextMarkerIndex = result.indexOf(mailQuery.model.endOfTextMarker);
-            if (endOfTextMarkerIndex != -1) {
-                result = result.substring(0, endOfTextMarkerIndex);
-            }
-        }
-
-        return result;
-    }
-
-    /**
-     * Returns the full command string used to run the AI inference via llama.cpp.
-     *
-     * @return a string representing the command and all arguments.
-     */
-    private String getCliCommand() {
-        int niceValue = 10; // niceness level for background tasks
-        String executablePath = configuration.getLlamaCliPath().getAbsolutePath();
-
-        return join(" ",
-                "nice", "-n", Integer.toString(niceValue),
-                executablePath,
-                "--model " + mailQuery.model.filesystemPath,
-                "--threads " + configuration.getThreadCount(),
-                "--threads-batch " + configuration.getBatchThreadCount(),
-
-                "--top-k 20",   // Restricts token selection to the K tokens with the highest probabilities.
-
-                "--top-p 0.95", // Restricts token selection to the smallest possible set
-                                // of tokens whose cumulative probability exceeds the specified
-                                // threshold P.
-
-                "--min-p 0.1",  // Filters the vocabulary to include only tokens whose
-                                // probability is at least a certain fraction (Min P) of the
-                                // probability of the most likely token.
-
-                // "--chat-format qwen3",   // Ensure that model sees the <|im_start|>system â¦ / <|im_start|>user â¦ markup it was trained on
-
-                // Avoid getting stuck in a forever repetition loop
-                "--repeat-penalty 1.05", // Very little penalty, because computer code is often repetitive
-                "--repeat-last-n 512",   // Last n tokens to consider for penalizing repetition
-
-                "--dry-multiplier 0.1", // Controls the strength of the penalty for a detected repetition sequence.
-
-                "--presence-penalty 0", // In a code we want the model to reuse the same variable names,
-                                        // keywords, and syntax consistently. A presence penalty,
-                                        // even a small 0.1, could cause the model to needlessly
-                                        // rename variables.
-
-                "--mirostat 0", // Disable mirostat
-
-                "--no-display-prompt",
-                "--no-warmup",
-                "--flash-attn",
-                "--temp 0.6",
-                "--ctx-size " + mailQuery.model.contextSizeTokens,
-                "--batch-size 512",
-                "--single-turn", // run conversation for a single turn only, then exit when done
-                "-n -1",
-                "--file " + inputFile
-        );
-
-        //   "--cache-type-k q8_0",    might save RAM, need to test precision loss is acceptable
-        //   "--cache-type-v q8_0",    might save RAM, need to test precision loss is acceptable
-
-    }
-
-    /**
-     * Spawns a new Thread to handle the error stream from llama.cpp,
-     * printing lines that contain metadata or errors to the console.
-     *
-     * @param process the process whose error stream is consumed.
-     */
-    private static void handleErrorThread(Process process) {
-        Thread errorThread = new Thread(() -> {
-            try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getErrorStream()))) {
-                String line;
-                while ((line = reader.readLine()) != null) {
-                    handleErrorStreamLine(line);
-                }
-            } catch (IOException e) {
-                System.err.println("Error reading error stream: " + e.getMessage());
-            }
-        });
-        errorThread.start();
-    }
-
-    /**
-     * Decides what to do with each line from the error stream:
-     * if it matches the llama.cpp meta-info marker, print it normally;
-     * otherwise print as an error.
-     *
-     * @param line a line from the llama.cpp error stream.
-     */
-    private static void handleErrorStreamLine(String line) {
-        if (line.startsWith(LLAMA_CPP_META_INFO_MARKER)) {
-            // Print the meta-info to the console in normal color
-            System.out.println(line.substring(LLAMA_CPP_META_INFO_MARKER.length()));
-        } else {
-            // Print actual error lines in red
-            Utils.printRedMessageToConsole(line);
-        }
-    }
-
-    /**
-     * Consumes the standard output (inference result) from the
-     * llama.cpp process, storing it into a result buffer for further
-     * cleanup, while simultaneously printing it to the console.
-     *
-     * @param process the AI inference process.
-     * @param result  a string builder to accumulate the final result.
-     * @return the thread that is reading the output stream.
-     */
-    private static Thread handleResultThread(Process process, StringBuilder result) {
-        Thread outputThread = new Thread(() -> {
-            try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) {
-                String aiResultLine;
-                while ((aiResultLine = reader.readLine()) != null) {
-                    System.out.print("AI: " + aiResultLine + "\n"); // Show each line in real-time
-                    result.append(aiResultLine).append("\n");
-                }
-            } catch (IOException e) {
-                throw new RuntimeException(e);
-            }
-        });
-        outputThread.start();
-        return outputThread;
-    }
-
-    /**
-     * Deletes the temporary input file once processing is complete.
-     */
-    private void deleteTemporaryFile() {
-        if (inputFile != null && inputFile.exists()) {
-            try {
-                Files.delete(inputFile.toPath());
-            } catch (IOException e) {
-                System.err.println("Failed to delete temporary file: " + e.getMessage());
-            }
-        }
-    }
-}
diff --git a/src/main/java/eu/svjatoslav/alyverkko_cli/commands/mail_correspondant/AiTask.java b/src/main/java/eu/svjatoslav/alyverkko_cli/commands/mail_correspondant/AiTask.java
new file mode 100644
index 0000000..29e5c0f
--- /dev/null
+++ b/src/main/java/eu/svjatoslav/alyverkko_cli/commands/mail_correspondant/AiTask.java
@@ -0,0 +1,297 @@
+package eu.svjatoslav.alyverkko_cli.commands.mail_correspondant;
+
+import eu.svjatoslav.alyverkko_cli.Utils;
+
+import java.io.*;
+import java.nio.file.Files;
+
+import static eu.svjatoslav.alyverkko_cli.Main.configuration;
+import static java.lang.String.join;
+
+/**
+ *
+ * TODO: what if directory disappeared that contained original input file ? Response cannot be written back anymore.
+ *
+ * <p>Executes AI inference tasks through llama.cpp CLI. This class handles the complete workflow
+ * from prompt construction to response formatting, including temporary file management and process execution.
+ * <p>Key processing steps:
+ * <ol>
+ *   <li>Build standardized input prompt</li>
+ *   <li>Create a temporary input file</li>
+ *   <li>Execute llama.cpp with appropriate parameters</li>
+ *   <li>Capture and filter output</li>
+ *   <li>Perform cleanup operations</li>
+ * </ol>
+ * 
+ * <p>Temperature settings, context size, and thread counts are all derived from the current configuration.
+ * The response is formatted to match org-mode conventions while preserving original conversation structure.
+ */
+public class AiTask {
+
+    /**
+     * Marker for the AI's response block, used in the constructed prompt string.
+     */
+    public static final String AI_RESPONSE_MARKER = "ASSISTANT:";
+
+    /**
+     * Marker used by llama.cpp to print metadata. We monitor and display these lines.
+     */
+    private static final String LLAMA_CPP_META_INFO_MARKER = "llm_load_print_meta: ";
+
+    /**
+     * The mail query defining system prompt, user prompt, and which model to use.
+     */
+    private final MailQuery mailQuery;
+
+    /**
+     * The temperature (creativity factor) for the AI.
+     */
+    private final Float temperature;
+
+    /**
+     * Temporary file used as input to the llama.cpp CLI.
+     */
+    private File inputFile;
+
+    /**
+     * Creates a new AI task with a given mail query.
+     *
+     * @param mailQuery the mail query containing model and prompts.
+     */
+    public AiTask(MailQuery mailQuery) {
+        this.mailQuery = mailQuery;
+        this.temperature = configuration.getDefaultTemperature();
+    }
+
+    /**
+     * Builds the prompt text that is fed to llama.cpp, including the system prompt,
+     * the user prompt, and an "ASSISTANT:" marker signifying where the AI response begins.
+     *
+     * @return a string containing the fully prepared query prompt.
+     */
+    private String buildAiQuery() {
+        return mailQuery.systemPrompt.replace("<TASK-FILE>", mailQuery.userPrompt);
+    }
+
+    /**
+     * Runs the AI query by constructing the prompt, writing it to a temp file,
+     * invoking llama.cpp, collecting output, and performing any final cleanup.
+     *
+     * @return the AI's response in a format suitable for appending back into
+     *         the conversation file.
+     * @throws InterruptedException if the process is interrupted.
+     * @throws IOException if reading/writing the file fails or the process fails to start.
+     */
+    public String runAiQuery() throws InterruptedException, IOException {
+        try {
+            // Record the start time of the query
+            mailQuery.startTimeMillis = System.currentTimeMillis();
+
+            // Build input prompt
+            initializeInputFile(buildAiQuery());
+
+            // Prepare a process builder
+            ProcessBuilder processBuilder = new ProcessBuilder();
+            processBuilder.command(getCliCommand().split("\\s+")); // Splitting the command string into tokens
+
+            // Start process
+            Process process = processBuilder.start();
+
+            // Handle process's error stream
+            handleErrorThread(process);
+
+            // Handle the process's output stream
+            StringBuilder result = new StringBuilder();
+            Thread outputThread = handleResultThread(process, result);
+
+            // Wait for the process to finish
+            process.waitFor();
+
+            // Wait for the output thread to finish reading
+            outputThread.join();
+
+            // Record the end time of the query
+            mailQuery.endTimeMillis = System.currentTimeMillis();
+
+            // Clean up the AI response: remove partial prompt text, end-of-text marker, etc.
+            return cleanupAiResponse(result.toString());
+        } finally {
+            deleteTemporaryFile();
+        }
+    }
+
+    /**
+     * Creates a temporary file for the AI input and writes the prompt to it.
+     *
+     * @param aiQuery the final prompt string for the AI to process.
+     * @throws IOException if file creation or writing fails.
+     */
+    private void initializeInputFile(String aiQuery) throws IOException {
+        inputFile = createTemporaryFile();
+        Files.write(inputFile.toPath(), aiQuery.getBytes());
+    }
+
+    /**
+     * Creates a temporary file that will be used for the AI prompt input.
+     *
+     * @return a new {@link File} referencing the created temporary file.
+     * @throws IOException if the file could not be created.
+     */
+    private File createTemporaryFile() throws IOException {
+        File file = Files.createTempFile("ai-inference", ".tmp").toFile();
+        file.deleteOnExit();
+        return file;
+    }
+
+    /**
+     * Cleans up the AI response by removing the partial text before the
+     * AI response marker and after the end-of-text marker, if specified.
+     *
+     * @param result the raw output from llama.cpp.
+     * @return the cleaned AI response.
+     */
+    private String cleanupAiResponse(String result) {
+
+        // remove text after the end of text marker if it exists
+        if (mailQuery.model.endOfTextMarker != null) {
+            int endOfTextMarkerIndex = result.indexOf(mailQuery.model.endOfTextMarker);
+            if (endOfTextMarkerIndex != -1) {
+                result = result.substring(0, endOfTextMarkerIndex);
+            }
+        }
+
+        return result;
+    }
+
+    /**
+     * Returns the full command string used to run the AI inference via llama.cpp.
+     *
+     * @return a string representing the command and all arguments.
+     */
+    private String getCliCommand() {
+        int niceValue = 10; // niceness level for background tasks
+        String executablePath = configuration.getLlamaCliPath().getAbsolutePath();
+
+        return join(" ",
+                "nice", "-n", Integer.toString(niceValue),
+                executablePath,
+                "--model " + mailQuery.model.filesystemPath,
+                "--threads " + configuration.getThreadCount(),
+                "--threads-batch " + configuration.getBatchThreadCount(),
+
+                "--top-k 20",   // Restricts token selection to the K tokens with the highest probabilities.
+
+                "--top-p 0.95", // Restricts token selection to the smallest possible set
+                                // of tokens whose cumulative probability exceeds the specified
+                                // threshold P.
+
+                "--min-p 0.1",  // Filters the vocabulary to include only tokens whose
+                                // probability is at least a certain fraction (Min P) of the
+                                // probability of the most likely token.
+
+                // "--chat-format qwen3",   // Ensure that model sees the <|im_start|>system â¦ / <|im_start|>user â¦ markup it was trained on
+
+                // Avoid getting stuck in a forever repetition loop
+                "--repeat-penalty 1.05", // Very little penalty, because computer code is often repetitive
+                "--repeat-last-n 512",   // Last n tokens to consider for penalizing repetition
+
+                "--dry-multiplier 0.1", // Controls the strength of the penalty for a detected repetition sequence.
+
+                "--presence-penalty 0", // In a code we want the model to reuse the same variable names,
+                                        // keywords, and syntax consistently. A presence penalty,
+                                        // even a small 0.1, could cause the model to needlessly
+                                        // rename variables.
+
+                "--mirostat 0", // Disable mirostat
+
+                "--no-display-prompt",
+                "--no-warmup",
+                "--flash-attn",
+                "--temp 0.6",
+                "--ctx-size " + mailQuery.model.contextSizeTokens,
+                "--batch-size 512",
+                "--single-turn", // run conversation for a single turn only, then exit when done
+                "-n -1",
+                "--file " + inputFile
+        );
+
+        //   "--cache-type-k q8_0",    might save RAM, need to test precision loss is acceptable
+        //   "--cache-type-v q8_0",    might save RAM, need to test precision loss is acceptable
+
+    }
+
+    /**
+     * Spawns a new Thread to handle the error stream from llama.cpp,
+     * printing lines that contain metadata or errors to the console.
+     *
+     * @param process the process whose error stream is consumed.
+     */
+    private static void handleErrorThread(Process process) {
+        Thread errorThread = new Thread(() -> {
+            try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getErrorStream()))) {
+                String line;
+                while ((line = reader.readLine()) != null) {
+                    handleErrorStreamLine(line);
+                }
+            } catch (IOException e) {
+                System.err.println("Error reading error stream: " + e.getMessage());
+            }
+        });
+        errorThread.start();
+    }
+
+    /**
+     * Decides what to do with each line from the error stream:
+     * if it matches the llama.cpp meta-info marker, print it normally;
+     * otherwise print as an error.
+     *
+     * @param line a line from the llama.cpp error stream.
+     */
+    private static void handleErrorStreamLine(String line) {
+        if (line.startsWith(LLAMA_CPP_META_INFO_MARKER)) {
+            // Print the meta-info to the console in normal color
+            System.out.println(line.substring(LLAMA_CPP_META_INFO_MARKER.length()));
+        } else {
+            // Print actual error lines in red
+            Utils.printRedMessageToConsole(line);
+        }
+    }
+
+    /**
+     * Consumes the standard output (inference result) from the
+     * llama.cpp process, storing it into a result buffer for further
+     * cleanup, while simultaneously printing it to the console.
+     *
+     * @param process the AI inference process.
+     * @param result  a string builder to accumulate the final result.
+     * @return the thread that is reading the output stream.
+     */
+    private static Thread handleResultThread(Process process, StringBuilder result) {
+        Thread outputThread = new Thread(() -> {
+            try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) {
+                String aiResultLine;
+                while ((aiResultLine = reader.readLine()) != null) {
+                    System.out.print("AI: " + aiResultLine + "\n"); // Show each line in real-time
+                    result.append(aiResultLine).append("\n");
+                }
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        });
+        outputThread.start();
+        return outputThread;
+    }
+
+    /**
+     * Deletes the temporary input file once processing is complete.
+     */
+    private void deleteTemporaryFile() {
+        if (inputFile != null && inputFile.exists()) {
+            try {
+                Files.delete(inputFile.toPath());
+            } catch (IOException e) {
+                System.err.println("Failed to delete temporary file: " + e.getMessage());
+            }
+        }
+    }
+}
diff --git a/src/main/java/eu/svjatoslav/alyverkko_cli/commands/mail_correspondant/MailCorrespondentCommand.java b/src/main/java/eu/svjatoslav/alyverkko_cli/commands/mail_correspondant/MailCorrespondentCommand.java
index d279166..8c37285 100644
--- a/src/main/java/eu/svjatoslav/alyverkko_cli/commands/mail_correspondant/MailCorrespondentCommand.java
+++ b/src/main/java/eu/svjatoslav/alyverkko_cli/commands/mail_correspondant/MailCorrespondentCommand.java
@@ -21,6 +21,8 @@ import static java.nio.file.StandardWatchEventKinds.*;
 
 
 /**
+ * TODO: What happens when directory gets renamed ? Will event listener reindex all files inside it for processing ?
+ *
  * The MailCorrespondentCommand continuously monitors a specified mail
  * directory for new or modified text files, checks if they have a
  * "TOCOMPUTE:" marker, and if so, adds them to a priority queue to be