From: Svjatoslav Agejenko Date: Thu, 21 Aug 2025 20:41:19 +0000 (+0300) Subject: Add important findings X-Git-Url: http://www2.svjatoslav.eu/gitweb/?a=commitdiff_plain;h=da1b701b4ffef3413da400d2eb3d4881d79ce871;p=alyverkko-cli.git Add important findings --- diff --git a/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java b/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java deleted file mode 100644 index 896f69b..0000000 --- a/src/main/java/eu/svjatoslav/alyverkko_cli/AiTask.java +++ /dev/null @@ -1,294 +0,0 @@ -package eu.svjatoslav.alyverkko_cli; - -import eu.svjatoslav.alyverkko_cli.commands.mail_correspondant.MailQuery; - -import java.io.*; -import java.nio.file.Files; - -import static eu.svjatoslav.alyverkko_cli.Main.configuration; -import static java.lang.String.join; - -/** - *

Executes AI inference tasks through llama.cpp CLI. This class handles the complete workflow - * from prompt construction to response formatting, including temporary file management and process execution. - *

Key processing steps: - *

    - *
  1. Build standardized input prompt
  2. - *
  3. Create a temporary input file
  4. - *
  5. Execute llama.cpp with appropriate parameters
  6. - *
  7. Capture and filter output
  8. - *
  9. Perform cleanup operations
  10. - *
- * - *

Temperature settings, context size, and thread counts are all derived from the current configuration. - * The response is formatted to match org-mode conventions while preserving original conversation structure. - */ -public class AiTask { - - /** - * Marker for the AI's response block, used in the constructed prompt string. - */ - public static final String AI_RESPONSE_MARKER = "ASSISTANT:"; - - /** - * Marker used by llama.cpp to print metadata. We monitor and display these lines. - */ - private static final String LLAMA_CPP_META_INFO_MARKER = "llm_load_print_meta: "; - - /** - * The mail query defining system prompt, user prompt, and which model to use. - */ - private final MailQuery mailQuery; - - /** - * The temperature (creativity factor) for the AI. - */ - private final Float temperature; - - /** - * Temporary file used as input to the llama.cpp CLI. - */ - private File inputFile; - - /** - * Creates a new AI task with a given mail query. - * - * @param mailQuery the mail query containing model and prompts. - */ - public AiTask(MailQuery mailQuery) { - this.mailQuery = mailQuery; - this.temperature = configuration.getDefaultTemperature(); - } - - /** - * Builds the prompt text that is fed to llama.cpp, including the system prompt, - * the user prompt, and an "ASSISTANT:" marker signifying where the AI response begins. - * - * @return a string containing the fully prepared query prompt. - */ - private String buildAiQuery() { - return mailQuery.systemPrompt.replace("", mailQuery.userPrompt); - } - - /** - * Runs the AI query by constructing the prompt, writing it to a temp file, - * invoking llama.cpp, collecting output, and performing any final cleanup. - * - * @return the AI's response in a format suitable for appending back into - * the conversation file. - * @throws InterruptedException if the process is interrupted. - * @throws IOException if reading/writing the file fails or the process fails to start. - */ - public String runAiQuery() throws InterruptedException, IOException { - try { - // Record the start time of the query - mailQuery.startTimeMillis = System.currentTimeMillis(); - - // Build input prompt - initializeInputFile(buildAiQuery()); - - // Prepare a process builder - ProcessBuilder processBuilder = new ProcessBuilder(); - processBuilder.command(getCliCommand().split("\\s+")); // Splitting the command string into tokens - - // Start process - Process process = processBuilder.start(); - - // Handle process's error stream - handleErrorThread(process); - - // Handle the process's output stream - StringBuilder result = new StringBuilder(); - Thread outputThread = handleResultThread(process, result); - - // Wait for the process to finish - process.waitFor(); - - // Wait for the output thread to finish reading - outputThread.join(); - - // Record the end time of the query - mailQuery.endTimeMillis = System.currentTimeMillis(); - - // Clean up the AI response: remove partial prompt text, end-of-text marker, etc. - return cleanupAiResponse(result.toString()); - } finally { - deleteTemporaryFile(); - } - } - - /** - * Creates a temporary file for the AI input and writes the prompt to it. - * - * @param aiQuery the final prompt string for the AI to process. - * @throws IOException if file creation or writing fails. - */ - private void initializeInputFile(String aiQuery) throws IOException { - inputFile = createTemporaryFile(); - Files.write(inputFile.toPath(), aiQuery.getBytes()); - } - - /** - * Creates a temporary file that will be used for the AI prompt input. - * - * @return a new {@link File} referencing the created temporary file. - * @throws IOException if the file could not be created. - */ - private File createTemporaryFile() throws IOException { - File file = Files.createTempFile("ai-inference", ".tmp").toFile(); - file.deleteOnExit(); - return file; - } - - /** - * Cleans up the AI response by removing the partial text before the - * AI response marker and after the end-of-text marker, if specified. - * - * @param result the raw output from llama.cpp. - * @return the cleaned AI response. - */ - private String cleanupAiResponse(String result) { - - // remove text after the end of text marker if it exists - if (mailQuery.model.endOfTextMarker != null) { - int endOfTextMarkerIndex = result.indexOf(mailQuery.model.endOfTextMarker); - if (endOfTextMarkerIndex != -1) { - result = result.substring(0, endOfTextMarkerIndex); - } - } - - return result; - } - - /** - * Returns the full command string used to run the AI inference via llama.cpp. - * - * @return a string representing the command and all arguments. - */ - private String getCliCommand() { - int niceValue = 10; // niceness level for background tasks - String executablePath = configuration.getLlamaCliPath().getAbsolutePath(); - - return join(" ", - "nice", "-n", Integer.toString(niceValue), - executablePath, - "--model " + mailQuery.model.filesystemPath, - "--threads " + configuration.getThreadCount(), - "--threads-batch " + configuration.getBatchThreadCount(), - - "--top-k 20", // Restricts token selection to the K tokens with the highest probabilities. - - "--top-p 0.95", // Restricts token selection to the smallest possible set - // of tokens whose cumulative probability exceeds the specified - // threshold P. - - "--min-p 0.1", // Filters the vocabulary to include only tokens whose - // probability is at least a certain fraction (Min P) of the - // probability of the most likely token. - - // "--chat-format qwen3", // Ensure that model sees the <|im_start|>system … / <|im_start|>user … markup it was trained on - - // Avoid getting stuck in a forever repetition loop - "--repeat-penalty 1.05", // Very little penalty, because computer code is often repetitive - "--repeat-last-n 512", // Last n tokens to consider for penalizing repetition - - "--dry-multiplier 0.1", // Controls the strength of the penalty for a detected repetition sequence. - - "--presence-penalty 0", // In a code we want the model to reuse the same variable names, - // keywords, and syntax consistently. A presence penalty, - // even a small 0.1, could cause the model to needlessly - // rename variables. - - "--mirostat 0", // Disable mirostat - - "--no-display-prompt", - "--no-warmup", - "--flash-attn", - "--temp 0.6", - "--ctx-size " + mailQuery.model.contextSizeTokens, - "--batch-size 512", - "--single-turn", // run conversation for a single turn only, then exit when done - "-n -1", - "--file " + inputFile - ); - - // "--cache-type-k q8_0", might save RAM, need to test precision loss is acceptable - // "--cache-type-v q8_0", might save RAM, need to test precision loss is acceptable - - } - - /** - * Spawns a new Thread to handle the error stream from llama.cpp, - * printing lines that contain metadata or errors to the console. - * - * @param process the process whose error stream is consumed. - */ - private static void handleErrorThread(Process process) { - Thread errorThread = new Thread(() -> { - try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getErrorStream()))) { - String line; - while ((line = reader.readLine()) != null) { - handleErrorStreamLine(line); - } - } catch (IOException e) { - System.err.println("Error reading error stream: " + e.getMessage()); - } - }); - errorThread.start(); - } - - /** - * Decides what to do with each line from the error stream: - * if it matches the llama.cpp meta-info marker, print it normally; - * otherwise print as an error. - * - * @param line a line from the llama.cpp error stream. - */ - private static void handleErrorStreamLine(String line) { - if (line.startsWith(LLAMA_CPP_META_INFO_MARKER)) { - // Print the meta-info to the console in normal color - System.out.println(line.substring(LLAMA_CPP_META_INFO_MARKER.length())); - } else { - // Print actual error lines in red - Utils.printRedMessageToConsole(line); - } - } - - /** - * Consumes the standard output (inference result) from the - * llama.cpp process, storing it into a result buffer for further - * cleanup, while simultaneously printing it to the console. - * - * @param process the AI inference process. - * @param result a string builder to accumulate the final result. - * @return the thread that is reading the output stream. - */ - private static Thread handleResultThread(Process process, StringBuilder result) { - Thread outputThread = new Thread(() -> { - try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) { - String aiResultLine; - while ((aiResultLine = reader.readLine()) != null) { - System.out.print("AI: " + aiResultLine + "\n"); // Show each line in real-time - result.append(aiResultLine).append("\n"); - } - } catch (IOException e) { - throw new RuntimeException(e); - } - }); - outputThread.start(); - return outputThread; - } - - /** - * Deletes the temporary input file once processing is complete. - */ - private void deleteTemporaryFile() { - if (inputFile != null && inputFile.exists()) { - try { - Files.delete(inputFile.toPath()); - } catch (IOException e) { - System.err.println("Failed to delete temporary file: " + e.getMessage()); - } - } - } -} diff --git a/src/main/java/eu/svjatoslav/alyverkko_cli/commands/mail_correspondant/AiTask.java b/src/main/java/eu/svjatoslav/alyverkko_cli/commands/mail_correspondant/AiTask.java new file mode 100644 index 0000000..29e5c0f --- /dev/null +++ b/src/main/java/eu/svjatoslav/alyverkko_cli/commands/mail_correspondant/AiTask.java @@ -0,0 +1,297 @@ +package eu.svjatoslav.alyverkko_cli.commands.mail_correspondant; + +import eu.svjatoslav.alyverkko_cli.Utils; + +import java.io.*; +import java.nio.file.Files; + +import static eu.svjatoslav.alyverkko_cli.Main.configuration; +import static java.lang.String.join; + +/** + * + * TODO: what if directory disappeared that contained original input file ? Response cannot be written back anymore. + * + *

Executes AI inference tasks through llama.cpp CLI. This class handles the complete workflow + * from prompt construction to response formatting, including temporary file management and process execution. + *

Key processing steps: + *

    + *
  1. Build standardized input prompt
  2. + *
  3. Create a temporary input file
  4. + *
  5. Execute llama.cpp with appropriate parameters
  6. + *
  7. Capture and filter output
  8. + *
  9. Perform cleanup operations
  10. + *
+ * + *

Temperature settings, context size, and thread counts are all derived from the current configuration. + * The response is formatted to match org-mode conventions while preserving original conversation structure. + */ +public class AiTask { + + /** + * Marker for the AI's response block, used in the constructed prompt string. + */ + public static final String AI_RESPONSE_MARKER = "ASSISTANT:"; + + /** + * Marker used by llama.cpp to print metadata. We monitor and display these lines. + */ + private static final String LLAMA_CPP_META_INFO_MARKER = "llm_load_print_meta: "; + + /** + * The mail query defining system prompt, user prompt, and which model to use. + */ + private final MailQuery mailQuery; + + /** + * The temperature (creativity factor) for the AI. + */ + private final Float temperature; + + /** + * Temporary file used as input to the llama.cpp CLI. + */ + private File inputFile; + + /** + * Creates a new AI task with a given mail query. + * + * @param mailQuery the mail query containing model and prompts. + */ + public AiTask(MailQuery mailQuery) { + this.mailQuery = mailQuery; + this.temperature = configuration.getDefaultTemperature(); + } + + /** + * Builds the prompt text that is fed to llama.cpp, including the system prompt, + * the user prompt, and an "ASSISTANT:" marker signifying where the AI response begins. + * + * @return a string containing the fully prepared query prompt. + */ + private String buildAiQuery() { + return mailQuery.systemPrompt.replace("", mailQuery.userPrompt); + } + + /** + * Runs the AI query by constructing the prompt, writing it to a temp file, + * invoking llama.cpp, collecting output, and performing any final cleanup. + * + * @return the AI's response in a format suitable for appending back into + * the conversation file. + * @throws InterruptedException if the process is interrupted. + * @throws IOException if reading/writing the file fails or the process fails to start. + */ + public String runAiQuery() throws InterruptedException, IOException { + try { + // Record the start time of the query + mailQuery.startTimeMillis = System.currentTimeMillis(); + + // Build input prompt + initializeInputFile(buildAiQuery()); + + // Prepare a process builder + ProcessBuilder processBuilder = new ProcessBuilder(); + processBuilder.command(getCliCommand().split("\\s+")); // Splitting the command string into tokens + + // Start process + Process process = processBuilder.start(); + + // Handle process's error stream + handleErrorThread(process); + + // Handle the process's output stream + StringBuilder result = new StringBuilder(); + Thread outputThread = handleResultThread(process, result); + + // Wait for the process to finish + process.waitFor(); + + // Wait for the output thread to finish reading + outputThread.join(); + + // Record the end time of the query + mailQuery.endTimeMillis = System.currentTimeMillis(); + + // Clean up the AI response: remove partial prompt text, end-of-text marker, etc. + return cleanupAiResponse(result.toString()); + } finally { + deleteTemporaryFile(); + } + } + + /** + * Creates a temporary file for the AI input and writes the prompt to it. + * + * @param aiQuery the final prompt string for the AI to process. + * @throws IOException if file creation or writing fails. + */ + private void initializeInputFile(String aiQuery) throws IOException { + inputFile = createTemporaryFile(); + Files.write(inputFile.toPath(), aiQuery.getBytes()); + } + + /** + * Creates a temporary file that will be used for the AI prompt input. + * + * @return a new {@link File} referencing the created temporary file. + * @throws IOException if the file could not be created. + */ + private File createTemporaryFile() throws IOException { + File file = Files.createTempFile("ai-inference", ".tmp").toFile(); + file.deleteOnExit(); + return file; + } + + /** + * Cleans up the AI response by removing the partial text before the + * AI response marker and after the end-of-text marker, if specified. + * + * @param result the raw output from llama.cpp. + * @return the cleaned AI response. + */ + private String cleanupAiResponse(String result) { + + // remove text after the end of text marker if it exists + if (mailQuery.model.endOfTextMarker != null) { + int endOfTextMarkerIndex = result.indexOf(mailQuery.model.endOfTextMarker); + if (endOfTextMarkerIndex != -1) { + result = result.substring(0, endOfTextMarkerIndex); + } + } + + return result; + } + + /** + * Returns the full command string used to run the AI inference via llama.cpp. + * + * @return a string representing the command and all arguments. + */ + private String getCliCommand() { + int niceValue = 10; // niceness level for background tasks + String executablePath = configuration.getLlamaCliPath().getAbsolutePath(); + + return join(" ", + "nice", "-n", Integer.toString(niceValue), + executablePath, + "--model " + mailQuery.model.filesystemPath, + "--threads " + configuration.getThreadCount(), + "--threads-batch " + configuration.getBatchThreadCount(), + + "--top-k 20", // Restricts token selection to the K tokens with the highest probabilities. + + "--top-p 0.95", // Restricts token selection to the smallest possible set + // of tokens whose cumulative probability exceeds the specified + // threshold P. + + "--min-p 0.1", // Filters the vocabulary to include only tokens whose + // probability is at least a certain fraction (Min P) of the + // probability of the most likely token. + + // "--chat-format qwen3", // Ensure that model sees the <|im_start|>system … / <|im_start|>user … markup it was trained on + + // Avoid getting stuck in a forever repetition loop + "--repeat-penalty 1.05", // Very little penalty, because computer code is often repetitive + "--repeat-last-n 512", // Last n tokens to consider for penalizing repetition + + "--dry-multiplier 0.1", // Controls the strength of the penalty for a detected repetition sequence. + + "--presence-penalty 0", // In a code we want the model to reuse the same variable names, + // keywords, and syntax consistently. A presence penalty, + // even a small 0.1, could cause the model to needlessly + // rename variables. + + "--mirostat 0", // Disable mirostat + + "--no-display-prompt", + "--no-warmup", + "--flash-attn", + "--temp 0.6", + "--ctx-size " + mailQuery.model.contextSizeTokens, + "--batch-size 512", + "--single-turn", // run conversation for a single turn only, then exit when done + "-n -1", + "--file " + inputFile + ); + + // "--cache-type-k q8_0", might save RAM, need to test precision loss is acceptable + // "--cache-type-v q8_0", might save RAM, need to test precision loss is acceptable + + } + + /** + * Spawns a new Thread to handle the error stream from llama.cpp, + * printing lines that contain metadata or errors to the console. + * + * @param process the process whose error stream is consumed. + */ + private static void handleErrorThread(Process process) { + Thread errorThread = new Thread(() -> { + try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getErrorStream()))) { + String line; + while ((line = reader.readLine()) != null) { + handleErrorStreamLine(line); + } + } catch (IOException e) { + System.err.println("Error reading error stream: " + e.getMessage()); + } + }); + errorThread.start(); + } + + /** + * Decides what to do with each line from the error stream: + * if it matches the llama.cpp meta-info marker, print it normally; + * otherwise print as an error. + * + * @param line a line from the llama.cpp error stream. + */ + private static void handleErrorStreamLine(String line) { + if (line.startsWith(LLAMA_CPP_META_INFO_MARKER)) { + // Print the meta-info to the console in normal color + System.out.println(line.substring(LLAMA_CPP_META_INFO_MARKER.length())); + } else { + // Print actual error lines in red + Utils.printRedMessageToConsole(line); + } + } + + /** + * Consumes the standard output (inference result) from the + * llama.cpp process, storing it into a result buffer for further + * cleanup, while simultaneously printing it to the console. + * + * @param process the AI inference process. + * @param result a string builder to accumulate the final result. + * @return the thread that is reading the output stream. + */ + private static Thread handleResultThread(Process process, StringBuilder result) { + Thread outputThread = new Thread(() -> { + try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) { + String aiResultLine; + while ((aiResultLine = reader.readLine()) != null) { + System.out.print("AI: " + aiResultLine + "\n"); // Show each line in real-time + result.append(aiResultLine).append("\n"); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + outputThread.start(); + return outputThread; + } + + /** + * Deletes the temporary input file once processing is complete. + */ + private void deleteTemporaryFile() { + if (inputFile != null && inputFile.exists()) { + try { + Files.delete(inputFile.toPath()); + } catch (IOException e) { + System.err.println("Failed to delete temporary file: " + e.getMessage()); + } + } + } +} diff --git a/src/main/java/eu/svjatoslav/alyverkko_cli/commands/mail_correspondant/MailCorrespondentCommand.java b/src/main/java/eu/svjatoslav/alyverkko_cli/commands/mail_correspondant/MailCorrespondentCommand.java index d279166..8c37285 100644 --- a/src/main/java/eu/svjatoslav/alyverkko_cli/commands/mail_correspondant/MailCorrespondentCommand.java +++ b/src/main/java/eu/svjatoslav/alyverkko_cli/commands/mail_correspondant/MailCorrespondentCommand.java @@ -21,6 +21,8 @@ import static java.nio.file.StandardWatchEventKinds.*; /** + * TODO: What happens when directory gets renamed ? Will event listener reindex all files inside it for processing ? + * * The MailCorrespondentCommand continuously monitors a specified mail * directory for new or modified text files, checks if they have a * "TOCOMPUTE:" marker, and if so, adds them to a priority queue to be