feat(api): migrate from llama-cli subprocess to llama-server REST API

author Svjatoslav Agejenko <svjatoslav@svjatoslav.eu>

Sun, 17 May 2026 20:22:45 +0000 (23:22 +0300)

committer Svjatoslav Agejenko <svjatoslav@svjatoslav.eu>

Sun, 17 May 2026 20:22:45 +0000 (23:22 +0300)
author Svjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Sun, 17 May 2026 20:22:45 +0000 (23:22 +0300)
committer Svjatoslav Agejenko <svjatoslav@svjatoslav.eu>
Sun, 17 May 2026 20:22:45 +0000 (23:22 +0300)
diff --git a/doc/examples/skills/default.yaml b/doc/examples/skills/default.yaml

index 452b7f7..42d533d 100644 (file)
--- a/doc/examples/skills/default.yaml
+++ b/doc/examples/skills/default.yaml
@@ -1,12 +1,3 @@
-prompt: |
-  <|im_start|>system
+system_prompt: |
    User will provide you with task that needs to be solved along with
    existing relevant information. You must provide well reasoned solution.
-  <|im_end|>
-  <|im_start|>user
-  /think Solve following problem:
-
-  <TASK-FILE>
-
-  <|im_end|>
-  <|im_start|>assistant
diff --git a/doc/examples/skills/summary.yaml b/doc/examples/skills/summary.yaml

index 6530a26..530e523 100644 (file)
--- a/doc/examples/skills/summary.yaml
+++ b/doc/examples/skills/summary.yaml
@@ -1,18 +1,7 @@
-prompt: |
-  <|im_start|>system
-
+system_prompt: |
    User is curious about the world and wants to understand how it
    works. User is finding various news, blogs and stories on the internet
    that look promising but user does not have time to read them in depth.
    Your task is to analyze user provided story and write the summary that
    preserves the most valuable events, facts, conclusions or take-aways
    from it.
-
-  <|im_end|>
-  <|im_start|>user
-  /think Summarize following:
-
-  <TASK-FILE>
-
-  <|im_end|>
-  <|im_start|>assistant
diff --git a/src/main/java/eu/svjatoslav/alyverkko_cli/commands/ListModelsCommand.java b/src/main/java/eu/svjatoslav/alyverkko_cli/commands/ListModelsCommand.java

index 63b6cab..a7e0438 100644 (file)
--- a/src/main/java/eu/svjatoslav/alyverkko_cli/commands/ListModelsCommand.java
+++ b/src/main/java/eu/svjatoslav/alyverkko_cli/commands/ListModelsCommand.java
@@ -44,7 +44,7 @@ public class ListModelsCommand implements Command {
              return;
          }
  
-        System.out.println("Listing models in directory: " + configuration.getModelsDirectory());
+        System.out.println("Available models:");
          configuration.printModels();
      }
  }
diff --git a/src/main/java/eu/svjatoslav/alyverkko_cli/commands/WizardCommand.java b/src/main/java/eu/svjatoslav/alyverkko_cli/commands/WizardCommand.java

index 0d8c2b2..e1b4832 100644 (file)
--- a/src/main/java/eu/svjatoslav/alyverkko_cli/commands/WizardCommand.java
+++ b/src/main/java/eu/svjatoslav/alyverkko_cli/commands/WizardCommand.java
@@ -5,31 +5,24 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
  import eu.svjatoslav.alyverkko_cli.Command;
  import eu.svjatoslav.alyverkko_cli.Utils;
  import eu.svjatoslav.alyverkko_cli.configuration.Configuration;
-import eu.svjatoslav.alyverkko_cli.configuration.Model;
  import eu.svjatoslav.commons.cli_helper.parameter_parser.Parser;
  import eu.svjatoslav.commons.cli_helper.parameter_parser.parameter.FileOption;
  
  import java.io.*;
  import java.nio.file.*;
-import java.util.ArrayList;
-import java.util.List;
  
  import static eu.svjatoslav.alyverkko_cli.Utils.printRedMessageToConsole;
  import static eu.svjatoslav.commons.cli_helper.CLIHelper.*;
  
  /**
   * <p>Interactive configuration wizard that helps users validate and fix their configuration files.
- * It performs system checks and offers to fix any missing or invalid paths, discovers new models,
- * and updates the configuration accordingly.
+ * It performs system checks and offers to fix any missing or invalid paths, and updates the configuration accordingly.
   * <p>Key workflow steps:
   * <ol>
   *   <li>Load or create configuration</li>
   *   <li>Validate core directory paths</li>
- *   <li>Discover and annotate new models</li>
   *   <li>Save updated configuration</li>
   * </ol>
- * <p>When handling split models (.gguf files with part numbering), the wizard automatically
- * detects base models and only adds part-1 files to the configuration.
   */
  public class WizardCommand implements Command {
  
@@ -50,7 +43,6 @@ public class WizardCommand implements Command {
      private File configurationFile;
  
      private boolean configurationUpdated = false;
-    private boolean modelsUpdated = false;
  
      @Override
      public String getCommandName() {
@@ -71,13 +63,7 @@ public class WizardCommand implements Command {
  
          checkAndFixGeneralParameters();
  
-        fixModelEntries();
-
          trySaveConfiguration();
-
-        if (modelsUpdated) {
-            System.out.println("Configuration has been updated. Please open the configuration file in a text editor to review and adjust model settings as needed.");
-        }
      }
  
      private void loadOrCreateConfiguration() throws IOException {
@@ -131,20 +117,10 @@ public class WizardCommand implements Command {
          configuration.setTasksDirectory(
                  checkDirectory(
                          configuration.getTasksDirectory(),
-                        "Mail directory",
+                        "Tasks directory",
                          true,
-                        "The mail directory is where the AI will look for tasks to solve. " +
-                                "It should be a directory that you can write to. Please specify new mail directory path.",
-                        true)
-        );
-
-        configuration.setModelsDirectory(
-                checkDirectory(
-                        configuration.getModelsDirectory(),
-                        "Models directory",
-                        null,
-                        "The models directory is where the AI model files (*.gguf) are stored. " +
-                                "Please specify new models directory path.",
+                        "The tasks directory is where the AI will look for tasks to solve. " +
+                                "It should be a directory that you can write to. Please specify new tasks directory path.",
                          true)
          );
  
@@ -158,50 +134,15 @@ public class WizardCommand implements Command {
                          true)
          );
  
-        configuration.setLlamaCliPath(
-                checkFile(
-                        configuration.getLlamaCliPath(),
-                        "llama.cpp project llama-cli executable file path",
-                        "The llama-cli is commandline engine that runs GGUF language models. " +
-                                "Usually it is located under build/bin/ directory within llama.cpp project.",
-                        true,
-                        true)
-        );
-
-        // Default_temperature
-        Float temperature = configuration.getDefaultTemperature();
-        if (temperature == null || temperature < 0f || temperature > 3f) {
-            configuration.setDefaultTemperature(askFloat(
-                    "Enter default temperature (0-3). Lower => more deterministic, higher => more creative.",
-                    temperature,
-                    0f, 3f, false
-            ));
-        }
-        // Thread_count
-        Integer threadCount = configuration.getThreadCount();
-        if (threadCount == null || threadCount < 1) {
-            int defaultThreadCount = Runtime.getRuntime().availableProcessors() / 2;
-            if (defaultThreadCount < 1) defaultThreadCount = 1;
-            configuration.setThreadCount(askInteger(
-                    "Enter number of CPU threads for AI generation. Typically RAM bandwidth gets saturated " +
-                            "first and becomes bottleneck before all CPU cores can get fully utilized. So for 12 core CPU" +
-                            " it might be enough to set 6 threads. Increasing this number higher yields diminishing returns.",
-                    defaultThreadCount,
-                    1, null, false
+        // Default_timeout_millis
+        Long defaultTimeoutMillis = configuration.getDefaultTimeoutMillis();
+        if (defaultTimeoutMillis == null || defaultTimeoutMillis < 0) {
+            configuration.setDefaultTimeoutMillis(askLong(
+                    "Enter default timeout in milliseconds (0 = no timeout).",
+                    0L,
+                    0L, null, false
              ));
          }
-
-        // Batch thread count
-        Integer batchThreadCount = configuration.getBatchThreadCount();
-        if (batchThreadCount == null || batchThreadCount < 1) {
-            int defaultThreadCount = Runtime.getRuntime().availableProcessors();
-            configuration.setBatchThreadCount(
-                    askInteger(
-                            "\nEnter number of CPU threads for input prompt processing (all cores is often fine).",
-                            defaultThreadCount,
-                            1, null, false
-                    ));
-        }
      }
  
      /**
@@ -266,57 +207,6 @@ public class WizardCommand implements Command {
          }
      }
  
-    /**
-     * Validates a file path and prompts user to fix if needed.
-     * @param file current file value
-     * @param fileName name to display to user
-     * @param mustExist if file must exist (null = no check)
-     * @param explanation message to show user when prompting
-     * @param executable if file must be executable (null = no check)
-     * @return validated file path
-     */
-    private File checkFile(
-            File file,
-            String fileName,
-            String explanation,
-            Boolean mustExist,
-            Boolean executable) {
-
-        while (true) {
-            boolean allOk = true;
-            if (file == null) {
-                System.out.println(fileName + " is not defined.");
-                allOk = false;
-                file = askFile(explanation, null, mustExist, null, null, executable, false);
-            }
-
-            if (mustExist != null && mustExist && !file.exists()) {
-                System.out.println(fileName + " does not exist: " + file.getAbsolutePath());
-                allOk = false;
-                file = askFile(explanation, null, mustExist, null, null, executable, false);
-            }
-
-            if (!file.isFile()) {
-                System.out.println(fileName + " is not a file: " + file.getAbsolutePath());
-                allOk = false;
-                file = askFile(explanation, null, null, null, null, executable, false);
-            }
-
-            if (executable != null && executable && !file.canExecute()) {
-                System.out.println(fileName + " is not executable: " + file.getAbsolutePath());
-                allOk = false;
-                file = askFile(explanation, null, true, null, null, executable, false);
-            }
-
-            if (allOk) {
-                System.out.println(fileName + " is: " + file.getAbsolutePath());
-                return file;
-            }
-
-            configurationUpdated = true;
-        }
-    }
-
      private static File getNewDirectoryPath(String directoryName, Boolean writable) {
          return askDirectory(directoryName, null, null, null, writable, null, false);
      }
@@ -356,126 +246,4 @@ public class WizardCommand implements Command {
          }
      }
  
-    /**
-     * Generates an alias from a .gguf filename by removing non-alphanumeric chars.
-     */
-    private String suggestAlias(String filePath) {
-        String fileName = new File(filePath).getName();
-        // Remove .gguf extension
-        fileName = fileName.replaceFirst("\\.gguf$", "");
-        // Check if it's a split model
-        if (fileName.matches(".*-\\d{5}-of-\\d{5}")) {
-            // Extract base name by removing the part numbers
-            fileName = fileName.replaceFirst("-\\d{5}-of-\\d{5}", "");
-        }
-        // Replace non-alphanumeric characters with hyphens
-        String alias = fileName.replaceAll("[^a-zA-Z0-9]", "-").toLowerCase();
-        // Normalize hyphens and trim leading/trailing hyphens
-        return alias.replaceAll("-+", "-").replaceAll("^-|-$", "");
-    }
-
-    private void fixModelEntries() {
-        File modelsDir = configuration.getModelsDirectory();
-        if (modelsDir == null || !modelsDir.exists() || !modelsDir.isDirectory()) {
-            System.out.println("Models directory is not properly configured. Skipping model checks.");
-            return;
-        }
-
-        List<Model> existingModels = configuration.getModels();
-        if (existingModels == null) {
-            existingModels = new ArrayList<>();
-            configuration.setModels(existingModels);
-            configurationUpdated = true;
-        }
-
-        annotateMissingModels();
-
-        discoverNewModels();
-    }
-
-    private void discoverNewModels() {
-        // List all .gguf files in models directory
-        File[] files = configuration.getModelsDirectory().listFiles((dir, name) -> name.endsWith(".gguf"));
-        if (files == null) return;
-
-        for (File file : files) {
-            String relativePath = configuration.getModelsDirectory().toPath().relativize(file.toPath()).toString();
-            if (isExistingModel(relativePath)) continue;
-
-            processPotentialNewModelFile(file, relativePath);
-        }
-    }
-
-    private boolean isExistingModel(String relativePath) {
-        return configuration.getModels().stream()
-                .anyMatch(m -> m.getFilesystemPath().equals(relativePath));
-    }
-
-    private void processPotentialNewModelFile(File file, String relativePath) {
-        // Check if it's a split model
-        if (isSplitModel(file.getName())) {
-            handleSplitModel(file, relativePath);
-        } else {
-            addNewModel(relativePath);
-        }
-    }
-
-    private boolean isSplitModel(String fileName) {
-        return fileName.matches(".*-\\d{5}-of-\\d{5}\\.gguf");
-    }
-
-    private void handleSplitModel(File file, String relativePath) {
-        String baseName = relativePath.replaceFirst("-\\d{5}-of-\\d{5}\\.gguf", "");
-        if (configuration.getModels().stream().anyMatch(m -> m.getAlias().startsWith(baseName))) {
-            return;
-        }
-
-        // Extract part number
-        String partNumberStr = relativePath.replaceAll(".*-(\\d{5}-of-\\d{5}\\.gguf)", "$1");
-        int partNumber = Integer.parseInt(partNumberStr.split("-of-")[0]);
-        if (partNumber == 1) {
-            addNewModel(relativePath);
-        }
-    }
-
-    private void addNewModel(String relativePath) {
-        Model newModel = getNewModel(relativePath);
-        configuration.getModels().add(newModel);
-        System.out.println("Added new model: " + newModel.getAlias() + " (" + newModel.getFilesystemPath() + ")");
-        configurationUpdated = true;
-        modelsUpdated = true;
-    }
-
-    private Model getNewModel(String relativePath) {
-        String suggestedAlias = suggestAlias(relativePath);
-        Model newModel = new Model();
-        newModel.setAlias(suggestedAlias + "-new");
-        newModel.setFilesystemPath(relativePath);
-        newModel.setContextSizeTokens(32768); // Default context size
-        newModel.setEndOfTextMarker(null);  // Default end marker
-        return newModel;
-    }
-
-    private void annotateMissingModels() {
-        // Process existing models to add/remove -missing suffix
-        for (Model model : configuration.getModels()) {
-            File modelFile = new File(configuration.getModelsDirectory(), model.getFilesystemPath());
-            if (!modelFile.exists()) {
-                if (!model.getAlias().endsWith("-missing")) {
-                    model.setAlias(model.getAlias() + "-missing");
-                    System.out.println("Marked model as missing: " + model.getAlias());
-                    configurationUpdated = true;
-                    modelsUpdated = true;
-                }
-            } else {
-                if (model.getAlias().endsWith("-missing")) {
-                    model.setAlias(model.getAlias().replaceFirst("-missing$", ""));
-                    System.out.println("Removed -missing suffix from model: " + model.getAlias());
-                    configurationUpdated = true;
-                    modelsUpdated = true;
-                }
-            }
-        }
-    }
-
  }
diff --git a/src/main/java/eu/svjatoslav/alyverkko_cli/commands/task_processor/Task.java b/src/main/java/eu/svjatoslav/alyverkko_cli/commands/task_processor/Task.java

index 1cd2865..c2b7c3b 100644 (file)
--- a/src/main/java/eu/svjatoslav/alyverkko_cli/commands/task_processor/Task.java
+++ b/src/main/java/eu/svjatoslav/alyverkko_cli/commands/task_processor/Task.java
@@ -13,15 +13,11 @@ public class Task {
  
      /**
       * The system prompt text that sets the context or role for the AI.
-     * This is often used to establish rules or background instructions
-     * for how the assistant should behave.
       */
      public String systemPrompt;
  
      /**
-     * The name of the specific skill or capability that the AI should utilize
-     * when processing the query. This can help determine the focus or purpose
-     * of the response generated by the AI.
+     * The name of the specific skill or capability that the AI should utilize.
       */
      public String skillName;
  
@@ -52,82 +48,15 @@ public class Task {
       */
      public int priority;
  
-
-    /**
-     * Returns a string containing a summary of the {@link Task} object.
-     *
-     * @return a string with the system prompt, user prompt, and model info.
-     */
      @Override
      public String toString() {
-        return "MailQuery{" +
+        return "Task{" +
                  "systemPrompt='" + systemPrompt + '\'' +
                  ", userPrompt='" + userPrompt + '\'' +
                  ", model=" + model +
                  '}';
      }
  
-    /**
-     * Calculate effective temperature based on override hierarchy.
-     */
-    public Float getEffectiveTemperature() {
-        // 1. Skill-specific temperature has priority
-        if (skill != null && skill.getTemperature() != null) return skill.getTemperature();
-
-        // 2. If not in skill, check if model specifies it
-        if (model.getTemperature() != null) return model.getTemperature();
-
-        // 3. Fall back to global default
-        return configuration.getDefaultTemperature();
-    }
-
-    /**
-     * Calculates effective top-p value using hierarchy:
-     * skill-specific → model-specific → global default
-     *
-     * @return the applicable top-p value for this query
-     */
-    public Float getEffectiveTopP() {
-        // Skill-specific has the highest priority
-        if (skill != null && skill.getTopP() != null) return skill.getTopP();
-
-        // Model-specific next
-        if (model.getTopP() != null) return model.getTopP();
-
-        // Global default as fallback
-        return configuration.getDefaultTopP();
-    }
-
-    /**
-     * Calculates effective repeat penalty using hierarchy:
-     * skill-specific → model-specific → global default
-     *
-     * @return the applicable repeat penalty for this query
-     */
-    public Float getEffectiveRepeatPenalty() {
-        // Skill-specific has the highest priority
-        if (skill != null && skill.getRepeatPenalty() != null) return skill.getRepeatPenalty();
-
-        // Model-specific next
-        if (model.getRepeatPenalty() != null) return model.getRepeatPenalty();
-
-        // Global default as fallback
-        return configuration.getDefaultRepeatPenalty();
-    }
-
-
-    public Float getEffectiveTopK() {
-        if (skill != null && skill.getTopK() != null) return skill.getTopK();
-        else if (model.getTopK() != null) return model.getTopK();
-        else return configuration.getDefaultTopK();
-    }
-
-    public Float getEffectiveMinP() {
-        if (skill != null && skill.getMinP() != null) return skill.getMinP();
-        else if (model.getMinP() != null) return model.getMinP();
-        else return configuration.getDefaultMinP();
-    }
-
      /**
       * Calculates the effective timeout in milliseconds using the following hierarchy:
       * <ol>
@@ -135,23 +64,14 @@ public class Task {
       *   <li>Model-specific timeout</li>
       *   <li>Global default timeout (lowest priority)</li>
       * </ol>
-     *
-     * @return the effective timeout in milliseconds, or null if no timeout is configured.
       */
      public Long getEffectiveTimeoutMillis() {
-        // Skill-specific has the highest priority
          if (skill != null && skill.getTimeoutMillis() != null) {
              return skill.getTimeoutMillis();
          }
-
-        // Model-specific next
          if (model.getTimeoutMillis() != null) {
              return model.getTimeoutMillis();
          }
-
-        // Global default as fallback
          return configuration.getDefaultTimeoutMillis();
      }
-
-
  }
diff --git a/src/main/java/eu/svjatoslav/alyverkko_cli/commands/task_processor/TaskPriorityQueue.java b/src/main/java/eu/svjatoslav/alyverkko_cli/commands/task_processor/TaskPriorityQueue.java

index e64bdb1..06bc811 100644 (file)
--- a/src/main/java/eu/svjatoslav/alyverkko_cli/commands/task_processor/TaskPriorityQueue.java
+++ b/src/main/java/eu/svjatoslav/alyverkko_cli/commands/task_processor/TaskPriorityQueue.java
@@ -32,6 +32,10 @@ public class TaskPriorityQueue {
          tasks.put(filePath, priority);
      }
  
+    public boolean isEmpty() {
+        return tasks.isEmpty();
+    }
+
      public void remove(Path filePath) {
          tasks.remove(filePath);
      }
diff --git a/src/main/java/eu/svjatoslav/alyverkko_cli/commands/task_processor/TaskProcess.java b/src/main/java/eu/svjatoslav/alyverkko_cli/commands/task_processor/TaskProcess.java

index 9235d3e..66b0ea8 100644 (file)
--- a/src/main/java/eu/svjatoslav/alyverkko_cli/commands/task_processor/TaskProcess.java
+++ b/src/main/java/eu/svjatoslav/alyverkko_cli/commands/task_processor/TaskProcess.java
@@ -1,317 +1,97 @@
  package eu.svjatoslav.alyverkko_cli.commands.task_processor;
  
-import eu.svjatoslav.alyverkko_cli.Utils;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.ObjectMapper;
  
  import java.io.*;
-import java.nio.file.Files;
-import java.util.ArrayList;
-import java.util.concurrent.TimeUnit;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.util.HashMap;
+import java.util.Map;
  
  import static eu.svjatoslav.alyverkko_cli.Main.configuration;
-import static java.lang.String.join;
  
  /**
- *
- * TODO: what if directory disappeared that contained original input file ? Response cannot be written back anymore.
- *
- * <p>Executes AI inference tasks through llama.cpp CLI. This class handles the complete workflow
- * from prompt construction to response formatting, including temporary file management and process execution.
- * <p>Key processing steps:
- * <ol>
- *   <li>Build standardized input prompt</li>
- *   <li>Create a temporary input file</li>
- *   <li>Execute llama.cpp with appropriate parameters</li>
- *   <li>Capture and filter output</li>
- *   <li>Perform cleanup operations</li>
- * </ol>
- * 
- * <p>Temperature settings, context size, and thread counts are all derived from the current configuration.
- * The response is formatted to match org-mode conventions while preserving the original conversation structure.
+ * Executes AI inference tasks through llama-server REST API.
   */
  public class TaskProcess {
  
-    /**
-     * Marker used by llama.cpp to print metadata. We monitor and display these lines.
-     */
-    private static final String LLAMA_CPP_META_INFO_MARKER = "llm_load_print_meta: ";
-
-    /**
-     * The mail query defining system prompt, user prompt, and which model to use.
-     */
      private final Task task;
  
-    /**
-     * Temporary file used as input to the llama.cpp CLI.
-     */
-    private File inputFile;
-
-    /**
-     * Creates a new AI task with a given mail query.
-     *
-     * @param task         the mail query containing model and prompts.
-     */
      public TaskProcess(Task task) {
          this.task = task;
      }
  
-    /**
-     * Builds the prompt text that is fed to llama.cpp, including the system prompt,
-     * the user prompt, and an "ASSISTANT:" marker signifying where the AI response begins.
-     *
-     * @return a string containing the fully prepared query prompt.
-     */
-    private String buildAiQuery() {
-        return task.systemPrompt.replace("<TASK-FILE>", task.userPrompt);
-    }
-
-
-    /**
-     * Runs the AI query by constructing the prompt, writing it to a temp file,
-     * invoking llama.cpp, collecting output, and performing any final cleanup.
-     *
-     * @return the AI's response in a format suitable for appending back into
-     *         the conversation file.
-     * @throws InterruptedException if the process is interrupted.
-     * @throws IOException if reading/writing the file fails or the process fails to start.
-     */
-    public String runAiQuery() throws InterruptedException, IOException {
-        try {
-            // Record the start time of the query
-            task.startTimeMillis = System.currentTimeMillis();
-
-            // Build input prompt
-            initializeInputFile(buildAiQuery());
+    public String runAiQuery() throws IOException {
+        task.startTimeMillis = System.currentTimeMillis();
  
-            // Prepare a process builder
-            ProcessBuilder processBuilder = new ProcessBuilder();
-            processBuilder.command(getCliCommand().split("\\s+")); // Splitting the command string into tokens
+        String serverUrl = configuration.getServerUrl();
+        URL url = new URL(serverUrl + "/v1/chat/completions");
+        HttpURLConnection conn = (HttpURLConnection) url.openConnection();
+        conn.setRequestMethod("POST");
+        conn.setRequestProperty("Content-Type", "application/json");
+        conn.setDoOutput(true);
  
-            // Start process
-            Process process = processBuilder.start();
-
-            // Handle process's error stream
-            handleErrorThread(process);
-
-            // Handle the process's output stream
-            StringBuilder result = new StringBuilder();
-            Thread outputThread = handleResultThread(process, result);
-
-            // Get effective timeout value
-            Long timeoutMillis = task.getEffectiveTimeoutMillis();
-            boolean isTimedOut = false;
-
-            if (timeoutMillis != null && timeoutMillis > 0) {
-                try {
-                    if (!process.waitFor(timeoutMillis, TimeUnit.MILLISECONDS)) {
-                        process.destroyForcibly();
-                        isTimedOut = true;
-                    }
-                } catch (InterruptedException e) {
-                    Thread.currentThread().interrupt();
-                    process.destroyForcibly();
-                    isTimedOut = true;
-                }
-            } else {
-                process.waitFor();
-            }
+        Long timeoutMillis = task.getEffectiveTimeoutMillis();
+        if (timeoutMillis != null && timeoutMillis > 0) {
+            conn.setConnectTimeout(timeoutMillis.intValue());
+            conn.setReadTimeout(timeoutMillis.intValue());
+        }
  
-            // Wait for the output thread to finish reading
-            outputThread.join();
+        String requestBody = buildRequestBody();
+        try (OutputStream os = conn.getOutputStream()) {
+            os.write(requestBody.getBytes(StandardCharsets.UTF_8));
+        }
  
-            // Record the end time of the query
-            task.endTimeMillis = System.currentTimeMillis();
+        int responseCode = conn.getResponseCode();
+        InputStream is = (responseCode >= 200 && responseCode < 300)
+                ? conn.getInputStream()
+                : conn.getErrorStream();
  
-            // Clean up the AI response: remove partial prompt text, end-of-text marker, etc.
-            String cleanedResponse = cleanupAiResponse(result.toString());
-            if (isTimedOut) {
-                cleanedResponse += "\nTERMINATED BY TIMEOUT";
+        StringBuilder response = new StringBuilder();
+        try (BufferedReader reader = new BufferedReader(new InputStreamReader(is, StandardCharsets.UTF_8))) {
+            String line;
+            while ((line = reader.readLine()) != null) {
+                response.append(line);
              }
-
-            return cleanedResponse;
-        } finally {
-            deleteTemporaryFile();
          }
-    }
-
-    /**
-     * Creates a temporary file for the AI input and writes the prompt to it.
-     *
-     * @param aiQuery the final prompt string for the AI to process.
-     * @throws IOException if file creation or writing fails.
-     */
-    private void initializeInputFile(String aiQuery) throws IOException {
-        inputFile = createTemporaryFile();
-        Files.write(inputFile.toPath(), aiQuery.getBytes());
-    }
-
-    /**
-     * Creates a temporary file that will be used for the AI prompt input.
-     *
-     * @return a new {@link File} referencing the created temporary file.
-     * @throws IOException if the file could not be created.
-     */
-    private File createTemporaryFile() throws IOException {
-        File file = Files.createTempFile("ai-inference", ".tmp").toFile();
-        file.deleteOnExit();
-        return file;
-    }
  
-    /**
-     * Cleans up the AI response by removing the partial text before the
-     * AI response marker and after the end-of-text marker, if specified.
-     *
-     * @param result the raw output from llama.cpp.
-     * @return the cleaned AI response.
-     */
-    private String cleanupAiResponse(String result) {
+        task.endTimeMillis = System.currentTimeMillis();
  
-        // remove text after the end of text marker if it exists
-        if (task.model.getEndOfTextMarker() != null) {
-            int endOfTextMarkerIndex = result.indexOf(task.model.getEndOfTextMarker());
-            if (endOfTextMarkerIndex != -1) {
-                result = result.substring(0, endOfTextMarkerIndex);
-            }
+        if (responseCode < 200 || responseCode >= 300) {
+            throw new IOException("HTTP " + responseCode + ": " + response);
          }
  
-        return result;
+        return parseResponse(response.toString());
      }
  
-    /**
-     * Returns the full command string used to run the AI inference via llama.cpp.
-     *
-     * @return a string representing the command and all arguments.
-     */
-    private String getCliCommand() {
-        int niceValue = 10; // niceness level for background tasks
-        String executablePath = configuration.getLlamaCliPath().getAbsolutePath();
-
-        ArrayList <String> args = new ArrayList<>();
-        args.add("nice -n " + niceValue);
-        args.add(executablePath);
-        args.add("--model " +  configuration.getModelFullFilesystemPath(task.model));
-        args.add("--threads " + configuration.getThreadCount());
-        args.add("--threads-batch " + configuration.getBatchThreadCount());
+    private String buildRequestBody() throws IOException {
+        Map<String, Object> body = new HashMap<>();
+        body.put("model", task.model.getAlias());
  
-        Float topP = task.getEffectiveTopP();
-        if (topP != null) args.add("--top-p " + topP);
+        Map<String, String> systemMessage = new HashMap<>();
+        systemMessage.put("role", "system");
+        systemMessage.put("content", task.systemPrompt);
  
-        Float topK = task.getEffectiveTopK();
-        if (topK != null) args.add("--top-k " + topK);
+        Map<String, String> userMessage = new HashMap<>();
+        userMessage.put("role", "user");
+        userMessage.put("content", task.userPrompt);
  
-        Float minP = task.getEffectiveMinP();
-        if (minP != null) args.add("--min-p " + minP);
-
-        Float repetitionPenalty = task.getEffectiveRepeatPenalty();
-        if (repetitionPenalty != null) args.add("--repeat-penalty " + repetitionPenalty);
-
-        args.add("--repeat-last-n 512");
-
-        args.add("--mirostat 0");
-        args.add("--no-display-prompt");
-        args.add("--no-warmup");
-        args.add("--flash-attn on");
-
-        // By default, llama.cpp converts escape sequence like "\n" into newline before feeding it to AI.
-        // This causes issues if your input to AI is a computer program that has those escape codes within strings.
-        // So escaping must be disabled.
-        args.add("--no-escape");
-
-        Float temperature = task.getEffectiveTemperature();
-        if (temperature != null) args.add("--temp " + task.getEffectiveTemperature());
-
-        args.add("--ctx-size " + task.model.getContextSizeTokens());
-        args.add("--batch-size 512");
-
-        // Maps AI model from filesystem to RAM without preloading it in advance.
-        // Reduces RAM usage and speeds up startup.
-        args.add("--mmap");
-
-        args.add("--single-turn");
-        args.add("-n -1");
-        args.add("--file " + inputFile);
-
-        return join(" ", args);
-
-        // "--cache-type-k q8_0",
-        // might save RAM, need to test if precision loss is acceptable
-
-        // might save RAM, need to test it if precision loss is acceptable
-        // "--cache-type-v q8_0",
+        body.put("messages", new Object[]{systemMessage, userMessage});
  
+        ObjectMapper mapper = new ObjectMapper();
+        return mapper.writeValueAsString(body);
      }
  
-    /**
-     * Spawns a new Thread to handle the error stream from llama.cpp,
-     * printing lines that contain metadata or errors to the console.
-     *
-     * @param process the process whose error stream is consumed.
-     */
-    private static void handleErrorThread(Process process) {
-        Thread errorThread = new Thread(() -> {
-            try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getErrorStream()))) {
-                String line;
-                while ((line = reader.readLine()) != null) {
-                    handleErrorStreamLine(line);
-                }
-            } catch (IOException e) {
-                System.err.println("Error reading error stream: " + e.getMessage());
-            }
-        });
-        errorThread.start();
-    }
-
-    /**
-     * Decides what to do with each line from the error stream:
-     * if it matches the llama.cpp meta-info marker, print it normally;
-     * otherwise print as an error.
-     *
-     * @param line a line from the llama.cpp error stream.
-     */
-    private static void handleErrorStreamLine(String line) {
-        if (line.startsWith(LLAMA_CPP_META_INFO_MARKER)) {
-            // Print the meta-info to the console in normal color
-            System.out.println(line.substring(LLAMA_CPP_META_INFO_MARKER.length()));
-        } else {
-            // Print actual error lines in red
-            Utils.printRedMessageToConsole(line);
-        }
-    }
-
-    /**
-     * Consumes the standard output (inference result) from the
-     * llama.cpp process, storing it into a result buffer for further
-     * cleanup, while simultaneously printing it to the console.
-     *
-     * @param process the AI inference process.
-     * @param result  a string builder to accumulate the final result.
-     * @return the thread that is reading the output stream.
-     */
-    private static Thread handleResultThread(Process process, StringBuilder result) {
-        Thread outputThread = new Thread(() -> {
-            try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) {
-                String aiResultLine;
-                while ((aiResultLine = reader.readLine()) != null) {
-                    System.out.print("AI: " + aiResultLine + "\n"); // Show each line in real-time
-                    result.append(aiResultLine).append("\n");
-                }
-            } catch (IOException e) {
-                throw new RuntimeException(e);
-            }
-        });
-        outputThread.start();
-        return outputThread;
-    }
-
-    /**
-     * Deletes the temporary input file once processing is complete.
-     */
-    private void deleteTemporaryFile() {
-        if (inputFile != null && inputFile.exists()) {
-            try {
-                Files.delete(inputFile.toPath());
-            } catch (IOException e) {
-                System.err.println("Failed to delete temporary file: " + e.getMessage());
-            }
+    private String parseResponse(String json) throws IOException {
+        ObjectMapper mapper = new ObjectMapper();
+        JsonNode root = mapper.readTree(json);
+        JsonNode choices = root.path("choices");
+        if (choices.isArray() && choices.size() > 0) {
+            JsonNode message = choices.get(0).path("message");
+            return message.path("content").asText();
          }
+        throw new IOException("Unexpected response format: " + json);
      }
  }
diff --git a/src/main/java/eu/svjatoslav/alyverkko_cli/commands/task_processor/TaskProcessorCommand.java b/src/main/java/eu/svjatoslav/alyverkko_cli/commands/task_processor/TaskProcessorCommand.java

index 3789b6f..8644f9e 100644 (file)
--- a/src/main/java/eu/svjatoslav/alyverkko_cli/commands/task_processor/TaskProcessorCommand.java
+++ b/src/main/java/eu/svjatoslav/alyverkko_cli/commands/task_processor/TaskProcessorCommand.java
@@ -5,6 +5,7 @@ import eu.svjatoslav.alyverkko_cli.configuration.Model;
  import eu.svjatoslav.alyverkko_cli.configuration.SkillConfig;
  import eu.svjatoslav.commons.cli_helper.parameter_parser.Parser;
  import eu.svjatoslav.commons.cli_helper.parameter_parser.parameter.FileOption;
+import eu.svjatoslav.commons.cli_helper.parameter_parser.parameter.StringOption;
  
  import java.io.File;
  import java.io.IOException;
@@ -52,6 +53,12 @@ public class TaskProcessorCommand implements Command {
              .addAliases("--config", "-c")
              .mustExist();
  
+    /**
+     * When set, process all pending tasks and exit instead of running forever.
+     */
+    public StringOption onceOption = parser.add(new StringOption("Process all pending tasks and exit"))
+            .addAliases("--once");
+
      /**
       * The WatchService instance for monitoring file system changes in
       * the mail directory.
@@ -111,6 +118,11 @@ public class TaskProcessorCommand implements Command {
              Path nextTask = taskQueue.poll();
              if (nextTask != null) processTask(nextTask);
  
+            if (onceOption.isPresent() && taskQueue.isEmpty()) {
+                System.out.println("--once specified and no more tasks. Exiting.");
+                break;
+            }
+
              // Check for filesystem events
              WatchKey key = directoryWatcher.poll();
  
@@ -238,7 +250,7 @@ public class TaskProcessorCommand implements Command {
              String aiGeneratedResponse = aiTask.runAiQuery();
  
              saveAiResponseToFile(file, task, aiGeneratedResponse);
-        } catch (IOException | InterruptedException | RuntimeException e) {
+        } catch (IOException | RuntimeException e) {
              e.printStackTrace();
          }
      }
@@ -304,7 +316,7 @@ public class TaskProcessorCommand implements Command {
          // Set system prompt
          result.skillName = fileProcessingSettings.getOrDefault("skill", "default");
          SkillConfig skill = configuration.getSkillByName(result.skillName);
-        result.systemPrompt = skill.getPrompt();
+        result.systemPrompt = skill.getSystemPrompt();
          result.skill = skill;
  
          // Set AI model using hierarchy: TOCOMPUTE > skill config > default
diff --git a/src/main/java/eu/svjatoslav/alyverkko_cli/configuration/Configuration.java b/src/main/java/eu/svjatoslav/alyverkko_cli/configuration/Configuration.java

index 018109f..aedb4b4 100644 (file)
--- a/src/main/java/eu/svjatoslav/alyverkko_cli/configuration/Configuration.java
+++ b/src/main/java/eu/svjatoslav/alyverkko_cli/configuration/Configuration.java
@@ -16,8 +16,8 @@ import com.fasterxml.jackson.dataformat.yaml.YAMLFactory;
   * This class is serialized to YAML format for user editing and persistence.
   * <p>Configuration parameters include:
   * <ul>
- *   <li>Model and prompt directories</li>
- *   <li>Performance tuning parameters</li>
+ *   <li>Task and prompt directories</li>
+ *   <li>Server connection settings</li>
   *   <li>Model-specific configurations</li>
   * </ul>
   * <p>All paths are resolved relative to the user's home directory by default, but can be customized. The class provides
@@ -33,62 +33,11 @@ public class Configuration {
      private File tasksDirectory;
  
      /**
-     * Directory that contains AI model files in GGUF format.
+     * URL of the running llama.cpp server (OpenAI-compatible API).
+     * Example: http://localhost:8081
       */
-    @JsonProperty("models_directory")
-    private File modelsDirectory;
-
-    /**
-     * The default "temperature" used by the AI for creative/deterministic
-     * tradeoff. Ranges roughly between 0 and 3.
-     */
-    @JsonProperty("default_temperature")
-    private Float defaultTemperature;
-
-    /**
-     * Default top-p value used when not specified elsewhere.
-     * Controls diversity of sampling (0.0-1.0, where 1.0 means no restriction).
-     */
-    @JsonProperty("default_top_p")
-    private Float defaultTopP;
-
-    /**
-     * Default global Top-K value used when not specified elsewhere.
-     */
-    @JsonProperty("default_top_k")
-    private Float defaultTopK = 30f;
-
-    /**
-     * Global minimum-p threshold default (applies if none set).
-     */
-    @JsonProperty("default_min_p")
-    private Float defaultMinP = 0f;
-
-    /**
-     * Default repeat penalty used when not specified elsewhere.
-     * Controls repetition reduction (typically 0.8-2.0, where 1.0 means no penalty).
-     */
-    @JsonProperty("default_repeat_penalty")
-    private Float defaultRepeatPenalty;
-
-    /**
-     * The filesystem path to the llama-cli executable, which processes
-     * AI tasks via llama.cpp.
-     */
-    @JsonProperty("llama_cli_path")
-    private File llamaCliPath;
-
-    /**
-     * Number of CPU threads used for input prompt processing.
-     */
-    @JsonProperty("batch_thread_count")
-    private Integer batchThreadCount;
-
-    /**
-     * Number of CPU threads used for AI inference.
-     */
-    @JsonProperty("thread_count")
-    private Integer threadCount;
+    @JsonProperty("server_url")
+    private String serverUrl = "http://localhost:8081";
  
      /**
       * Directory containing text prompt files. Each file is a separate
@@ -139,7 +88,7 @@ public class Configuration {
      }
  
      public String getModelFullFilesystemPath(Model model) {
-        return new File(modelsDirectory, model.getFilesystemPath()).getAbsolutePath();
+        return model.getFilesystemPath();
      }
  
      public Optional<Model> findModelByAlias(String modelAlias) {
diff --git a/src/main/java/eu/svjatoslav/alyverkko_cli/configuration/SkillConfig.java b/src/main/java/eu/svjatoslav/alyverkko_cli/configuration/SkillConfig.java

index f260938..adb92f8 100644 (file)
--- a/src/main/java/eu/svjatoslav/alyverkko_cli/configuration/SkillConfig.java
+++ b/src/main/java/eu/svjatoslav/alyverkko_cli/configuration/SkillConfig.java
@@ -6,7 +6,8 @@ import lombok.Data;
  @Data
  public class SkillConfig {
  
-    private String prompt;
+    @JsonProperty("system_prompt")
+    private String systemPrompt;
  
      private Float temperature;
author	Svjatoslav Agejenko <svjatoslav@svjatoslav.eu>
	Sun, 17 May 2026 20:22:45 +0000 (23:22 +0300)
committer	Svjatoslav Agejenko <svjatoslav@svjatoslav.eu>
	Sun, 17 May 2026 20:22:45 +0000 (23:22 +0300)
doc/examples/skills/default.yaml		patch \| blob \| history
doc/examples/skills/summary.yaml		patch \| blob \| history
src/main/java/eu/svjatoslav/alyverkko_cli/commands/ListModelsCommand.java		patch \| blob \| history
src/main/java/eu/svjatoslav/alyverkko_cli/commands/WizardCommand.java		patch \| blob \| history
src/main/java/eu/svjatoslav/alyverkko_cli/commands/task_processor/Task.java		patch \| blob \| history
src/main/java/eu/svjatoslav/alyverkko_cli/commands/task_processor/TaskPriorityQueue.java		patch \| blob \| history
src/main/java/eu/svjatoslav/alyverkko_cli/commands/task_processor/TaskProcess.java		patch \| blob \| history
src/main/java/eu/svjatoslav/alyverkko_cli/commands/task_processor/TaskProcessorCommand.java		patch \| blob \| history
src/main/java/eu/svjatoslav/alyverkko_cli/configuration/Configuration.java		patch \| blob \| history
src/main/java/eu/svjatoslav/alyverkko_cli/configuration/SkillConfig.java		patch \| blob \| history