From ab2770bdd919beb278fd55ea9042786773225cad Mon Sep 17 00:00:00 2001
From: LordMathis <matus@namesny.com>
Date: Thu, 9 Oct 2025 21:50:39 +0200
Subject: [PATCH] Add documentation for remote node deployment and
 configuration

---
 README.md                             |  7 ++++-
 docs/getting-started/configuration.md | 38 ++++++++++++++++++++-------
 docs/getting-started/installation.md  |  8 ++++++
 docs/user-guide/api-reference.md      | 33 +++++++++++++++++++++++
 docs/user-guide/managing-instances.md | 24 +++++++++++++----
 docs/user-guide/troubleshooting.md    | 24 +++++++++++++++++
 6 files changed, 118 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index 0f27290..b452ebe 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,12 @@
 ### ⚡ Smart Operations
 - **Instance Monitoring**: Health checks, auto-restart, log management
 - **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
-- **Environment Variables**: Set custom environment variables per instance for advanced configuration  
+- **Environment Variables**: Set custom environment variables per instance for advanced configuration
+
+### 🔗 Remote Instance Deployment
+- **Remote Node Support**: Deploy instances on remote hosts
+- **Central Management**: Manage remote instances from a single dashboard
+- **Seamless Routing**: Automatic request routing to remote instances  
 
 ![Dashboard Screenshot](docs/images/dashboard.png)
 
diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md
index be4fc6d..c43efc6 100644
--- a/docs/getting-started/configuration.md
+++ b/docs/getting-started/configuration.md
@@ -70,6 +70,10 @@ auth:
   inference_keys: []             # Keys for inference endpoints
   require_management_auth: true  # Require auth for management endpoints
   management_keys: []            # Keys for management endpoints
+
+local_node: "main"               # Name of the local node (default: "main")
+nodes:                           # Node configuration for multi-node deployment
+  main:                          # Default local node (empty config)
 ```
 
 ## Configuration Files
@@ -235,18 +239,32 @@ auth:
   management_keys: []                    # List of valid management API keys
 ```
 
-**Environment Variables:**  
-- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)  
-- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys  
-- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)  
-- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys  
+**Environment Variables:**
+- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
+- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
+- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
+- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
 
-## Command Line Options
+### Remote Node Configuration
 
-View all available command line options:
+llamactl supports remote node deployments. Configure remote nodes to deploy instances on remote hosts and manage them centrally.
 
-```bash
-llamactl --help
+```yaml
+local_node: "main"               # Name of the local node (default: "main")
+nodes:                           # Node configuration map
+  main:                          # Local node (empty address means local)
+    address: ""                  # Not used for local node
+    api_key: ""                  # Not used for local node
+  worker1:                       # Remote worker node
+    address: "http://192.168.1.10:8080"
+    api_key: "worker1-api-key"   # Management API key for authentication
 ```
 
-You can also override configuration using command line flags when starting llamactl.
+**Node Configuration Fields:**
+- `local_node`: Specifies which node in the `nodes` map represents the local node
+- `nodes`: Map of node configurations
+  - `address`: HTTP/HTTPS URL of the remote node (empty for local node)
+  - `api_key`: Management API key for authenticating with the remote node
+
+**Environment Variables:**
+- `LLAMACTL_LOCAL_NODE` - Name of the local node
diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md
index f64146f..04e0dfd 100644
--- a/docs/getting-started/installation.md
+++ b/docs/getting-started/installation.md
@@ -157,6 +157,12 @@ cd webui && npm ci && npm run build && cd ..
 go build -o llamactl ./cmd/server
 ```
 
+## Remote Node Installation
+
+For deployments with remote nodes:
+- Install llamactl on each node using any of the methods above
+- Configure API keys for authentication between nodes
+
 ## Verification
 
 Verify your installation by checking the version:
@@ -168,3 +174,5 @@ llamactl --version
 ## Next Steps
 
 Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!
+
+For remote node deployments, see the [Configuration Guide](configuration.md) for node setup instructions.
diff --git a/docs/user-guide/api-reference.md b/docs/user-guide/api-reference.md
index 26e01e4..472cd0b 100644
--- a/docs/user-guide/api-reference.md
+++ b/docs/user-guide/api-reference.md
@@ -126,6 +126,7 @@ POST /api/v1/instances/{name}
 - `on_demand_start`: Start instance when receiving requests
 - `idle_timeout`: Idle timeout in minutes
 - `environment`: Environment variables as key-value pairs
+- `nodes`: Array with single node name to deploy the instance to (for remote deployments)
 
 See [Managing Instances](managing-instances.md) for complete configuration options.
 
@@ -405,6 +406,38 @@ curl -X DELETE -H "Authorization: Bearer your-api-key" \
   http://localhost:8080/api/v1/instances/my-model
 ```
 
+### Remote Node Instance Example
+
+```bash
+# Create instance on specific remote node
+curl -X POST http://localhost:8080/api/v1/instances/remote-model \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer your-api-key" \
+  -d '{
+    "backend_type": "llama_cpp",
+    "backend_options": {
+      "model": "/models/llama-2-7b.gguf",
+      "gpu_layers": 32
+    },
+    "nodes": ["worker1"]
+  }'
+
+# Check status of remote instance
+curl -H "Authorization: Bearer your-api-key" \
+  http://localhost:8080/api/v1/instances/remote-model
+
+# Use remote instance with OpenAI-compatible API
+curl -X POST http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer your-inference-api-key" \
+  -d '{
+    "model": "remote-model",
+    "messages": [
+      {"role": "user", "content": "Hello from remote node!"}
+    ]
+  }'
+```
+
 ### Using the Proxy Endpoint
 
 You can also directly proxy requests to the llama-server instance:
diff --git a/docs/user-guide/managing-instances.md b/docs/user-guide/managing-instances.md
index 824c4fe..b02de2d 100644
--- a/docs/user-guide/managing-instances.md
+++ b/docs/user-guide/managing-instances.md
@@ -39,26 +39,27 @@ Each instance is displayed as a card showing:
 
 1. Click the **"Create Instance"** button on the dashboard
 2. Enter a unique **Name** for your instance (only required field)
-3. **Choose Backend Type**:
+3. **Select Target Node**: Choose which node to deploy the instance to from the dropdown
+4. **Choose Backend Type**:
     - **llama.cpp**: For GGUF models using llama-server
     - **MLX**: For MLX-optimized models (macOS only)
     - **vLLM**: For distributed serving and high-throughput inference
-4. Configure model source:
+5. Configure model source:
     - **For llama.cpp**: GGUF model path or HuggingFace repo
     - **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
     - **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
-5. Configure optional instance management settings:
+6. Configure optional instance management settings:
     - **Auto Restart**: Automatically restart instance on failure
     - **Max Restarts**: Maximum number of restart attempts
     - **Restart Delay**: Delay in seconds between restart attempts
     - **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
     - **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
     - **Environment Variables**: Set custom environment variables for the instance process
-6. Configure backend-specific options:
+7. Configure backend-specific options:
     - **llama.cpp**: Threads, context size, GPU layers, port, etc.
     - **MLX**: Temperature, top-p, adapter path, Python environment, etc.
     - **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
-7. Click **"Create"** to save the instance  
+8. Click **"Create"** to save the instance  
 
 ### Via API
 
@@ -121,6 +122,18 @@ curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
       "gpu_layers": 32
     }
   }'
+
+# Create instance on specific remote node
+curl -X POST http://localhost:8080/api/instances/remote-llama \
+  -H "Content-Type: application/json" \
+  -d '{
+    "backend_type": "llama_cpp",
+    "backend_options": {
+      "model": "/models/llama-7b.gguf",
+      "gpu_layers": 32
+    },
+    "nodes": ["worker1"]
+  }'
 ```
 
 ## Start Instance
@@ -227,3 +240,4 @@ Check the health status of your instances:
 ```bash
 curl http://localhost:8080/api/instances/{name}/proxy/health
 ```
+
diff --git a/docs/user-guide/troubleshooting.md b/docs/user-guide/troubleshooting.md
index 5608139..4b7a507 100644
--- a/docs/user-guide/troubleshooting.md
+++ b/docs/user-guide/troubleshooting.md
@@ -125,6 +125,30 @@ This helps determine if the issue is with llamactl or with the underlying llama.
      http://localhost:8080/api/v1/instances
    ```
 
+## Remote Node Issues
+
+### Node Configuration
+
+**Problem:** Remote instances not appearing or cannot be managed
+
+**Solutions:**
+1. **Verify node configuration:**
+   ```yaml
+   local_node: "main"  # Must match a key in nodes map
+   nodes:
+     main:
+       address: ""     # Empty for local node
+     worker1:
+       address: "http://worker1.internal:8080"
+       api_key: "secure-key"  # Must match worker1's management key
+   ```
+
+2. **Test remote node connectivity:**
+   ```bash
+   curl -H "Authorization: Bearer remote-node-key" \
+     http://remote-node:8080/api/v1/instances
+   ```
+
 ## Debugging and Logs
 
 ### Viewing Instance Logs