Merge pull request #60 from lordmathis/lordmathis-patch-1

Update docs.yaml
2025-11-06 00:54:23 +00:00 · 2025-10-09 22:31:38 +02:00 · 2025-10-09 22:29:23 +02:00 · 2025-10-09 22:23:27 +02:00 · 2025-10-09 22:18:53 +02:00 · 2025-10-09 22:10:40 +02:00
42 changed files with 2680 additions and 912 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,45 @@
+# Git and version control
+.git/
+.gitignore
+
+# Documentation
+*.md
+docs/
+
+# Development files
+.vscode/
+.idea/
+
+# Build artifacts
+webui/node_modules/
+webui/dist/
+webui/.next/
+*.log
+*.tmp
+
+# Data directories
+data/
+models/
+logs/
+
+# Test files
+*_test.go
+**/*_test.go
+
+# CI/CD
+.github/
+
+# Local configuration
+llamactl.yaml
+config.yaml
+.env
+.env.local
+
+# OS files
+.DS_Store
+Thumbs.db
+
+# Backup files
+*.bak
+*.backup
+*~
--- a/.github/workflows/codeql.yaml
+++ b/.github/workflows/codeql.yaml
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -1,4 +1,4 @@
-name: Build and Deploy Documentation
+name: User Docs

 on:
  push:
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # llamactl

-![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg)
+![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg) ![User Docs](https://github.com/lordmathis/llamactl/actions/workflows/docs.yaml/badge.svg)

 **Unified management and routing for llama.cpp, MLX and vLLM models with web dashboard.**

@@ -25,6 +25,11 @@
 - **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
 - **Environment Variables**: Set custom environment variables per instance for advanced configuration

+### 🔗 Remote Instance Deployment
+- **Remote Node Support**: Deploy instances on remote hosts
+- **Central Management**: Manage remote instances from a single dashboard
+- **Seamless Routing**: Automatic request routing to remote instances  
+
 ![Dashboard Screenshot](docs/images/dashboard.png)

 ## Quick Start
@@ -95,7 +100,30 @@ sudo mv llamactl /usr/local/bin/
 # Windows - Download from releases page
 ```

-### Option 2: Build from Source
+### Option 2: Docker (No local backend installation required)
+
+```bash
+# Clone repository and build Docker images
+git clone https://github.com/lordmathis/llamactl.git
+cd llamactl
+mkdir -p data/llamacpp data/vllm models
+
+# Build and start llamactl with llama.cpp CUDA backend
+docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
+
+# Build and start llamactl with vLLM CUDA backend
+docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
+
+# Build from source using multi-stage build
+docker build -f docker/Dockerfile.source -t llamactl:source .
+```
+
+**Features:** CUDA support, automatic latest release installation, no backend dependencies.
+**Note:** Dockerfiles are configured for CUDA. Adapt base images for other platforms (CPU, ROCm, etc.).
+
+For detailed Docker setup and configuration, see the [Installation Guide](docs/getting-started/installation.md).
+
+### Option 3: Build from Source
 Requires Go 1.24+ and Node.js 22+
 ```bash
 git clone https://github.com/lordmathis/llamactl.git
@@ -147,9 +175,9 @@ pip install vllm
 # Or use Docker - no local installation required
 ```

-## Docker Support
+## Backend Docker Support

-llamactl supports running backends in Docker containers - perfect for production deployments without local backend installation. Simply enable Docker in your configuration:
+llamactl can run backends in Docker containers:

 ```yaml
 backends:
@@ -174,6 +202,7 @@ server:
  host: "0.0.0.0"                # Server host to bind to
  port: 8080                     # Server port to bind to
  allowed_origins: ["*"]         # Allowed CORS origins (default: all)
+  allowed_headers: ["*"]         # Allowed CORS headers (default: all)
  enable_swagger: false          # Enable Swagger UI for API docs

 backends:
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -58,7 +58,7 @@ func main() {
 	}

 	// Initialize the instance manager
-	instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances)
+	instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances, cfg.Nodes)

 	// Create a new handler with the instance manager
 	handler := server.NewHandler(instanceManager, cfg)
--- a/docker/Dockerfile.llamacpp
+++ b/docker/Dockerfile.llamacpp
@@ -0,0 +1,23 @@
+FROM ghcr.io/ggml-org/llama.cpp:server-cuda
+
+# Install curl for downloading llamactl
+RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
+
+# Download and install the latest llamactl release
+RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
+    curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
+    mv llamactl /usr/local/bin/ && \
+    chmod +x /usr/local/bin/llamactl
+
+# Set working directory
+RUN mkdir -p /data
+WORKDIR /data
+
+# Expose the default llamactl port
+EXPOSE 8080
+
+ENV LLAMACTL_LLAMACPP_COMMAND=/app/llama-server
+ENV LD_LIBRARY_PATH="/app:/usr/local/lib:/usr/lib"
+
+# Set llamactl as the entrypoint
+ENTRYPOINT ["llamactl"]
--- a/docker/Dockerfile.source
+++ b/docker/Dockerfile.source
@@ -0,0 +1,64 @@
+# WebUI build stage
+FROM node:20-alpine AS webui-builder
+
+WORKDIR /webui
+
+# Copy webui package files
+COPY webui/package*.json ./
+
+# Install dependencies
+RUN npm ci
+
+# Copy webui source
+COPY webui/ ./
+
+# Build webui
+RUN npm run build
+
+# Go build stage
+FROM golang:1.24-alpine AS builder
+
+# Install build dependencies
+RUN apk add --no-cache git ca-certificates
+
+# Set working directory
+WORKDIR /build
+
+# Copy go mod files
+COPY go.mod go.sum ./
+
+# Download dependencies
+RUN go mod download
+
+# Copy source code
+COPY cmd/ ./cmd/
+COPY pkg/ ./pkg/
+COPY apidocs/ ./apidocs/
+COPY webui/webui.go ./webui/
+
+# Copy built webui from webui-builder
+COPY --from=webui-builder /webui/dist ./webui/dist
+
+# Build the application
+RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags="-w -s" -o llamactl ./cmd/server
+
+# Final stage
+FROM alpine:latest
+
+# Install runtime dependencies
+RUN apk --no-cache add ca-certificates
+
+# Create data directory
+RUN mkdir -p /data
+
+# Set working directory
+WORKDIR /data
+
+# Copy binary from builder
+COPY --from=builder /build/llamactl /usr/local/bin/llamactl
+
+# Expose the default port
+EXPOSE 8080
+
+# Set llamactl as the entrypoint
+ENTRYPOINT ["llamactl"]
--- a/docker/Dockerfile.vllm
+++ b/docker/Dockerfile.vllm
@@ -0,0 +1,20 @@
+FROM vllm/vllm-openai:latest
+
+# Install curl for downloading llamactl
+RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
+
+# Download and install the latest llamactl release
+RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
+    curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
+    mv llamactl /usr/local/bin/ && \
+    chmod +x /usr/local/bin/llamactl
+
+# Set working directory
+RUN mkdir -p /data
+WORKDIR /data
+
+# Expose the default llamactl port
+EXPOSE 8080
+
+# Set llamactl as the entrypoint
+ENTRYPOINT ["llamactl"]
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -0,0 +1,56 @@
+version: '3.8'
+
+services:
+  llamactl-llamacpp:
+    build:
+      context: ..
+      dockerfile: docker/Dockerfile.llamacpp
+    image: llamactl:llamacpp-cuda
+    container_name: llamactl-llamacpp
+    ports:
+      - "8080:8080"
+    volumes:
+      - ./data/llamacpp:/data
+      - ./models:/models  # Mount models directory
+      - ~/.cache/llama.cpp:/root/.cache/llama.cpp  # Llama.cpp cache
+    environment:
+      # Set data directory for persistence
+      - LLAMACTL_DATA_DIR=/data
+      # Enable Docker mode for nested containers (if needed)
+      - LLAMACTL_LLAMACPP_DOCKER_ENABLED=false
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    restart: unless-stopped
+
+  llamactl-vllm:
+    build:
+      context: ..
+      dockerfile: docker/Dockerfile.vllm
+    image: llamactl:vllm-cuda
+    container_name: llamactl-vllm
+    ports:
+      - "8081:8080"  # Use different port to avoid conflicts
+    volumes:
+      - ./data/vllm:/data
+      - ./models:/models  # Mount models directory
+      - ~/.cache/huggingface:/root/.cache/huggingface  # HuggingFace cache
+    environment:
+      # Set data directory for persistence
+      - LLAMACTL_DATA_DIR=/data
+      # Enable Docker mode for nested containers (if needed)
+      - LLAMACTL_VLLM_DOCKER_ENABLED=false
+      # vLLM specific environment variables
+      - CUDA_VISIBLE_DEVICES=all
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+    restart: unless-stopped
--- a/docs/getting-started/configuration.md
+++ b/docs/getting-started/configuration.md
@@ -17,6 +17,7 @@ server:
  host: "0.0.0.0"                # Server host to bind to
  port: 8080                     # Server port to bind to
  allowed_origins: ["*"]         # Allowed CORS origins (default: all)
+  allowed_headers: ["*"]         # Allowed CORS headers (default: all)
  enable_swagger: false          # Enable Swagger UI for API docs

 backends:
@@ -29,6 +30,7 @@ backends:
      image: "ghcr.io/ggml-org/llama.cpp:server"
      args: ["run", "--rm", "--network", "host", "--gpus", "all"]
      environment: {}
+    response_headers: {}         # Additional response headers to send with responses

  vllm:
    command: "vllm"
@@ -39,11 +41,13 @@ backends:
      image: "vllm/vllm-openai:latest"
      args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
      environment: {}
+    response_headers: {}         # Additional response headers to send with responses

  mlx:
    command: "mlx_lm.server"
    args: []
    environment: {}              # Environment variables for the backend process
+    response_headers: {}         # Additional response headers to send with responses

 instances:
  port_range: [8000, 9000]       # Port range for instances
@@ -66,6 +70,10 @@ auth:
  inference_keys: []             # Keys for inference endpoints
  require_management_auth: true  # Require auth for management endpoints
  management_keys: []            # Keys for management endpoints
+
+local_node: "main"               # Name of the local node (default: "main")
+nodes:                           # Node configuration for multi-node deployment
+  main:                          # Default local node (empty config)
 ```

 ## Configuration Files
@@ -101,6 +109,7 @@ server:
  host: "0.0.0.0"         # Server host to bind to (default: "0.0.0.0")
  port: 8080              # Server port to bind to (default: 8080)
  allowed_origins: ["*"]  # CORS allowed origins (default: ["*"])
+  allowed_headers: ["*"]  # CORS allowed headers (default: ["*"])
  enable_swagger: false   # Enable Swagger UI (default: false)
 ```

@@ -122,34 +131,40 @@ backends:
      image: "ghcr.io/ggml-org/llama.cpp:server"
      args: ["run", "--rm", "--network", "host", "--gpus", "all"]
      environment: {}
+    response_headers: {}         # Additional response headers to send with responses

  vllm:
    command: "vllm"
    args: ["serve"]
    environment: {}              # Environment variables for the backend process
    docker:
-      enabled: false
+      enabled: false             # Enable Docker runtime (default: false)
      image: "vllm/vllm-openai:latest"
      args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
      environment: {}
+    response_headers: {}         # Additional response headers to send with responses

  mlx:
    command: "mlx_lm.server"
    args: []
    environment: {}              # Environment variables for the backend process
    # MLX does not support Docker
+    response_headers: {}         # Additional response headers to send with responses
 ```

 **Backend Configuration Fields:**
 - `command`: Executable name/path for the backend
 - `args`: Default arguments prepended to all instances
 - `environment`: Environment variables for the backend process (optional)
+- `response_headers`: Additional response headers to send with responses (optional)
 - `docker`: Docker-specific configuration (optional)
  - `enabled`: Boolean flag to enable Docker runtime
  - `image`: Docker image to use
  - `args`: Additional arguments passed to `docker run`
  - `environment`: Environment variables for the container (optional)

+> If llamactl is behind an NGINX proxy, `X-Accel-Buffering: no` response header may be required for NGINX to properly stream the responses without buffering.
+
 **Environment Variables:**

 **LlamaCpp Backend:**
@@ -160,6 +175,7 @@ backends:
 - `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use
 - `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments
 - `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
+- `LLAMACTL_LLAMACPP_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"

 **VLLM Backend:**
 - `LLAMACTL_VLLM_COMMAND` - VLLM executable command
@@ -169,11 +185,13 @@ backends:
 - `LLAMACTL_VLLM_DOCKER_IMAGE` - Docker image to use
 - `LLAMACTL_VLLM_DOCKER_ARGS` - Space-separated Docker arguments
 - `LLAMACTL_VLLM_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
+- `LLAMACTL_VLLM_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"

 **MLX Backend:**
 - `LLAMACTL_MLX_COMMAND` - MLX executable command
 - `LLAMACTL_MLX_ARGS` - Space-separated default arguments
 - `LLAMACTL_MLX_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
+- `LLAMACTL_MLX_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"

 ### Instance Configuration

@@ -227,12 +245,26 @@ auth:
 - `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
 - `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys

-## Command Line Options
+### Remote Node Configuration

-View all available command line options:
+llamactl supports remote node deployments. Configure remote nodes to deploy instances on remote hosts and manage them centrally.

-```bash
-llamactl --help
+```yaml
+local_node: "main"               # Name of the local node (default: "main")
+nodes:                           # Node configuration map
+  main:                          # Local node (empty address means local)
+    address: ""                  # Not used for local node
+    api_key: ""                  # Not used for local node
+  worker1:                       # Remote worker node
+    address: "http://192.168.1.10:8080"
+    api_key: "worker1-api-key"   # Management API key for authentication
 ```

-You can also override configuration using command line flags when starting llamactl.
+**Node Configuration Fields:**
+- `local_node`: Specifies which node in the `nodes` map represents the local node
+- `nodes`: Map of node configurations
+  - `address`: HTTP/HTTPS URL of the remote node (empty for local node)
+  - `api_key`: Management API key for authenticating with the remote node
+
+**Environment Variables:**
+- `LLAMACTL_LOCAL_NODE` - Name of the local node
--- a/docs/getting-started/installation.md
+++ b/docs/getting-started/installation.md
@@ -71,7 +71,72 @@ sudo mv llamactl /usr/local/bin/
 # Windows - Download from releases page
 ```

-### Option 2: Build from Source
+### Option 2: Docker
+
+llamactl provides Dockerfiles for creating Docker images with backends pre-installed. The resulting images include the latest llamactl release with the respective backend.
+
+**Available Dockerfiles (CUDA):**
+- **llamactl with llama.cpp CUDA**: `docker/Dockerfile.llamacpp` (based on `ghcr.io/ggml-org/llama.cpp:server-cuda`)
+- **llamactl with vLLM CUDA**: `docker/Dockerfile.vllm` (based on `vllm/vllm-openai:latest`)
+- **llamactl built from source**: `docker/Dockerfile.source` (multi-stage build with webui)
+
+**Note:** These Dockerfiles are configured for CUDA. For other platforms (CPU, ROCm, Vulkan, etc.), adapt the base image. For llama.cpp, see available tags at [llama.cpp Docker docs](https://github.com/ggml-org/llama.cpp/blob/master/docs/docker.md). For vLLM, check [vLLM docs](https://docs.vllm.ai/en/v0.6.5/serving/deploying_with_docker.html).
+
+#### Using Docker Compose
+
+```bash
+# Clone the repository
+git clone https://github.com/lordmathis/llamactl.git
+cd llamactl
+
+# Create directories for data and models
+mkdir -p data/llamacpp data/vllm models
+
+# Start llamactl with llama.cpp backend
+docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
+
+# Or start llamactl with vLLM backend
+docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
+```
+
+Access the dashboard at:
+- llamactl with llama.cpp: http://localhost:8080
+- llamactl with vLLM: http://localhost:8081
+
+#### Using Docker Build and Run
+
+**llamactl with llama.cpp CUDA:**
+```bash
+docker build -f docker/Dockerfile.llamacpp -t llamactl:llamacpp-cuda .
+docker run -d \
+  --name llamactl-llamacpp \
+  --gpus all \
+  -p 8080:8080 \
+  -v ~/.cache/llama.cpp:/root/.cache/llama.cpp \
+  llamactl:llamacpp-cuda
+```
+
+**llamactl with vLLM CUDA:**
+```bash
+docker build -f docker/Dockerfile.vllm -t llamactl:vllm-cuda .
+docker run -d \
+  --name llamactl-vllm \
+  --gpus all \
+  -p 8080:8080 \
+  -v ~/.cache/huggingface:/root/.cache/huggingface \
+  llamactl:vllm-cuda
+```
+
+**llamactl built from source:**
+```bash
+docker build -f docker/Dockerfile.source -t llamactl:source .
+docker run -d \
+  --name llamactl \
+  -p 8080:8080 \
+  llamactl:source
+```
+
+### Option 3: Build from Source

 Requirements:
 - Go 1.24 or later
@@ -92,6 +157,12 @@ cd webui && npm ci && npm run build && cd ..
 go build -o llamactl ./cmd/server
 ```

+## Remote Node Installation
+
+For deployments with remote nodes:
+- Install llamactl on each node using any of the methods above
+- Configure API keys for authentication between nodes
+
 ## Verification

 Verify your installation by checking the version:
@@ -103,3 +174,5 @@ llamactl --version
 ## Next Steps

 Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!
+
+For remote node deployments, see the [Configuration Guide](configuration.md) for node setup instructions.
--- a/docs/user-guide/api-reference.md
+++ b/docs/user-guide/api-reference.md
@@ -126,6 +126,7 @@ POST /api/v1/instances/{name}
 - `on_demand_start`: Start instance when receiving requests
 - `idle_timeout`: Idle timeout in minutes
 - `environment`: Environment variables as key-value pairs
+- `nodes`: Array with single node name to deploy the instance to (for remote deployments)

 See [Managing Instances](managing-instances.md) for complete configuration options.

@@ -405,6 +406,38 @@ curl -X DELETE -H "Authorization: Bearer your-api-key" \
  http://localhost:8080/api/v1/instances/my-model
 ```

+### Remote Node Instance Example
+
+```bash
+# Create instance on specific remote node
+curl -X POST http://localhost:8080/api/v1/instances/remote-model \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer your-api-key" \
+  -d '{
+    "backend_type": "llama_cpp",
+    "backend_options": {
+      "model": "/models/llama-2-7b.gguf",
+      "gpu_layers": 32
+    },
+    "nodes": ["worker1"]
+  }'
+
+# Check status of remote instance
+curl -H "Authorization: Bearer your-api-key" \
+  http://localhost:8080/api/v1/instances/remote-model
+
+# Use remote instance with OpenAI-compatible API
+curl -X POST http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer your-inference-api-key" \
+  -d '{
+    "model": "remote-model",
+    "messages": [
+      {"role": "user", "content": "Hello from remote node!"}
+    ]
+  }'
+```
+
 ### Using the Proxy Endpoint

 You can also directly proxy requests to the llama-server instance:
--- a/docs/user-guide/managing-instances.md
+++ b/docs/user-guide/managing-instances.md
@@ -39,26 +39,27 @@ Each instance is displayed as a card showing:

 1. Click the **"Create Instance"** button on the dashboard
 2. Enter a unique **Name** for your instance (only required field)
-3. **Choose Backend Type**:
+3. **Select Target Node**: Choose which node to deploy the instance to from the dropdown
+4. **Choose Backend Type**:
    - **llama.cpp**: For GGUF models using llama-server
    - **MLX**: For MLX-optimized models (macOS only)
    - **vLLM**: For distributed serving and high-throughput inference
-4. Configure model source:
+5. Configure model source:
    - **For llama.cpp**: GGUF model path or HuggingFace repo
    - **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
    - **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
-5. Configure optional instance management settings:
+6. Configure optional instance management settings:
    - **Auto Restart**: Automatically restart instance on failure
    - **Max Restarts**: Maximum number of restart attempts
    - **Restart Delay**: Delay in seconds between restart attempts
    - **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
    - **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
    - **Environment Variables**: Set custom environment variables for the instance process
-6. Configure backend-specific options:
+7. Configure backend-specific options:
    - **llama.cpp**: Threads, context size, GPU layers, port, etc.
    - **MLX**: Temperature, top-p, adapter path, Python environment, etc.
    - **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
-7. Click **"Create"** to save the instance  
+8. Click **"Create"** to save the instance  

 ### Via API

@@ -121,6 +122,18 @@ curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
      "gpu_layers": 32
    }
  }'
+
+# Create instance on specific remote node
+curl -X POST http://localhost:8080/api/instances/remote-llama \
+  -H "Content-Type: application/json" \
+  -d '{
+    "backend_type": "llama_cpp",
+    "backend_options": {
+      "model": "/models/llama-7b.gguf",
+      "gpu_layers": 32
+    },
+    "nodes": ["worker1"]
+  }'
 ```

 ## Start Instance
@@ -227,3 +240,4 @@ Check the health status of your instances:
 ```bash
 curl http://localhost:8080/api/instances/{name}/proxy/health
 ```
+
--- a/docs/user-guide/troubleshooting.md
+++ b/docs/user-guide/troubleshooting.md
@@ -125,6 +125,30 @@ This helps determine if the issue is with llamactl or with the underlying llama.
     http://localhost:8080/api/v1/instances
   ```

+## Remote Node Issues
+
+### Node Configuration
+
+**Problem:** Remote instances not appearing or cannot be managed
+
+**Solutions:**
+1. **Verify node configuration:**
+   ```yaml
+   local_node: "main"  # Must match a key in nodes map
+   nodes:
+     main:
+       address: ""     # Empty for local node
+     worker1:
+       address: "http://worker1.internal:8080"
+       api_key: "secure-key"  # Must match worker1's management key
+   ```
+
+2. **Test remote node connectivity:**
+   ```bash
+   curl -H "Authorization: Bearer remote-node-key" \
+     http://remote-node:8080/api/v1/instances
+   ```
+
 ## Debugging and Logs

 ### Viewing Instance Logs
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -17,6 +17,7 @@ type BackendSettings struct {
 	Args            []string          `yaml:"args"`
 	Environment     map[string]string `yaml:"environment,omitempty"`
 	Docker          *DockerSettings   `yaml:"docker,omitempty"`
+	ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
 }

 // DockerSettings contains Docker-specific configuration
@@ -40,6 +41,8 @@ type AppConfig struct {
 	Backends   BackendConfig         `yaml:"backends"`
 	Instances  InstancesConfig       `yaml:"instances"`
 	Auth       AuthConfig            `yaml:"auth"`
+	LocalNode  string                `yaml:"local_node,omitempty"`
+	Nodes      map[string]NodeConfig `yaml:"nodes,omitempty"`
 	Version    string                `yaml:"-"`
 	CommitHash string                `yaml:"-"`
 	BuildTime  string                `yaml:"-"`
@@ -56,8 +59,14 @@ type ServerConfig struct {
 	// Allowed origins for CORS (e.g., "http://localhost:3000")
 	AllowedOrigins []string `yaml:"allowed_origins"`

+	// Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type", "X-CSRF-Token")
+	AllowedHeaders []string `yaml:"allowed_headers"`
+
 	// Enable Swagger UI for API documentation
 	EnableSwagger bool `yaml:"enable_swagger"`
+
+	// Response headers to send with responses
+	ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
 }

 // InstancesConfig contains instance management configuration
@@ -121,6 +130,11 @@ type AuthConfig struct {
 	ManagementKeys []string `yaml:"management_keys"`
 }

+type NodeConfig struct {
+	Address string `yaml:"address"`
+	APIKey  string `yaml:"api_key,omitempty"`
+}
+
 // LoadConfig loads configuration with the following precedence:
 // 1. Hardcoded defaults
 // 2. Config file
@@ -132,8 +146,13 @@ func LoadConfig(configPath string) (AppConfig, error) {
 			Host:           "0.0.0.0",
 			Port:           8080,
 			AllowedOrigins: []string{"*"}, // Default to allow all origins
+			AllowedHeaders: []string{"*"}, // Default to allow all headers
 			EnableSwagger:  false,
 		},
+		LocalNode: "main",
+		Nodes: map[string]NodeConfig{
+			"main": {}, // Local node with empty config
+		},
 		Backends: BackendConfig{
 			LlamaCpp: BackendSettings{
 				Command:     "llama-server",
@@ -337,6 +356,12 @@ func loadEnvVars(cfg *AppConfig) {
 		}
 		parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
 	}
+	if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_RESPONSE_HEADERS"); llamaEnv != "" {
+		if cfg.Backends.LlamaCpp.ResponseHeaders == nil {
+			cfg.Backends.LlamaCpp.ResponseHeaders = make(map[string]string)
+		}
+		parseHeaders(llamaEnv, cfg.Backends.LlamaCpp.ResponseHeaders)
+	}

 	// vLLM backend
 	if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
@@ -380,6 +405,12 @@ func loadEnvVars(cfg *AppConfig) {
 		}
 		parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
 	}
+	if llamaEnv := os.Getenv("LLAMACTL_VLLM_RESPONSE_HEADERS"); llamaEnv != "" {
+		if cfg.Backends.VLLM.ResponseHeaders == nil {
+			cfg.Backends.VLLM.ResponseHeaders = make(map[string]string)
+		}
+		parseHeaders(llamaEnv, cfg.Backends.VLLM.ResponseHeaders)
+	}

 	// MLX backend
 	if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
@@ -394,6 +425,12 @@ func loadEnvVars(cfg *AppConfig) {
 		}
 		parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
 	}
+	if llamaEnv := os.Getenv("LLAMACTL_MLX_RESPONSE_HEADERS"); llamaEnv != "" {
+		if cfg.Backends.MLX.ResponseHeaders == nil {
+			cfg.Backends.MLX.ResponseHeaders = make(map[string]string)
+		}
+		parseHeaders(llamaEnv, cfg.Backends.MLX.ResponseHeaders)
+	}

 	// Instance defaults
 	if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
@@ -443,6 +480,11 @@ func loadEnvVars(cfg *AppConfig) {
 	if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
 		cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
 	}
+
+	// Local node config
+	if localNode := os.Getenv("LLAMACTL_LOCAL_NODE"); localNode != "" {
+		cfg.LocalNode = localNode
+	}
 }

 // ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
@@ -481,6 +523,19 @@ func parseEnvVars(envString string, envMap map[string]string) {
 	}
 }

+// parseHeaders parses HTTP headers in format "KEY1=value1;KEY2=value2"
+// and populates the provided environment map
+func parseHeaders(envString string, envMap map[string]string) {
+	if envString == "" {
+		return
+	}
+	for _, envPair := range strings.Split(envString, ";") {
+		if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
+			envMap[parts[0]] = parts[1]
+		}
+	}
+}
+
 // getDefaultDataDirectory returns platform-specific default data directory
 func getDefaultDataDirectory() string {
 	switch runtime.GOOS {
--- a/pkg/config/config_test.go
+++ b/pkg/config/config_test.go
@@ -510,3 +510,132 @@ func TestGetBackendSettings_InvalidBackendType(t *testing.T) {
 		t.Errorf("Expected empty command for invalid backend, got %q", settings.Command)
 	}
 }
+
+func TestLoadConfig_LocalNode(t *testing.T) {
+	t.Run("default local node", func(t *testing.T) {
+		cfg, err := config.LoadConfig("nonexistent-file.yaml")
+		if err != nil {
+			t.Fatalf("LoadConfig failed: %v", err)
+		}
+
+		if cfg.LocalNode != "main" {
+			t.Errorf("Expected default local node 'main', got %q", cfg.LocalNode)
+		}
+	})
+
+	t.Run("local node from file", func(t *testing.T) {
+		tempDir := t.TempDir()
+		configFile := filepath.Join(tempDir, "test-config.yaml")
+
+		configContent := `
+local_node: "worker1"
+nodes:
+  worker1:
+    address: ""
+  worker2:
+    address: "http://192.168.1.10:8080"
+    api_key: "test-key"
+`
+
+		err := os.WriteFile(configFile, []byte(configContent), 0644)
+		if err != nil {
+			t.Fatalf("Failed to write test config file: %v", err)
+		}
+
+		cfg, err := config.LoadConfig(configFile)
+		if err != nil {
+			t.Fatalf("LoadConfig failed: %v", err)
+		}
+
+		if cfg.LocalNode != "worker1" {
+			t.Errorf("Expected local node 'worker1', got %q", cfg.LocalNode)
+		}
+
+		// Verify nodes map (includes default "main" + worker1 + worker2)
+		if len(cfg.Nodes) != 3 {
+			t.Errorf("Expected 3 nodes (default main + worker1 + worker2), got %d", len(cfg.Nodes))
+		}
+
+		// Verify local node exists and is empty
+		localNode, exists := cfg.Nodes["worker1"]
+		if !exists {
+			t.Error("Expected local node 'worker1' to exist in nodes map")
+		}
+		if localNode.Address != "" {
+			t.Errorf("Expected local node address to be empty, got %q", localNode.Address)
+		}
+		if localNode.APIKey != "" {
+			t.Errorf("Expected local node api_key to be empty, got %q", localNode.APIKey)
+		}
+
+		// Verify remote node
+		remoteNode, exists := cfg.Nodes["worker2"]
+		if !exists {
+			t.Error("Expected remote node 'worker2' to exist in nodes map")
+		}
+		if remoteNode.Address != "http://192.168.1.10:8080" {
+			t.Errorf("Expected remote node address 'http://192.168.1.10:8080', got %q", remoteNode.Address)
+		}
+
+		// Verify default main node still exists
+		_, exists = cfg.Nodes["main"]
+		if !exists {
+			t.Error("Expected default 'main' node to still exist in nodes map")
+		}
+	})
+
+	t.Run("custom local node name in config", func(t *testing.T) {
+		tempDir := t.TempDir()
+		configFile := filepath.Join(tempDir, "test-config.yaml")
+
+		configContent := `
+local_node: "primary"
+nodes:
+  primary:
+    address: ""
+  worker1:
+    address: "http://192.168.1.10:8080"
+`
+
+		err := os.WriteFile(configFile, []byte(configContent), 0644)
+		if err != nil {
+			t.Fatalf("Failed to write test config file: %v", err)
+		}
+
+		cfg, err := config.LoadConfig(configFile)
+		if err != nil {
+			t.Fatalf("LoadConfig failed: %v", err)
+		}
+
+		if cfg.LocalNode != "primary" {
+			t.Errorf("Expected local node 'primary', got %q", cfg.LocalNode)
+		}
+
+		// Verify nodes map includes default "main" + primary + worker1
+		if len(cfg.Nodes) != 3 {
+			t.Errorf("Expected 3 nodes (default main + primary + worker1), got %d", len(cfg.Nodes))
+		}
+
+		localNode, exists := cfg.Nodes["primary"]
+		if !exists {
+			t.Error("Expected local node 'primary' to exist in nodes map")
+		}
+		if localNode.Address != "" {
+			t.Errorf("Expected local node address to be empty, got %q", localNode.Address)
+		}
+	})
+
+	t.Run("local node from environment variable", func(t *testing.T) {
+		os.Setenv("LLAMACTL_LOCAL_NODE", "custom-node")
+		defer os.Unsetenv("LLAMACTL_LOCAL_NODE")
+
+		cfg, err := config.LoadConfig("nonexistent-file.yaml")
+		if err != nil {
+			t.Fatalf("LoadConfig failed: %v", err)
+		}
+
+		if cfg.LocalNode != "custom-node" {
+			t.Errorf("Expected local node 'custom-node' from env var, got %q", cfg.LocalNode)
+		}
+	})
+}
--- a/pkg/instance/instance.go
+++ b/pkg/instance/instance.go
@@ -171,6 +171,11 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
 		return nil, fmt.Errorf("instance %s has no options set", i.Name)
 	}

+	// Remote instances should not use local proxy - they are handled by RemoteInstanceProxy
+	if len(i.options.Nodes) > 0 {
+		return nil, fmt.Errorf("instance %s is a remote instance and should not use local proxy", i.Name)
+	}
+
 	var host string
 	var port int
 	switch i.options.BackendType {
@@ -198,6 +203,15 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {

 	proxy := httputil.NewSingleHostReverseProxy(targetURL)

+	var responseHeaders map[string]string
+	switch i.options.BackendType {
+	case backends.BackendTypeLlamaCpp:
+		responseHeaders = i.globalBackendSettings.LlamaCpp.ResponseHeaders
+	case backends.BackendTypeVllm:
+		responseHeaders = i.globalBackendSettings.VLLM.ResponseHeaders
+	case backends.BackendTypeMlxLm:
+		responseHeaders = i.globalBackendSettings.MLX.ResponseHeaders
+	}
 	proxy.ModifyResponse = func(resp *http.Response) error {
 		// Remove CORS headers from llama-server response to avoid conflicts
 		// llamactl will add its own CORS headers
@@ -207,6 +221,10 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
 		resp.Header.Del("Access-Control-Allow-Credentials")
 		resp.Header.Del("Access-Control-Max-Age")
 		resp.Header.Del("Access-Control-Expose-Headers")
+
+		for key, value := range responseHeaders {
+			resp.Header.Set(key, value)
+		}
 		return nil
 	}

@@ -272,5 +290,24 @@ func (i *Process) UnmarshalJSON(data []byte) error {
 		i.options = aux.Options
 	}

+	// Initialize fields that are not serialized
+	if i.timeProvider == nil {
+		i.timeProvider = realTimeProvider{}
+	}
+	if i.logger == nil && i.globalInstanceSettings != nil {
+		i.logger = NewInstanceLogger(i.Name, i.globalInstanceSettings.LogsDir)
+	}
+
 	return nil
 }
+
+func (i *Process) IsRemote() bool {
+	i.mu.RLock()
+	defer i.mu.RUnlock()
+
+	if i.options == nil {
+		return false
+	}
+
+	return len(i.options.Nodes) > 0
+}
--- a/pkg/instance/lifecycle.go
+++ b/pkg/instance/lifecycle.go
@@ -5,6 +5,7 @@ import (
 	"fmt"
 	"log"
 	"net/http"
+	"os"
 	"os/exec"
 	"runtime"
 	"syscall"
@@ -37,6 +38,9 @@ func (i *Process) Start() error {
 	// Initialize last request time to current time when starting
 	i.lastRequestTime.Store(i.timeProvider.Now().Unix())

+	// Create context before building command (needed for CommandContext)
+	i.ctx, i.cancel = context.WithCancel(context.Background())
+
 	// Create log files
 	if err := i.logger.Create(); err != nil {
 		return fmt.Errorf("failed to create log files: %w", err)
@@ -47,8 +51,6 @@ func (i *Process) Start() error {
 	if cmdErr != nil {
 		return fmt.Errorf("failed to build command: %w", cmdErr)
 	}
-
-	i.ctx, i.cancel = context.WithCancel(context.Background())
 	i.cmd = cmd

 	if runtime.GOOS != "windows" {
@@ -383,7 +385,11 @@ func (i *Process) buildCommand() (*exec.Cmd, error) {

 	// Create the exec.Cmd
 	cmd := exec.CommandContext(i.ctx, command, args...)
-	cmd.Env = []string{}
+
+	// Start with host environment variables
+	cmd.Env = os.Environ()
+
+	// Add/override with backend-specific environment variables
 	for k, v := range env {
 		cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v))
 	}
--- a/pkg/instance/options.go
+++ b/pkg/instance/options.go
@@ -27,6 +27,8 @@ type CreateInstanceOptions struct {
 	BackendType    backends.BackendType `json:"backend_type"`
 	BackendOptions map[string]any       `json:"backend_options,omitempty"`

+	Nodes []string `json:"nodes,omitempty"`
+
 	// Backend-specific options
 	LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
 	MlxServerOptions   *mlx.MlxServerOptions        `json:"-"`
--- a/pkg/manager/manager.go
+++ b/pkg/manager/manager.go
@@ -6,6 +6,7 @@ import (
 	"llamactl/pkg/config"
 	"llamactl/pkg/instance"
 	"log"
+	"net/http"
 	"os"
 	"path/filepath"
 	"strings"
@@ -25,10 +26,22 @@ type InstanceManager interface {
 	StopInstance(name string) (*instance.Process, error)
 	EvictLRUInstance() error
 	RestartInstance(name string) (*instance.Process, error)
-	GetInstanceLogs(name string) (string, error)
+	GetInstanceLogs(name string, numLines int) (string, error)
 	Shutdown()
 }

+type RemoteManager interface {
+	ListRemoteInstances(node *config.NodeConfig) ([]*instance.Process, error)
+	CreateRemoteInstance(node *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
+	GetRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
+	UpdateRemoteInstance(node *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
+	DeleteRemoteInstance(node *config.NodeConfig, name string) error
+	StartRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
+	StopRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
+	RestartRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
+	GetRemoteInstanceLogs(node *config.NodeConfig, name string, numLines int) (string, error)
+}
+
 type instanceManager struct {
 	mu               sync.RWMutex
 	instances        map[string]*instance.Process
@@ -42,13 +55,26 @@ type instanceManager struct {
 	shutdownChan   chan struct{}
 	shutdownDone   chan struct{}
 	isShutdown     bool
+
+	// Remote instance management
+	httpClient        *http.Client
+	instanceNodeMap   map[string]*config.NodeConfig // Maps instance name to its node config
+	nodeConfigMap     map[string]*config.NodeConfig // Maps node name to node config for quick lookup
 }

 // NewInstanceManager creates a new instance of InstanceManager.
-func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig) InstanceManager {
+func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig, nodesConfig map[string]config.NodeConfig) InstanceManager {
 	if instancesConfig.TimeoutCheckInterval <= 0 {
 		instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
 	}
+
+	// Build node config map for quick lookup
+	nodeConfigMap := make(map[string]*config.NodeConfig)
+	for name := range nodesConfig {
+		nodeCopy := nodesConfig[name]
+		nodeConfigMap[name] = &nodeCopy
+	}
+
 	im := &instanceManager{
 		instances:        make(map[string]*instance.Process),
 		runningInstances: make(map[string]struct{}),
@@ -59,6 +85,13 @@ func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig con
 		timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
 		shutdownChan:   make(chan struct{}),
 		shutdownDone:   make(chan struct{}),
+
+		httpClient: &http.Client{
+			Timeout: 30 * time.Second,
+		},
+
+		instanceNodeMap: make(map[string]*config.NodeConfig),
+		nodeConfigMap:   nodeConfigMap,
 	}

 	// Load existing instances from disk
@@ -238,18 +271,36 @@ func (im *instanceManager) loadInstance(name, path string) error {
 		return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name)
 	}

-	statusCallback := func(oldStatus, newStatus instance.InstanceStatus) {
+	options := persistedInstance.GetOptions()
+
+	// Check if this is a remote instance
+	isRemote := options != nil && len(options.Nodes) > 0
+
+	var statusCallback func(oldStatus, newStatus instance.InstanceStatus)
+	if !isRemote {
+		// Only set status callback for local instances
+		statusCallback = func(oldStatus, newStatus instance.InstanceStatus) {
 			im.onStatusChange(persistedInstance.Name, oldStatus, newStatus)
 		}
+	}

 	// Create new inst using NewInstance (handles validation, defaults, setup)
-	inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback)
+	inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, statusCallback)

 	// Restore persisted fields that NewInstance doesn't set
 	inst.Created = persistedInstance.Created
 	inst.SetStatus(persistedInstance.Status)

-	// Check for port conflicts and add to maps
+	// Handle remote instance mapping
+	if isRemote {
+		nodeName := options.Nodes[0]
+		nodeConfig, exists := im.nodeConfigMap[nodeName]
+		if !exists {
+			return fmt.Errorf("node %s not found for remote instance %s", nodeName, name)
+		}
+		im.instanceNodeMap[name] = nodeConfig
+	} else {
+		// Check for port conflicts only for local instances
 		if inst.GetPort() > 0 {
 			port := inst.GetPort()
 			if im.ports[port] {
@@ -257,34 +308,58 @@ func (im *instanceManager) loadInstance(name, path string) error {
 			}
 			im.ports[port] = true
 		}
+	}

 	im.instances[name] = inst
 	return nil
 }

 // autoStartInstances starts instances that were running when persisted and have auto-restart enabled
+// For instances with auto-restart disabled, it sets their status to Stopped
 func (im *instanceManager) autoStartInstances() {
 	im.mu.RLock()
 	var instancesToStart []*instance.Process
+	var instancesToStop []*instance.Process
 	for _, inst := range im.instances {
 		if inst.IsRunning() && // Was running when persisted
 			inst.GetOptions() != nil &&
-			inst.GetOptions().AutoRestart != nil &&
-			*inst.GetOptions().AutoRestart {
+			inst.GetOptions().AutoRestart != nil {
+			if *inst.GetOptions().AutoRestart {
 				instancesToStart = append(instancesToStart, inst)
+			} else {
+				// Instance was running but auto-restart is disabled, mark as stopped
+				instancesToStop = append(instancesToStop, inst)
+			}
 		}
 	}
 	im.mu.RUnlock()

+	// Stop instances that have auto-restart disabled
+	for _, inst := range instancesToStop {
+		log.Printf("Instance %s was running but auto-restart is disabled, setting status to stopped", inst.Name)
+		inst.SetStatus(instance.Stopped)
+	}
+
+	// Start instances that have auto-restart enabled
 	for _, inst := range instancesToStart {
 		log.Printf("Auto-starting instance %s", inst.Name)
 		// Reset running state before starting (since Start() expects stopped instance)
 		inst.SetStatus(instance.Stopped)
+
+		// Check if this is a remote instance
+		if node := im.getNodeForInstance(inst); node != nil {
+			// Remote instance - use StartRemoteInstance
+			if _, err := im.StartRemoteInstance(node, inst.Name); err != nil {
+				log.Printf("Failed to auto-start remote instance %s: %v", inst.Name, err)
+			}
+		} else {
+			// Local instance - call Start() directly
 			if err := inst.Start(); err != nil {
 				log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
 			}
 		}
 	}
+}

 func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus instance.InstanceStatus) {
 	im.mu.Lock()
@@ -296,3 +371,18 @@ func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus inst
 		delete(im.runningInstances, name)
 	}
 }
+
+// getNodeForInstance returns the node configuration for a remote instance
+// Returns nil if the instance is not remote or the node is not found
+func (im *instanceManager) getNodeForInstance(inst *instance.Process) *config.NodeConfig {
+	if !inst.IsRemote() {
+		return nil
+	}
+
+	// Check if we have a cached mapping
+	if nodeConfig, exists := im.instanceNodeMap[inst.Name]; exists {
+		return nodeConfig
+	}
+
+	return nil
+}
--- a/pkg/manager/manager_test.go
+++ b/pkg/manager/manager_test.go
@@ -34,7 +34,7 @@ func TestNewInstanceManager(t *testing.T) {
 		TimeoutCheckInterval: 5,
 	}

-	mgr := manager.NewInstanceManager(backendConfig, cfg)
+	mgr := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
 	if mgr == nil {
 		t.Fatal("NewInstanceManager returned nil")
 	}
@@ -69,7 +69,7 @@ func TestPersistence(t *testing.T) {
 	}

 	// Test instance persistence on creation
-	manager1 := manager.NewInstanceManager(backendConfig, cfg)
+	manager1 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
 	options := &instance.CreateInstanceOptions{
 		BackendType: backends.BackendTypeLlamaCpp,
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
@@ -90,7 +90,7 @@ func TestPersistence(t *testing.T) {
 	}

 	// Test loading instances from disk
-	manager2 := manager.NewInstanceManager(backendConfig, cfg)
+	manager2 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
 	instances, err := manager2.ListInstances()
 	if err != nil {
 		t.Fatalf("ListInstances failed: %v", err)
@@ -207,5 +207,68 @@ func createTestManager() manager.InstanceManager {
 		DefaultRestartDelay:  5,
 		TimeoutCheckInterval: 5,
 	}
-	return manager.NewInstanceManager(backendConfig, cfg)
+	return manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
+}
+
+func TestAutoRestartDisabledInstanceStatus(t *testing.T) {
+	tempDir := t.TempDir()
+
+	backendConfig := config.BackendConfig{
+		LlamaCpp: config.BackendSettings{
+			Command: "llama-server",
+		},
+	}
+
+	cfg := config.InstancesConfig{
+		PortRange:            [2]int{8000, 9000},
+		InstancesDir:         tempDir,
+		MaxInstances:         10,
+		TimeoutCheckInterval: 5,
+	}
+
+	// Create first manager and instance with auto-restart disabled
+	manager1 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
+
+	autoRestart := false
+	options := &instance.CreateInstanceOptions{
+		BackendType: backends.BackendTypeLlamaCpp,
+		AutoRestart: &autoRestart,
+		LlamaServerOptions: &llamacpp.LlamaServerOptions{
+			Model: "/path/to/model.gguf",
+			Port:  8080,
+		},
+	}
+
+	inst, err := manager1.CreateInstance("test-instance", options)
+	if err != nil {
+		t.Fatalf("CreateInstance failed: %v", err)
+	}
+
+	// Simulate instance being in running state when persisted
+	// (this would happen if the instance was running when llamactl was stopped)
+	inst.SetStatus(instance.Running)
+
+	// Shutdown first manager
+	manager1.Shutdown()
+
+	// Create second manager (simulating restart of llamactl)
+	manager2 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
+
+	// Get the loaded instance
+	loadedInst, err := manager2.GetInstance("test-instance")
+	if err != nil {
+		t.Fatalf("GetInstance failed: %v", err)
+	}
+
+	// The instance should be marked as Stopped, not Running
+	// because auto-restart is disabled
+	if loadedInst.IsRunning() {
+		t.Errorf("Expected instance with auto-restart disabled to be stopped after manager restart, but it was running")
+	}
+
+	if loadedInst.GetStatus() != instance.Stopped {
+		t.Errorf("Expected instance status to be Stopped, got %v", loadedInst.GetStatus())
+	}
+
+	manager2.Shutdown()
 }
--- a/pkg/manager/operations.go
+++ b/pkg/manager/operations.go
@@ -3,6 +3,7 @@ package manager
 import (
 	"fmt"
 	"llamactl/pkg/backends"
+	"llamactl/pkg/config"
 	"llamactl/pkg/instance"
 	"llamactl/pkg/validation"
 	"os"
@@ -11,16 +12,65 @@ import (

 type MaxRunningInstancesError error

+// updateLocalInstanceFromRemote updates the local stub instance with data from the remote instance
+// while preserving the Nodes field to maintain remote instance tracking
+func (im *instanceManager) updateLocalInstanceFromRemote(localInst *instance.Process, remoteInst *instance.Process) {
+	if localInst == nil || remoteInst == nil {
+		return
+	}
+
+	// Get the remote instance options
+	remoteOptions := remoteInst.GetOptions()
+	if remoteOptions == nil {
+		return
+	}
+
+	// Preserve the Nodes field from the local instance
+	localOptions := localInst.GetOptions()
+	var preservedNodes []string
+	if localOptions != nil && len(localOptions.Nodes) > 0 {
+		preservedNodes = make([]string, len(localOptions.Nodes))
+		copy(preservedNodes, localOptions.Nodes)
+	}
+
+	// Create a copy of remote options and restore the Nodes field
+	updatedOptions := *remoteOptions
+	updatedOptions.Nodes = preservedNodes
+
+	// Update the local instance with all remote data
+	localInst.SetOptions(&updatedOptions)
+	localInst.Status = remoteInst.Status
+	localInst.Created = remoteInst.Created
+}
+
 // ListInstances returns a list of all instances managed by the instance manager.
+// For remote instances, this fetches the live state from remote nodes and updates local stubs.
 func (im *instanceManager) ListInstances() ([]*instance.Process, error) {
 	im.mu.RLock()
-	defer im.mu.RUnlock()
-
-	instances := make([]*instance.Process, 0, len(im.instances))
+	localInstances := make([]*instance.Process, 0, len(im.instances))
 	for _, inst := range im.instances {
-		instances = append(instances, inst)
+		localInstances = append(localInstances, inst)
 	}
-	return instances, nil
+	im.mu.RUnlock()
+
+	// Update remote instances with live state
+	for _, inst := range localInstances {
+		if node := im.getNodeForInstance(inst); node != nil {
+			remoteInst, err := im.GetRemoteInstance(node, inst.Name)
+			if err != nil {
+				// Log error but continue with stale data
+				// Don't fail the entire list operation due to one remote failure
+				continue
+			}
+
+			// Update the local stub with all remote data (preserving Nodes)
+			im.mu.Lock()
+			im.updateLocalInstanceFromRemote(inst, remoteInst)
+			im.mu.Unlock()
+		}
+	}
+
+	return localInstances, nil
 }

 // CreateInstance creates a new instance with the given options and returns it.
@@ -43,16 +93,56 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
 	im.mu.Lock()
 	defer im.mu.Unlock()

-	// Check max instances limit after acquiring the lock
-	if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
-		return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
-	}
-
-	// Check if instance with this name already exists
+	// Check if instance with this name already exists (must be globally unique)
 	if im.instances[name] != nil {
 		return nil, fmt.Errorf("instance with name %s already exists", name)
 	}

+	// Check if this is a remote instance
+	isRemote := len(options.Nodes) > 0
+	var nodeConfig *config.NodeConfig
+
+	if isRemote {
+		// Validate that the node exists
+		nodeName := options.Nodes[0] // Use first node for now
+		var exists bool
+		nodeConfig, exists = im.nodeConfigMap[nodeName]
+		if !exists {
+			return nil, fmt.Errorf("node %s not found", nodeName)
+		}
+
+		// Create the remote instance on the remote node
+		remoteInst, err := im.CreateRemoteInstance(nodeConfig, name, options)
+		if err != nil {
+			return nil, err
+		}
+
+		// Create a local stub that preserves the Nodes field for tracking
+		// We keep the original options (with Nodes) so IsRemote() works correctly
+		inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, nil)
+
+		// Update the local stub with all remote data (preserving Nodes)
+		im.updateLocalInstanceFromRemote(inst, remoteInst)
+
+		// Add to local tracking maps (but don't count towards limits)
+		im.instances[name] = inst
+		im.instanceNodeMap[name] = nodeConfig
+
+		// Persist the remote instance locally for tracking across restarts
+		if err := im.persistInstance(inst); err != nil {
+			return nil, fmt.Errorf("failed to persist remote instance %s: %w", name, err)
+		}
+
+		return inst, nil
+	}
+
+	// Local instance creation
+	// Check max instances limit for local instances only
+	localInstanceCount := len(im.instances) - len(im.instanceNodeMap)
+	if localInstanceCount >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
+		return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
+	}
+
 	// Assign and validate port for backend-specific options
 	if err := im.assignAndValidatePort(options); err != nil {
 		return nil, err
@@ -73,28 +163,68 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
 }

 // GetInstance retrieves an instance by its name.
+// For remote instances, this fetches the live state from the remote node and updates the local stub.
 func (im *instanceManager) GetInstance(name string) (*instance.Process, error) {
 	im.mu.RLock()
-	defer im.mu.RUnlock()
+	inst, exists := im.instances[name]
+	im.mu.RUnlock()

-	instance, exists := im.instances[name]
 	if !exists {
 		return nil, fmt.Errorf("instance with name %s not found", name)
 	}
-	return instance, nil
+
+	// Check if instance is remote and fetch live state
+	if node := im.getNodeForInstance(inst); node != nil {
+		remoteInst, err := im.GetRemoteInstance(node, name)
+		if err != nil {
+			return nil, err
+		}
+
+		// Update the local stub with all remote data (preserving Nodes)
+		im.mu.Lock()
+		im.updateLocalInstanceFromRemote(inst, remoteInst)
+		im.mu.Unlock()
+
+		// Return the local stub (preserving Nodes field)
+		return inst, nil
+	}
+
+	return inst, nil
 }

 // UpdateInstance updates the options of an existing instance and returns it.
 // If the instance is running, it will be restarted to apply the new options.
 func (im *instanceManager) UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
 	im.mu.RLock()
-	instance, exists := im.instances[name]
+	inst, exists := im.instances[name]
 	im.mu.RUnlock()

 	if !exists {
 		return nil, fmt.Errorf("instance with name %s not found", name)
 	}

+	// Check if instance is remote and delegate to remote operation
+	if node := im.getNodeForInstance(inst); node != nil {
+		remoteInst, err := im.UpdateRemoteInstance(node, name, options)
+		if err != nil {
+			return nil, err
+		}
+
+		// Update the local stub with all remote data (preserving Nodes)
+		im.mu.Lock()
+		im.updateLocalInstanceFromRemote(inst, remoteInst)
+		im.mu.Unlock()
+
+		// Persist the updated remote instance locally
+		im.mu.Lock()
+		defer im.mu.Unlock()
+		if err := im.persistInstance(inst); err != nil {
+			return nil, fmt.Errorf("failed to persist updated remote instance %s: %w", name, err)
+		}
+
+		return inst, nil
+	}
+
 	if options == nil {
 		return nil, fmt.Errorf("instance options cannot be nil")
 	}
@@ -105,55 +235,90 @@ func (im *instanceManager) UpdateInstance(name string, options *instance.CreateI
 	}

 	// Check if instance is running before updating options
-	wasRunning := instance.IsRunning()
+	wasRunning := inst.IsRunning()

 	// If the instance is running, stop it first
 	if wasRunning {
-		if err := instance.Stop(); err != nil {
+		if err := inst.Stop(); err != nil {
 			return nil, fmt.Errorf("failed to stop instance %s for update: %w", name, err)
 		}
 	}

 	// Now update the options while the instance is stopped
-	instance.SetOptions(options)
+	inst.SetOptions(options)

 	// If it was running before, start it again with the new options
 	if wasRunning {
-		if err := instance.Start(); err != nil {
+		if err := inst.Start(); err != nil {
 			return nil, fmt.Errorf("failed to start instance %s after update: %w", name, err)
 		}
 	}

 	im.mu.Lock()
 	defer im.mu.Unlock()
-	if err := im.persistInstance(instance); err != nil {
+	if err := im.persistInstance(inst); err != nil {
 		return nil, fmt.Errorf("failed to persist updated instance %s: %w", name, err)
 	}

-	return instance, nil
+	return inst, nil
 }

 // DeleteInstance removes stopped instance by its name.
 func (im *instanceManager) DeleteInstance(name string) error {
 	im.mu.Lock()
-	defer im.mu.Unlock()
+	inst, exists := im.instances[name]
+	im.mu.Unlock()

-	instance, exists := im.instances[name]
 	if !exists {
 		return fmt.Errorf("instance with name %s not found", name)
 	}

-	if instance.IsRunning() {
+	// Check if instance is remote and delegate to remote operation
+	if node := im.getNodeForInstance(inst); node != nil {
+		err := im.DeleteRemoteInstance(node, name)
+		if err != nil {
+			return err
+		}
+
+		// Clean up local tracking
+		im.mu.Lock()
+		defer im.mu.Unlock()
+		delete(im.instances, name)
+		delete(im.instanceNodeMap, name)
+
+		// Delete the instance's config file if persistence is enabled
+		// Re-validate instance name for security (defense in depth)
+		validatedName, err := validation.ValidateInstanceName(name)
+		if err != nil {
+			return fmt.Errorf("invalid instance name for file deletion: %w", err)
+		}
+		instancePath := filepath.Join(im.instancesConfig.InstancesDir, validatedName+".json")
+		if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
+			return fmt.Errorf("failed to delete config file for remote instance %s: %w", validatedName, err)
+		}
+
+		return nil
+	}
+
+	if inst.IsRunning() {
 		return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
 	}

-	delete(im.ports, instance.GetPort())
+	im.mu.Lock()
+	defer im.mu.Unlock()
+
+	delete(im.ports, inst.GetPort())
 	delete(im.instances, name)

 	// Delete the instance's config file if persistence is enabled
-	instancePath := filepath.Join(im.instancesConfig.InstancesDir, instance.Name+".json")
+	// Re-validate instance name for security (defense in depth)
+	validatedName, err := validation.ValidateInstanceName(inst.Name)
+	if err != nil {
+		return fmt.Errorf("invalid instance name for file deletion: %w", err)
+	}
+	instancePath := filepath.Join(im.instancesConfig.InstancesDir, validatedName+".json")
 	if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
-		return fmt.Errorf("failed to delete config file for instance %s: %w", instance.Name, err)
+		return fmt.Errorf("failed to delete config file for instance %s: %w", validatedName, err)
 	}

 	return nil
@@ -163,33 +328,59 @@ func (im *instanceManager) DeleteInstance(name string) error {
 // If the instance is already running, it returns an error.
 func (im *instanceManager) StartInstance(name string) (*instance.Process, error) {
 	im.mu.RLock()
-	instance, exists := im.instances[name]
-	maxRunningExceeded := len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
+	inst, exists := im.instances[name]
 	im.mu.RUnlock()

 	if !exists {
 		return nil, fmt.Errorf("instance with name %s not found", name)
 	}
-	if instance.IsRunning() {
-		return instance, fmt.Errorf("instance with name %s is already running", name)
+
+	// Check if instance is remote and delegate to remote operation
+	if node := im.getNodeForInstance(inst); node != nil {
+		remoteInst, err := im.StartRemoteInstance(node, name)
+		if err != nil {
+			return nil, err
 		}

+		// Update the local stub with all remote data (preserving Nodes)
+		im.mu.Lock()
+		im.updateLocalInstanceFromRemote(inst, remoteInst)
+		im.mu.Unlock()
+
+		return inst, nil
+	}
+
+	if inst.IsRunning() {
+		return inst, fmt.Errorf("instance with name %s is already running", name)
+	}
+
+	// Check max running instances limit for local instances only
+	im.mu.RLock()
+	localRunningCount := 0
+	for instName := range im.runningInstances {
+		if _, isRemote := im.instanceNodeMap[instName]; !isRemote {
+			localRunningCount++
+		}
+	}
+	maxRunningExceeded := localRunningCount >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
+	im.mu.RUnlock()
+
 	if maxRunningExceeded {
 		return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances))
 	}

-	if err := instance.Start(); err != nil {
+	if err := inst.Start(); err != nil {
 		return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
 	}

 	im.mu.Lock()
 	defer im.mu.Unlock()
-	err := im.persistInstance(instance)
+	err := im.persistInstance(inst)
 	if err != nil {
 		return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
 	}

-	return instance, nil
+	return inst, nil
 }

 func (im *instanceManager) IsMaxRunningInstancesReached() bool {
@@ -206,51 +397,95 @@ func (im *instanceManager) IsMaxRunningInstancesReached() bool {
 // StopInstance stops a running instance and returns it.
 func (im *instanceManager) StopInstance(name string) (*instance.Process, error) {
 	im.mu.RLock()
-	instance, exists := im.instances[name]
+	inst, exists := im.instances[name]
 	im.mu.RUnlock()

 	if !exists {
 		return nil, fmt.Errorf("instance with name %s not found", name)
 	}
-	if !instance.IsRunning() {
-		return instance, fmt.Errorf("instance with name %s is already stopped", name)
+
+	// Check if instance is remote and delegate to remote operation
+	if node := im.getNodeForInstance(inst); node != nil {
+		remoteInst, err := im.StopRemoteInstance(node, name)
+		if err != nil {
+			return nil, err
 		}

-	if err := instance.Stop(); err != nil {
+		// Update the local stub with all remote data (preserving Nodes)
+		im.mu.Lock()
+		im.updateLocalInstanceFromRemote(inst, remoteInst)
+		im.mu.Unlock()
+
+		return inst, nil
+	}
+
+	if !inst.IsRunning() {
+		return inst, fmt.Errorf("instance with name %s is already stopped", name)
+	}
+
+	if err := inst.Stop(); err != nil {
 		return nil, fmt.Errorf("failed to stop instance %s: %w", name, err)
 	}

 	im.mu.Lock()
 	defer im.mu.Unlock()
-	err := im.persistInstance(instance)
+	err := im.persistInstance(inst)
 	if err != nil {
 		return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
 	}

-	return instance, nil
+	return inst, nil
 }

 // RestartInstance stops and then starts an instance, returning the updated instance.
 func (im *instanceManager) RestartInstance(name string) (*instance.Process, error) {
-	instance, err := im.StopInstance(name)
+	im.mu.RLock()
+	inst, exists := im.instances[name]
+	im.mu.RUnlock()
+
+	if !exists {
+		return nil, fmt.Errorf("instance with name %s not found", name)
+	}
+
+	// Check if instance is remote and delegate to remote operation
+	if node := im.getNodeForInstance(inst); node != nil {
+		remoteInst, err := im.RestartRemoteInstance(node, name)
 		if err != nil {
 			return nil, err
 		}
-	return im.StartInstance(instance.Name)
+
+		// Update the local stub with all remote data (preserving Nodes)
+		im.mu.Lock()
+		im.updateLocalInstanceFromRemote(inst, remoteInst)
+		im.mu.Unlock()
+
+		return inst, nil
+	}
+
+	inst, err := im.StopInstance(name)
+	if err != nil {
+		return nil, err
+	}
+	return im.StartInstance(inst.Name)
 }

 // GetInstanceLogs retrieves the logs for a specific instance by its name.
-func (im *instanceManager) GetInstanceLogs(name string) (string, error) {
+func (im *instanceManager) GetInstanceLogs(name string, numLines int) (string, error) {
 	im.mu.RLock()
-	_, exists := im.instances[name]
+	inst, exists := im.instances[name]
 	im.mu.RUnlock()

 	if !exists {
 		return "", fmt.Errorf("instance with name %s not found", name)
 	}

-	// TODO: Implement actual log retrieval logic
-	return fmt.Sprintf("Logs for instance %s", name), nil
+	// Check if instance is remote and delegate to remote operation
+	if node := im.getNodeForInstance(inst); node != nil {
+		return im.GetRemoteInstanceLogs(node, name, numLines)
+	}
+
+	// Get logs from the local instance
+	return inst.GetLogs(numLines)
 }

 // getPortFromOptions extracts the port from backend-specific options
--- a/pkg/manager/operations_test.go
+++ b/pkg/manager/operations_test.go
@@ -75,7 +75,7 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) {
 		MaxInstances:         1, // Very low limit for testing
 		TimeoutCheckInterval: 5,
 	}
-	limitedManager := manager.NewInstanceManager(backendConfig, cfg)
+	limitedManager := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})

 	_, err = limitedManager.CreateInstance("instance1", options)
 	if err != nil {
--- a/pkg/manager/remote_ops.go
+++ b/pkg/manager/remote_ops.go
@@ -0,0 +1,243 @@
+package manager
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"llamactl/pkg/config"
+	"llamactl/pkg/instance"
+	"net/http"
+)
+
+// stripNodesFromOptions creates a copy of the instance options without the Nodes field
+// to prevent routing loops when sending requests to remote nodes
+func (im *instanceManager) stripNodesFromOptions(options *instance.CreateInstanceOptions) *instance.CreateInstanceOptions {
+	if options == nil {
+		return nil
+	}
+
+	// Create a copy of the options struct
+	optionsCopy := *options
+
+	// Clear the Nodes field to prevent the remote node from trying to route further
+	optionsCopy.Nodes = nil
+
+	return &optionsCopy
+}
+
+// makeRemoteRequest is a helper function to make HTTP requests to a remote node
+func (im *instanceManager) makeRemoteRequest(nodeConfig *config.NodeConfig, method, path string, body any) (*http.Response, error) {
+	var reqBody io.Reader
+	if body != nil {
+		// Strip nodes from CreateInstanceOptions to prevent routing loops
+		if options, ok := body.(*instance.CreateInstanceOptions); ok {
+			body = im.stripNodesFromOptions(options)
+		}
+
+		jsonData, err := json.Marshal(body)
+		if err != nil {
+			return nil, fmt.Errorf("failed to marshal request body: %w", err)
+		}
+		reqBody = bytes.NewBuffer(jsonData)
+	}
+
+	url := fmt.Sprintf("%s%s", nodeConfig.Address, path)
+	req, err := http.NewRequest(method, url, reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	if body != nil {
+		req.Header.Set("Content-Type", "application/json")
+	}
+
+	if nodeConfig.APIKey != "" {
+		req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", nodeConfig.APIKey))
+	}
+
+	resp, err := im.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to execute request: %w", err)
+	}
+
+	return resp, nil
+}
+
+// parseRemoteResponse is a helper function to parse API responses
+func parseRemoteResponse(resp *http.Response, result any) error {
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return fmt.Errorf("failed to read response body: %w", err)
+	}
+
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	if result != nil {
+		if err := json.Unmarshal(body, result); err != nil {
+			return fmt.Errorf("failed to unmarshal response: %w", err)
+		}
+	}
+
+	return nil
+}
+
+// ListRemoteInstances lists all instances on the remote node
+func (im *instanceManager) ListRemoteInstances(nodeConfig *config.NodeConfig) ([]*instance.Process, error) {
+	resp, err := im.makeRemoteRequest(nodeConfig, "GET", "/api/v1/instances/", nil)
+	if err != nil {
+		return nil, err
+	}
+
+	var instances []*instance.Process
+	if err := parseRemoteResponse(resp, &instances); err != nil {
+		return nil, err
+	}
+
+	return instances, nil
+}
+
+// CreateRemoteInstance creates a new instance on the remote node
+func (im *instanceManager) CreateRemoteInstance(nodeConfig *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
+	path := fmt.Sprintf("/api/v1/instances/%s/", name)
+
+	resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, options)
+	if err != nil {
+		return nil, err
+	}
+
+	var inst instance.Process
+	if err := parseRemoteResponse(resp, &inst); err != nil {
+		return nil, err
+	}
+
+	return &inst, nil
+}
+
+// GetRemoteInstance retrieves an instance by name from the remote node
+func (im *instanceManager) GetRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
+	path := fmt.Sprintf("/api/v1/instances/%s/", name)
+	resp, err := im.makeRemoteRequest(nodeConfig, "GET", path, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	var inst instance.Process
+	if err := parseRemoteResponse(resp, &inst); err != nil {
+		return nil, err
+	}
+
+	return &inst, nil
+}
+
+// UpdateRemoteInstance updates an existing instance on the remote node
+func (im *instanceManager) UpdateRemoteInstance(nodeConfig *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
+	path := fmt.Sprintf("/api/v1/instances/%s/", name)
+
+	resp, err := im.makeRemoteRequest(nodeConfig, "PUT", path, options)
+	if err != nil {
+		return nil, err
+	}
+
+	var inst instance.Process
+	if err := parseRemoteResponse(resp, &inst); err != nil {
+		return nil, err
+	}
+
+	return &inst, nil
+}
+
+// DeleteRemoteInstance deletes an instance from the remote node
+func (im *instanceManager) DeleteRemoteInstance(nodeConfig *config.NodeConfig, name string) error {
+	path := fmt.Sprintf("/api/v1/instances/%s/", name)
+	resp, err := im.makeRemoteRequest(nodeConfig, "DELETE", path, nil)
+	if err != nil {
+		return err
+	}
+
+	return parseRemoteResponse(resp, nil)
+}
+
+// StartRemoteInstance starts an instance on the remote node
+func (im *instanceManager) StartRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
+	path := fmt.Sprintf("/api/v1/instances/%s/start", name)
+	resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	var inst instance.Process
+	if err := parseRemoteResponse(resp, &inst); err != nil {
+		return nil, err
+	}
+
+	return &inst, nil
+}
+
+// StopRemoteInstance stops an instance on the remote node
+func (im *instanceManager) StopRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
+	path := fmt.Sprintf("/api/v1/instances/%s/stop", name)
+	resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	var inst instance.Process
+	if err := parseRemoteResponse(resp, &inst); err != nil {
+		return nil, err
+	}
+
+	return &inst, nil
+}
+
+// RestartRemoteInstance restarts an instance on the remote node
+func (im *instanceManager) RestartRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
+	path := fmt.Sprintf("/api/v1/instances/%s/restart", name)
+	resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	var inst instance.Process
+	if err := parseRemoteResponse(resp, &inst); err != nil {
+		return nil, err
+	}
+
+	return &inst, nil
+}
+
+// GetRemoteInstanceLogs retrieves logs for an instance from the remote node
+func (im *instanceManager) GetRemoteInstanceLogs(nodeConfig *config.NodeConfig, name string, numLines int) (string, error) {
+	path := fmt.Sprintf("/api/v1/instances/%s/logs?lines=%d", name, numLines)
+	resp, err := im.makeRemoteRequest(nodeConfig, "GET", path, nil)
+	if err != nil {
+		return "", err
+	}
+
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return "", fmt.Errorf("failed to read response body: %w", err)
+	}
+
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		return "", fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	// Logs endpoint might return plain text or JSON
+	// Try to parse as JSON first (in case it's wrapped in a response object)
+	var logResponse struct {
+		Logs string `json:"logs"`
+	}
+	if err := json.Unmarshal(body, &logResponse); err == nil && logResponse.Logs != "" {
+		return logResponse.Logs, nil
+	}
+
+	// Otherwise, return as plain text
+	return string(body), nil
+}
--- a/pkg/manager/remote_ops_test.go
+++ b/pkg/manager/remote_ops_test.go
@@ -0,0 +1,39 @@
+package manager
+
+import (
+	"llamactl/pkg/backends"
+	"llamactl/pkg/instance"
+	"testing"
+)
+
+func TestStripNodesFromOptions(t *testing.T) {
+	im := &instanceManager{}
+
+	// Test nil case
+	if result := im.stripNodesFromOptions(nil); result != nil {
+		t.Errorf("Expected nil, got %+v", result)
+	}
+
+	// Test main case: nodes should be stripped, other fields preserved
+	options := &instance.CreateInstanceOptions{
+		BackendType: backends.BackendTypeLlamaCpp,
+		Nodes:       []string{"node1", "node2"},
+		Environment: map[string]string{"TEST": "value"},
+	}
+
+	result := im.stripNodesFromOptions(options)
+
+	if result.Nodes != nil {
+		t.Errorf("Expected Nodes to be nil, got %+v", result.Nodes)
+	}
+	if result.BackendType != backends.BackendTypeLlamaCpp {
+		t.Errorf("Expected BackendType preserved")
+	}
+	if result.Environment["TEST"] != "value" {
+		t.Errorf("Expected Environment preserved")
+	}
+	// Original should not be modified
+	if len(options.Nodes) != 2 {
+		t.Errorf("Original options should not be modified")
+	}
+}
--- a/pkg/manager/timeout.go
+++ b/pkg/manager/timeout.go
@@ -12,6 +12,11 @@ func (im *instanceManager) checkAllTimeouts() {

 	// Identify instances that should timeout
 	for _, inst := range im.instances {
+		// Skip remote instances - they are managed by their respective nodes
+		if inst.IsRemote() {
+			continue
+		}
+
 		if inst.ShouldTimeout() {
 			timeoutInstances = append(timeoutInstances, inst.Name)
 		}
@@ -40,6 +45,11 @@ func (im *instanceManager) EvictLRUInstance() error {
 			continue
 		}

+		// Skip remote instances - they are managed by their respective nodes
+		if inst.IsRemote() {
+			continue
+		}
+
 		if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
 			continue // Skip instances without idle timeout
 		}
--- a/pkg/manager/timeout_test.go
+++ b/pkg/manager/timeout_test.go
@@ -23,7 +23,7 @@ func TestTimeoutFunctionality(t *testing.T) {
 		MaxInstances:         5,
 	}

-	manager := manager.NewInstanceManager(backendConfig, cfg)
+	manager := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
 	if manager == nil {
 		t.Fatal("Manager should be initialized with timeout checker")
 	}
--- a/pkg/server/handlers.go
+++ b/pkg/server/handlers.go
@@ -1,795 +1,29 @@
 package server

 import (
-	"bytes"
-	"encoding/json"
-	"fmt"
-	"io"
-	"llamactl/pkg/backends"
-	"llamactl/pkg/backends/llamacpp"
-	"llamactl/pkg/backends/mlx"
-	"llamactl/pkg/backends/vllm"
 	"llamactl/pkg/config"
-	"llamactl/pkg/instance"
 	"llamactl/pkg/manager"
 	"net/http"
-	"os/exec"
-	"strconv"
-	"strings"
-
-	"github.com/go-chi/chi/v5"
+	"net/http/httputil"
+	"sync"
+	"time"
 )

 type Handler struct {
 	InstanceManager manager.InstanceManager
 	cfg             config.AppConfig
+	httpClient      *http.Client
+	remoteProxies   map[string]*httputil.ReverseProxy // Cache of remote proxies by instance name
+	remoteProxiesMu sync.RWMutex
 }

 func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
 	return &Handler{
 		InstanceManager: im,
 		cfg:             cfg,
-	}
-}
-
-// VersionHandler godoc
-// @Summary Get llamactl version
-// @Description Returns the version of the llamactl command
-// @Tags version
-// @Security ApiKeyAuth
-// @Produces text/plain
-// @Success 200 {string} string "Version information"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /version [get]
-func (h *Handler) VersionHandler() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		w.Header().Set("Content-Type", "text/plain")
-		fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
-	}
-}
-
-// LlamaServerHelpHandler godoc
-// @Summary Get help for llama server
-// @Description Returns the help text for the llama server command
-// @Tags backends
-// @Security ApiKeyAuth
-// @Produces text/plain
-// @Success 200 {string} string "Help text"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /backends/llama-cpp/help [get]
-func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		helpCmd := exec.Command("llama-server", "--help")
-		output, err := helpCmd.CombinedOutput()
-		if err != nil {
-			http.Error(w, "Failed to get help: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-		w.Header().Set("Content-Type", "text/plain")
-		w.Write(output)
-	}
-}
-
-// LlamaServerVersionHandler godoc
-// @Summary Get version of llama server
-// @Description Returns the version of the llama server command
-// @Tags backends
-// @Security ApiKeyAuth
-// @Produces text/plain
-// @Success 200 {string} string "Version information"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /backends/llama-cpp/version [get]
-func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		versionCmd := exec.Command("llama-server", "--version")
-		output, err := versionCmd.CombinedOutput()
-		if err != nil {
-			http.Error(w, "Failed to get version: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-		w.Header().Set("Content-Type", "text/plain")
-		w.Write(output)
-	}
-}
-
-// LlamaServerListDevicesHandler godoc
-// @Summary List available devices for llama server
-// @Description Returns a list of available devices for the llama server
-// @Tags backends
-// @Security ApiKeyAuth
-// @Produces text/plain
-// @Success 200 {string} string "List of devices"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /backends/llama-cpp/devices [get]
-func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		listCmd := exec.Command("llama-server", "--list-devices")
-		output, err := listCmd.CombinedOutput()
-		if err != nil {
-			http.Error(w, "Failed to list devices: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-		w.Header().Set("Content-Type", "text/plain")
-		w.Write(output)
-	}
-}
-
-// ListInstances godoc
-// @Summary List all instances
-// @Description Returns a list of all instances managed by the server
-// @Tags instances
-// @Security ApiKeyAuth
-// @Produces json
-// @Success 200 {array} instance.Process "List of instances"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /instances [get]
-func (h *Handler) ListInstances() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		instances, err := h.InstanceManager.ListInstances()
-		if err != nil {
-			http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		w.Header().Set("Content-Type", "application/json")
-		if err := json.NewEncoder(w).Encode(instances); err != nil {
-			http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-	}
-}
-
-// CreateInstance godoc
-// @Summary Create and start a new instance
-// @Description Creates a new instance with the provided configuration options
-// @Tags instances
-// @Security ApiKeyAuth
-// @Accept json
-// @Produces json
-// @Param name path string true "Instance Name"
-// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
-// @Success 201 {object} instance.Process "Created instance details"
-// @Failure 400 {string} string "Invalid request body"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name} [post]
-func (h *Handler) CreateInstance() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		name := chi.URLParam(r, "name")
-		if name == "" {
-			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
-			return
-		}
-
-		var options instance.CreateInstanceOptions
-		if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
-			http.Error(w, "Invalid request body", http.StatusBadRequest)
-			return
-		}
-
-		inst, err := h.InstanceManager.CreateInstance(name, &options)
-		if err != nil {
-			http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		w.Header().Set("Content-Type", "application/json")
-		w.WriteHeader(http.StatusCreated)
-		if err := json.NewEncoder(w).Encode(inst); err != nil {
-			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-	}
-}
-
-// GetInstance godoc
-// @Summary Get details of a specific instance
-// @Description Returns the details of a specific instance by name
-// @Tags instances
-// @Security ApiKeyAuth
-// @Produces json
-// @Param name path string true "Instance Name"
-// @Success 200 {object} instance.Process "Instance details"
-// @Failure 400 {string} string "Invalid name format"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name} [get]
-func (h *Handler) GetInstance() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		name := chi.URLParam(r, "name")
-		if name == "" {
-			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
-			return
-		}
-
-		inst, err := h.InstanceManager.GetInstance(name)
-		if err != nil {
-			http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		w.Header().Set("Content-Type", "application/json")
-		if err := json.NewEncoder(w).Encode(inst); err != nil {
-			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-	}
-}
-
-// UpdateInstance godoc
-// @Summary Update an instance's configuration
-// @Description Updates the configuration of a specific instance by name
-// @Tags instances
-// @Security ApiKeyAuth
-// @Accept json
-// @Produces json
-// @Param name path string true "Instance Name"
-// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
-// @Success 200 {object} instance.Process "Updated instance details"
-// @Failure 400 {string} string "Invalid name format"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name} [put]
-func (h *Handler) UpdateInstance() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		name := chi.URLParam(r, "name")
-		if name == "" {
-			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
-			return
-		}
-
-		var options instance.CreateInstanceOptions
-		if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
-			http.Error(w, "Invalid request body", http.StatusBadRequest)
-			return
-		}
-
-		inst, err := h.InstanceManager.UpdateInstance(name, &options)
-		if err != nil {
-			http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		w.Header().Set("Content-Type", "application/json")
-		if err := json.NewEncoder(w).Encode(inst); err != nil {
-			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-	}
-}
-
-// StartInstance godoc
-// @Summary Start a stopped instance
-// @Description Starts a specific instance by name
-// @Tags instances
-// @Security ApiKeyAuth
-// @Produces json
-// @Param name path string true "Instance Name"
-// @Success 200 {object} instance.Process "Started instance details"
-// @Failure 400 {string} string "Invalid name format"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name}/start [post]
-func (h *Handler) StartInstance() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		name := chi.URLParam(r, "name")
-		if name == "" {
-			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
-			return
-		}
-
-		inst, err := h.InstanceManager.StartInstance(name)
-		if err != nil {
-			// Check if error is due to maximum running instances limit
-			if _, ok := err.(manager.MaxRunningInstancesError); ok {
-				http.Error(w, err.Error(), http.StatusConflict)
-				return
-			}
-
-			http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		w.Header().Set("Content-Type", "application/json")
-		if err := json.NewEncoder(w).Encode(inst); err != nil {
-			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-	}
-}
-
-// StopInstance godoc
-// @Summary Stop a running instance
-// @Description Stops a specific instance by name
-// @Tags instances
-// @Security ApiKeyAuth
-// @Produces json
-// @Param name path string true "Instance Name"
-// @Success 200 {object} instance.Process "Stopped instance details"
-// @Failure 400 {string} string "Invalid name format"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name}/stop [post]
-func (h *Handler) StopInstance() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		name := chi.URLParam(r, "name")
-		if name == "" {
-			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
-			return
-		}
-
-		inst, err := h.InstanceManager.StopInstance(name)
-		if err != nil {
-			http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		w.Header().Set("Content-Type", "application/json")
-		if err := json.NewEncoder(w).Encode(inst); err != nil {
-			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-	}
-}
-
-// RestartInstance godoc
-// @Summary Restart a running instance
-// @Description Restarts a specific instance by name
-// @Tags instances
-// @Security ApiKeyAuth
-// @Produces json
-// @Param name path string true "Instance Name"
-// @Success 200 {object} instance.Process "Restarted instance details"
-// @Failure 400 {string} string "Invalid name format"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name}/restart [post]
-func (h *Handler) RestartInstance() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		name := chi.URLParam(r, "name")
-		if name == "" {
-			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
-			return
-		}
-
-		inst, err := h.InstanceManager.RestartInstance(name)
-		if err != nil {
-			http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		w.Header().Set("Content-Type", "application/json")
-		if err := json.NewEncoder(w).Encode(inst); err != nil {
-			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-	}
-}
-
-// DeleteInstance godoc
-// @Summary Delete an instance
-// @Description Stops and removes a specific instance by name
-// @Tags instances
-// @Security ApiKeyAuth
-// @Param name path string true "Instance Name"
-// @Success 204 "No Content"
-// @Failure 400 {string} string "Invalid name format"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name} [delete]
-func (h *Handler) DeleteInstance() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		name := chi.URLParam(r, "name")
-		if name == "" {
-			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
-			return
-		}
-
-		if err := h.InstanceManager.DeleteInstance(name); err != nil {
-			http.Error(w, "Failed to delete instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		w.WriteHeader(http.StatusNoContent)
-	}
-}
-
-// GetInstanceLogs godoc
-// @Summary Get logs from a specific instance
-// @Description Returns the logs from a specific instance by name with optional line limit
-// @Tags instances
-// @Security ApiKeyAuth
-// @Param name path string true "Instance Name"
-// @Param lines query string false "Number of lines to retrieve (default: all lines)"
-// @Produces text/plain
-// @Success 200 {string} string "Instance logs"
-// @Failure 400 {string} string "Invalid name format or lines parameter"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name}/logs [get]
-func (h *Handler) GetInstanceLogs() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		name := chi.URLParam(r, "name")
-		if name == "" {
-			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
-			return
-		}
-
-		lines := r.URL.Query().Get("lines")
-		if lines == "" {
-			lines = "-1"
-		}
-
-		num_lines, err := strconv.Atoi(lines)
-		if err != nil {
-			http.Error(w, "Invalid lines parameter: "+err.Error(), http.StatusBadRequest)
-			return
-		}
-
-		inst, err := h.InstanceManager.GetInstance(name)
-		if err != nil {
-			http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		logs, err := inst.GetLogs(num_lines)
-		if err != nil {
-			http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		w.Header().Set("Content-Type", "text/plain")
-		w.Write([]byte(logs))
-	}
-}
-
-// ProxyToInstance godoc
-// @Summary Proxy requests to a specific instance
-// @Description Forwards HTTP requests to the llama-server instance running on a specific port
-// @Tags instances
-// @Security ApiKeyAuth
-// @Param name path string true "Instance Name"
-// @Success 200 "Request successfully proxied to instance"
-// @Failure 400 {string} string "Invalid name format"
-// @Failure 500 {string} string "Internal Server Error"
-// @Failure 503 {string} string "Instance is not running"
-// @Router /instances/{name}/proxy [get]
-// @Router /instances/{name}/proxy [post]
-func (h *Handler) ProxyToInstance() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		name := chi.URLParam(r, "name")
-		if name == "" {
-			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
-			return
-		}
-
-		inst, err := h.InstanceManager.GetInstance(name)
-		if err != nil {
-			http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		if !inst.IsRunning() {
-			http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
-			return
-		}
-
-		// Get the cached proxy for this instance
-		proxy, err := inst.GetProxy()
-		if err != nil {
-			http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
-		prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
-		proxyPath := r.URL.Path[len(prefix):]
-
-		// Ensure the proxy path starts with "/"
-		if !strings.HasPrefix(proxyPath, "/") {
-			proxyPath = "/" + proxyPath
-		}
-
-		// Update the last request time for the instance
-		inst.UpdateLastRequestTime()
-
-		// Modify the request to remove the proxy prefix
-		originalPath := r.URL.Path
-		r.URL.Path = proxyPath
-
-		// Set forwarded headers
-		r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
-		r.Header.Set("X-Forwarded-Proto", "http")
-
-		// Restore original path for logging purposes
-		defer func() {
-			r.URL.Path = originalPath
-		}()
-
-		// Forward the request using the cached proxy
-		proxy.ServeHTTP(w, r)
-	}
-}
-
-// OpenAIListInstances godoc
-// @Summary List instances in OpenAI-compatible format
-// @Description Returns a list of instances in a format compatible with OpenAI API
-// @Tags openai
-// @Security ApiKeyAuth
-// @Produces json
-// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /v1/models [get]
-func (h *Handler) OpenAIListInstances() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		instances, err := h.InstanceManager.ListInstances()
-		if err != nil {
-			http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		openaiInstances := make([]OpenAIInstance, len(instances))
-		for i, inst := range instances {
-			openaiInstances[i] = OpenAIInstance{
-				ID:      inst.Name,
-				Object:  "model",
-				Created: inst.Created,
-				OwnedBy: "llamactl",
-			}
-		}
-
-		openaiResponse := OpenAIListInstancesResponse{
-			Object: "list",
-			Data:   openaiInstances,
-		}
-
-		w.Header().Set("Content-Type", "application/json")
-		if err := json.NewEncoder(w).Encode(openaiResponse); err != nil {
-			http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-	}
-}
-
-// OpenAIProxy godoc
-// @Summary OpenAI-compatible proxy endpoint
-// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
-// @Tags openai
-// @Security ApiKeyAuth
-// @Accept json
-// @Produces json
-// @Success 200 "OpenAI response"
-// @Failure 400 {string} string "Invalid request body or instance name"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /v1/ [post]
-func (h *Handler) OpenAIProxy() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		// Read the entire body first
-		bodyBytes, err := io.ReadAll(r.Body)
-		if err != nil {
-			http.Error(w, "Failed to read request body", http.StatusBadRequest)
-			return
-		}
-		r.Body.Close()
-
-		// Parse the body to extract instance name
-		var requestBody map[string]any
-		if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
-			http.Error(w, "Invalid request body", http.StatusBadRequest)
-			return
-		}
-
-		modelName, ok := requestBody["model"].(string)
-		if !ok || modelName == "" {
-			http.Error(w, "Instance name is required", http.StatusBadRequest)
-			return
-		}
-
-		// Route to the appropriate inst based on instance name
-		inst, err := h.InstanceManager.GetInstance(modelName)
-		if err != nil {
-			http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		if !inst.IsRunning() {
-			allowOnDemand := inst.GetOptions() != nil && inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart
-			if !allowOnDemand {
-				http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
-				return
-			}
-
-			if h.InstanceManager.IsMaxRunningInstancesReached() {
-				if h.cfg.Instances.EnableLRUEviction {
-					err := h.InstanceManager.EvictLRUInstance()
-					if err != nil {
-						http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
-						return
-					}
-				} else {
-					http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
-					return
-				}
-			}
-
-			// If on-demand start is enabled, start the instance
-			if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
-				http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
-				return
-			}
-
-			// Wait for the instance to become healthy before proceeding
-			if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
-				http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
-				return
-			}
-		}
-
-		proxy, err := inst.GetProxy()
-		if err != nil {
-			http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		// Update last request time for the instance
-		inst.UpdateLastRequestTime()
-
-		// Recreate the request body from the bytes we read
-		r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
-		r.ContentLength = int64(len(bodyBytes))
-
-		proxy.ServeHTTP(w, r)
-	}
-}
-
-// ParseCommandRequest represents the request body for command parsing
-type ParseCommandRequest struct {
-	Command string `json:"command"`
-}
-
-// ParseLlamaCommand godoc
-// @Summary Parse llama-server command
-// @Description Parses a llama-server command string into instance options
-// @Tags backends
-// @Security ApiKeyAuth
-// @Accept json
-// @Produce json
-// @Param request body ParseCommandRequest true "Command to parse"
-// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
-// @Failure 400 {object} map[string]string "Invalid request or command"
-// @Failure 500 {object} map[string]string "Internal Server Error"
-// @Router /backends/llama-cpp/parse-command [post]
-func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
-	type errorResponse struct {
-		Error   string `json:"error"`
-		Details string `json:"details,omitempty"`
-	}
-	writeError := func(w http.ResponseWriter, status int, code, details string) {
-		w.Header().Set("Content-Type", "application/json")
-		w.WriteHeader(status)
-		_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
-	}
-	return func(w http.ResponseWriter, r *http.Request) {
-		var req ParseCommandRequest
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
-			return
-		}
-		if strings.TrimSpace(req.Command) == "" {
-			writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
-			return
-		}
-		llamaOptions, err := llamacpp.ParseLlamaCommand(req.Command)
-		if err != nil {
-			writeError(w, http.StatusBadRequest, "parse_error", err.Error())
-			return
-		}
-		options := &instance.CreateInstanceOptions{
-			BackendType:        backends.BackendTypeLlamaCpp,
-			LlamaServerOptions: llamaOptions,
-		}
-		w.Header().Set("Content-Type", "application/json")
-		if err := json.NewEncoder(w).Encode(options); err != nil {
-			writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
-		}
-	}
-}
-
-// ParseMlxCommand godoc
-// @Summary Parse mlx_lm.server command
-// @Description Parses MLX-LM server command string into instance options
-// @Tags backends
-// @Security ApiKeyAuth
-// @Accept json
-// @Produce json
-// @Param request body ParseCommandRequest true "Command to parse"
-// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
-// @Failure 400 {object} map[string]string "Invalid request or command"
-// @Router /backends/mlx/parse-command [post]
-func (h *Handler) ParseMlxCommand() http.HandlerFunc {
-	type errorResponse struct {
-		Error   string `json:"error"`
-		Details string `json:"details,omitempty"`
-	}
-	writeError := func(w http.ResponseWriter, status int, code, details string) {
-		w.Header().Set("Content-Type", "application/json")
-		w.WriteHeader(status)
-		_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
-	}
-	return func(w http.ResponseWriter, r *http.Request) {
-		var req ParseCommandRequest
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
-			return
-		}
-		
-		if strings.TrimSpace(req.Command) == "" {
-			writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
-			return
-		}
-		
-		mlxOptions, err := mlx.ParseMlxCommand(req.Command)
-		if err != nil {
-			writeError(w, http.StatusBadRequest, "parse_error", err.Error())
-			return
-		}
-		
-		// Currently only support mlx_lm backend type
-		backendType := backends.BackendTypeMlxLm
-		
-		options := &instance.CreateInstanceOptions{
-			BackendType:      backendType,
-			MlxServerOptions: mlxOptions,
-		}
-
-		w.Header().Set("Content-Type", "application/json")
-		if err := json.NewEncoder(w).Encode(options); err != nil {
-			writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
-		}
-	}
-}
-
-// ParseVllmCommand godoc
-// @Summary Parse vllm serve command
-// @Description Parses a vLLM serve command string into instance options
-// @Tags backends
-// @Security ApiKeyAuth
-// @Accept json
-// @Produce json
-// @Param request body ParseCommandRequest true "Command to parse"
-// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
-// @Failure 400 {object} map[string]string "Invalid request or command"
-// @Router /backends/vllm/parse-command [post]
-func (h *Handler) ParseVllmCommand() http.HandlerFunc {
-	type errorResponse struct {
-		Error   string `json:"error"`
-		Details string `json:"details,omitempty"`
-	}
-	writeError := func(w http.ResponseWriter, status int, code, details string) {
-		w.Header().Set("Content-Type", "application/json")
-		w.WriteHeader(status)
-		_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
-	}
-	return func(w http.ResponseWriter, r *http.Request) {
-		var req ParseCommandRequest
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
-			return
-		}
-
-		if strings.TrimSpace(req.Command) == "" {
-			writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
-			return
-		}
-
-		vllmOptions, err := vllm.ParseVllmCommand(req.Command)
-		if err != nil {
-			writeError(w, http.StatusBadRequest, "parse_error", err.Error())
-			return
-		}
-
-		backendType := backends.BackendTypeVllm
-
-		options := &instance.CreateInstanceOptions{
-			BackendType:       backendType,
-			VllmServerOptions: vllmOptions,
-		}
-
-		w.Header().Set("Content-Type", "application/json")
-		if err := json.NewEncoder(w).Encode(options); err != nil {
-			writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
-		}
+		httpClient: &http.Client{
+			Timeout: 30 * time.Second,
+		},
+		remoteProxies: make(map[string]*httputil.ReverseProxy),
 	}
 }
--- a/pkg/server/handlers_backends.go
+++ b/pkg/server/handlers_backends.go
@@ -0,0 +1,320 @@
+package server
+
+import (
+	"encoding/json"
+	"fmt"
+	"llamactl/pkg/backends"
+	"llamactl/pkg/backends/llamacpp"
+	"llamactl/pkg/backends/mlx"
+	"llamactl/pkg/backends/vllm"
+	"llamactl/pkg/instance"
+	"net/http"
+	"os/exec"
+	"strings"
+
+	"github.com/go-chi/chi/v5"
+)
+
+// ParseCommandRequest represents the request body for command parsing
+type ParseCommandRequest struct {
+	Command string `json:"command"`
+}
+
+func (h *Handler) LlamaCppProxy(onDemandStart bool) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+
+		// Get the instance name from the URL parameter
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		// Route to the appropriate inst based on instance name
+		inst, err := h.InstanceManager.GetInstance(name)
+		if err != nil {
+			http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
+			return
+		}
+
+		options := inst.GetOptions()
+		if options == nil {
+			http.Error(w, "Cannot obtain Instance's options", http.StatusInternalServerError)
+			return
+		}
+
+		if options.BackendType != backends.BackendTypeLlamaCpp {
+			http.Error(w, "Instance is not a llama.cpp server.", http.StatusBadRequest)
+			return
+		}
+
+		if !inst.IsRunning() {
+
+			if !(onDemandStart && options.OnDemandStart != nil && *options.OnDemandStart) {
+				http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
+				return
+			}
+
+			if h.InstanceManager.IsMaxRunningInstancesReached() {
+				if h.cfg.Instances.EnableLRUEviction {
+					err := h.InstanceManager.EvictLRUInstance()
+					if err != nil {
+						http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
+						return
+					}
+				} else {
+					http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
+					return
+				}
+			}
+
+			// If on-demand start is enabled, start the instance
+			if _, err := h.InstanceManager.StartInstance(name); err != nil {
+				http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
+				return
+			}
+
+			// Wait for the instance to become healthy before proceeding
+			if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
+				http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
+				return
+			}
+		}
+
+		proxy, err := inst.GetProxy()
+		if err != nil {
+			http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		// Strip the "/llama-cpp/<name>" prefix from the request URL
+		prefix := fmt.Sprintf("/llama-cpp/%s", name)
+		r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
+
+		// Update the last request time for the instance
+		inst.UpdateLastRequestTime()
+
+		proxy.ServeHTTP(w, r)
+	}
+}
+
+// ParseLlamaCommand godoc
+// @Summary Parse llama-server command
+// @Description Parses a llama-server command string into instance options
+// @Tags backends
+// @Security ApiKeyAuth
+// @Accept json
+// @Produce json
+// @Param request body ParseCommandRequest true "Command to parse"
+// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
+// @Failure 400 {object} map[string]string "Invalid request or command"
+// @Failure 500 {object} map[string]string "Internal Server Error"
+// @Router /backends/llama-cpp/parse-command [post]
+func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
+	type errorResponse struct {
+		Error   string `json:"error"`
+		Details string `json:"details,omitempty"`
+	}
+	writeError := func(w http.ResponseWriter, status int, code, details string) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(status)
+		_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
+	}
+	return func(w http.ResponseWriter, r *http.Request) {
+		var req ParseCommandRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
+			return
+		}
+		if strings.TrimSpace(req.Command) == "" {
+			writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
+			return
+		}
+		llamaOptions, err := llamacpp.ParseLlamaCommand(req.Command)
+		if err != nil {
+			writeError(w, http.StatusBadRequest, "parse_error", err.Error())
+			return
+		}
+		options := &instance.CreateInstanceOptions{
+			BackendType:        backends.BackendTypeLlamaCpp,
+			LlamaServerOptions: llamaOptions,
+		}
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(options); err != nil {
+			writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
+		}
+	}
+}
+
+// ParseMlxCommand godoc
+// @Summary Parse mlx_lm.server command
+// @Description Parses MLX-LM server command string into instance options
+// @Tags backends
+// @Security ApiKeyAuth
+// @Accept json
+// @Produce json
+// @Param request body ParseCommandRequest true "Command to parse"
+// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
+// @Failure 400 {object} map[string]string "Invalid request or command"
+// @Router /backends/mlx/parse-command [post]
+func (h *Handler) ParseMlxCommand() http.HandlerFunc {
+	type errorResponse struct {
+		Error   string `json:"error"`
+		Details string `json:"details,omitempty"`
+	}
+	writeError := func(w http.ResponseWriter, status int, code, details string) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(status)
+		_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
+	}
+	return func(w http.ResponseWriter, r *http.Request) {
+		var req ParseCommandRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
+			return
+		}
+
+		if strings.TrimSpace(req.Command) == "" {
+			writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
+			return
+		}
+
+		mlxOptions, err := mlx.ParseMlxCommand(req.Command)
+		if err != nil {
+			writeError(w, http.StatusBadRequest, "parse_error", err.Error())
+			return
+		}
+
+		// Currently only support mlx_lm backend type
+		backendType := backends.BackendTypeMlxLm
+
+		options := &instance.CreateInstanceOptions{
+			BackendType:      backendType,
+			MlxServerOptions: mlxOptions,
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(options); err != nil {
+			writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
+		}
+	}
+}
+
+// ParseVllmCommand godoc
+// @Summary Parse vllm serve command
+// @Description Parses a vLLM serve command string into instance options
+// @Tags backends
+// @Security ApiKeyAuth
+// @Accept json
+// @Produce json
+// @Param request body ParseCommandRequest true "Command to parse"
+// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
+// @Failure 400 {object} map[string]string "Invalid request or command"
+// @Router /backends/vllm/parse-command [post]
+func (h *Handler) ParseVllmCommand() http.HandlerFunc {
+	type errorResponse struct {
+		Error   string `json:"error"`
+		Details string `json:"details,omitempty"`
+	}
+	writeError := func(w http.ResponseWriter, status int, code, details string) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(status)
+		_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
+	}
+	return func(w http.ResponseWriter, r *http.Request) {
+		var req ParseCommandRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
+			return
+		}
+
+		if strings.TrimSpace(req.Command) == "" {
+			writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
+			return
+		}
+
+		vllmOptions, err := vllm.ParseVllmCommand(req.Command)
+		if err != nil {
+			writeError(w, http.StatusBadRequest, "parse_error", err.Error())
+			return
+		}
+
+		backendType := backends.BackendTypeVllm
+
+		options := &instance.CreateInstanceOptions{
+			BackendType:       backendType,
+			VllmServerOptions: vllmOptions,
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(options); err != nil {
+			writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
+		}
+	}
+}
+
+// LlamaServerHelpHandler godoc
+// @Summary Get help for llama server
+// @Description Returns the help text for the llama server command
+// @Tags backends
+// @Security ApiKeyAuth
+// @Produces text/plain
+// @Success 200 {string} string "Help text"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /backends/llama-cpp/help [get]
+func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		helpCmd := exec.Command("llama-server", "--help")
+		output, err := helpCmd.CombinedOutput()
+		if err != nil {
+			http.Error(w, "Failed to get help: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+		w.Header().Set("Content-Type", "text/plain")
+		w.Write(output)
+	}
+}
+
+// LlamaServerVersionHandler godoc
+// @Summary Get version of llama server
+// @Description Returns the version of the llama server command
+// @Tags backends
+// @Security ApiKeyAuth
+// @Produces text/plain
+// @Success 200 {string} string "Version information"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /backends/llama-cpp/version [get]
+func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		versionCmd := exec.Command("llama-server", "--version")
+		output, err := versionCmd.CombinedOutput()
+		if err != nil {
+			http.Error(w, "Failed to get version: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+		w.Header().Set("Content-Type", "text/plain")
+		w.Write(output)
+	}
+}
+
+// LlamaServerListDevicesHandler godoc
+// @Summary List available devices for llama server
+// @Description Returns a list of available devices for the llama server
+// @Tags backends
+// @Security ApiKeyAuth
+// @Produces text/plain
+// @Success 200 {string} string "List of devices"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /backends/llama-cpp/devices [get]
+func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		listCmd := exec.Command("llama-server", "--list-devices")
+		output, err := listCmd.CombinedOutput()
+		if err != nil {
+			http.Error(w, "Failed to list devices: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+		w.Header().Set("Content-Type", "text/plain")
+		w.Write(output)
+	}
+}
--- a/pkg/server/handlers_instances.go
+++ b/pkg/server/handlers_instances.go
@@ -0,0 +1,445 @@
+package server
+
+import (
+	"encoding/json"
+	"fmt"
+	"llamactl/pkg/instance"
+	"llamactl/pkg/manager"
+	"net/http"
+	"net/http/httputil"
+	"net/url"
+	"strconv"
+	"strings"
+
+	"github.com/go-chi/chi/v5"
+)
+
+// ListInstances godoc
+// @Summary List all instances
+// @Description Returns a list of all instances managed by the server
+// @Tags instances
+// @Security ApiKeyAuth
+// @Produces json
+// @Success 200 {array} instance.Process "List of instances"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /instances [get]
+func (h *Handler) ListInstances() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		instances, err := h.InstanceManager.ListInstances()
+		if err != nil {
+			http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(instances); err != nil {
+			http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}
+}
+
+// CreateInstance godoc
+// @Summary Create and start a new instance
+// @Description Creates a new instance with the provided configuration options
+// @Tags instances
+// @Security ApiKeyAuth
+// @Accept json
+// @Produces json
+// @Param name path string true "Instance Name"
+// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
+// @Success 201 {object} instance.Process "Created instance details"
+// @Failure 400 {string} string "Invalid request body"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /instances/{name} [post]
+func (h *Handler) CreateInstance() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		var options instance.CreateInstanceOptions
+		if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
+			http.Error(w, "Invalid request body", http.StatusBadRequest)
+			return
+		}
+
+		inst, err := h.InstanceManager.CreateInstance(name, &options)
+		if err != nil {
+			http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(http.StatusCreated)
+		if err := json.NewEncoder(w).Encode(inst); err != nil {
+			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}
+}
+
+// GetInstance godoc
+// @Summary Get details of a specific instance
+// @Description Returns the details of a specific instance by name
+// @Tags instances
+// @Security ApiKeyAuth
+// @Produces json
+// @Param name path string true "Instance Name"
+// @Success 200 {object} instance.Process "Instance details"
+// @Failure 400 {string} string "Invalid name format"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /instances/{name} [get]
+func (h *Handler) GetInstance() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		inst, err := h.InstanceManager.GetInstance(name)
+		if err != nil {
+			http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
+			return
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(inst); err != nil {
+			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}
+}
+
+// UpdateInstance godoc
+// @Summary Update an instance's configuration
+// @Description Updates the configuration of a specific instance by name
+// @Tags instances
+// @Security ApiKeyAuth
+// @Accept json
+// @Produces json
+// @Param name path string true "Instance Name"
+// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
+// @Success 200 {object} instance.Process "Updated instance details"
+// @Failure 400 {string} string "Invalid name format"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /instances/{name} [put]
+func (h *Handler) UpdateInstance() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		var options instance.CreateInstanceOptions
+		if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
+			http.Error(w, "Invalid request body", http.StatusBadRequest)
+			return
+		}
+
+		inst, err := h.InstanceManager.UpdateInstance(name, &options)
+		if err != nil {
+			http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(inst); err != nil {
+			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}
+}
+
+// StartInstance godoc
+// @Summary Start a stopped instance
+// @Description Starts a specific instance by name
+// @Tags instances
+// @Security ApiKeyAuth
+// @Produces json
+// @Param name path string true "Instance Name"
+// @Success 200 {object} instance.Process "Started instance details"
+// @Failure 400 {string} string "Invalid name format"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /instances/{name}/start [post]
+func (h *Handler) StartInstance() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		inst, err := h.InstanceManager.StartInstance(name)
+		if err != nil {
+			// Check if error is due to maximum running instances limit
+			if _, ok := err.(manager.MaxRunningInstancesError); ok {
+				http.Error(w, err.Error(), http.StatusConflict)
+				return
+			}
+
+			http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(inst); err != nil {
+			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}
+}
+
+// StopInstance godoc
+// @Summary Stop a running instance
+// @Description Stops a specific instance by name
+// @Tags instances
+// @Security ApiKeyAuth
+// @Produces json
+// @Param name path string true "Instance Name"
+// @Success 200 {object} instance.Process "Stopped instance details"
+// @Failure 400 {string} string "Invalid name format"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /instances/{name}/stop [post]
+func (h *Handler) StopInstance() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		inst, err := h.InstanceManager.StopInstance(name)
+		if err != nil {
+			http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(inst); err != nil {
+			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}
+}
+
+// RestartInstance godoc
+// @Summary Restart a running instance
+// @Description Restarts a specific instance by name
+// @Tags instances
+// @Security ApiKeyAuth
+// @Produces json
+// @Param name path string true "Instance Name"
+// @Success 200 {object} instance.Process "Restarted instance details"
+// @Failure 400 {string} string "Invalid name format"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /instances/{name}/restart [post]
+func (h *Handler) RestartInstance() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		inst, err := h.InstanceManager.RestartInstance(name)
+		if err != nil {
+			http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(inst); err != nil {
+			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}
+}
+
+// DeleteInstance godoc
+// @Summary Delete an instance
+// @Description Stops and removes a specific instance by name
+// @Tags instances
+// @Security ApiKeyAuth
+// @Param name path string true "Instance Name"
+// @Success 204 "No Content"
+// @Failure 400 {string} string "Invalid name format"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /instances/{name} [delete]
+func (h *Handler) DeleteInstance() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		if err := h.InstanceManager.DeleteInstance(name); err != nil {
+			http.Error(w, "Failed to delete instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		w.WriteHeader(http.StatusNoContent)
+	}
+}
+
+// GetInstanceLogs godoc
+// @Summary Get logs from a specific instance
+// @Description Returns the logs from a specific instance by name with optional line limit
+// @Tags instances
+// @Security ApiKeyAuth
+// @Param name path string true "Instance Name"
+// @Param lines query string false "Number of lines to retrieve (default: all lines)"
+// @Produces text/plain
+// @Success 200 {string} string "Instance logs"
+// @Failure 400 {string} string "Invalid name format or lines parameter"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /instances/{name}/logs [get]
+func (h *Handler) GetInstanceLogs() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		lines := r.URL.Query().Get("lines")
+		numLines := -1 // Default to all lines
+		if lines != "" {
+			parsedLines, err := strconv.Atoi(lines)
+			if err != nil {
+				http.Error(w, "Invalid lines parameter: "+err.Error(), http.StatusBadRequest)
+				return
+			}
+			numLines = parsedLines
+		}
+
+		// Use the instance manager which handles both local and remote instances
+		logs, err := h.InstanceManager.GetInstanceLogs(name, numLines)
+		if err != nil {
+			http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		w.Header().Set("Content-Type", "text/plain")
+		w.Write([]byte(logs))
+	}
+}
+
+// ProxyToInstance godoc
+// @Summary Proxy requests to a specific instance
+// @Description Forwards HTTP requests to the llama-server instance running on a specific port
+// @Tags instances
+// @Security ApiKeyAuth
+// @Param name path string true "Instance Name"
+// @Success 200 "Request successfully proxied to instance"
+// @Failure 400 {string} string "Invalid name format"
+// @Failure 500 {string} string "Internal Server Error"
+// @Failure 503 {string} string "Instance is not running"
+// @Router /instances/{name}/proxy [get]
+// @Router /instances/{name}/proxy [post]
+func (h *Handler) ProxyToInstance() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		inst, err := h.InstanceManager.GetInstance(name)
+		if err != nil {
+			http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		// Check if this is a remote instance
+		if inst.IsRemote() {
+			h.RemoteInstanceProxy(w, r, name, inst)
+			return
+		}
+
+		if !inst.IsRunning() {
+			http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
+			return
+		}
+
+		// Get the cached proxy for this instance
+		proxy, err := inst.GetProxy()
+		if err != nil {
+			http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
+		prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
+		r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
+
+		// Update the last request time for the instance
+		inst.UpdateLastRequestTime()
+
+		// Set forwarded headers
+		r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
+		r.Header.Set("X-Forwarded-Proto", "http")
+
+		// Forward the request using the cached proxy
+		proxy.ServeHTTP(w, r)
+	}
+}
+
+// RemoteInstanceProxy proxies requests to a remote instance
+func (h *Handler) RemoteInstanceProxy(w http.ResponseWriter, r *http.Request, name string, inst *instance.Process) {
+	// Get the node name from instance options
+	options := inst.GetOptions()
+	if options == nil || len(options.Nodes) == 0 {
+		http.Error(w, "Instance has no node configured", http.StatusInternalServerError)
+		return
+	}
+
+	nodeName := options.Nodes[0]
+
+	// Check if we have a cached proxy for this node
+	h.remoteProxiesMu.RLock()
+	proxy, exists := h.remoteProxies[nodeName]
+	h.remoteProxiesMu.RUnlock()
+
+	if !exists {
+		// Find node configuration
+		nodeConfig, exists := h.cfg.Nodes[nodeName]
+		if !exists {
+			http.Error(w, fmt.Sprintf("Node %s not found", nodeName), http.StatusInternalServerError)
+			return
+		}
+
+		// Create reverse proxy to remote node
+		targetURL, err := url.Parse(nodeConfig.Address)
+		if err != nil {
+			http.Error(w, "Failed to parse node address: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		proxy = httputil.NewSingleHostReverseProxy(targetURL)
+
+		// Modify request before forwarding
+		originalDirector := proxy.Director
+		apiKey := nodeConfig.APIKey // Capture for closure
+		proxy.Director = func(req *http.Request) {
+			originalDirector(req)
+			// Add API key if configured
+			if apiKey != "" {
+				req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", apiKey))
+			}
+		}
+
+		// Cache the proxy by node name
+		h.remoteProxiesMu.Lock()
+		h.remoteProxies[nodeName] = proxy
+		h.remoteProxiesMu.Unlock()
+	}
+
+	// Forward the request using the cached proxy
+	proxy.ServeHTTP(w, r)
+}
--- a/pkg/server/handlers_nodes.go
+++ b/pkg/server/handlers_nodes.go
@@ -0,0 +1,79 @@
+package server
+
+import (
+	"encoding/json"
+	"net/http"
+
+	"github.com/go-chi/chi/v5"
+)
+
+// NodeResponse represents a sanitized node configuration for API responses
+type NodeResponse struct {
+	Address string `json:"address"`
+}
+
+// ListNodes godoc
+// @Summary List all configured nodes
+// @Description Returns a map of all nodes configured in the server (node name -> node config)
+// @Tags nodes
+// @Security ApiKeyAuth
+// @Produces json
+// @Success 200 {object} map[string]NodeResponse "Map of nodes"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /nodes [get]
+func (h *Handler) ListNodes() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		// Convert to sanitized response format (map of name -> NodeResponse)
+		nodeResponses := make(map[string]NodeResponse, len(h.cfg.Nodes))
+		for name, node := range h.cfg.Nodes {
+			nodeResponses[name] = NodeResponse{
+				Address: node.Address,
+			}
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(nodeResponses); err != nil {
+			http.Error(w, "Failed to encode nodes: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}
+}
+
+// GetNode godoc
+// @Summary Get details of a specific node
+// @Description Returns the details of a specific node by name
+// @Tags nodes
+// @Security ApiKeyAuth
+// @Produces json
+// @Param name path string true "Node Name"
+// @Success 200 {object} NodeResponse "Node details"
+// @Failure 400 {string} string "Invalid name format"
+// @Failure 404 {string} string "Node not found"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /nodes/{name} [get]
+func (h *Handler) GetNode() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Node name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		nodeConfig, exists := h.cfg.Nodes[name]
+		if !exists {
+			http.Error(w, "Node not found", http.StatusNotFound)
+			return
+		}
+
+		// Convert to sanitized response format
+		nodeResponse := NodeResponse{
+			Address: nodeConfig.Address,
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(nodeResponse); err != nil {
+			http.Error(w, "Failed to encode node: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}
+}
--- a/pkg/server/handlers_openai.go
+++ b/pkg/server/handlers_openai.go
@@ -0,0 +1,206 @@
+package server
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"llamactl/pkg/instance"
+	"net/http"
+	"net/http/httputil"
+	"net/url"
+)
+
+// OpenAIListInstances godoc
+// @Summary List instances in OpenAI-compatible format
+// @Description Returns a list of instances in a format compatible with OpenAI API
+// @Tags openai
+// @Security ApiKeyAuth
+// @Produces json
+// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /v1/models [get]
+func (h *Handler) OpenAIListInstances() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		instances, err := h.InstanceManager.ListInstances()
+		if err != nil {
+			http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		openaiInstances := make([]OpenAIInstance, len(instances))
+		for i, inst := range instances {
+			openaiInstances[i] = OpenAIInstance{
+				ID:      inst.Name,
+				Object:  "model",
+				Created: inst.Created,
+				OwnedBy: "llamactl",
+			}
+		}
+
+		openaiResponse := OpenAIListInstancesResponse{
+			Object: "list",
+			Data:   openaiInstances,
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(openaiResponse); err != nil {
+			http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}
+}
+
+// OpenAIProxy godoc
+// @Summary OpenAI-compatible proxy endpoint
+// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
+// @Tags openai
+// @Security ApiKeyAuth
+// @Accept json
+// @Produces json
+// @Success 200 "OpenAI response"
+// @Failure 400 {string} string "Invalid request body or instance name"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /v1/ [post]
+func (h *Handler) OpenAIProxy() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		// Read the entire body first
+		bodyBytes, err := io.ReadAll(r.Body)
+		if err != nil {
+			http.Error(w, "Failed to read request body", http.StatusBadRequest)
+			return
+		}
+		r.Body.Close()
+
+		// Parse the body to extract instance name
+		var requestBody map[string]any
+		if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
+			http.Error(w, "Invalid request body", http.StatusBadRequest)
+			return
+		}
+
+		modelName, ok := requestBody["model"].(string)
+		if !ok || modelName == "" {
+			http.Error(w, "Instance name is required", http.StatusBadRequest)
+			return
+		}
+
+		// Route to the appropriate inst based on instance name
+		inst, err := h.InstanceManager.GetInstance(modelName)
+		if err != nil {
+			http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
+			return
+		}
+
+		// Check if this is a remote instance
+		if inst.IsRemote() {
+			// Restore the body for the remote proxy
+			r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
+			h.RemoteOpenAIProxy(w, r, modelName, inst)
+			return
+		}
+
+		if !inst.IsRunning() {
+			options := inst.GetOptions()
+			allowOnDemand := options != nil && options.OnDemandStart != nil && *options.OnDemandStart
+			if !allowOnDemand {
+				http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
+				return
+			}
+
+			if h.InstanceManager.IsMaxRunningInstancesReached() {
+				if h.cfg.Instances.EnableLRUEviction {
+					err := h.InstanceManager.EvictLRUInstance()
+					if err != nil {
+						http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
+						return
+					}
+				} else {
+					http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
+					return
+				}
+			}
+
+			// If on-demand start is enabled, start the instance
+			if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
+				http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
+				return
+			}
+
+			// Wait for the instance to become healthy before proceeding
+			if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
+				http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
+				return
+			}
+		}
+
+		proxy, err := inst.GetProxy()
+		if err != nil {
+			http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		// Update last request time for the instance
+		inst.UpdateLastRequestTime()
+
+		// Recreate the request body from the bytes we read
+		r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
+		r.ContentLength = int64(len(bodyBytes))
+
+		proxy.ServeHTTP(w, r)
+	}
+}
+
+// RemoteOpenAIProxy proxies OpenAI-compatible requests to a remote instance
+func (h *Handler) RemoteOpenAIProxy(w http.ResponseWriter, r *http.Request, modelName string, inst *instance.Process) {
+	// Get the node name from instance options
+	options := inst.GetOptions()
+	if options == nil || len(options.Nodes) == 0 {
+		http.Error(w, "Instance has no node configured", http.StatusInternalServerError)
+		return
+	}
+
+	nodeName := options.Nodes[0]
+
+	// Check if we have a cached proxy for this node
+	h.remoteProxiesMu.RLock()
+	proxy, exists := h.remoteProxies[nodeName]
+	h.remoteProxiesMu.RUnlock()
+
+	if !exists {
+		// Find node configuration
+		nodeConfig, exists := h.cfg.Nodes[nodeName]
+		if !exists {
+			http.Error(w, fmt.Sprintf("Node %s not found", nodeName), http.StatusInternalServerError)
+			return
+		}
+
+		// Create reverse proxy to remote node
+		targetURL, err := url.Parse(nodeConfig.Address)
+		if err != nil {
+			http.Error(w, "Failed to parse node address: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		proxy = httputil.NewSingleHostReverseProxy(targetURL)
+
+		// Modify request before forwarding
+		originalDirector := proxy.Director
+		apiKey := nodeConfig.APIKey // Capture for closure
+		proxy.Director = func(req *http.Request) {
+			originalDirector(req)
+			// Add API key if configured
+			if apiKey != "" {
+				req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", apiKey))
+			}
+		}
+
+		// Cache the proxy
+		h.remoteProxiesMu.Lock()
+		h.remoteProxies[nodeName] = proxy
+		h.remoteProxiesMu.Unlock()
+	}
+
+	// Forward the request using the cached proxy
+	proxy.ServeHTTP(w, r)
+}
--- a/pkg/server/handlers_system.go
+++ b/pkg/server/handlers_system.go
@@ -0,0 +1,22 @@
+package server
+
+import (
+	"fmt"
+	"net/http"
+)
+
+// VersionHandler godoc
+// @Summary Get llamactl version
+// @Description Returns the version of the llamactl command
+// @Tags version
+// @Security ApiKeyAuth
+// @Produces text/plain
+// @Success 200 {string} string "Version information"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /version [get]
+func (h *Handler) VersionHandler() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "text/plain")
+		fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
+	}
+}
--- a/pkg/server/routes.go
+++ b/pkg/server/routes.go
@@ -20,7 +20,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
 	r.Use(cors.Handler(cors.Options{
 		AllowedOrigins:   handler.cfg.Server.AllowedOrigins,
 		AllowedMethods:   []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"},
-		AllowedHeaders:   []string{"Accept", "Authorization", "Content-Type", "X-CSRF-Token"},
+		AllowedHeaders:   handler.cfg.Server.AllowedHeaders,
 		ExposedHeaders:   []string{"Link"},
 		AllowCredentials: false,
 		MaxAge:           300,
@@ -60,6 +60,15 @@ func SetupRouter(handler *Handler) *chi.Mux {
 			})
 		})

+		// Node management endpoints
+		r.Route("/nodes", func(r chi.Router) {
+			r.Get("/", handler.ListNodes()) // List all nodes
+
+			r.Route("/{name}", func(r chi.Router) {
+				r.Get("/", handler.GetNode())
+			})
+		})
+
 		// Instance management endpoints
 		r.Route("/instances", func(r chi.Router) {
 			r.Get("/", handler.ListInstances()) // List all instances
@@ -103,6 +112,51 @@ func SetupRouter(handler *Handler) *chi.Mux {

 	})

+	r.Route("/llama-cpp/{name}", func(r chi.Router) {
+
+		// Public Routes
+		// Allow llama-cpp server to serve its own WebUI if it is running.
+		// Don't auto start the server since it can be accessed without an API key
+		r.Get("/", handler.LlamaCppProxy(false))
+
+		// Private Routes
+		r.Group(func(r chi.Router) {
+
+			if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
+				r.Use(authMiddleware.AuthMiddleware(KeyTypeInference))
+			}
+
+			// This handler auto start the server if it's not running
+			llamaCppHandler := handler.LlamaCppProxy(true)
+
+			// llama.cpp server specific proxy endpoints
+			r.Get("/props", llamaCppHandler)
+			// /slots endpoint is secured (see: https://github.com/ggml-org/llama.cpp/pull/15630)
+			r.Get("/slots", llamaCppHandler)
+			r.Post("/apply-template", llamaCppHandler)
+			r.Post("/completion", llamaCppHandler)
+			r.Post("/detokenize", llamaCppHandler)
+			r.Post("/embeddings", llamaCppHandler)
+			r.Post("/infill", llamaCppHandler)
+			r.Post("/metrics", llamaCppHandler)
+			r.Post("/props", llamaCppHandler)
+			r.Post("/reranking", llamaCppHandler)
+			r.Post("/tokenize", llamaCppHandler)
+
+			// OpenAI-compatible proxy endpoint
+			// Handles all POST requests to /v1/*, including:
+			//   - /v1/completions
+			//   - /v1/chat/completions
+			//   - /v1/embeddings
+			//   - /v1/rerank
+			//   - /v1/reranking
+			// llamaCppHandler is used here because some users of llama.cpp endpoints depend
+			// on "model" field being optional, and handler.OpenAIProxy requires it.
+			r.Post("/v1/*", llamaCppHandler)
+		})
+
+	})
+
 	// Serve WebUI files
 	if err := webui.SetupWebUI(r); err != nil {
 		fmt.Printf("Failed to set up WebUI: %v\n", err)
--- a/webui/src/components/InstanceDialog.tsx
+++ b/webui/src/components/InstanceDialog.tsx
@@ -106,7 +106,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
    // Clean up undefined values to avoid sending empty fields
    const cleanOptions: CreateInstanceOptions = {};
    Object.entries(formData).forEach(([key, value]) => {
-      if (key === 'backend_options' && value && typeof value === 'object') {
+      if (key === 'backend_options' && value && typeof value === 'object' && !Array.isArray(value)) {
        // Handle backend_options specially - clean nested object
        const cleanBackendOptions: any = {};
        Object.entries(value).forEach(([backendKey, backendValue]) => {
@@ -123,8 +123,12 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
        if (Object.keys(cleanBackendOptions).length > 0) {
          (cleanOptions as any)[key] = cleanBackendOptions;
        }
-      } else if (value !== undefined && value !== null && (typeof value !== 'string' || value.trim() !== "")) {
-        // Handle arrays - don't include empty arrays
+      } else if (value !== undefined && value !== null) {
+        // Skip empty strings
+        if (typeof value === 'string' && value.trim() === "") {
+          return;
+        }
+        // Skip empty arrays
        if (Array.isArray(value) && value.length === 0) {
          return;
        }
--- a/webui/src/components/instance/InstanceSettingsCard.tsx
+++ b/webui/src/components/instance/InstanceSettingsCard.tsx
@@ -1,4 +1,4 @@
-import React from 'react'
+import React, { useState, useEffect } from 'react'
 import type { CreateInstanceOptions } from '@/types/instance'
 import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
 import { Label } from '@/components/ui/label'
@@ -7,6 +7,8 @@ import AutoRestartConfiguration from '@/components/instance/AutoRestartConfigura
 import NumberInput from '@/components/form/NumberInput'
 import CheckboxInput from '@/components/form/CheckboxInput'
 import EnvironmentVariablesInput from '@/components/form/EnvironmentVariablesInput'
+import SelectInput from '@/components/form/SelectInput'
+import { nodesApi, type NodesMap } from '@/lib/api'

 interface InstanceSettingsCardProps {
  instanceName: string
@@ -25,6 +27,46 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
  onNameChange,
  onChange
 }) => {
+  const [nodes, setNodes] = useState<NodesMap>({})
+  const [loadingNodes, setLoadingNodes] = useState(true)
+
+  useEffect(() => {
+    const fetchNodes = async () => {
+      try {
+        const fetchedNodes = await nodesApi.list()
+        setNodes(fetchedNodes)
+
+        // Auto-select first node if none selected
+        const nodeNames = Object.keys(fetchedNodes)
+        if (nodeNames.length > 0 && (!formData.nodes || formData.nodes.length === 0)) {
+          onChange('nodes', [nodeNames[0]])
+        }
+      } catch (error) {
+        console.error('Failed to fetch nodes:', error)
+      } finally {
+        setLoadingNodes(false)
+      }
+    }
+
+    void fetchNodes()
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [])
+
+  const nodeOptions = Object.keys(nodes).map(nodeName => ({
+    value: nodeName,
+    label: nodeName
+  }))
+
+  const handleNodeChange = (value: string | undefined) => {
+    if (value) {
+      onChange('nodes', [value])
+    } else {
+      onChange('nodes', undefined)
+    }
+  }
+
+  const selectedNode = formData.nodes && formData.nodes.length > 0 ? formData.nodes[0] : ''
+
  return (
    <Card>
      <CardHeader>
@@ -50,6 +92,18 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
          </p>
        </div>

+        {/* Node Selection */}
+        {!loadingNodes && Object.keys(nodes).length > 0 && (
+          <SelectInput
+            id="node"
+            label="Node"
+            value={selectedNode}
+            onChange={handleNodeChange}
+            options={nodeOptions}
+            description="Select the node where the instance will run (default: main node)"
+          />
+        )}
+
        {/* Auto Restart Configuration */}
        <AutoRestartConfiguration
          formData={formData}
--- a/webui/src/contexts/AuthContext.tsx
+++ b/webui/src/contexts/AuthContext.tsx
@@ -1,4 +1,4 @@
-import { type ReactNode, createContext, useContext, useState, useEffect, useCallback } from 'react'
+import { type ReactNode, createContext, useCallback, useContext, useEffect, useState } from 'react'

 interface AuthContextState {
  isAuthenticated: boolean
@@ -62,7 +62,7 @@ export const AuthProvider = ({ children }: AuthProviderProps) => {
  // Validate API key by making a test request
  const validateApiKey = async (key: string): Promise<boolean> => {
    try {
-      const response = await fetch('/api/v1/instances', {
+      const response = await fetch(document.baseURI + 'api/v1/instances', {
        headers: {
          'Authorization': `Bearer ${key}`,
          'Content-Type': 'application/json'
--- a/webui/src/lib/tests/api.test.ts
+++ b/webui/src/lib/tests/api.test.ts
@@ -1,5 +1,5 @@
-import { describe, it, expect, vi, beforeEach } from 'vitest'
 import { instancesApi } from '@/lib/api'
+import { beforeEach, describe, expect, it, vi } from 'vitest'

 // Mock fetch globally
 const mockFetch = vi.fn()
@@ -11,11 +11,13 @@ describe('API Error Handling', () => {
  })

  it('converts HTTP errors to meaningful messages', async () => {
-    mockFetch.mockResolvedValue({
+    const mockResponse = {
      ok: false,
      status: 409,
-      text: () => Promise.resolve('Instance already exists')
-    })
+      text: () => Promise.resolve('Instance already exists'),
+      clone: function() { return this }
+    }
+    mockFetch.mockResolvedValue(mockResponse)

    await expect(instancesApi.create('existing', {}))
      .rejects
@@ -23,11 +25,13 @@ describe('API Error Handling', () => {
  })

  it('handles empty error responses gracefully', async () => {
-    mockFetch.mockResolvedValue({
+    const mockResponse = {
      ok: false,
      status: 500,
-      text: () => Promise.resolve('')
-    })
+      text: () => Promise.resolve(''),
+      clone: function() { return this }
+    }
+    mockFetch.mockResolvedValue(mockResponse)

    await expect(instancesApi.list())
      .rejects
@@ -53,7 +57,9 @@ describe('API Error Handling', () => {
    await instancesApi.getLogs('test-instance', 100)

    expect(mockFetch).toHaveBeenCalledWith(
-      '/api/v1/instances/test-instance/logs?lines=100',
+      expect.stringMatching(
+        /^https?:\/\/[^/]+\/api\/v1\/instances\/test-instance\/logs\?lines=100$/
+      ),
      expect.any(Object)
    )
  })
--- a/webui/src/lib/api.ts
+++ b/webui/src/lib/api.ts
@@ -1,7 +1,10 @@
 import type { CreateInstanceOptions, Instance } from "@/types/instance";
 import { handleApiError } from "./errorUtils";

-const API_BASE = "/api/v1";
+// Adding baseURI as a prefix to support being served behind a subpath
+// e.g. when llmamctl's `/` is served behind a reverse proxy at `/proxy/...`
+// the baseURI will be `/proxy/` and the API calls will be made to `/proxy/api/v1/<endpoint>`
+export const API_BASE = document.baseURI + "api/v1";

 // Base API call function with error handling
 async function apiCall<T>(
@@ -46,12 +49,9 @@ async function apiCall<T>(
    } else {
      // Handle empty responses for JSON endpoints
      const contentLength = response.headers.get('content-length');
-      if (contentLength === '0' || contentLength === null) {
-        const text = await response.text();
-        if (text.trim() === '') {
+      if (contentLength === '0') {
        return {} as T; // Return empty object for empty JSON responses
      }
-      }
      const data = await response.json() as T;
      return data;
    }
@@ -103,6 +103,22 @@ export const backendsApi = {
  },
 };

+// Node API types
+export interface NodeResponse {
+  address: string;
+}
+
+export type NodesMap = Record<string, NodeResponse>;
+
+// Node API functions
+export const nodesApi = {
+  // GET /nodes - returns map of node name to NodeResponse
+  list: () => apiCall<NodesMap>("/nodes"),
+
+  // GET /nodes/{name}
+  get: (name: string) => apiCall<NodeResponse>(`/nodes/${name}`),
+};
+
 // Instance API functions
 export const instancesApi = {
  // GET /instances
--- a/webui/src/lib/errorUtils.ts
+++ b/webui/src/lib/errorUtils.ts
@@ -26,7 +26,8 @@ export async function handleApiError(response: Response): Promise<void> {
  }

  if (!response.ok) {
-    const errorMessage = await parseErrorResponse(response)
+    // Clone the response before reading to avoid consuming the body stream
+    const errorMessage = await parseErrorResponse(response.clone())
    throw new Error(errorMessage)
  }
 }
--- a/webui/src/schemas/instanceOptions.ts
+++ b/webui/src/schemas/instanceOptions.ts
@@ -39,6 +39,9 @@ export const CreateInstanceOptionsSchema = z.object({
  // Backend configuration
  backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
  backend_options: BackendOptionsSchema.optional(),
+
+  // Node configuration
+  nodes: z.array(z.string()).optional(),
 })

 // Re-export types and schemas from backend files
--- a/webui/vite.config.ts
+++ b/webui/vite.config.ts
@@ -21,4 +21,6 @@ export default defineConfig({
    setupFiles: ['./src/test/setup.ts'],
    css: true,
  },
+  // ensures relative asset paths to support being served behind a subpath
+  base: "./"
 })
Author	SHA1	Message	Date
Matúš Námešný	c3037f914d	Merge pull request #60 from lordmathis/lordmathis-patch-1 Update docs.yaml	2025-10-09 22:31:38 +02:00
Matúš Námešný	81266b4bc4	Update docs.yaml	2025-10-09 22:29:23 +02:00
Matúš Námešný	a31af94e7b	Merge pull request #59 from lordmathis/feat/multi-host feat: Implement multi node support	2025-10-09 22:23:27 +02:00
LordMathis	9ee0a184b3	Re-validate instance name in DeleteInstance for improved security	2025-10-09 22:18:53 +02:00
LordMathis	5436c28a1f	Add instance name validation before deletion for security	2025-10-09 22:10:40 +02:00
LordMathis	73b9dd5bc7	Rename workflows for consistency	2025-10-09 21:53:14 +02:00
LordMathis	f61e8dad5c	Add User Docs badge to README	2025-10-09 21:51:38 +02:00
LordMathis	ab2770bdd9	Add documentation for remote node deployment and configuration	2025-10-09 21:50:39 +02:00
LordMathis	e7a6a7003e	Skip remote instances in checkAllTimeouts and EvictLRUInstance methods	2025-10-09 21:13:38 +02:00
LordMathis	2b950ee649	Implement updateLocalInstanceFromRemote to preserve Nodes field when syncing remote instance data	2025-10-09 20:39:21 +02:00
LordMathis	b965b77c18	Prevent remote instances from using local proxy in GetProxy method	2025-10-09 20:24:54 +02:00
LordMathis	8a16a195de	Fix getting remote instance logs	2025-10-09 20:22:32 +02:00
LordMathis	9684a8a09b	Enhance instance management to preserve local state for remote instances	2025-10-09 19:34:52 +02:00
LordMathis	9d5f01d4ae	Auto-select first node in InstanceSettingsCard if none is selected	2025-10-09 19:13:58 +02:00
LordMathis	e281708b20	Enhance auto-start logic to differentiate between remote and local instances	2025-10-09 18:56:23 +02:00
LordMathis	8d9b0c0621	Initialize timeProvider and logger in UnmarshalJSON for Process	2025-10-09 18:56:12 +02:00
LordMathis	6c1a76691d	Improve cleanup of options in InstanceDialog to skip empty strings and arrays	2025-10-09 18:49:36 +02:00
LordMathis	5d958ed283	Fix backend_options cleanup to exclude empty arrays in InstanceDialog	2025-10-09 18:38:33 +02:00
LordMathis	56b95d1243	Refactor InstanceSettingsCard and API types to use NodesMap	2025-10-08 19:52:39 +02:00
LordMathis	688b815ca7	Add LocalNode configuration	2025-10-08 19:43:53 +02:00
LordMathis	7f6725da96	Refactor NodeConfig handling to use a map	2025-10-08 19:24:24 +02:00
LordMathis	3418735204	Add stripNodesFromOptions function to prevent routing loops in remote requests	2025-10-07 20:27:31 +02:00
LordMathis	2f1cf5acdc	Refactor CreateRemoteInstance and UpdateRemoteInstance to directly use options parameter in API requests	2025-10-07 19:57:21 +02:00
LordMathis	01380e6641	Update instance manager tests to use empty NodeConfig slice	2025-10-07 19:18:13 +02:00
LordMathis	6298b03636	Refactor RemoteOpenAIProxy to use cached proxies and restore request body handling	2025-10-07 18:57:08 +02:00
LordMathis	aae3f84d49	Implement caching for remote instance proxies and enhance proxy request handling	2025-10-07 18:44:23 +02:00
LordMathis	554796391b	Remove test config file	2025-10-07 18:05:30 +02:00
LordMathis	16b28bac05	Merge branch 'main' into feat/multi-host	2025-10-07 18:04:24 +02:00
Matúš Námešný	1892dc8315	Merge pull request #57 from BobbyL2k/feat/llama-cpp-proxy feat: Proxy llama.cpp API endpoints via `/llama-cpp/{name}/`	2025-10-06 20:23:44 +02:00
Anuruth Lertpiya	997bd1b063	Changed status code to StatusBadRequest (400) if requested invalid model name.	2025-10-05 14:53:20 +00:00
Anuruth Lertpiya	fa43f9e967	Added support for proxying llama.cpp native API endpoints via `/llama-cpp/{name}/`	2025-10-05 14:28:33 +00:00
Matúš Námešný	db9eebeb8b	Merge pull request #56 from lordmathis/fix/body-already-read Fix double read of json response when content-length header is missing	2025-10-04 22:28:22 +02:00
LordMathis	bd062f8ca0	Mock Response.clone for tests	2025-10-04 22:22:25 +02:00
LordMathis	8ebdb1a183	Fix double read of json response when content-length header is missing	2025-10-04 22:16:28 +02:00
Matúš Námešný	7272212081	Merge pull request #55 from lordmathis/fix/auto-restart fix: Set status to Stopped for instances with auto-restart disabled	2025-10-04 21:45:12 +02:00
Matúš Námešný	035e184789	Merge branch 'main' into fix/auto-restart	2025-10-04 21:22:50 +02:00
LordMathis	d15976e7aa	Implement auto-stop for instances with auto-restart disabled and add corresponding tests	2025-10-04 21:17:55 +02:00
Matúš Námešný	4fa75d9801	Merge pull request #52 from BobbyL2k/feat/config-cors-headers feat: Added support for configuring access-control-request-headers for CORS	2025-10-04 20:45:27 +02:00
Anuruth Lertpiya	0e1bc8a352	Added support for configuring CORS headers	2025-10-04 09:13:40 +00:00
LordMathis	b728a7c6b2	Fix fetchNodes call to ensure proper handling of promise	2025-10-03 10:53:29 +02:00
LordMathis	a491f29483	Add node selection functionality to InstanceSettingsCard and define Node API	2025-10-02 23:18:33 +02:00
LordMathis	670f8ff81b	Split up handlers	2025-10-02 23:11:20 +02:00
LordMathis	da56456504	Add node management endpoints to handle listing and retrieving node details	2025-10-02 22:51:41 +02:00
LordMathis	c30053e51c	Enhance instance loading to support remote instances and handle node configuration	2025-10-01 22:59:45 +02:00
LordMathis	347c58e15f	Enhance instance manager to persist remote instances and update tracking on modifications	2025-10-01 22:58:57 +02:00
LordMathis	2ed67eb672	Add remote instance proxying functionality to handler	2025-10-01 22:17:19 +02:00
LordMathis	0188f82306	Implement remote instance creation and deletion in instance manager	2025-10-01 22:05:18 +02:00
LordMathis	e0f176de10	Enhance instance manager to support remote instance management and update related tests	2025-10-01 20:25:06 +02:00
LordMathis	2759be65a5	Add remote instance management functionality and configuration support	2025-09-30 21:09:05 +02:00
Matúš Námešný	1e5e86d2c3	Merge pull request #50 from lordmathis/feat/docker-image feat: Add Dockerfiles for running llamactl in docker	2025-09-29 21:26:23 +02:00
LordMathis	25d3d70707	Update README and installation guide to reflect Dockerfile paths and add source build instructions	2025-09-29 21:18:13 +02:00
LordMathis	e54cfd006d	Add Dockerfile for building from source	2025-09-29 21:17:40 +02:00
LordMathis	7d39e7ee86	Move docker stuff to a dedicated folder	2025-09-29 21:16:51 +02:00
Matúš Námešný	222d913b4a	Merge pull request #49 from BobbyL2k/feat/reverse-proxy-support Added support for serving behind a reverse proxy	2025-09-29 20:32:11 +02:00
Anuruth Lertpiya	03a7a5d139	Update configration.md with reverse proxy related information	2025-09-29 13:54:15 +00:00
Anuruth Lertpiya	e50660c379	Fixed broken webui tests	2025-09-29 13:38:24 +00:00
Anuruth Lertpiya	5906d89f8d	Added support for serving behind a reverse proxy - Added support for specifying response headers for each backend - Allowing users to set `X-Accel-Buffering: no` to disable buffering for streaming responses in nginx - Updated `configuration.md` to document the new configuration options - Modified Vite config to build with `base: "./"`, making assets be accessed via relative paths - Updated API_BASE to use `document.baseURI`, allowing API calls to be made relative to the base path	2025-09-29 12:43:10 +00:00
LordMathis	cb2d95139f	Setup data dir in Docker and docker-compose	2025-09-28 22:17:38 +02:00
LordMathis	889a8707e7	Refactor Dockerfile and docker-compose to streamline environment variable configuration and remove redundant commands	2025-09-28 22:17:38 +02:00
LordMathis	070c91787d	Add environment variable for llamactl command in Dockerfile	2025-09-28 22:17:38 +02:00
LordMathis	169ee422ec	Update README and installation guide to clarify Docker support and CUDA configuration	2025-09-28 22:17:38 +02:00
LordMathis	bb0176b7f5	Update Dockerfile to use server-cuda image for improved performance	2025-09-28 22:17:38 +02:00
LordMathis	291ec7995f	Update Docker run commands to use cached directories and remove unnecessary environment variables	2025-09-28 22:17:38 +02:00
LordMathis	b940b38e46	Initial support for docker	2025-09-28 22:17:38 +02:00
Matúš Námešný	92cb57e816	Merge pull request #48 from lordmathis/fix/command-environment fix: Pass host environment to instances	2025-09-28 21:40:50 +02:00
LordMathis	0ecd55c354	Start with host environment for instances	2025-09-28 21:37:48 +02:00
Matúš Námešný	b4c17194eb	Merge pull request #47 from lordmathis/fix/nil-context fix: Initialize context before building command	2025-09-28 20:59:30 +02:00
LordMathis	808092decf	Initialize context in Start method for command execution	2025-09-28 20:51:11 +02:00