diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yaml
similarity index 100%
rename from .github/workflows/codeql.yml
rename to .github/workflows/codeql.yaml
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yaml
similarity index 100%
rename from .github/workflows/docs.yml
rename to .github/workflows/docs.yaml
diff --git a/README.md b/README.md
index 0f27290..d9fea15 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # llamactl
 
-![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg)
+![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg) ![User Docs](https://github.com/lordmathis/llamactl/actions/workflows/docs.yaml/badge.svg)
 
 **Unified management and routing for llama.cpp, MLX and vLLM models with web dashboard.**
 
@@ -23,7 +23,12 @@
 ### ⚡ Smart Operations
 - **Instance Monitoring**: Health checks, auto-restart, log management
 - **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
-- **Environment Variables**: Set custom environment variables per instance for advanced configuration  
+- **Environment Variables**: Set custom environment variables per instance for advanced configuration
+
+### 🔗 Remote Instance Deployment
+- **Remote Node Support**: Deploy instances on remote hosts
+- **Central Management**: Manage remote instances from a single dashboard
+- **Seamless Routing**: Automatic request routing to remote instances  
 
 ![Dashboard Screenshot](docs/images/dashboard.png)
 
diff --git a/cmd/server/main.go b/cmd/server/main.go
index e245ebf..de080c7 100644
--- a/cmd/server/main.go
+++ b/cmd/server/main.go
@@ -58,7 +58,7 @@ func main() {
 	}
 
 	// Initialize the instance manager
-	instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances)
+	instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances, cfg.Nodes)
 
 	// Create a new handler with the instance manager
 	handler := server.NewHandler(instanceManager, cfg)
diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md
index be4fc6d..c43efc6 100644
--- a/docs/getting-started/configuration.md
+++ b/docs/getting-started/configuration.md
@@ -70,6 +70,10 @@ auth:
   inference_keys: []             # Keys for inference endpoints
   require_management_auth: true  # Require auth for management endpoints
   management_keys: []            # Keys for management endpoints
+
+local_node: "main"               # Name of the local node (default: "main")
+nodes:                           # Node configuration for multi-node deployment
+  main:                          # Default local node (empty config)
 ```
 
 ## Configuration Files
@@ -235,18 +239,32 @@ auth:
   management_keys: []                    # List of valid management API keys
 ```
 
-**Environment Variables:**  
-- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)  
-- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys  
-- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)  
-- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys  
+**Environment Variables:**
+- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false)
+- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys
+- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
+- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
 
-## Command Line Options
+### Remote Node Configuration
 
-View all available command line options:
+llamactl supports remote node deployments. Configure remote nodes to deploy instances on remote hosts and manage them centrally.
 
-```bash
-llamactl --help
+```yaml
+local_node: "main"               # Name of the local node (default: "main")
+nodes:                           # Node configuration map
+  main:                          # Local node (empty address means local)
+    address: ""                  # Not used for local node
+    api_key: ""                  # Not used for local node
+  worker1:                       # Remote worker node
+    address: "http://192.168.1.10:8080"
+    api_key: "worker1-api-key"   # Management API key for authentication
 ```
 
-You can also override configuration using command line flags when starting llamactl.
+**Node Configuration Fields:**
+- `local_node`: Specifies which node in the `nodes` map represents the local node
+- `nodes`: Map of node configurations
+  - `address`: HTTP/HTTPS URL of the remote node (empty for local node)
+  - `api_key`: Management API key for authenticating with the remote node
+
+**Environment Variables:**
+- `LLAMACTL_LOCAL_NODE` - Name of the local node
diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md
index f64146f..04e0dfd 100644
--- a/docs/getting-started/installation.md
+++ b/docs/getting-started/installation.md
@@ -157,6 +157,12 @@ cd webui && npm ci && npm run build && cd ..
 go build -o llamactl ./cmd/server
 ```
 
+## Remote Node Installation
+
+For deployments with remote nodes:
+- Install llamactl on each node using any of the methods above
+- Configure API keys for authentication between nodes
+
 ## Verification
 
 Verify your installation by checking the version:
@@ -168,3 +174,5 @@ llamactl --version
 ## Next Steps
 
 Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!
+
+For remote node deployments, see the [Configuration Guide](configuration.md) for node setup instructions.
diff --git a/docs/user-guide/api-reference.md b/docs/user-guide/api-reference.md
index 26e01e4..472cd0b 100644
--- a/docs/user-guide/api-reference.md
+++ b/docs/user-guide/api-reference.md
@@ -126,6 +126,7 @@ POST /api/v1/instances/{name}
 - `on_demand_start`: Start instance when receiving requests
 - `idle_timeout`: Idle timeout in minutes
 - `environment`: Environment variables as key-value pairs
+- `nodes`: Array with single node name to deploy the instance to (for remote deployments)
 
 See [Managing Instances](managing-instances.md) for complete configuration options.
 
@@ -405,6 +406,38 @@ curl -X DELETE -H "Authorization: Bearer your-api-key" \
   http://localhost:8080/api/v1/instances/my-model
 ```
 
+### Remote Node Instance Example
+
+```bash
+# Create instance on specific remote node
+curl -X POST http://localhost:8080/api/v1/instances/remote-model \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer your-api-key" \
+  -d '{
+    "backend_type": "llama_cpp",
+    "backend_options": {
+      "model": "/models/llama-2-7b.gguf",
+      "gpu_layers": 32
+    },
+    "nodes": ["worker1"]
+  }'
+
+# Check status of remote instance
+curl -H "Authorization: Bearer your-api-key" \
+  http://localhost:8080/api/v1/instances/remote-model
+
+# Use remote instance with OpenAI-compatible API
+curl -X POST http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer your-inference-api-key" \
+  -d '{
+    "model": "remote-model",
+    "messages": [
+      {"role": "user", "content": "Hello from remote node!"}
+    ]
+  }'
+```
+
 ### Using the Proxy Endpoint
 
 You can also directly proxy requests to the llama-server instance:
diff --git a/docs/user-guide/managing-instances.md b/docs/user-guide/managing-instances.md
index 824c4fe..b02de2d 100644
--- a/docs/user-guide/managing-instances.md
+++ b/docs/user-guide/managing-instances.md
@@ -39,26 +39,27 @@ Each instance is displayed as a card showing:
 
 1. Click the **"Create Instance"** button on the dashboard
 2. Enter a unique **Name** for your instance (only required field)
-3. **Choose Backend Type**:
+3. **Select Target Node**: Choose which node to deploy the instance to from the dropdown
+4. **Choose Backend Type**:
     - **llama.cpp**: For GGUF models using llama-server
     - **MLX**: For MLX-optimized models (macOS only)
     - **vLLM**: For distributed serving and high-throughput inference
-4. Configure model source:
+5. Configure model source:
     - **For llama.cpp**: GGUF model path or HuggingFace repo
     - **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
     - **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
-5. Configure optional instance management settings:
+6. Configure optional instance management settings:
     - **Auto Restart**: Automatically restart instance on failure
     - **Max Restarts**: Maximum number of restart attempts
     - **Restart Delay**: Delay in seconds between restart attempts
     - **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
     - **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
     - **Environment Variables**: Set custom environment variables for the instance process
-6. Configure backend-specific options:
+7. Configure backend-specific options:
     - **llama.cpp**: Threads, context size, GPU layers, port, etc.
     - **MLX**: Temperature, top-p, adapter path, Python environment, etc.
     - **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
-7. Click **"Create"** to save the instance  
+8. Click **"Create"** to save the instance  
 
 ### Via API
 
@@ -121,6 +122,18 @@ curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
       "gpu_layers": 32
     }
   }'
+
+# Create instance on specific remote node
+curl -X POST http://localhost:8080/api/instances/remote-llama \
+  -H "Content-Type: application/json" \
+  -d '{
+    "backend_type": "llama_cpp",
+    "backend_options": {
+      "model": "/models/llama-7b.gguf",
+      "gpu_layers": 32
+    },
+    "nodes": ["worker1"]
+  }'
 ```
 
 ## Start Instance
@@ -227,3 +240,4 @@ Check the health status of your instances:
 ```bash
 curl http://localhost:8080/api/instances/{name}/proxy/health
 ```
+
diff --git a/docs/user-guide/troubleshooting.md b/docs/user-guide/troubleshooting.md
index 5608139..4b7a507 100644
--- a/docs/user-guide/troubleshooting.md
+++ b/docs/user-guide/troubleshooting.md
@@ -125,6 +125,30 @@ This helps determine if the issue is with llamactl or with the underlying llama.
      http://localhost:8080/api/v1/instances
    ```
 
+## Remote Node Issues
+
+### Node Configuration
+
+**Problem:** Remote instances not appearing or cannot be managed
+
+**Solutions:**
+1. **Verify node configuration:**
+   ```yaml
+   local_node: "main"  # Must match a key in nodes map
+   nodes:
+     main:
+       address: ""     # Empty for local node
+     worker1:
+       address: "http://worker1.internal:8080"
+       api_key: "secure-key"  # Must match worker1's management key
+   ```
+
+2. **Test remote node connectivity:**
+   ```bash
+   curl -H "Authorization: Bearer remote-node-key" \
+     http://remote-node:8080/api/v1/instances
+   ```
+
 ## Debugging and Logs
 
 ### Viewing Instance Logs
diff --git a/pkg/config/config.go b/pkg/config/config.go
index ee57cd2..d6ee420 100644
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -37,13 +37,15 @@ type BackendConfig struct {
 
 // AppConfig represents the configuration for llamactl
 type AppConfig struct {
-	Server     ServerConfig    `yaml:"server"`
-	Backends   BackendConfig   `yaml:"backends"`
-	Instances  InstancesConfig `yaml:"instances"`
-	Auth       AuthConfig      `yaml:"auth"`
-	Version    string          `yaml:"-"`
-	CommitHash string          `yaml:"-"`
-	BuildTime  string          `yaml:"-"`
+	Server     ServerConfig          `yaml:"server"`
+	Backends   BackendConfig         `yaml:"backends"`
+	Instances  InstancesConfig       `yaml:"instances"`
+	Auth       AuthConfig            `yaml:"auth"`
+	LocalNode  string                `yaml:"local_node,omitempty"`
+	Nodes      map[string]NodeConfig `yaml:"nodes,omitempty"`
+	Version    string                `yaml:"-"`
+	CommitHash string                `yaml:"-"`
+	BuildTime  string                `yaml:"-"`
 }
 
 // ServerConfig contains HTTP server configuration
@@ -128,6 +130,11 @@ type AuthConfig struct {
 	ManagementKeys []string `yaml:"management_keys"`
 }
 
+type NodeConfig struct {
+	Address string `yaml:"address"`
+	APIKey  string `yaml:"api_key,omitempty"`
+}
+
 // LoadConfig loads configuration with the following precedence:
 // 1. Hardcoded defaults
 // 2. Config file
@@ -142,6 +149,10 @@ func LoadConfig(configPath string) (AppConfig, error) {
 			AllowedHeaders: []string{"*"}, // Default to allow all headers
 			EnableSwagger:  false,
 		},
+		LocalNode: "main",
+		Nodes: map[string]NodeConfig{
+			"main": {}, // Local node with empty config
+		},
 		Backends: BackendConfig{
 			LlamaCpp: BackendSettings{
 				Command:     "llama-server",
@@ -469,6 +480,11 @@ func loadEnvVars(cfg *AppConfig) {
 	if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
 		cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
 	}
+
+	// Local node config
+	if localNode := os.Getenv("LLAMACTL_LOCAL_NODE"); localNode != "" {
+		cfg.LocalNode = localNode
+	}
 }
 
 // ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go
index ad800ed..964708e 100644
--- a/pkg/config/config_test.go
+++ b/pkg/config/config_test.go
@@ -510,3 +510,132 @@ func TestGetBackendSettings_InvalidBackendType(t *testing.T) {
 		t.Errorf("Expected empty command for invalid backend, got %q", settings.Command)
 	}
 }
+
+func TestLoadConfig_LocalNode(t *testing.T) {
+	t.Run("default local node", func(t *testing.T) {
+		cfg, err := config.LoadConfig("nonexistent-file.yaml")
+		if err != nil {
+			t.Fatalf("LoadConfig failed: %v", err)
+		}
+
+		if cfg.LocalNode != "main" {
+			t.Errorf("Expected default local node 'main', got %q", cfg.LocalNode)
+		}
+	})
+
+	t.Run("local node from file", func(t *testing.T) {
+		tempDir := t.TempDir()
+		configFile := filepath.Join(tempDir, "test-config.yaml")
+
+		configContent := `
+local_node: "worker1"
+nodes:
+  worker1:
+    address: ""
+  worker2:
+    address: "http://192.168.1.10:8080"
+    api_key: "test-key"
+`
+
+		err := os.WriteFile(configFile, []byte(configContent), 0644)
+		if err != nil {
+			t.Fatalf("Failed to write test config file: %v", err)
+		}
+
+		cfg, err := config.LoadConfig(configFile)
+		if err != nil {
+			t.Fatalf("LoadConfig failed: %v", err)
+		}
+
+		if cfg.LocalNode != "worker1" {
+			t.Errorf("Expected local node 'worker1', got %q", cfg.LocalNode)
+		}
+
+		// Verify nodes map (includes default "main" + worker1 + worker2)
+		if len(cfg.Nodes) != 3 {
+			t.Errorf("Expected 3 nodes (default main + worker1 + worker2), got %d", len(cfg.Nodes))
+		}
+
+		// Verify local node exists and is empty
+		localNode, exists := cfg.Nodes["worker1"]
+		if !exists {
+			t.Error("Expected local node 'worker1' to exist in nodes map")
+		}
+		if localNode.Address != "" {
+			t.Errorf("Expected local node address to be empty, got %q", localNode.Address)
+		}
+		if localNode.APIKey != "" {
+			t.Errorf("Expected local node api_key to be empty, got %q", localNode.APIKey)
+		}
+
+		// Verify remote node
+		remoteNode, exists := cfg.Nodes["worker2"]
+		if !exists {
+			t.Error("Expected remote node 'worker2' to exist in nodes map")
+		}
+		if remoteNode.Address != "http://192.168.1.10:8080" {
+			t.Errorf("Expected remote node address 'http://192.168.1.10:8080', got %q", remoteNode.Address)
+		}
+
+		// Verify default main node still exists
+		_, exists = cfg.Nodes["main"]
+		if !exists {
+			t.Error("Expected default 'main' node to still exist in nodes map")
+		}
+	})
+
+	t.Run("custom local node name in config", func(t *testing.T) {
+		tempDir := t.TempDir()
+		configFile := filepath.Join(tempDir, "test-config.yaml")
+
+		configContent := `
+local_node: "primary"
+nodes:
+  primary:
+    address: ""
+  worker1:
+    address: "http://192.168.1.10:8080"
+`
+
+		err := os.WriteFile(configFile, []byte(configContent), 0644)
+		if err != nil {
+			t.Fatalf("Failed to write test config file: %v", err)
+		}
+
+		cfg, err := config.LoadConfig(configFile)
+		if err != nil {
+			t.Fatalf("LoadConfig failed: %v", err)
+		}
+
+		if cfg.LocalNode != "primary" {
+			t.Errorf("Expected local node 'primary', got %q", cfg.LocalNode)
+		}
+
+		// Verify nodes map includes default "main" + primary + worker1
+		if len(cfg.Nodes) != 3 {
+			t.Errorf("Expected 3 nodes (default main + primary + worker1), got %d", len(cfg.Nodes))
+		}
+
+		localNode, exists := cfg.Nodes["primary"]
+		if !exists {
+			t.Error("Expected local node 'primary' to exist in nodes map")
+		}
+		if localNode.Address != "" {
+			t.Errorf("Expected local node address to be empty, got %q", localNode.Address)
+		}
+	})
+
+	t.Run("local node from environment variable", func(t *testing.T) {
+		os.Setenv("LLAMACTL_LOCAL_NODE", "custom-node")
+		defer os.Unsetenv("LLAMACTL_LOCAL_NODE")
+
+		cfg, err := config.LoadConfig("nonexistent-file.yaml")
+		if err != nil {
+			t.Fatalf("LoadConfig failed: %v", err)
+		}
+
+		if cfg.LocalNode != "custom-node" {
+			t.Errorf("Expected local node 'custom-node' from env var, got %q", cfg.LocalNode)
+		}
+	})
+}
diff --git a/pkg/instance/instance.go b/pkg/instance/instance.go
index 228f382..dcebef4 100644
--- a/pkg/instance/instance.go
+++ b/pkg/instance/instance.go
@@ -171,6 +171,11 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
 		return nil, fmt.Errorf("instance %s has no options set", i.Name)
 	}
 
+	// Remote instances should not use local proxy - they are handled by RemoteInstanceProxy
+	if len(i.options.Nodes) > 0 {
+		return nil, fmt.Errorf("instance %s is a remote instance and should not use local proxy", i.Name)
+	}
+
 	var host string
 	var port int
 	switch i.options.BackendType {
@@ -285,5 +290,24 @@ func (i *Process) UnmarshalJSON(data []byte) error {
 		i.options = aux.Options
 	}
 
+	// Initialize fields that are not serialized
+	if i.timeProvider == nil {
+		i.timeProvider = realTimeProvider{}
+	}
+	if i.logger == nil && i.globalInstanceSettings != nil {
+		i.logger = NewInstanceLogger(i.Name, i.globalInstanceSettings.LogsDir)
+	}
+
 	return nil
 }
+
+func (i *Process) IsRemote() bool {
+	i.mu.RLock()
+	defer i.mu.RUnlock()
+
+	if i.options == nil {
+		return false
+	}
+
+	return len(i.options.Nodes) > 0
+}
diff --git a/pkg/instance/options.go b/pkg/instance/options.go
index 62181dd..439f426 100644
--- a/pkg/instance/options.go
+++ b/pkg/instance/options.go
@@ -27,6 +27,8 @@ type CreateInstanceOptions struct {
 	BackendType    backends.BackendType `json:"backend_type"`
 	BackendOptions map[string]any       `json:"backend_options,omitempty"`
 
+	Nodes []string `json:"nodes,omitempty"`
+
 	// Backend-specific options
 	LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
 	MlxServerOptions   *mlx.MlxServerOptions        `json:"-"`
diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go
index 725b1a9..b944ef3 100644
--- a/pkg/manager/manager.go
+++ b/pkg/manager/manager.go
@@ -6,6 +6,7 @@ import (
 	"llamactl/pkg/config"
 	"llamactl/pkg/instance"
 	"log"
+	"net/http"
 	"os"
 	"path/filepath"
 	"strings"
@@ -25,10 +26,22 @@ type InstanceManager interface {
 	StopInstance(name string) (*instance.Process, error)
 	EvictLRUInstance() error
 	RestartInstance(name string) (*instance.Process, error)
-	GetInstanceLogs(name string) (string, error)
+	GetInstanceLogs(name string, numLines int) (string, error)
 	Shutdown()
 }
 
+type RemoteManager interface {
+	ListRemoteInstances(node *config.NodeConfig) ([]*instance.Process, error)
+	CreateRemoteInstance(node *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
+	GetRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
+	UpdateRemoteInstance(node *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
+	DeleteRemoteInstance(node *config.NodeConfig, name string) error
+	StartRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
+	StopRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
+	RestartRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
+	GetRemoteInstanceLogs(node *config.NodeConfig, name string, numLines int) (string, error)
+}
+
 type instanceManager struct {
 	mu               sync.RWMutex
 	instances        map[string]*instance.Process
@@ -42,13 +55,26 @@ type instanceManager struct {
 	shutdownChan   chan struct{}
 	shutdownDone   chan struct{}
 	isShutdown     bool
+
+	// Remote instance management
+	httpClient        *http.Client
+	instanceNodeMap   map[string]*config.NodeConfig // Maps instance name to its node config
+	nodeConfigMap     map[string]*config.NodeConfig // Maps node name to node config for quick lookup
 }
 
 // NewInstanceManager creates a new instance of InstanceManager.
-func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig) InstanceManager {
+func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig, nodesConfig map[string]config.NodeConfig) InstanceManager {
 	if instancesConfig.TimeoutCheckInterval <= 0 {
 		instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
 	}
+
+	// Build node config map for quick lookup
+	nodeConfigMap := make(map[string]*config.NodeConfig)
+	for name := range nodesConfig {
+		nodeCopy := nodesConfig[name]
+		nodeConfigMap[name] = &nodeCopy
+	}
+
 	im := &instanceManager{
 		instances:        make(map[string]*instance.Process),
 		runningInstances: make(map[string]struct{}),
@@ -59,6 +85,13 @@ func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig con
 		timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
 		shutdownChan:   make(chan struct{}),
 		shutdownDone:   make(chan struct{}),
+
+		httpClient: &http.Client{
+			Timeout: 30 * time.Second,
+		},
+
+		instanceNodeMap: make(map[string]*config.NodeConfig),
+		nodeConfigMap:   nodeConfigMap,
 	}
 
 	// Load existing instances from disk
@@ -238,24 +271,43 @@ func (im *instanceManager) loadInstance(name, path string) error {
 		return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name)
 	}
 
-	statusCallback := func(oldStatus, newStatus instance.InstanceStatus) {
-		im.onStatusChange(persistedInstance.Name, oldStatus, newStatus)
+	options := persistedInstance.GetOptions()
+
+	// Check if this is a remote instance
+	isRemote := options != nil && len(options.Nodes) > 0
+
+	var statusCallback func(oldStatus, newStatus instance.InstanceStatus)
+	if !isRemote {
+		// Only set status callback for local instances
+		statusCallback = func(oldStatus, newStatus instance.InstanceStatus) {
+			im.onStatusChange(persistedInstance.Name, oldStatus, newStatus)
+		}
 	}
 
 	// Create new inst using NewInstance (handles validation, defaults, setup)
-	inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback)
+	inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, statusCallback)
 
 	// Restore persisted fields that NewInstance doesn't set
 	inst.Created = persistedInstance.Created
 	inst.SetStatus(persistedInstance.Status)
 
-	// Check for port conflicts and add to maps
-	if inst.GetPort() > 0 {
-		port := inst.GetPort()
-		if im.ports[port] {
-			return fmt.Errorf("port conflict: instance %s wants port %d which is already in use", name, port)
+	// Handle remote instance mapping
+	if isRemote {
+		nodeName := options.Nodes[0]
+		nodeConfig, exists := im.nodeConfigMap[nodeName]
+		if !exists {
+			return fmt.Errorf("node %s not found for remote instance %s", nodeName, name)
+		}
+		im.instanceNodeMap[name] = nodeConfig
+	} else {
+		// Check for port conflicts only for local instances
+		if inst.GetPort() > 0 {
+			port := inst.GetPort()
+			if im.ports[port] {
+				return fmt.Errorf("port conflict: instance %s wants port %d which is already in use", name, port)
+			}
+			im.ports[port] = true
 		}
-		im.ports[port] = true
 	}
 
 	im.instances[name] = inst
@@ -293,8 +345,18 @@ func (im *instanceManager) autoStartInstances() {
 		log.Printf("Auto-starting instance %s", inst.Name)
 		// Reset running state before starting (since Start() expects stopped instance)
 		inst.SetStatus(instance.Stopped)
-		if err := inst.Start(); err != nil {
-			log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
+
+		// Check if this is a remote instance
+		if node := im.getNodeForInstance(inst); node != nil {
+			// Remote instance - use StartRemoteInstance
+			if _, err := im.StartRemoteInstance(node, inst.Name); err != nil {
+				log.Printf("Failed to auto-start remote instance %s: %v", inst.Name, err)
+			}
+		} else {
+			// Local instance - call Start() directly
+			if err := inst.Start(); err != nil {
+				log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
+			}
 		}
 	}
 }
@@ -309,3 +371,18 @@ func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus inst
 		delete(im.runningInstances, name)
 	}
 }
+
+// getNodeForInstance returns the node configuration for a remote instance
+// Returns nil if the instance is not remote or the node is not found
+func (im *instanceManager) getNodeForInstance(inst *instance.Process) *config.NodeConfig {
+	if !inst.IsRemote() {
+		return nil
+	}
+
+	// Check if we have a cached mapping
+	if nodeConfig, exists := im.instanceNodeMap[inst.Name]; exists {
+		return nodeConfig
+	}
+
+	return nil
+}
diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go
index 3b683d6..e59e2eb 100644
--- a/pkg/manager/manager_test.go
+++ b/pkg/manager/manager_test.go
@@ -34,7 +34,7 @@ func TestNewInstanceManager(t *testing.T) {
 		TimeoutCheckInterval: 5,
 	}
 
-	mgr := manager.NewInstanceManager(backendConfig, cfg)
+	mgr := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
 	if mgr == nil {
 		t.Fatal("NewInstanceManager returned nil")
 	}
@@ -69,7 +69,7 @@ func TestPersistence(t *testing.T) {
 	}
 
 	// Test instance persistence on creation
-	manager1 := manager.NewInstanceManager(backendConfig, cfg)
+	manager1 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
 	options := &instance.CreateInstanceOptions{
 		BackendType: backends.BackendTypeLlamaCpp,
 		LlamaServerOptions: &llamacpp.LlamaServerOptions{
@@ -90,7 +90,7 @@ func TestPersistence(t *testing.T) {
 	}
 
 	// Test loading instances from disk
-	manager2 := manager.NewInstanceManager(backendConfig, cfg)
+	manager2 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
 	instances, err := manager2.ListInstances()
 	if err != nil {
 		t.Fatalf("ListInstances failed: %v", err)
@@ -207,7 +207,7 @@ func createTestManager() manager.InstanceManager {
 		DefaultRestartDelay:  5,
 		TimeoutCheckInterval: 5,
 	}
-	return manager.NewInstanceManager(backendConfig, cfg)
+	return manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
 }
 
 func TestAutoRestartDisabledInstanceStatus(t *testing.T) {
@@ -227,7 +227,7 @@ func TestAutoRestartDisabledInstanceStatus(t *testing.T) {
 	}
 
 	// Create first manager and instance with auto-restart disabled
-	manager1 := manager.NewInstanceManager(backendConfig, cfg)
+	manager1 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
 
 	autoRestart := false
 	options := &instance.CreateInstanceOptions{
@@ -252,7 +252,7 @@ func TestAutoRestartDisabledInstanceStatus(t *testing.T) {
 	manager1.Shutdown()
 
 	// Create second manager (simulating restart of llamactl)
-	manager2 := manager.NewInstanceManager(backendConfig, cfg)
+	manager2 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
 
 	// Get the loaded instance
 	loadedInst, err := manager2.GetInstance("test-instance")
diff --git a/pkg/manager/operations.go b/pkg/manager/operations.go
index b3c0d13..a8b5c3f 100644
--- a/pkg/manager/operations.go
+++ b/pkg/manager/operations.go
@@ -3,6 +3,7 @@ package manager
 import (
 	"fmt"
 	"llamactl/pkg/backends"
+	"llamactl/pkg/config"
 	"llamactl/pkg/instance"
 	"llamactl/pkg/validation"
 	"os"
@@ -11,16 +12,65 @@ import (
 
 type MaxRunningInstancesError error
 
+// updateLocalInstanceFromRemote updates the local stub instance with data from the remote instance
+// while preserving the Nodes field to maintain remote instance tracking
+func (im *instanceManager) updateLocalInstanceFromRemote(localInst *instance.Process, remoteInst *instance.Process) {
+	if localInst == nil || remoteInst == nil {
+		return
+	}
+
+	// Get the remote instance options
+	remoteOptions := remoteInst.GetOptions()
+	if remoteOptions == nil {
+		return
+	}
+
+	// Preserve the Nodes field from the local instance
+	localOptions := localInst.GetOptions()
+	var preservedNodes []string
+	if localOptions != nil && len(localOptions.Nodes) > 0 {
+		preservedNodes = make([]string, len(localOptions.Nodes))
+		copy(preservedNodes, localOptions.Nodes)
+	}
+
+	// Create a copy of remote options and restore the Nodes field
+	updatedOptions := *remoteOptions
+	updatedOptions.Nodes = preservedNodes
+
+	// Update the local instance with all remote data
+	localInst.SetOptions(&updatedOptions)
+	localInst.Status = remoteInst.Status
+	localInst.Created = remoteInst.Created
+}
+
 // ListInstances returns a list of all instances managed by the instance manager.
+// For remote instances, this fetches the live state from remote nodes and updates local stubs.
 func (im *instanceManager) ListInstances() ([]*instance.Process, error) {
 	im.mu.RLock()
-	defer im.mu.RUnlock()
-
-	instances := make([]*instance.Process, 0, len(im.instances))
+	localInstances := make([]*instance.Process, 0, len(im.instances))
 	for _, inst := range im.instances {
-		instances = append(instances, inst)
+		localInstances = append(localInstances, inst)
 	}
-	return instances, nil
+	im.mu.RUnlock()
+
+	// Update remote instances with live state
+	for _, inst := range localInstances {
+		if node := im.getNodeForInstance(inst); node != nil {
+			remoteInst, err := im.GetRemoteInstance(node, inst.Name)
+			if err != nil {
+				// Log error but continue with stale data
+				// Don't fail the entire list operation due to one remote failure
+				continue
+			}
+
+			// Update the local stub with all remote data (preserving Nodes)
+			im.mu.Lock()
+			im.updateLocalInstanceFromRemote(inst, remoteInst)
+			im.mu.Unlock()
+		}
+	}
+
+	return localInstances, nil
 }
 
 // CreateInstance creates a new instance with the given options and returns it.
@@ -43,16 +93,56 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
 	im.mu.Lock()
 	defer im.mu.Unlock()
 
-	// Check max instances limit after acquiring the lock
-	if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
-		return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
-	}
-
-	// Check if instance with this name already exists
+	// Check if instance with this name already exists (must be globally unique)
 	if im.instances[name] != nil {
 		return nil, fmt.Errorf("instance with name %s already exists", name)
 	}
 
+	// Check if this is a remote instance
+	isRemote := len(options.Nodes) > 0
+	var nodeConfig *config.NodeConfig
+
+	if isRemote {
+		// Validate that the node exists
+		nodeName := options.Nodes[0] // Use first node for now
+		var exists bool
+		nodeConfig, exists = im.nodeConfigMap[nodeName]
+		if !exists {
+			return nil, fmt.Errorf("node %s not found", nodeName)
+		}
+
+		// Create the remote instance on the remote node
+		remoteInst, err := im.CreateRemoteInstance(nodeConfig, name, options)
+		if err != nil {
+			return nil, err
+		}
+
+		// Create a local stub that preserves the Nodes field for tracking
+		// We keep the original options (with Nodes) so IsRemote() works correctly
+		inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, nil)
+
+		// Update the local stub with all remote data (preserving Nodes)
+		im.updateLocalInstanceFromRemote(inst, remoteInst)
+
+		// Add to local tracking maps (but don't count towards limits)
+		im.instances[name] = inst
+		im.instanceNodeMap[name] = nodeConfig
+
+		// Persist the remote instance locally for tracking across restarts
+		if err := im.persistInstance(inst); err != nil {
+			return nil, fmt.Errorf("failed to persist remote instance %s: %w", name, err)
+		}
+
+		return inst, nil
+	}
+
+	// Local instance creation
+	// Check max instances limit for local instances only
+	localInstanceCount := len(im.instances) - len(im.instanceNodeMap)
+	if localInstanceCount >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
+		return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
+	}
+
 	// Assign and validate port for backend-specific options
 	if err := im.assignAndValidatePort(options); err != nil {
 		return nil, err
@@ -73,28 +163,68 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
 }
 
 // GetInstance retrieves an instance by its name.
+// For remote instances, this fetches the live state from the remote node and updates the local stub.
 func (im *instanceManager) GetInstance(name string) (*instance.Process, error) {
 	im.mu.RLock()
-	defer im.mu.RUnlock()
+	inst, exists := im.instances[name]
+	im.mu.RUnlock()
 
-	instance, exists := im.instances[name]
 	if !exists {
 		return nil, fmt.Errorf("instance with name %s not found", name)
 	}
-	return instance, nil
+
+	// Check if instance is remote and fetch live state
+	if node := im.getNodeForInstance(inst); node != nil {
+		remoteInst, err := im.GetRemoteInstance(node, name)
+		if err != nil {
+			return nil, err
+		}
+
+		// Update the local stub with all remote data (preserving Nodes)
+		im.mu.Lock()
+		im.updateLocalInstanceFromRemote(inst, remoteInst)
+		im.mu.Unlock()
+
+		// Return the local stub (preserving Nodes field)
+		return inst, nil
+	}
+
+	return inst, nil
 }
 
 // UpdateInstance updates the options of an existing instance and returns it.
 // If the instance is running, it will be restarted to apply the new options.
 func (im *instanceManager) UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
 	im.mu.RLock()
-	instance, exists := im.instances[name]
+	inst, exists := im.instances[name]
 	im.mu.RUnlock()
 
 	if !exists {
 		return nil, fmt.Errorf("instance with name %s not found", name)
 	}
 
+	// Check if instance is remote and delegate to remote operation
+	if node := im.getNodeForInstance(inst); node != nil {
+		remoteInst, err := im.UpdateRemoteInstance(node, name, options)
+		if err != nil {
+			return nil, err
+		}
+
+		// Update the local stub with all remote data (preserving Nodes)
+		im.mu.Lock()
+		im.updateLocalInstanceFromRemote(inst, remoteInst)
+		im.mu.Unlock()
+
+		// Persist the updated remote instance locally
+		im.mu.Lock()
+		defer im.mu.Unlock()
+		if err := im.persistInstance(inst); err != nil {
+			return nil, fmt.Errorf("failed to persist updated remote instance %s: %w", name, err)
+		}
+
+		return inst, nil
+	}
+
 	if options == nil {
 		return nil, fmt.Errorf("instance options cannot be nil")
 	}
@@ -105,55 +235,90 @@ func (im *instanceManager) UpdateInstance(name string, options *instance.CreateI
 	}
 
 	// Check if instance is running before updating options
-	wasRunning := instance.IsRunning()
+	wasRunning := inst.IsRunning()
 
 	// If the instance is running, stop it first
 	if wasRunning {
-		if err := instance.Stop(); err != nil {
+		if err := inst.Stop(); err != nil {
 			return nil, fmt.Errorf("failed to stop instance %s for update: %w", name, err)
 		}
 	}
 
 	// Now update the options while the instance is stopped
-	instance.SetOptions(options)
+	inst.SetOptions(options)
 
 	// If it was running before, start it again with the new options
 	if wasRunning {
-		if err := instance.Start(); err != nil {
+		if err := inst.Start(); err != nil {
 			return nil, fmt.Errorf("failed to start instance %s after update: %w", name, err)
 		}
 	}
 
 	im.mu.Lock()
 	defer im.mu.Unlock()
-	if err := im.persistInstance(instance); err != nil {
+	if err := im.persistInstance(inst); err != nil {
 		return nil, fmt.Errorf("failed to persist updated instance %s: %w", name, err)
 	}
 
-	return instance, nil
+	return inst, nil
 }
 
 // DeleteInstance removes stopped instance by its name.
 func (im *instanceManager) DeleteInstance(name string) error {
 	im.mu.Lock()
-	defer im.mu.Unlock()
+	inst, exists := im.instances[name]
+	im.mu.Unlock()
 
-	instance, exists := im.instances[name]
 	if !exists {
 		return fmt.Errorf("instance with name %s not found", name)
 	}
 
-	if instance.IsRunning() {
+	// Check if instance is remote and delegate to remote operation
+	if node := im.getNodeForInstance(inst); node != nil {
+		err := im.DeleteRemoteInstance(node, name)
+		if err != nil {
+			return err
+		}
+
+		// Clean up local tracking
+		im.mu.Lock()
+		defer im.mu.Unlock()
+		delete(im.instances, name)
+		delete(im.instanceNodeMap, name)
+
+		// Delete the instance's config file if persistence is enabled
+		// Re-validate instance name for security (defense in depth)
+		validatedName, err := validation.ValidateInstanceName(name)
+		if err != nil {
+			return fmt.Errorf("invalid instance name for file deletion: %w", err)
+		}
+		instancePath := filepath.Join(im.instancesConfig.InstancesDir, validatedName+".json")
+		if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
+			return fmt.Errorf("failed to delete config file for remote instance %s: %w", validatedName, err)
+		}
+
+		return nil
+	}
+
+	if inst.IsRunning() {
 		return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
 	}
 
-	delete(im.ports, instance.GetPort())
+	im.mu.Lock()
+	defer im.mu.Unlock()
+
+	delete(im.ports, inst.GetPort())
 	delete(im.instances, name)
 
 	// Delete the instance's config file if persistence is enabled
-	instancePath := filepath.Join(im.instancesConfig.InstancesDir, instance.Name+".json")
+	// Re-validate instance name for security (defense in depth)
+	validatedName, err := validation.ValidateInstanceName(inst.Name)
+	if err != nil {
+		return fmt.Errorf("invalid instance name for file deletion: %w", err)
+	}
+	instancePath := filepath.Join(im.instancesConfig.InstancesDir, validatedName+".json")
 	if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
-		return fmt.Errorf("failed to delete config file for instance %s: %w", instance.Name, err)
+		return fmt.Errorf("failed to delete config file for instance %s: %w", validatedName, err)
 	}
 
 	return nil
@@ -163,33 +328,59 @@ func (im *instanceManager) DeleteInstance(name string) error {
 // If the instance is already running, it returns an error.
 func (im *instanceManager) StartInstance(name string) (*instance.Process, error) {
 	im.mu.RLock()
-	instance, exists := im.instances[name]
-	maxRunningExceeded := len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
+	inst, exists := im.instances[name]
 	im.mu.RUnlock()
 
 	if !exists {
 		return nil, fmt.Errorf("instance with name %s not found", name)
 	}
-	if instance.IsRunning() {
-		return instance, fmt.Errorf("instance with name %s is already running", name)
+
+	// Check if instance is remote and delegate to remote operation
+	if node := im.getNodeForInstance(inst); node != nil {
+		remoteInst, err := im.StartRemoteInstance(node, name)
+		if err != nil {
+			return nil, err
+		}
+
+		// Update the local stub with all remote data (preserving Nodes)
+		im.mu.Lock()
+		im.updateLocalInstanceFromRemote(inst, remoteInst)
+		im.mu.Unlock()
+
+		return inst, nil
 	}
 
+	if inst.IsRunning() {
+		return inst, fmt.Errorf("instance with name %s is already running", name)
+	}
+
+	// Check max running instances limit for local instances only
+	im.mu.RLock()
+	localRunningCount := 0
+	for instName := range im.runningInstances {
+		if _, isRemote := im.instanceNodeMap[instName]; !isRemote {
+			localRunningCount++
+		}
+	}
+	maxRunningExceeded := localRunningCount >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
+	im.mu.RUnlock()
+
 	if maxRunningExceeded {
 		return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances))
 	}
 
-	if err := instance.Start(); err != nil {
+	if err := inst.Start(); err != nil {
 		return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
 	}
 
 	im.mu.Lock()
 	defer im.mu.Unlock()
-	err := im.persistInstance(instance)
+	err := im.persistInstance(inst)
 	if err != nil {
 		return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
 	}
 
-	return instance, nil
+	return inst, nil
 }
 
 func (im *instanceManager) IsMaxRunningInstancesReached() bool {
@@ -206,51 +397,95 @@ func (im *instanceManager) IsMaxRunningInstancesReached() bool {
 // StopInstance stops a running instance and returns it.
 func (im *instanceManager) StopInstance(name string) (*instance.Process, error) {
 	im.mu.RLock()
-	instance, exists := im.instances[name]
+	inst, exists := im.instances[name]
 	im.mu.RUnlock()
 
 	if !exists {
 		return nil, fmt.Errorf("instance with name %s not found", name)
 	}
-	if !instance.IsRunning() {
-		return instance, fmt.Errorf("instance with name %s is already stopped", name)
+
+	// Check if instance is remote and delegate to remote operation
+	if node := im.getNodeForInstance(inst); node != nil {
+		remoteInst, err := im.StopRemoteInstance(node, name)
+		if err != nil {
+			return nil, err
+		}
+
+		// Update the local stub with all remote data (preserving Nodes)
+		im.mu.Lock()
+		im.updateLocalInstanceFromRemote(inst, remoteInst)
+		im.mu.Unlock()
+
+		return inst, nil
 	}
 
-	if err := instance.Stop(); err != nil {
+	if !inst.IsRunning() {
+		return inst, fmt.Errorf("instance with name %s is already stopped", name)
+	}
+
+	if err := inst.Stop(); err != nil {
 		return nil, fmt.Errorf("failed to stop instance %s: %w", name, err)
 	}
 
 	im.mu.Lock()
 	defer im.mu.Unlock()
-	err := im.persistInstance(instance)
+	err := im.persistInstance(inst)
 	if err != nil {
 		return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
 	}
 
-	return instance, nil
+	return inst, nil
 }
 
 // RestartInstance stops and then starts an instance, returning the updated instance.
 func (im *instanceManager) RestartInstance(name string) (*instance.Process, error) {
-	instance, err := im.StopInstance(name)
+	im.mu.RLock()
+	inst, exists := im.instances[name]
+	im.mu.RUnlock()
+
+	if !exists {
+		return nil, fmt.Errorf("instance with name %s not found", name)
+	}
+
+	// Check if instance is remote and delegate to remote operation
+	if node := im.getNodeForInstance(inst); node != nil {
+		remoteInst, err := im.RestartRemoteInstance(node, name)
+		if err != nil {
+			return nil, err
+		}
+
+		// Update the local stub with all remote data (preserving Nodes)
+		im.mu.Lock()
+		im.updateLocalInstanceFromRemote(inst, remoteInst)
+		im.mu.Unlock()
+
+		return inst, nil
+	}
+
+	inst, err := im.StopInstance(name)
 	if err != nil {
 		return nil, err
 	}
-	return im.StartInstance(instance.Name)
+	return im.StartInstance(inst.Name)
 }
 
 // GetInstanceLogs retrieves the logs for a specific instance by its name.
-func (im *instanceManager) GetInstanceLogs(name string) (string, error) {
+func (im *instanceManager) GetInstanceLogs(name string, numLines int) (string, error) {
 	im.mu.RLock()
-	_, exists := im.instances[name]
+	inst, exists := im.instances[name]
 	im.mu.RUnlock()
 
 	if !exists {
 		return "", fmt.Errorf("instance with name %s not found", name)
 	}
 
-	// TODO: Implement actual log retrieval logic
-	return fmt.Sprintf("Logs for instance %s", name), nil
+	// Check if instance is remote and delegate to remote operation
+	if node := im.getNodeForInstance(inst); node != nil {
+		return im.GetRemoteInstanceLogs(node, name, numLines)
+	}
+
+	// Get logs from the local instance
+	return inst.GetLogs(numLines)
 }
 
 // getPortFromOptions extracts the port from backend-specific options
diff --git a/pkg/manager/operations_test.go b/pkg/manager/operations_test.go
index 97358c5..fdeb44f 100644
--- a/pkg/manager/operations_test.go
+++ b/pkg/manager/operations_test.go
@@ -75,7 +75,7 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) {
 		MaxInstances:         1, // Very low limit for testing
 		TimeoutCheckInterval: 5,
 	}
-	limitedManager := manager.NewInstanceManager(backendConfig, cfg)
+	limitedManager := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
 
 	_, err = limitedManager.CreateInstance("instance1", options)
 	if err != nil {
diff --git a/pkg/manager/remote_ops.go b/pkg/manager/remote_ops.go
new file mode 100644
index 0000000..40b2384
--- /dev/null
+++ b/pkg/manager/remote_ops.go
@@ -0,0 +1,243 @@
+package manager
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"llamactl/pkg/config"
+	"llamactl/pkg/instance"
+	"net/http"
+)
+
+// stripNodesFromOptions creates a copy of the instance options without the Nodes field
+// to prevent routing loops when sending requests to remote nodes
+func (im *instanceManager) stripNodesFromOptions(options *instance.CreateInstanceOptions) *instance.CreateInstanceOptions {
+	if options == nil {
+		return nil
+	}
+
+	// Create a copy of the options struct
+	optionsCopy := *options
+
+	// Clear the Nodes field to prevent the remote node from trying to route further
+	optionsCopy.Nodes = nil
+
+	return &optionsCopy
+}
+
+// makeRemoteRequest is a helper function to make HTTP requests to a remote node
+func (im *instanceManager) makeRemoteRequest(nodeConfig *config.NodeConfig, method, path string, body any) (*http.Response, error) {
+	var reqBody io.Reader
+	if body != nil {
+		// Strip nodes from CreateInstanceOptions to prevent routing loops
+		if options, ok := body.(*instance.CreateInstanceOptions); ok {
+			body = im.stripNodesFromOptions(options)
+		}
+
+		jsonData, err := json.Marshal(body)
+		if err != nil {
+			return nil, fmt.Errorf("failed to marshal request body: %w", err)
+		}
+		reqBody = bytes.NewBuffer(jsonData)
+	}
+
+	url := fmt.Sprintf("%s%s", nodeConfig.Address, path)
+	req, err := http.NewRequest(method, url, reqBody)
+	if err != nil {
+		return nil, fmt.Errorf("failed to create request: %w", err)
+	}
+
+	if body != nil {
+		req.Header.Set("Content-Type", "application/json")
+	}
+
+	if nodeConfig.APIKey != "" {
+		req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", nodeConfig.APIKey))
+	}
+
+	resp, err := im.httpClient.Do(req)
+	if err != nil {
+		return nil, fmt.Errorf("failed to execute request: %w", err)
+	}
+
+	return resp, nil
+}
+
+// parseRemoteResponse is a helper function to parse API responses
+func parseRemoteResponse(resp *http.Response, result any) error {
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return fmt.Errorf("failed to read response body: %w", err)
+	}
+
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	if result != nil {
+		if err := json.Unmarshal(body, result); err != nil {
+			return fmt.Errorf("failed to unmarshal response: %w", err)
+		}
+	}
+
+	return nil
+}
+
+// ListRemoteInstances lists all instances on the remote node
+func (im *instanceManager) ListRemoteInstances(nodeConfig *config.NodeConfig) ([]*instance.Process, error) {
+	resp, err := im.makeRemoteRequest(nodeConfig, "GET", "/api/v1/instances/", nil)
+	if err != nil {
+		return nil, err
+	}
+
+	var instances []*instance.Process
+	if err := parseRemoteResponse(resp, &instances); err != nil {
+		return nil, err
+	}
+
+	return instances, nil
+}
+
+// CreateRemoteInstance creates a new instance on the remote node
+func (im *instanceManager) CreateRemoteInstance(nodeConfig *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
+	path := fmt.Sprintf("/api/v1/instances/%s/", name)
+
+	resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, options)
+	if err != nil {
+		return nil, err
+	}
+
+	var inst instance.Process
+	if err := parseRemoteResponse(resp, &inst); err != nil {
+		return nil, err
+	}
+
+	return &inst, nil
+}
+
+// GetRemoteInstance retrieves an instance by name from the remote node
+func (im *instanceManager) GetRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
+	path := fmt.Sprintf("/api/v1/instances/%s/", name)
+	resp, err := im.makeRemoteRequest(nodeConfig, "GET", path, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	var inst instance.Process
+	if err := parseRemoteResponse(resp, &inst); err != nil {
+		return nil, err
+	}
+
+	return &inst, nil
+}
+
+// UpdateRemoteInstance updates an existing instance on the remote node
+func (im *instanceManager) UpdateRemoteInstance(nodeConfig *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
+	path := fmt.Sprintf("/api/v1/instances/%s/", name)
+
+	resp, err := im.makeRemoteRequest(nodeConfig, "PUT", path, options)
+	if err != nil {
+		return nil, err
+	}
+
+	var inst instance.Process
+	if err := parseRemoteResponse(resp, &inst); err != nil {
+		return nil, err
+	}
+
+	return &inst, nil
+}
+
+// DeleteRemoteInstance deletes an instance from the remote node
+func (im *instanceManager) DeleteRemoteInstance(nodeConfig *config.NodeConfig, name string) error {
+	path := fmt.Sprintf("/api/v1/instances/%s/", name)
+	resp, err := im.makeRemoteRequest(nodeConfig, "DELETE", path, nil)
+	if err != nil {
+		return err
+	}
+
+	return parseRemoteResponse(resp, nil)
+}
+
+// StartRemoteInstance starts an instance on the remote node
+func (im *instanceManager) StartRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
+	path := fmt.Sprintf("/api/v1/instances/%s/start", name)
+	resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	var inst instance.Process
+	if err := parseRemoteResponse(resp, &inst); err != nil {
+		return nil, err
+	}
+
+	return &inst, nil
+}
+
+// StopRemoteInstance stops an instance on the remote node
+func (im *instanceManager) StopRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
+	path := fmt.Sprintf("/api/v1/instances/%s/stop", name)
+	resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	var inst instance.Process
+	if err := parseRemoteResponse(resp, &inst); err != nil {
+		return nil, err
+	}
+
+	return &inst, nil
+}
+
+// RestartRemoteInstance restarts an instance on the remote node
+func (im *instanceManager) RestartRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
+	path := fmt.Sprintf("/api/v1/instances/%s/restart", name)
+	resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	var inst instance.Process
+	if err := parseRemoteResponse(resp, &inst); err != nil {
+		return nil, err
+	}
+
+	return &inst, nil
+}
+
+// GetRemoteInstanceLogs retrieves logs for an instance from the remote node
+func (im *instanceManager) GetRemoteInstanceLogs(nodeConfig *config.NodeConfig, name string, numLines int) (string, error) {
+	path := fmt.Sprintf("/api/v1/instances/%s/logs?lines=%d", name, numLines)
+	resp, err := im.makeRemoteRequest(nodeConfig, "GET", path, nil)
+	if err != nil {
+		return "", err
+	}
+
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return "", fmt.Errorf("failed to read response body: %w", err)
+	}
+
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		return "", fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
+	}
+
+	// Logs endpoint might return plain text or JSON
+	// Try to parse as JSON first (in case it's wrapped in a response object)
+	var logResponse struct {
+		Logs string `json:"logs"`
+	}
+	if err := json.Unmarshal(body, &logResponse); err == nil && logResponse.Logs != "" {
+		return logResponse.Logs, nil
+	}
+
+	// Otherwise, return as plain text
+	return string(body), nil
+}
diff --git a/pkg/manager/remote_ops_test.go b/pkg/manager/remote_ops_test.go
new file mode 100644
index 0000000..94db40b
--- /dev/null
+++ b/pkg/manager/remote_ops_test.go
@@ -0,0 +1,39 @@
+package manager
+
+import (
+	"llamactl/pkg/backends"
+	"llamactl/pkg/instance"
+	"testing"
+)
+
+func TestStripNodesFromOptions(t *testing.T) {
+	im := &instanceManager{}
+
+	// Test nil case
+	if result := im.stripNodesFromOptions(nil); result != nil {
+		t.Errorf("Expected nil, got %+v", result)
+	}
+
+	// Test main case: nodes should be stripped, other fields preserved
+	options := &instance.CreateInstanceOptions{
+		BackendType: backends.BackendTypeLlamaCpp,
+		Nodes:       []string{"node1", "node2"},
+		Environment: map[string]string{"TEST": "value"},
+	}
+
+	result := im.stripNodesFromOptions(options)
+
+	if result.Nodes != nil {
+		t.Errorf("Expected Nodes to be nil, got %+v", result.Nodes)
+	}
+	if result.BackendType != backends.BackendTypeLlamaCpp {
+		t.Errorf("Expected BackendType preserved")
+	}
+	if result.Environment["TEST"] != "value" {
+		t.Errorf("Expected Environment preserved")
+	}
+	// Original should not be modified
+	if len(options.Nodes) != 2 {
+		t.Errorf("Original options should not be modified")
+	}
+}
diff --git a/pkg/manager/timeout.go b/pkg/manager/timeout.go
index 0ee9c11..50b1c10 100644
--- a/pkg/manager/timeout.go
+++ b/pkg/manager/timeout.go
@@ -12,6 +12,11 @@ func (im *instanceManager) checkAllTimeouts() {
 
 	// Identify instances that should timeout
 	for _, inst := range im.instances {
+		// Skip remote instances - they are managed by their respective nodes
+		if inst.IsRemote() {
+			continue
+		}
+
 		if inst.ShouldTimeout() {
 			timeoutInstances = append(timeoutInstances, inst.Name)
 		}
@@ -40,6 +45,11 @@ func (im *instanceManager) EvictLRUInstance() error {
 			continue
 		}
 
+		// Skip remote instances - they are managed by their respective nodes
+		if inst.IsRemote() {
+			continue
+		}
+
 		if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
 			continue // Skip instances without idle timeout
 		}
diff --git a/pkg/manager/timeout_test.go b/pkg/manager/timeout_test.go
index 08d500c..55cd781 100644
--- a/pkg/manager/timeout_test.go
+++ b/pkg/manager/timeout_test.go
@@ -23,7 +23,7 @@ func TestTimeoutFunctionality(t *testing.T) {
 		MaxInstances:         5,
 	}
 
-	manager := manager.NewInstanceManager(backendConfig, cfg)
+	manager := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
 	if manager == nil {
 		t.Fatal("Manager should be initialized with timeout checker")
 	}
diff --git a/pkg/server/handlers.go b/pkg/server/handlers.go
index 8f0b509..9e31df9 100644
--- a/pkg/server/handlers.go
+++ b/pkg/server/handlers.go
@@ -1,865 +1,29 @@
 package server
 
 import (
-	"bytes"
-	"encoding/json"
-	"fmt"
-	"io"
-	"llamactl/pkg/backends"
-	"llamactl/pkg/backends/llamacpp"
-	"llamactl/pkg/backends/mlx"
-	"llamactl/pkg/backends/vllm"
 	"llamactl/pkg/config"
-	"llamactl/pkg/instance"
 	"llamactl/pkg/manager"
 	"net/http"
-	"os/exec"
-	"strconv"
-	"strings"
-
-	"github.com/go-chi/chi/v5"
+	"net/http/httputil"
+	"sync"
+	"time"
 )
 
 type Handler struct {
 	InstanceManager manager.InstanceManager
 	cfg             config.AppConfig
+	httpClient      *http.Client
+	remoteProxies   map[string]*httputil.ReverseProxy // Cache of remote proxies by instance name
+	remoteProxiesMu sync.RWMutex
 }
 
 func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
 	return &Handler{
 		InstanceManager: im,
 		cfg:             cfg,
-	}
-}
-
-// VersionHandler godoc
-// @Summary Get llamactl version
-// @Description Returns the version of the llamactl command
-// @Tags version
-// @Security ApiKeyAuth
-// @Produces text/plain
-// @Success 200 {string} string "Version information"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /version [get]
-func (h *Handler) VersionHandler() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		w.Header().Set("Content-Type", "text/plain")
-		fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
-	}
-}
-
-// LlamaServerHelpHandler godoc
-// @Summary Get help for llama server
-// @Description Returns the help text for the llama server command
-// @Tags backends
-// @Security ApiKeyAuth
-// @Produces text/plain
-// @Success 200 {string} string "Help text"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /backends/llama-cpp/help [get]
-func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		helpCmd := exec.Command("llama-server", "--help")
-		output, err := helpCmd.CombinedOutput()
-		if err != nil {
-			http.Error(w, "Failed to get help: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-		w.Header().Set("Content-Type", "text/plain")
-		w.Write(output)
-	}
-}
-
-// LlamaServerVersionHandler godoc
-// @Summary Get version of llama server
-// @Description Returns the version of the llama server command
-// @Tags backends
-// @Security ApiKeyAuth
-// @Produces text/plain
-// @Success 200 {string} string "Version information"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /backends/llama-cpp/version [get]
-func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		versionCmd := exec.Command("llama-server", "--version")
-		output, err := versionCmd.CombinedOutput()
-		if err != nil {
-			http.Error(w, "Failed to get version: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-		w.Header().Set("Content-Type", "text/plain")
-		w.Write(output)
-	}
-}
-
-// LlamaServerListDevicesHandler godoc
-// @Summary List available devices for llama server
-// @Description Returns a list of available devices for the llama server
-// @Tags backends
-// @Security ApiKeyAuth
-// @Produces text/plain
-// @Success 200 {string} string "List of devices"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /backends/llama-cpp/devices [get]
-func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		listCmd := exec.Command("llama-server", "--list-devices")
-		output, err := listCmd.CombinedOutput()
-		if err != nil {
-			http.Error(w, "Failed to list devices: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-		w.Header().Set("Content-Type", "text/plain")
-		w.Write(output)
-	}
-}
-
-// ListInstances godoc
-// @Summary List all instances
-// @Description Returns a list of all instances managed by the server
-// @Tags instances
-// @Security ApiKeyAuth
-// @Produces json
-// @Success 200 {array} instance.Process "List of instances"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /instances [get]
-func (h *Handler) ListInstances() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		instances, err := h.InstanceManager.ListInstances()
-		if err != nil {
-			http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		// Marshal to bytes first to set Content-Length header
-		data, err := json.Marshal(instances)
-		if err != nil {
-			http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		w.Header().Set("Content-Type", "application/json")
-		w.Header().Set("Content-Length", strconv.Itoa(len(data)))
-		w.Write(data)
-	}
-}
-
-// CreateInstance godoc
-// @Summary Create and start a new instance
-// @Description Creates a new instance with the provided configuration options
-// @Tags instances
-// @Security ApiKeyAuth
-// @Accept json
-// @Produces json
-// @Param name path string true "Instance Name"
-// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
-// @Success 201 {object} instance.Process "Created instance details"
-// @Failure 400 {string} string "Invalid request body"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name} [post]
-func (h *Handler) CreateInstance() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		name := chi.URLParam(r, "name")
-		if name == "" {
-			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
-			return
-		}
-
-		var options instance.CreateInstanceOptions
-		if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
-			http.Error(w, "Invalid request body", http.StatusBadRequest)
-			return
-		}
-
-		inst, err := h.InstanceManager.CreateInstance(name, &options)
-		if err != nil {
-			http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		w.Header().Set("Content-Type", "application/json")
-		w.WriteHeader(http.StatusCreated)
-		if err := json.NewEncoder(w).Encode(inst); err != nil {
-			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-	}
-}
-
-// GetInstance godoc
-// @Summary Get details of a specific instance
-// @Description Returns the details of a specific instance by name
-// @Tags instances
-// @Security ApiKeyAuth
-// @Produces json
-// @Param name path string true "Instance Name"
-// @Success 200 {object} instance.Process "Instance details"
-// @Failure 400 {string} string "Invalid name format"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name} [get]
-func (h *Handler) GetInstance() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		name := chi.URLParam(r, "name")
-		if name == "" {
-			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
-			return
-		}
-
-		inst, err := h.InstanceManager.GetInstance(name)
-		if err != nil {
-			http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
-			return
-		}
-
-		w.Header().Set("Content-Type", "application/json")
-		if err := json.NewEncoder(w).Encode(inst); err != nil {
-			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-	}
-}
-
-// UpdateInstance godoc
-// @Summary Update an instance's configuration
-// @Description Updates the configuration of a specific instance by name
-// @Tags instances
-// @Security ApiKeyAuth
-// @Accept json
-// @Produces json
-// @Param name path string true "Instance Name"
-// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
-// @Success 200 {object} instance.Process "Updated instance details"
-// @Failure 400 {string} string "Invalid name format"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name} [put]
-func (h *Handler) UpdateInstance() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		name := chi.URLParam(r, "name")
-		if name == "" {
-			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
-			return
-		}
-
-		var options instance.CreateInstanceOptions
-		if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
-			http.Error(w, "Invalid request body", http.StatusBadRequest)
-			return
-		}
-
-		inst, err := h.InstanceManager.UpdateInstance(name, &options)
-		if err != nil {
-			http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		w.Header().Set("Content-Type", "application/json")
-		if err := json.NewEncoder(w).Encode(inst); err != nil {
-			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-	}
-}
-
-// StartInstance godoc
-// @Summary Start a stopped instance
-// @Description Starts a specific instance by name
-// @Tags instances
-// @Security ApiKeyAuth
-// @Produces json
-// @Param name path string true "Instance Name"
-// @Success 200 {object} instance.Process "Started instance details"
-// @Failure 400 {string} string "Invalid name format"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name}/start [post]
-func (h *Handler) StartInstance() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		name := chi.URLParam(r, "name")
-		if name == "" {
-			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
-			return
-		}
-
-		inst, err := h.InstanceManager.StartInstance(name)
-		if err != nil {
-			// Check if error is due to maximum running instances limit
-			if _, ok := err.(manager.MaxRunningInstancesError); ok {
-				http.Error(w, err.Error(), http.StatusConflict)
-				return
-			}
-
-			http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		w.Header().Set("Content-Type", "application/json")
-		if err := json.NewEncoder(w).Encode(inst); err != nil {
-			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-	}
-}
-
-// StopInstance godoc
-// @Summary Stop a running instance
-// @Description Stops a specific instance by name
-// @Tags instances
-// @Security ApiKeyAuth
-// @Produces json
-// @Param name path string true "Instance Name"
-// @Success 200 {object} instance.Process "Stopped instance details"
-// @Failure 400 {string} string "Invalid name format"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name}/stop [post]
-func (h *Handler) StopInstance() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		name := chi.URLParam(r, "name")
-		if name == "" {
-			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
-			return
-		}
-
-		inst, err := h.InstanceManager.StopInstance(name)
-		if err != nil {
-			http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		w.Header().Set("Content-Type", "application/json")
-		if err := json.NewEncoder(w).Encode(inst); err != nil {
-			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-	}
-}
-
-// RestartInstance godoc
-// @Summary Restart a running instance
-// @Description Restarts a specific instance by name
-// @Tags instances
-// @Security ApiKeyAuth
-// @Produces json
-// @Param name path string true "Instance Name"
-// @Success 200 {object} instance.Process "Restarted instance details"
-// @Failure 400 {string} string "Invalid name format"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name}/restart [post]
-func (h *Handler) RestartInstance() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		name := chi.URLParam(r, "name")
-		if name == "" {
-			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
-			return
-		}
-
-		inst, err := h.InstanceManager.RestartInstance(name)
-		if err != nil {
-			http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		w.Header().Set("Content-Type", "application/json")
-		if err := json.NewEncoder(w).Encode(inst); err != nil {
-			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-	}
-}
-
-// DeleteInstance godoc
-// @Summary Delete an instance
-// @Description Stops and removes a specific instance by name
-// @Tags instances
-// @Security ApiKeyAuth
-// @Param name path string true "Instance Name"
-// @Success 204 "No Content"
-// @Failure 400 {string} string "Invalid name format"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name} [delete]
-func (h *Handler) DeleteInstance() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		name := chi.URLParam(r, "name")
-		if name == "" {
-			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
-			return
-		}
-
-		if err := h.InstanceManager.DeleteInstance(name); err != nil {
-			http.Error(w, "Failed to delete instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		w.WriteHeader(http.StatusNoContent)
-	}
-}
-
-// GetInstanceLogs godoc
-// @Summary Get logs from a specific instance
-// @Description Returns the logs from a specific instance by name with optional line limit
-// @Tags instances
-// @Security ApiKeyAuth
-// @Param name path string true "Instance Name"
-// @Param lines query string false "Number of lines to retrieve (default: all lines)"
-// @Produces text/plain
-// @Success 200 {string} string "Instance logs"
-// @Failure 400 {string} string "Invalid name format or lines parameter"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /instances/{name}/logs [get]
-func (h *Handler) GetInstanceLogs() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		name := chi.URLParam(r, "name")
-		if name == "" {
-			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
-			return
-		}
-
-		lines := r.URL.Query().Get("lines")
-		if lines == "" {
-			lines = "-1"
-		}
-
-		num_lines, err := strconv.Atoi(lines)
-		if err != nil {
-			http.Error(w, "Invalid lines parameter: "+err.Error(), http.StatusBadRequest)
-			return
-		}
-
-		inst, err := h.InstanceManager.GetInstance(name)
-		if err != nil {
-			http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		logs, err := inst.GetLogs(num_lines)
-		if err != nil {
-			http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		w.Header().Set("Content-Type", "text/plain")
-		w.Write([]byte(logs))
-	}
-}
-
-// ProxyToInstance godoc
-// @Summary Proxy requests to a specific instance
-// @Description Forwards HTTP requests to the llama-server instance running on a specific port
-// @Tags instances
-// @Security ApiKeyAuth
-// @Param name path string true "Instance Name"
-// @Success 200 "Request successfully proxied to instance"
-// @Failure 400 {string} string "Invalid name format"
-// @Failure 500 {string} string "Internal Server Error"
-// @Failure 503 {string} string "Instance is not running"
-// @Router /instances/{name}/proxy [get]
-// @Router /instances/{name}/proxy [post]
-func (h *Handler) ProxyToInstance() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		name := chi.URLParam(r, "name")
-		if name == "" {
-			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
-			return
-		}
-
-		inst, err := h.InstanceManager.GetInstance(name)
-		if err != nil {
-			http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		if !inst.IsRunning() {
-			http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
-			return
-		}
-
-		// Get the cached proxy for this instance
-		proxy, err := inst.GetProxy()
-		if err != nil {
-			http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
-		prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
-		r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
-
-		// Update the last request time for the instance
-		inst.UpdateLastRequestTime()
-
-		// Set forwarded headers
-		r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
-		r.Header.Set("X-Forwarded-Proto", "http")
-
-		// Forward the request using the cached proxy
-		proxy.ServeHTTP(w, r)
-	}
-}
-
-// OpenAIListInstances godoc
-// @Summary List instances in OpenAI-compatible format
-// @Description Returns a list of instances in a format compatible with OpenAI API
-// @Tags openai
-// @Security ApiKeyAuth
-// @Produces json
-// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /v1/models [get]
-func (h *Handler) OpenAIListInstances() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		instances, err := h.InstanceManager.ListInstances()
-		if err != nil {
-			http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		openaiInstances := make([]OpenAIInstance, len(instances))
-		for i, inst := range instances {
-			openaiInstances[i] = OpenAIInstance{
-				ID:      inst.Name,
-				Object:  "model",
-				Created: inst.Created,
-				OwnedBy: "llamactl",
-			}
-		}
-
-		openaiResponse := OpenAIListInstancesResponse{
-			Object: "list",
-			Data:   openaiInstances,
-		}
-
-		w.Header().Set("Content-Type", "application/json")
-		if err := json.NewEncoder(w).Encode(openaiResponse); err != nil {
-			http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-	}
-}
-
-// OpenAIProxy godoc
-// @Summary OpenAI-compatible proxy endpoint
-// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
-// @Tags openai
-// @Security ApiKeyAuth
-// @Accept json
-// @Produces json
-// @Success 200 "OpenAI response"
-// @Failure 400 {string} string "Invalid request body or instance name"
-// @Failure 500 {string} string "Internal Server Error"
-// @Router /v1/ [post]
-func (h *Handler) OpenAIProxy() http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-		// Read the entire body first
-		bodyBytes, err := io.ReadAll(r.Body)
-		if err != nil {
-			http.Error(w, "Failed to read request body", http.StatusBadRequest)
-			return
-		}
-		r.Body.Close()
-
-		// Parse the body to extract instance name
-		var requestBody map[string]any
-		if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
-			http.Error(w, "Invalid request body", http.StatusBadRequest)
-			return
-		}
-
-		modelName, ok := requestBody["model"].(string)
-		if !ok || modelName == "" {
-			http.Error(w, "Instance name is required", http.StatusBadRequest)
-			return
-		}
-
-		// Route to the appropriate inst based on instance name
-		inst, err := h.InstanceManager.GetInstance(modelName)
-		if err != nil {
-			http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
-			return
-		}
-
-		if !inst.IsRunning() {
-			options := inst.GetOptions()
-			allowOnDemand := options != nil && options.OnDemandStart != nil && *options.OnDemandStart
-			if !allowOnDemand {
-				http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
-				return
-			}
-
-			if h.InstanceManager.IsMaxRunningInstancesReached() {
-				if h.cfg.Instances.EnableLRUEviction {
-					err := h.InstanceManager.EvictLRUInstance()
-					if err != nil {
-						http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
-						return
-					}
-				} else {
-					http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
-					return
-				}
-			}
-
-			// If on-demand start is enabled, start the instance
-			if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
-				http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
-				return
-			}
-
-			// Wait for the instance to become healthy before proceeding
-			if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
-				http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
-				return
-			}
-		}
-
-		proxy, err := inst.GetProxy()
-		if err != nil {
-			http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		// Update last request time for the instance
-		inst.UpdateLastRequestTime()
-
-		// Recreate the request body from the bytes we read
-		r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
-		r.ContentLength = int64(len(bodyBytes))
-
-		proxy.ServeHTTP(w, r)
-	}
-}
-
-func (h *Handler) LlamaCppProxy(onDemandStart bool) http.HandlerFunc {
-	return func(w http.ResponseWriter, r *http.Request) {
-
-		// Get the instance name from the URL parameter
-		name := chi.URLParam(r, "name")
-		if name == "" {
-			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
-			return
-		}
-
-		// Route to the appropriate inst based on instance name
-		inst, err := h.InstanceManager.GetInstance(name)
-		if err != nil {
-			http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
-			return
-		}
-
-		options := inst.GetOptions()
-		if options == nil {
-			http.Error(w, "Cannot obtain Instance's options", http.StatusInternalServerError)
-			return
-		}
-
-		if options.BackendType != backends.BackendTypeLlamaCpp {
-			http.Error(w, "Instance is not a llama.cpp server.", http.StatusBadRequest)
-			return
-		}
-
-		if !inst.IsRunning() {
-
-			if !(onDemandStart && options.OnDemandStart != nil && *options.OnDemandStart) {
-				http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
-				return
-			}
-
-			if h.InstanceManager.IsMaxRunningInstancesReached() {
-				if h.cfg.Instances.EnableLRUEviction {
-					err := h.InstanceManager.EvictLRUInstance()
-					if err != nil {
-						http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
-						return
-					}
-				} else {
-					http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
-					return
-				}
-			}
-
-			// If on-demand start is enabled, start the instance
-			if _, err := h.InstanceManager.StartInstance(name); err != nil {
-				http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
-				return
-			}
-
-			// Wait for the instance to become healthy before proceeding
-			if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
-				http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
-				return
-			}
-		}
-
-		proxy, err := inst.GetProxy()
-		if err != nil {
-			http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
-			return
-		}
-
-		// Strip the "/llama-cpp/<name>" prefix from the request URL
-		prefix := fmt.Sprintf("/llama-cpp/%s", name)
-		r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
-
-		// Update the last request time for the instance
-		inst.UpdateLastRequestTime()
-
-		proxy.ServeHTTP(w, r)
-	}
-}
-
-// ParseCommandRequest represents the request body for command parsing
-type ParseCommandRequest struct {
-	Command string `json:"command"`
-}
-
-// ParseLlamaCommand godoc
-// @Summary Parse llama-server command
-// @Description Parses a llama-server command string into instance options
-// @Tags backends
-// @Security ApiKeyAuth
-// @Accept json
-// @Produce json
-// @Param request body ParseCommandRequest true "Command to parse"
-// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
-// @Failure 400 {object} map[string]string "Invalid request or command"
-// @Failure 500 {object} map[string]string "Internal Server Error"
-// @Router /backends/llama-cpp/parse-command [post]
-func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
-	type errorResponse struct {
-		Error   string `json:"error"`
-		Details string `json:"details,omitempty"`
-	}
-	writeError := func(w http.ResponseWriter, status int, code, details string) {
-		w.Header().Set("Content-Type", "application/json")
-		w.WriteHeader(status)
-		_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
-	}
-	return func(w http.ResponseWriter, r *http.Request) {
-		var req ParseCommandRequest
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
-			return
-		}
-		if strings.TrimSpace(req.Command) == "" {
-			writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
-			return
-		}
-		llamaOptions, err := llamacpp.ParseLlamaCommand(req.Command)
-		if err != nil {
-			writeError(w, http.StatusBadRequest, "parse_error", err.Error())
-			return
-		}
-		options := &instance.CreateInstanceOptions{
-			BackendType:        backends.BackendTypeLlamaCpp,
-			LlamaServerOptions: llamaOptions,
-		}
-		w.Header().Set("Content-Type", "application/json")
-		if err := json.NewEncoder(w).Encode(options); err != nil {
-			writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
-		}
-	}
-}
-
-// ParseMlxCommand godoc
-// @Summary Parse mlx_lm.server command
-// @Description Parses MLX-LM server command string into instance options
-// @Tags backends
-// @Security ApiKeyAuth
-// @Accept json
-// @Produce json
-// @Param request body ParseCommandRequest true "Command to parse"
-// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
-// @Failure 400 {object} map[string]string "Invalid request or command"
-// @Router /backends/mlx/parse-command [post]
-func (h *Handler) ParseMlxCommand() http.HandlerFunc {
-	type errorResponse struct {
-		Error   string `json:"error"`
-		Details string `json:"details,omitempty"`
-	}
-	writeError := func(w http.ResponseWriter, status int, code, details string) {
-		w.Header().Set("Content-Type", "application/json")
-		w.WriteHeader(status)
-		_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
-	}
-	return func(w http.ResponseWriter, r *http.Request) {
-		var req ParseCommandRequest
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
-			return
-		}
-
-		if strings.TrimSpace(req.Command) == "" {
-			writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
-			return
-		}
-
-		mlxOptions, err := mlx.ParseMlxCommand(req.Command)
-		if err != nil {
-			writeError(w, http.StatusBadRequest, "parse_error", err.Error())
-			return
-		}
-
-		// Currently only support mlx_lm backend type
-		backendType := backends.BackendTypeMlxLm
-
-		options := &instance.CreateInstanceOptions{
-			BackendType:      backendType,
-			MlxServerOptions: mlxOptions,
-		}
-
-		w.Header().Set("Content-Type", "application/json")
-		if err := json.NewEncoder(w).Encode(options); err != nil {
-			writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
-		}
-	}
-}
-
-// ParseVllmCommand godoc
-// @Summary Parse vllm serve command
-// @Description Parses a vLLM serve command string into instance options
-// @Tags backends
-// @Security ApiKeyAuth
-// @Accept json
-// @Produce json
-// @Param request body ParseCommandRequest true "Command to parse"
-// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
-// @Failure 400 {object} map[string]string "Invalid request or command"
-// @Router /backends/vllm/parse-command [post]
-func (h *Handler) ParseVllmCommand() http.HandlerFunc {
-	type errorResponse struct {
-		Error   string `json:"error"`
-		Details string `json:"details,omitempty"`
-	}
-	writeError := func(w http.ResponseWriter, status int, code, details string) {
-		w.Header().Set("Content-Type", "application/json")
-		w.WriteHeader(status)
-		_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
-	}
-	return func(w http.ResponseWriter, r *http.Request) {
-		var req ParseCommandRequest
-		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
-			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
-			return
-		}
-
-		if strings.TrimSpace(req.Command) == "" {
-			writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
-			return
-		}
-
-		vllmOptions, err := vllm.ParseVllmCommand(req.Command)
-		if err != nil {
-			writeError(w, http.StatusBadRequest, "parse_error", err.Error())
-			return
-		}
-
-		backendType := backends.BackendTypeVllm
-
-		options := &instance.CreateInstanceOptions{
-			BackendType:       backendType,
-			VllmServerOptions: vllmOptions,
-		}
-
-		w.Header().Set("Content-Type", "application/json")
-		if err := json.NewEncoder(w).Encode(options); err != nil {
-			writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
-		}
+		httpClient: &http.Client{
+			Timeout: 30 * time.Second,
+		},
+		remoteProxies: make(map[string]*httputil.ReverseProxy),
 	}
 }
diff --git a/pkg/server/handlers_backends.go b/pkg/server/handlers_backends.go
new file mode 100644
index 0000000..7d6cab0
--- /dev/null
+++ b/pkg/server/handlers_backends.go
@@ -0,0 +1,320 @@
+package server
+
+import (
+	"encoding/json"
+	"fmt"
+	"llamactl/pkg/backends"
+	"llamactl/pkg/backends/llamacpp"
+	"llamactl/pkg/backends/mlx"
+	"llamactl/pkg/backends/vllm"
+	"llamactl/pkg/instance"
+	"net/http"
+	"os/exec"
+	"strings"
+
+	"github.com/go-chi/chi/v5"
+)
+
+// ParseCommandRequest represents the request body for command parsing
+type ParseCommandRequest struct {
+	Command string `json:"command"`
+}
+
+func (h *Handler) LlamaCppProxy(onDemandStart bool) http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+
+		// Get the instance name from the URL parameter
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		// Route to the appropriate inst based on instance name
+		inst, err := h.InstanceManager.GetInstance(name)
+		if err != nil {
+			http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
+			return
+		}
+
+		options := inst.GetOptions()
+		if options == nil {
+			http.Error(w, "Cannot obtain Instance's options", http.StatusInternalServerError)
+			return
+		}
+
+		if options.BackendType != backends.BackendTypeLlamaCpp {
+			http.Error(w, "Instance is not a llama.cpp server.", http.StatusBadRequest)
+			return
+		}
+
+		if !inst.IsRunning() {
+
+			if !(onDemandStart && options.OnDemandStart != nil && *options.OnDemandStart) {
+				http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
+				return
+			}
+
+			if h.InstanceManager.IsMaxRunningInstancesReached() {
+				if h.cfg.Instances.EnableLRUEviction {
+					err := h.InstanceManager.EvictLRUInstance()
+					if err != nil {
+						http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
+						return
+					}
+				} else {
+					http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
+					return
+				}
+			}
+
+			// If on-demand start is enabled, start the instance
+			if _, err := h.InstanceManager.StartInstance(name); err != nil {
+				http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
+				return
+			}
+
+			// Wait for the instance to become healthy before proceeding
+			if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
+				http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
+				return
+			}
+		}
+
+		proxy, err := inst.GetProxy()
+		if err != nil {
+			http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		// Strip the "/llama-cpp/<name>" prefix from the request URL
+		prefix := fmt.Sprintf("/llama-cpp/%s", name)
+		r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
+
+		// Update the last request time for the instance
+		inst.UpdateLastRequestTime()
+
+		proxy.ServeHTTP(w, r)
+	}
+}
+
+// ParseLlamaCommand godoc
+// @Summary Parse llama-server command
+// @Description Parses a llama-server command string into instance options
+// @Tags backends
+// @Security ApiKeyAuth
+// @Accept json
+// @Produce json
+// @Param request body ParseCommandRequest true "Command to parse"
+// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
+// @Failure 400 {object} map[string]string "Invalid request or command"
+// @Failure 500 {object} map[string]string "Internal Server Error"
+// @Router /backends/llama-cpp/parse-command [post]
+func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
+	type errorResponse struct {
+		Error   string `json:"error"`
+		Details string `json:"details,omitempty"`
+	}
+	writeError := func(w http.ResponseWriter, status int, code, details string) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(status)
+		_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
+	}
+	return func(w http.ResponseWriter, r *http.Request) {
+		var req ParseCommandRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
+			return
+		}
+		if strings.TrimSpace(req.Command) == "" {
+			writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
+			return
+		}
+		llamaOptions, err := llamacpp.ParseLlamaCommand(req.Command)
+		if err != nil {
+			writeError(w, http.StatusBadRequest, "parse_error", err.Error())
+			return
+		}
+		options := &instance.CreateInstanceOptions{
+			BackendType:        backends.BackendTypeLlamaCpp,
+			LlamaServerOptions: llamaOptions,
+		}
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(options); err != nil {
+			writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
+		}
+	}
+}
+
+// ParseMlxCommand godoc
+// @Summary Parse mlx_lm.server command
+// @Description Parses MLX-LM server command string into instance options
+// @Tags backends
+// @Security ApiKeyAuth
+// @Accept json
+// @Produce json
+// @Param request body ParseCommandRequest true "Command to parse"
+// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
+// @Failure 400 {object} map[string]string "Invalid request or command"
+// @Router /backends/mlx/parse-command [post]
+func (h *Handler) ParseMlxCommand() http.HandlerFunc {
+	type errorResponse struct {
+		Error   string `json:"error"`
+		Details string `json:"details,omitempty"`
+	}
+	writeError := func(w http.ResponseWriter, status int, code, details string) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(status)
+		_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
+	}
+	return func(w http.ResponseWriter, r *http.Request) {
+		var req ParseCommandRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
+			return
+		}
+
+		if strings.TrimSpace(req.Command) == "" {
+			writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
+			return
+		}
+
+		mlxOptions, err := mlx.ParseMlxCommand(req.Command)
+		if err != nil {
+			writeError(w, http.StatusBadRequest, "parse_error", err.Error())
+			return
+		}
+
+		// Currently only support mlx_lm backend type
+		backendType := backends.BackendTypeMlxLm
+
+		options := &instance.CreateInstanceOptions{
+			BackendType:      backendType,
+			MlxServerOptions: mlxOptions,
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(options); err != nil {
+			writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
+		}
+	}
+}
+
+// ParseVllmCommand godoc
+// @Summary Parse vllm serve command
+// @Description Parses a vLLM serve command string into instance options
+// @Tags backends
+// @Security ApiKeyAuth
+// @Accept json
+// @Produce json
+// @Param request body ParseCommandRequest true "Command to parse"
+// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
+// @Failure 400 {object} map[string]string "Invalid request or command"
+// @Router /backends/vllm/parse-command [post]
+func (h *Handler) ParseVllmCommand() http.HandlerFunc {
+	type errorResponse struct {
+		Error   string `json:"error"`
+		Details string `json:"details,omitempty"`
+	}
+	writeError := func(w http.ResponseWriter, status int, code, details string) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(status)
+		_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
+	}
+	return func(w http.ResponseWriter, r *http.Request) {
+		var req ParseCommandRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
+			return
+		}
+
+		if strings.TrimSpace(req.Command) == "" {
+			writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
+			return
+		}
+
+		vllmOptions, err := vllm.ParseVllmCommand(req.Command)
+		if err != nil {
+			writeError(w, http.StatusBadRequest, "parse_error", err.Error())
+			return
+		}
+
+		backendType := backends.BackendTypeVllm
+
+		options := &instance.CreateInstanceOptions{
+			BackendType:       backendType,
+			VllmServerOptions: vllmOptions,
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(options); err != nil {
+			writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
+		}
+	}
+}
+
+// LlamaServerHelpHandler godoc
+// @Summary Get help for llama server
+// @Description Returns the help text for the llama server command
+// @Tags backends
+// @Security ApiKeyAuth
+// @Produces text/plain
+// @Success 200 {string} string "Help text"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /backends/llama-cpp/help [get]
+func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		helpCmd := exec.Command("llama-server", "--help")
+		output, err := helpCmd.CombinedOutput()
+		if err != nil {
+			http.Error(w, "Failed to get help: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+		w.Header().Set("Content-Type", "text/plain")
+		w.Write(output)
+	}
+}
+
+// LlamaServerVersionHandler godoc
+// @Summary Get version of llama server
+// @Description Returns the version of the llama server command
+// @Tags backends
+// @Security ApiKeyAuth
+// @Produces text/plain
+// @Success 200 {string} string "Version information"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /backends/llama-cpp/version [get]
+func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		versionCmd := exec.Command("llama-server", "--version")
+		output, err := versionCmd.CombinedOutput()
+		if err != nil {
+			http.Error(w, "Failed to get version: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+		w.Header().Set("Content-Type", "text/plain")
+		w.Write(output)
+	}
+}
+
+// LlamaServerListDevicesHandler godoc
+// @Summary List available devices for llama server
+// @Description Returns a list of available devices for the llama server
+// @Tags backends
+// @Security ApiKeyAuth
+// @Produces text/plain
+// @Success 200 {string} string "List of devices"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /backends/llama-cpp/devices [get]
+func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		listCmd := exec.Command("llama-server", "--list-devices")
+		output, err := listCmd.CombinedOutput()
+		if err != nil {
+			http.Error(w, "Failed to list devices: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+		w.Header().Set("Content-Type", "text/plain")
+		w.Write(output)
+	}
+}
diff --git a/pkg/server/handlers_instances.go b/pkg/server/handlers_instances.go
new file mode 100644
index 0000000..be3cf4a
--- /dev/null
+++ b/pkg/server/handlers_instances.go
@@ -0,0 +1,445 @@
+package server
+
+import (
+	"encoding/json"
+	"fmt"
+	"llamactl/pkg/instance"
+	"llamactl/pkg/manager"
+	"net/http"
+	"net/http/httputil"
+	"net/url"
+	"strconv"
+	"strings"
+
+	"github.com/go-chi/chi/v5"
+)
+
+// ListInstances godoc
+// @Summary List all instances
+// @Description Returns a list of all instances managed by the server
+// @Tags instances
+// @Security ApiKeyAuth
+// @Produces json
+// @Success 200 {array} instance.Process "List of instances"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /instances [get]
+func (h *Handler) ListInstances() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		instances, err := h.InstanceManager.ListInstances()
+		if err != nil {
+			http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(instances); err != nil {
+			http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}
+}
+
+// CreateInstance godoc
+// @Summary Create and start a new instance
+// @Description Creates a new instance with the provided configuration options
+// @Tags instances
+// @Security ApiKeyAuth
+// @Accept json
+// @Produces json
+// @Param name path string true "Instance Name"
+// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
+// @Success 201 {object} instance.Process "Created instance details"
+// @Failure 400 {string} string "Invalid request body"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /instances/{name} [post]
+func (h *Handler) CreateInstance() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		var options instance.CreateInstanceOptions
+		if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
+			http.Error(w, "Invalid request body", http.StatusBadRequest)
+			return
+		}
+
+		inst, err := h.InstanceManager.CreateInstance(name, &options)
+		if err != nil {
+			http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(http.StatusCreated)
+		if err := json.NewEncoder(w).Encode(inst); err != nil {
+			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}
+}
+
+// GetInstance godoc
+// @Summary Get details of a specific instance
+// @Description Returns the details of a specific instance by name
+// @Tags instances
+// @Security ApiKeyAuth
+// @Produces json
+// @Param name path string true "Instance Name"
+// @Success 200 {object} instance.Process "Instance details"
+// @Failure 400 {string} string "Invalid name format"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /instances/{name} [get]
+func (h *Handler) GetInstance() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		inst, err := h.InstanceManager.GetInstance(name)
+		if err != nil {
+			http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
+			return
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(inst); err != nil {
+			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}
+}
+
+// UpdateInstance godoc
+// @Summary Update an instance's configuration
+// @Description Updates the configuration of a specific instance by name
+// @Tags instances
+// @Security ApiKeyAuth
+// @Accept json
+// @Produces json
+// @Param name path string true "Instance Name"
+// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
+// @Success 200 {object} instance.Process "Updated instance details"
+// @Failure 400 {string} string "Invalid name format"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /instances/{name} [put]
+func (h *Handler) UpdateInstance() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		var options instance.CreateInstanceOptions
+		if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
+			http.Error(w, "Invalid request body", http.StatusBadRequest)
+			return
+		}
+
+		inst, err := h.InstanceManager.UpdateInstance(name, &options)
+		if err != nil {
+			http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(inst); err != nil {
+			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}
+}
+
+// StartInstance godoc
+// @Summary Start a stopped instance
+// @Description Starts a specific instance by name
+// @Tags instances
+// @Security ApiKeyAuth
+// @Produces json
+// @Param name path string true "Instance Name"
+// @Success 200 {object} instance.Process "Started instance details"
+// @Failure 400 {string} string "Invalid name format"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /instances/{name}/start [post]
+func (h *Handler) StartInstance() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		inst, err := h.InstanceManager.StartInstance(name)
+		if err != nil {
+			// Check if error is due to maximum running instances limit
+			if _, ok := err.(manager.MaxRunningInstancesError); ok {
+				http.Error(w, err.Error(), http.StatusConflict)
+				return
+			}
+
+			http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(inst); err != nil {
+			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}
+}
+
+// StopInstance godoc
+// @Summary Stop a running instance
+// @Description Stops a specific instance by name
+// @Tags instances
+// @Security ApiKeyAuth
+// @Produces json
+// @Param name path string true "Instance Name"
+// @Success 200 {object} instance.Process "Stopped instance details"
+// @Failure 400 {string} string "Invalid name format"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /instances/{name}/stop [post]
+func (h *Handler) StopInstance() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		inst, err := h.InstanceManager.StopInstance(name)
+		if err != nil {
+			http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(inst); err != nil {
+			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}
+}
+
+// RestartInstance godoc
+// @Summary Restart a running instance
+// @Description Restarts a specific instance by name
+// @Tags instances
+// @Security ApiKeyAuth
+// @Produces json
+// @Param name path string true "Instance Name"
+// @Success 200 {object} instance.Process "Restarted instance details"
+// @Failure 400 {string} string "Invalid name format"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /instances/{name}/restart [post]
+func (h *Handler) RestartInstance() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		inst, err := h.InstanceManager.RestartInstance(name)
+		if err != nil {
+			http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(inst); err != nil {
+			http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}
+}
+
+// DeleteInstance godoc
+// @Summary Delete an instance
+// @Description Stops and removes a specific instance by name
+// @Tags instances
+// @Security ApiKeyAuth
+// @Param name path string true "Instance Name"
+// @Success 204 "No Content"
+// @Failure 400 {string} string "Invalid name format"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /instances/{name} [delete]
+func (h *Handler) DeleteInstance() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		if err := h.InstanceManager.DeleteInstance(name); err != nil {
+			http.Error(w, "Failed to delete instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		w.WriteHeader(http.StatusNoContent)
+	}
+}
+
+// GetInstanceLogs godoc
+// @Summary Get logs from a specific instance
+// @Description Returns the logs from a specific instance by name with optional line limit
+// @Tags instances
+// @Security ApiKeyAuth
+// @Param name path string true "Instance Name"
+// @Param lines query string false "Number of lines to retrieve (default: all lines)"
+// @Produces text/plain
+// @Success 200 {string} string "Instance logs"
+// @Failure 400 {string} string "Invalid name format or lines parameter"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /instances/{name}/logs [get]
+func (h *Handler) GetInstanceLogs() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		lines := r.URL.Query().Get("lines")
+		numLines := -1 // Default to all lines
+		if lines != "" {
+			parsedLines, err := strconv.Atoi(lines)
+			if err != nil {
+				http.Error(w, "Invalid lines parameter: "+err.Error(), http.StatusBadRequest)
+				return
+			}
+			numLines = parsedLines
+		}
+
+		// Use the instance manager which handles both local and remote instances
+		logs, err := h.InstanceManager.GetInstanceLogs(name, numLines)
+		if err != nil {
+			http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		w.Header().Set("Content-Type", "text/plain")
+		w.Write([]byte(logs))
+	}
+}
+
+// ProxyToInstance godoc
+// @Summary Proxy requests to a specific instance
+// @Description Forwards HTTP requests to the llama-server instance running on a specific port
+// @Tags instances
+// @Security ApiKeyAuth
+// @Param name path string true "Instance Name"
+// @Success 200 "Request successfully proxied to instance"
+// @Failure 400 {string} string "Invalid name format"
+// @Failure 500 {string} string "Internal Server Error"
+// @Failure 503 {string} string "Instance is not running"
+// @Router /instances/{name}/proxy [get]
+// @Router /instances/{name}/proxy [post]
+func (h *Handler) ProxyToInstance() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		inst, err := h.InstanceManager.GetInstance(name)
+		if err != nil {
+			http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		// Check if this is a remote instance
+		if inst.IsRemote() {
+			h.RemoteInstanceProxy(w, r, name, inst)
+			return
+		}
+
+		if !inst.IsRunning() {
+			http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
+			return
+		}
+
+		// Get the cached proxy for this instance
+		proxy, err := inst.GetProxy()
+		if err != nil {
+			http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
+		prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
+		r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
+
+		// Update the last request time for the instance
+		inst.UpdateLastRequestTime()
+
+		// Set forwarded headers
+		r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
+		r.Header.Set("X-Forwarded-Proto", "http")
+
+		// Forward the request using the cached proxy
+		proxy.ServeHTTP(w, r)
+	}
+}
+
+// RemoteInstanceProxy proxies requests to a remote instance
+func (h *Handler) RemoteInstanceProxy(w http.ResponseWriter, r *http.Request, name string, inst *instance.Process) {
+	// Get the node name from instance options
+	options := inst.GetOptions()
+	if options == nil || len(options.Nodes) == 0 {
+		http.Error(w, "Instance has no node configured", http.StatusInternalServerError)
+		return
+	}
+
+	nodeName := options.Nodes[0]
+
+	// Check if we have a cached proxy for this node
+	h.remoteProxiesMu.RLock()
+	proxy, exists := h.remoteProxies[nodeName]
+	h.remoteProxiesMu.RUnlock()
+
+	if !exists {
+		// Find node configuration
+		nodeConfig, exists := h.cfg.Nodes[nodeName]
+		if !exists {
+			http.Error(w, fmt.Sprintf("Node %s not found", nodeName), http.StatusInternalServerError)
+			return
+		}
+
+		// Create reverse proxy to remote node
+		targetURL, err := url.Parse(nodeConfig.Address)
+		if err != nil {
+			http.Error(w, "Failed to parse node address: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		proxy = httputil.NewSingleHostReverseProxy(targetURL)
+
+		// Modify request before forwarding
+		originalDirector := proxy.Director
+		apiKey := nodeConfig.APIKey // Capture for closure
+		proxy.Director = func(req *http.Request) {
+			originalDirector(req)
+			// Add API key if configured
+			if apiKey != "" {
+				req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", apiKey))
+			}
+		}
+
+		// Cache the proxy by node name
+		h.remoteProxiesMu.Lock()
+		h.remoteProxies[nodeName] = proxy
+		h.remoteProxiesMu.Unlock()
+	}
+
+	// Forward the request using the cached proxy
+	proxy.ServeHTTP(w, r)
+}
diff --git a/pkg/server/handlers_nodes.go b/pkg/server/handlers_nodes.go
new file mode 100644
index 0000000..98a4b43
--- /dev/null
+++ b/pkg/server/handlers_nodes.go
@@ -0,0 +1,79 @@
+package server
+
+import (
+	"encoding/json"
+	"net/http"
+
+	"github.com/go-chi/chi/v5"
+)
+
+// NodeResponse represents a sanitized node configuration for API responses
+type NodeResponse struct {
+	Address string `json:"address"`
+}
+
+// ListNodes godoc
+// @Summary List all configured nodes
+// @Description Returns a map of all nodes configured in the server (node name -> node config)
+// @Tags nodes
+// @Security ApiKeyAuth
+// @Produces json
+// @Success 200 {object} map[string]NodeResponse "Map of nodes"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /nodes [get]
+func (h *Handler) ListNodes() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		// Convert to sanitized response format (map of name -> NodeResponse)
+		nodeResponses := make(map[string]NodeResponse, len(h.cfg.Nodes))
+		for name, node := range h.cfg.Nodes {
+			nodeResponses[name] = NodeResponse{
+				Address: node.Address,
+			}
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(nodeResponses); err != nil {
+			http.Error(w, "Failed to encode nodes: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}
+}
+
+// GetNode godoc
+// @Summary Get details of a specific node
+// @Description Returns the details of a specific node by name
+// @Tags nodes
+// @Security ApiKeyAuth
+// @Produces json
+// @Param name path string true "Node Name"
+// @Success 200 {object} NodeResponse "Node details"
+// @Failure 400 {string} string "Invalid name format"
+// @Failure 404 {string} string "Node not found"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /nodes/{name} [get]
+func (h *Handler) GetNode() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		name := chi.URLParam(r, "name")
+		if name == "" {
+			http.Error(w, "Node name cannot be empty", http.StatusBadRequest)
+			return
+		}
+
+		nodeConfig, exists := h.cfg.Nodes[name]
+		if !exists {
+			http.Error(w, "Node not found", http.StatusNotFound)
+			return
+		}
+
+		// Convert to sanitized response format
+		nodeResponse := NodeResponse{
+			Address: nodeConfig.Address,
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(nodeResponse); err != nil {
+			http.Error(w, "Failed to encode node: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}
+}
diff --git a/pkg/server/handlers_openai.go b/pkg/server/handlers_openai.go
new file mode 100644
index 0000000..c6e56e9
--- /dev/null
+++ b/pkg/server/handlers_openai.go
@@ -0,0 +1,206 @@
+package server
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"llamactl/pkg/instance"
+	"net/http"
+	"net/http/httputil"
+	"net/url"
+)
+
+// OpenAIListInstances godoc
+// @Summary List instances in OpenAI-compatible format
+// @Description Returns a list of instances in a format compatible with OpenAI API
+// @Tags openai
+// @Security ApiKeyAuth
+// @Produces json
+// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /v1/models [get]
+func (h *Handler) OpenAIListInstances() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		instances, err := h.InstanceManager.ListInstances()
+		if err != nil {
+			http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		openaiInstances := make([]OpenAIInstance, len(instances))
+		for i, inst := range instances {
+			openaiInstances[i] = OpenAIInstance{
+				ID:      inst.Name,
+				Object:  "model",
+				Created: inst.Created,
+				OwnedBy: "llamactl",
+			}
+		}
+
+		openaiResponse := OpenAIListInstancesResponse{
+			Object: "list",
+			Data:   openaiInstances,
+		}
+
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(openaiResponse); err != nil {
+			http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+	}
+}
+
+// OpenAIProxy godoc
+// @Summary OpenAI-compatible proxy endpoint
+// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
+// @Tags openai
+// @Security ApiKeyAuth
+// @Accept json
+// @Produces json
+// @Success 200 "OpenAI response"
+// @Failure 400 {string} string "Invalid request body or instance name"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /v1/ [post]
+func (h *Handler) OpenAIProxy() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		// Read the entire body first
+		bodyBytes, err := io.ReadAll(r.Body)
+		if err != nil {
+			http.Error(w, "Failed to read request body", http.StatusBadRequest)
+			return
+		}
+		r.Body.Close()
+
+		// Parse the body to extract instance name
+		var requestBody map[string]any
+		if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
+			http.Error(w, "Invalid request body", http.StatusBadRequest)
+			return
+		}
+
+		modelName, ok := requestBody["model"].(string)
+		if !ok || modelName == "" {
+			http.Error(w, "Instance name is required", http.StatusBadRequest)
+			return
+		}
+
+		// Route to the appropriate inst based on instance name
+		inst, err := h.InstanceManager.GetInstance(modelName)
+		if err != nil {
+			http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
+			return
+		}
+
+		// Check if this is a remote instance
+		if inst.IsRemote() {
+			// Restore the body for the remote proxy
+			r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
+			h.RemoteOpenAIProxy(w, r, modelName, inst)
+			return
+		}
+
+		if !inst.IsRunning() {
+			options := inst.GetOptions()
+			allowOnDemand := options != nil && options.OnDemandStart != nil && *options.OnDemandStart
+			if !allowOnDemand {
+				http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
+				return
+			}
+
+			if h.InstanceManager.IsMaxRunningInstancesReached() {
+				if h.cfg.Instances.EnableLRUEviction {
+					err := h.InstanceManager.EvictLRUInstance()
+					if err != nil {
+						http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
+						return
+					}
+				} else {
+					http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
+					return
+				}
+			}
+
+			// If on-demand start is enabled, start the instance
+			if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
+				http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
+				return
+			}
+
+			// Wait for the instance to become healthy before proceeding
+			if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
+				http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
+				return
+			}
+		}
+
+		proxy, err := inst.GetProxy()
+		if err != nil {
+			http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		// Update last request time for the instance
+		inst.UpdateLastRequestTime()
+
+		// Recreate the request body from the bytes we read
+		r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
+		r.ContentLength = int64(len(bodyBytes))
+
+		proxy.ServeHTTP(w, r)
+	}
+}
+
+// RemoteOpenAIProxy proxies OpenAI-compatible requests to a remote instance
+func (h *Handler) RemoteOpenAIProxy(w http.ResponseWriter, r *http.Request, modelName string, inst *instance.Process) {
+	// Get the node name from instance options
+	options := inst.GetOptions()
+	if options == nil || len(options.Nodes) == 0 {
+		http.Error(w, "Instance has no node configured", http.StatusInternalServerError)
+		return
+	}
+
+	nodeName := options.Nodes[0]
+
+	// Check if we have a cached proxy for this node
+	h.remoteProxiesMu.RLock()
+	proxy, exists := h.remoteProxies[nodeName]
+	h.remoteProxiesMu.RUnlock()
+
+	if !exists {
+		// Find node configuration
+		nodeConfig, exists := h.cfg.Nodes[nodeName]
+		if !exists {
+			http.Error(w, fmt.Sprintf("Node %s not found", nodeName), http.StatusInternalServerError)
+			return
+		}
+
+		// Create reverse proxy to remote node
+		targetURL, err := url.Parse(nodeConfig.Address)
+		if err != nil {
+			http.Error(w, "Failed to parse node address: "+err.Error(), http.StatusInternalServerError)
+			return
+		}
+
+		proxy = httputil.NewSingleHostReverseProxy(targetURL)
+
+		// Modify request before forwarding
+		originalDirector := proxy.Director
+		apiKey := nodeConfig.APIKey // Capture for closure
+		proxy.Director = func(req *http.Request) {
+			originalDirector(req)
+			// Add API key if configured
+			if apiKey != "" {
+				req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", apiKey))
+			}
+		}
+
+		// Cache the proxy
+		h.remoteProxiesMu.Lock()
+		h.remoteProxies[nodeName] = proxy
+		h.remoteProxiesMu.Unlock()
+	}
+
+	// Forward the request using the cached proxy
+	proxy.ServeHTTP(w, r)
+}
diff --git a/pkg/server/handlers_system.go b/pkg/server/handlers_system.go
new file mode 100644
index 0000000..e3bb016
--- /dev/null
+++ b/pkg/server/handlers_system.go
@@ -0,0 +1,22 @@
+package server
+
+import (
+	"fmt"
+	"net/http"
+)
+
+// VersionHandler godoc
+// @Summary Get llamactl version
+// @Description Returns the version of the llamactl command
+// @Tags version
+// @Security ApiKeyAuth
+// @Produces text/plain
+// @Success 200 {string} string "Version information"
+// @Failure 500 {string} string "Internal Server Error"
+// @Router /version [get]
+func (h *Handler) VersionHandler() http.HandlerFunc {
+	return func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "text/plain")
+		fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
+	}
+}
diff --git a/pkg/server/routes.go b/pkg/server/routes.go
index 8d5068b..6ced6a7 100644
--- a/pkg/server/routes.go
+++ b/pkg/server/routes.go
@@ -60,6 +60,15 @@ func SetupRouter(handler *Handler) *chi.Mux {
 			})
 		})
 
+		// Node management endpoints
+		r.Route("/nodes", func(r chi.Router) {
+			r.Get("/", handler.ListNodes()) // List all nodes
+
+			r.Route("/{name}", func(r chi.Router) {
+				r.Get("/", handler.GetNode())
+			})
+		})
+
 		// Instance management endpoints
 		r.Route("/instances", func(r chi.Router) {
 			r.Get("/", handler.ListInstances()) // List all instances
diff --git a/webui/src/components/InstanceDialog.tsx b/webui/src/components/InstanceDialog.tsx
index d9b731c..4a54f7a 100644
--- a/webui/src/components/InstanceDialog.tsx
+++ b/webui/src/components/InstanceDialog.tsx
@@ -106,7 +106,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
     // Clean up undefined values to avoid sending empty fields
     const cleanOptions: CreateInstanceOptions = {};
     Object.entries(formData).forEach(([key, value]) => {
-      if (key === 'backend_options' && value && typeof value === 'object') {
+      if (key === 'backend_options' && value && typeof value === 'object' && !Array.isArray(value)) {
         // Handle backend_options specially - clean nested object
         const cleanBackendOptions: any = {};
         Object.entries(value).forEach(([backendKey, backendValue]) => {
@@ -118,13 +118,17 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
             cleanBackendOptions[backendKey] = backendValue;
           }
         });
-        
+
         // Only include backend_options if it has content
         if (Object.keys(cleanBackendOptions).length > 0) {
           (cleanOptions as any)[key] = cleanBackendOptions;
         }
-      } else if (value !== undefined && value !== null && (typeof value !== 'string' || value.trim() !== "")) {
-        // Handle arrays - don't include empty arrays
+      } else if (value !== undefined && value !== null) {
+        // Skip empty strings
+        if (typeof value === 'string' && value.trim() === "") {
+          return;
+        }
+        // Skip empty arrays
         if (Array.isArray(value) && value.length === 0) {
           return;
         }
diff --git a/webui/src/components/instance/InstanceSettingsCard.tsx b/webui/src/components/instance/InstanceSettingsCard.tsx
index c85eda9..a89ee90 100644
--- a/webui/src/components/instance/InstanceSettingsCard.tsx
+++ b/webui/src/components/instance/InstanceSettingsCard.tsx
@@ -1,4 +1,4 @@
-import React from 'react'
+import React, { useState, useEffect } from 'react'
 import type { CreateInstanceOptions } from '@/types/instance'
 import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
 import { Label } from '@/components/ui/label'
@@ -7,6 +7,8 @@ import AutoRestartConfiguration from '@/components/instance/AutoRestartConfigura
 import NumberInput from '@/components/form/NumberInput'
 import CheckboxInput from '@/components/form/CheckboxInput'
 import EnvironmentVariablesInput from '@/components/form/EnvironmentVariablesInput'
+import SelectInput from '@/components/form/SelectInput'
+import { nodesApi, type NodesMap } from '@/lib/api'
 
 interface InstanceSettingsCardProps {
   instanceName: string
@@ -25,6 +27,46 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
   onNameChange,
   onChange
 }) => {
+  const [nodes, setNodes] = useState<NodesMap>({})
+  const [loadingNodes, setLoadingNodes] = useState(true)
+
+  useEffect(() => {
+    const fetchNodes = async () => {
+      try {
+        const fetchedNodes = await nodesApi.list()
+        setNodes(fetchedNodes)
+
+        // Auto-select first node if none selected
+        const nodeNames = Object.keys(fetchedNodes)
+        if (nodeNames.length > 0 && (!formData.nodes || formData.nodes.length === 0)) {
+          onChange('nodes', [nodeNames[0]])
+        }
+      } catch (error) {
+        console.error('Failed to fetch nodes:', error)
+      } finally {
+        setLoadingNodes(false)
+      }
+    }
+
+    void fetchNodes()
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [])
+
+  const nodeOptions = Object.keys(nodes).map(nodeName => ({
+    value: nodeName,
+    label: nodeName
+  }))
+
+  const handleNodeChange = (value: string | undefined) => {
+    if (value) {
+      onChange('nodes', [value])
+    } else {
+      onChange('nodes', undefined)
+    }
+  }
+
+  const selectedNode = formData.nodes && formData.nodes.length > 0 ? formData.nodes[0] : ''
+
   return (
     <Card>
       <CardHeader>
@@ -50,6 +92,18 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
           </p>
         </div>
 
+        {/* Node Selection */}
+        {!loadingNodes && Object.keys(nodes).length > 0 && (
+          <SelectInput
+            id="node"
+            label="Node"
+            value={selectedNode}
+            onChange={handleNodeChange}
+            options={nodeOptions}
+            description="Select the node where the instance will run (default: main node)"
+          />
+        )}
+
         {/* Auto Restart Configuration */}
         <AutoRestartConfiguration
           formData={formData}
diff --git a/webui/src/lib/api.ts b/webui/src/lib/api.ts
index 4e180e0..8629c1f 100644
--- a/webui/src/lib/api.ts
+++ b/webui/src/lib/api.ts
@@ -103,6 +103,22 @@ export const backendsApi = {
   },
 };
 
+// Node API types
+export interface NodeResponse {
+  address: string;
+}
+
+export type NodesMap = Record<string, NodeResponse>;
+
+// Node API functions
+export const nodesApi = {
+  // GET /nodes - returns map of node name to NodeResponse
+  list: () => apiCall<NodesMap>("/nodes"),
+
+  // GET /nodes/{name}
+  get: (name: string) => apiCall<NodeResponse>(`/nodes/${name}`),
+};
+
 // Instance API functions
 export const instancesApi = {
   // GET /instances
diff --git a/webui/src/schemas/instanceOptions.ts b/webui/src/schemas/instanceOptions.ts
index 0af09c1..3cbf523 100644
--- a/webui/src/schemas/instanceOptions.ts
+++ b/webui/src/schemas/instanceOptions.ts
@@ -39,6 +39,9 @@ export const CreateInstanceOptionsSchema = z.object({
   // Backend configuration
   backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
   backend_options: BackendOptionsSchema.optional(),
+
+  // Node configuration
+  nodes: z.array(z.string()).optional(),
 })
 
 // Re-export types and schemas from backend files