Merge pull request #42 from lordmathis/feat/docker-backends

feat: Add support for dockerized backends
2025-12-23 17:44:24 +00:00 · 2025-09-25 23:07:24 +02:00
parent 78a483ee4a 2cd9d374a7
commit a824f066ec
20 changed files with 795 additions and 150 deletions
--- a/README.md
+++ b/README.md
@@ -14,6 +14,7 @@
 ### 🔗 Universal Compatibility
 - **OpenAI API Compatible**: Drop-in replacement - route requests by instance name
 - **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
+- **Docker Support**: Run backends in containers

 ### 🌐 User-Friendly Interface
 - **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
@@ -32,6 +33,7 @@
 # For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
 # For MLX on macOS: pip install mlx-lm
 # For vLLM: pip install vllm
+# Or use Docker - no local installation required

 # 2. Download and run llamactl
 LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
@@ -112,6 +114,7 @@ You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp)
 brew install llama.cpp

 # Or build from source - see llama.cpp docs
+# Or use Docker - no local installation required
 ```

 **For MLX backend (macOS only):**
@@ -139,9 +142,51 @@ python -m venv vllm-env
 source vllm-env/bin/activate
 pip install vllm

-# For production deployments, consider container-based installation
+# Or use Docker - no local installation required
 ```

+## Docker Support
+
+llamactl supports running backends in Docker containers with identical behavior to native execution. This is particularly useful for:
+- Production deployments without local backend installation
+- Isolating backend dependencies
+- GPU-accelerated inference using official Docker images
+
+### Docker Configuration
+
+Enable Docker support using the new structured backend configuration:
+
+```yaml
+backends:
+  llama-cpp:
+    command: "llama-server"
+    docker:
+      enabled: true
+      image: "ghcr.io/ggml-org/llama.cpp:server"
+      args: ["run", "--rm", "--network", "host", "--gpus", "all"]
+
+  vllm:
+    command: "vllm"
+    args: ["serve"]
+    docker:
+      enabled: true
+      image: "vllm/vllm-openai:latest"
+      args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
+```
+
+### Key Features
+
+- **Host Networking**: Uses `--network host` for seamless port management
+- **GPU Support**: Includes `--gpus all` for GPU acceleration
+- **Environment Variables**: Configure container environment as needed
+- **Flexible Configuration**: Per-backend Docker settings with sensible defaults
+
+### Requirements
+
+- Docker installed and running
+- For GPU support: nvidia-docker2 (Linux) or Docker Desktop with GPU support
+- No local backend installation required when using Docker
+
 ## Configuration

 llamactl works out of the box with sensible defaults.
@@ -154,9 +199,27 @@ server:
  enable_swagger: false          # Enable Swagger UI for API docs

 backends:
-  llama_executable: llama-server # Path to llama-server executable
-  mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
-  vllm_executable: vllm # Path to vllm executable
+  llama-cpp:
+    command: "llama-server"
+    args: []
+    docker:
+      enabled: false
+      image: "ghcr.io/ggml-org/llama.cpp:server"
+      args: ["run", "--rm", "--network", "host", "--gpus", "all"]
+      environment: {}
+
+  vllm:
+    command: "vllm"
+    args: ["serve"]
+    docker:
+      enabled: false
+      image: "vllm/vllm-openai:latest"
+      args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
+      environment: {}
+
+  mlx:
+    command: "mlx_lm.server"
+    args: []

 instances:
  port_range: [8000, 9000]       # Port range for instances
--- a/docs/getting-started/configuration.md
+++ b/docs/getting-started/configuration.md
@@ -20,9 +20,27 @@ server:
  enable_swagger: false          # Enable Swagger UI for API docs

 backends:
-  llama_executable: llama-server # Path to llama-server executable
-  mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
-  vllm_executable: vllm # Path to vllm executable
+  llama-cpp:
+    command: "llama-server"
+    args: []
+    docker:
+      enabled: false
+      image: "ghcr.io/ggml-org/llama.cpp:server"
+      args: ["run", "--rm", "--network", "host", "--gpus", "all"]
+      environment: {}
+
+  vllm:
+    command: "vllm"
+    args: ["serve"]
+    docker:
+      enabled: false
+      image: "vllm/vllm-openai:latest"
+      args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
+      environment: {}
+
+  mlx:
+    command: "mlx_lm.server"
+    args: []

 instances:
  port_range: [8000, 9000]       # Port range for instances
@@ -90,18 +108,40 @@ server:
 - `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)

 ### Backend Configuration
-
 ```yaml
 backends:
-  llama_executable: "llama-server"     # Path to llama-server executable (default: "llama-server")
-  mlx_lm_executable: "mlx_lm.server"   # Path to mlx_lm.server executable (default: "mlx_lm.server")
-  vllm_executable: "vllm"              # Path to vllm executable (default: "vllm")
+  llama-cpp:
+    command: "llama-server"
+    args: []
+    docker:
+      enabled: false                   # Enable Docker runtime (default: false)
+      image: "ghcr.io/ggml-org/llama.cpp:server"
+      args: ["run", "--rm", "--network", "host", "--gpus", "all"]
+      environment: {}
+
+  vllm:
+    command: "vllm"
+    args: ["serve"]
+    docker:
+      enabled: false
+      image: "vllm/vllm-openai:latest"
+      args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
+      environment: {}
+
+  mlx:
+    command: "mlx_lm.server"
+    args: []
+    # MLX does not support Docker
 ```

-**Environment Variables:**
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
- `LLAMACTL_MLX_LM_EXECUTABLE` - Path to mlx_lm.server executable
- `LLAMACTL_VLLM_EXECUTABLE` - Path to vllm executable
+**Backend Configuration Fields:**
+- `command`: Executable name/path for the backend
+- `args`: Default arguments prepended to all instances
+- `docker`: Docker-specific configuration (optional)
+  - `enabled`: Boolean flag to enable Docker runtime
+  - `image`: Docker image to use
+  - `args`: Additional arguments passed to `docker run`
+  - `environment`: Environment variables for the container (optional)

 ### Instance Configuration

--- a/docs/getting-started/quick-start.md
+++ b/docs/getting-started/quick-start.md
@@ -88,6 +88,21 @@ Here are basic example configurations for each backend:
 }
 ```

+## Docker Support
+
+Llamactl can run backends in Docker containers. To enable Docker for a backend, add a `docker` section to that backend in your YAML configuration file (e.g. `config.yaml`) as shown below:
+
+```yaml
+backends:
+  vllm:
+    command: "vllm"
+    args: ["serve"]
+    docker:
+      enabled: true
+      image: "vllm/vllm-openai:latest"
+      args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
+```
+
 ## Using the API

 You can also manage instances via the REST API:
--- a/pkg/backends/builder.go
+++ b/pkg/backends/builder.go
@@ -1,6 +1,8 @@
 package backends

 import (
+	"fmt"
+	"llamactl/pkg/config"
 	"reflect"
 	"strconv"
 	"strings"
@@ -68,3 +70,24 @@ func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {

 	return args
 }
+
+// BuildDockerCommand builds a Docker command with the specified configuration and arguments
+func BuildDockerCommand(backendConfig *config.BackendSettings, instanceArgs []string) (string, []string, error) {
+	// Start with configured Docker arguments (should include "run", "--rm", etc.)
+	dockerArgs := make([]string, len(backendConfig.Docker.Args))
+	copy(dockerArgs, backendConfig.Docker.Args)
+
+	// Add environment variables
+	for key, value := range backendConfig.Docker.Environment {
+		dockerArgs = append(dockerArgs, "-e", fmt.Sprintf("%s=%s", key, value))
+	}
+
+	// Add image name
+	dockerArgs = append(dockerArgs, backendConfig.Docker.Image)
+
+	// Add backend args and instance args
+	dockerArgs = append(dockerArgs, backendConfig.Args...)
+	dockerArgs = append(dockerArgs, instanceArgs...)
+
+	return "docker", dockerArgs, nil
+}
--- a/pkg/backends/llamacpp/llama.go
+++ b/pkg/backends/llamacpp/llama.go
@@ -7,6 +7,28 @@ import (
 	"strconv"
 )

+// multiValuedFlags defines flags that should be repeated for each value rather than comma-separated
+// Used for both parsing (with underscores) and building (with dashes)
+var multiValuedFlags = map[string]bool{
+	// Parsing keys (with underscores)
+	"override_tensor":       true,
+	"override_kv":           true,
+	"lora":                  true,
+	"lora_scaled":           true,
+	"control_vector":        true,
+	"control_vector_scaled": true,
+	"dry_sequence_breaker":  true,
+	"logit_bias":            true,
+	// Building keys (with dashes)
+	"override-tensor":       true,
+	"override-kv":           true,
+	"lora-scaled":           true,
+	"control-vector":        true,
+	"control-vector-scaled": true,
+	"dry-sequence-breaker":  true,
+	"logit-bias":            true,
+}
+
 type LlamaServerOptions struct {
 	// Common params
 	VerbosePrompt           bool     `json:"verbose_prompt,omitempty"`
@@ -316,17 +338,13 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
 // BuildCommandArgs converts InstanceOptions to command line arguments
 func (o *LlamaServerOptions) BuildCommandArgs() []string {
 	// Llama uses multiple flags for arrays by default (not comma-separated)
-	multipleFlags := map[string]bool{
-		"override-tensor":       true,
-		"override-kv":           true,
-		"lora":                  true,
-		"lora-scaled":           true,
-		"control-vector":        true,
-		"control-vector-scaled": true,
-		"dry-sequence-breaker":  true,
-		"logit-bias":            true,
-	}
-	return backends.BuildCommandArgs(o, multipleFlags)
+	// Use package-level multiValuedFlags variable
+	return backends.BuildCommandArgs(o, multiValuedFlags)
+}
+
+func (o *LlamaServerOptions) BuildDockerArgs() []string {
+	// For llama, Docker args are the same as normal args
+	return o.BuildCommandArgs()
 }

 // ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
@@ -338,16 +356,7 @@ func (o *LlamaServerOptions) BuildCommandArgs() []string {
 func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
 	executableNames := []string{"llama-server"}
 	var subcommandNames []string // Llama has no subcommands
-	multiValuedFlags := map[string]bool{
-		"override_tensor":       true,
-		"override_kv":           true,
-		"lora":                  true,
-		"lora_scaled":           true,
-		"control_vector":        true,
-		"control_vector_scaled": true,
-		"dry_sequence_breaker":  true,
-		"logit_bias":            true,
-	}
+	// Use package-level multiValuedFlags variable

 	var llamaOptions LlamaServerOptions
 	if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &llamaOptions); err != nil {
--- a/pkg/backends/vllm/vllm.go
+++ b/pkg/backends/vllm/vllm.go
@@ -4,6 +4,15 @@ import (
 	"llamactl/pkg/backends"
 )

+// multiValuedFlags defines flags that should be repeated for each value rather than comma-separated
+var multiValuedFlags = map[string]bool{
+	"api-key":         true,
+	"allowed-origins": true,
+	"allowed-methods": true,
+	"allowed-headers": true,
+	"middleware":      true,
+}
+
 type VllmServerOptions struct {
 	// Basic connection options (auto-assigned by llamactl)
 	Host string `json:"host,omitempty"`
@@ -131,30 +140,32 @@ type VllmServerOptions struct {
 }

 // BuildCommandArgs converts VllmServerOptions to command line arguments
-// Note: This does NOT include the "serve" subcommand, that's handled at the instance level
-// For vLLM, the model parameter is passed as a positional argument, not a --model flag
+// For vLLM native, model is a positional argument after "serve"
 func (o *VllmServerOptions) BuildCommandArgs() []string {
 	var args []string

-	// Add model as positional argument if specified
+	// Add model as positional argument if specified (for native execution)
 	if o.Model != "" {
 		args = append(args, o.Model)
 	}

-	// Create a copy of the options without the Model field to avoid including it as --model flag
+	// Create a copy without Model field to avoid --model flag
 	optionsCopy := *o
-	optionsCopy.Model = "" // Clear model field so it won't be included as a flag
+	optionsCopy.Model = ""

-	multipleFlags := map[string]bool{
-		"api-key":         true,
-		"allowed-origins": true,
-		"allowed-methods": true,
-		"allowed-headers": true,
-		"middleware":      true,
-	}
+	// Use package-level multipleFlags variable

-	// Build the rest of the arguments as flags
-	flagArgs := backends.BuildCommandArgs(&optionsCopy, multipleFlags)
+	flagArgs := backends.BuildCommandArgs(&optionsCopy, multiValuedFlags)
+	args = append(args, flagArgs...)
+
+	return args
+}
+
+func (o *VllmServerOptions) BuildDockerArgs() []string {
+	var args []string
+
+	// Use package-level multipleFlags variable
+	flagArgs := backends.BuildCommandArgs(o, multiValuedFlags)
 	args = append(args, flagArgs...)

 	return args
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -10,16 +10,26 @@ import (
 	"gopkg.in/yaml.v3"
 )

+// BackendSettings contains structured backend configuration
+type BackendSettings struct {
+	Command string          `yaml:"command"`
+	Args    []string        `yaml:"args"`
+	Docker  *DockerSettings `yaml:"docker,omitempty"`
+}
+
+// DockerSettings contains Docker-specific configuration
+type DockerSettings struct {
+	Enabled     bool              `yaml:"enabled"`
+	Image       string            `yaml:"image"`
+	Args        []string          `yaml:"args"`
+	Environment map[string]string `yaml:"environment,omitempty"`
+}
+
 // BackendConfig contains backend executable configurations
 type BackendConfig struct {
-	// Path to llama-server executable (llama.cpp backend)
-	LlamaExecutable string `yaml:"llama_executable"`
-
-	// Path to mlx_lm executable (MLX-LM backend)
-	MLXLMExecutable string `yaml:"mlx_lm_executable"`
-
-	// Path to vllm executable (vLLM backend)
-	VllmExecutable string `yaml:"vllm_executable"`
+	LlamaCpp BackendSettings `yaml:"llama-cpp"`
+	VLLM     BackendSettings `yaml:"vllm"`
+	MLX      BackendSettings `yaml:"mlx"`
 }

 // AppConfig represents the configuration for llamactl
@@ -123,9 +133,36 @@ func LoadConfig(configPath string) (AppConfig, error) {
 			EnableSwagger:  false,
 		},
 		Backends: BackendConfig{
-			LlamaExecutable: "llama-server",
-			MLXLMExecutable: "mlx_lm.server",
-			VllmExecutable:  "vllm",
+			LlamaCpp: BackendSettings{
+				Command: "llama-server",
+				Args:    []string{},
+				Docker: &DockerSettings{
+					Enabled: false,
+					Image:   "ghcr.io/ggml-org/llama.cpp:server",
+					Args: []string{
+						"run", "--rm", "--network", "host", "--gpus", "all",
+						"-v", filepath.Join(getDefaultDataDirectory(), "llama.cpp") + ":/root/.cache/llama.cpp"},
+					Environment: map[string]string{},
+				},
+			},
+			VLLM: BackendSettings{
+				Command: "vllm",
+				Args:    []string{"serve"},
+				Docker: &DockerSettings{
+					Enabled: false,
+					Image:   "vllm/vllm-openai:latest",
+					Args: []string{
+						"run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g",
+						"-v", filepath.Join(getDefaultDataDirectory(), "huggingface") + ":/root/.cache/huggingface",
+					},
+					Environment: map[string]string{},
+				},
+			},
+			MLX: BackendSettings{
+				Command: "mlx_lm.server",
+				Args:    []string{},
+				// No Docker section for MLX - not supported
+			},
 		},
 		Instances: InstancesConfig{
 			PortRange:            [2]int{8000, 9000},
@@ -244,15 +281,96 @@ func loadEnvVars(cfg *AppConfig) {
 		}
 	}
 	// Backend config
-	if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" {
-		cfg.Backends.LlamaExecutable = llamaExec
+	// LlamaCpp backend
+	if llamaCmd := os.Getenv("LLAMACTL_LLAMACPP_COMMAND"); llamaCmd != "" {
+		cfg.Backends.LlamaCpp.Command = llamaCmd
 	}
-	if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" {
-		cfg.Backends.MLXLMExecutable = mlxLMExec
+	if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
+		cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
 	}
-	if vllmExec := os.Getenv("LLAMACTL_VLLM_EXECUTABLE"); vllmExec != "" {
-		cfg.Backends.VllmExecutable = vllmExec
+	if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
+		if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
+			if cfg.Backends.LlamaCpp.Docker == nil {
+				cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
 			}
+			cfg.Backends.LlamaCpp.Docker.Enabled = b
+		}
+	}
+	if llamaDockerImage := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_IMAGE"); llamaDockerImage != "" {
+		if cfg.Backends.LlamaCpp.Docker == nil {
+			cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
+		}
+		cfg.Backends.LlamaCpp.Docker.Image = llamaDockerImage
+	}
+	if llamaDockerArgs := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ARGS"); llamaDockerArgs != "" {
+		if cfg.Backends.LlamaCpp.Docker == nil {
+			cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
+		}
+		cfg.Backends.LlamaCpp.Docker.Args = strings.Split(llamaDockerArgs, " ")
+	}
+	if llamaDockerEnv := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENV"); llamaDockerEnv != "" {
+		if cfg.Backends.LlamaCpp.Docker == nil {
+			cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
+		}
+		if cfg.Backends.LlamaCpp.Docker.Environment == nil {
+			cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
+		}
+		// Parse env vars in format "KEY1=value1,KEY2=value2"
+		for _, envPair := range strings.Split(llamaDockerEnv, ",") {
+			if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
+				cfg.Backends.LlamaCpp.Docker.Environment[parts[0]] = parts[1]
+			}
+		}
+	}
+
+	// vLLM backend
+	if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
+		cfg.Backends.VLLM.Command = vllmCmd
+	}
+	if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
+		if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
+			if cfg.Backends.VLLM.Docker == nil {
+				cfg.Backends.VLLM.Docker = &DockerSettings{}
+			}
+			cfg.Backends.VLLM.Docker.Enabled = b
+		}
+	}
+	if vllmDockerImage := os.Getenv("LLAMACTL_VLLM_DOCKER_IMAGE"); vllmDockerImage != "" {
+		if cfg.Backends.VLLM.Docker == nil {
+			cfg.Backends.VLLM.Docker = &DockerSettings{}
+		}
+		cfg.Backends.VLLM.Docker.Image = vllmDockerImage
+	}
+	if vllmDockerArgs := os.Getenv("LLAMACTL_VLLM_DOCKER_ARGS"); vllmDockerArgs != "" {
+		if cfg.Backends.VLLM.Docker == nil {
+			cfg.Backends.VLLM.Docker = &DockerSettings{}
+		}
+		cfg.Backends.VLLM.Docker.Args = strings.Split(vllmDockerArgs, " ")
+	}
+	if vllmDockerEnv := os.Getenv("LLAMACTL_VLLM_DOCKER_ENV"); vllmDockerEnv != "" {
+		if cfg.Backends.VLLM.Docker == nil {
+			cfg.Backends.VLLM.Docker = &DockerSettings{}
+		}
+		if cfg.Backends.VLLM.Docker.Environment == nil {
+			cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
+		}
+		// Parse env vars in format "KEY1=value1,KEY2=value2"
+		for _, envPair := range strings.Split(vllmDockerEnv, ",") {
+			if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
+				cfg.Backends.VLLM.Docker.Environment[parts[0]] = parts[1]
+			}
+		}
+	}
+
+	// MLX backend
+	if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
+		cfg.Backends.MLX.Command = mlxCmd
+	}
+	if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
+		cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
+	}
+
+	// Instance defaults
 	if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
 		if b, err := strconv.ParseBool(autoRestart); err == nil {
 			cfg.Instances.DefaultAutoRestart = b
@@ -386,3 +504,17 @@ func getDefaultConfigLocations() []string {

 	return locations
 }
+
+// GetBackendSettings resolves backend settings
+func (bc *BackendConfig) GetBackendSettings(backendType string) BackendSettings {
+	switch backendType {
+	case "llama-cpp":
+		return bc.LlamaCpp
+	case "vllm":
+		return bc.VLLM
+	case "mlx":
+		return bc.MLX
+	default:
+		return BackendSettings{}
+	}
+}
--- a/pkg/config/config_test.go
+++ b/pkg/config/config_test.go
@@ -117,7 +117,6 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
 		"LLAMACTL_INSTANCE_PORT_RANGE":   "5000-6000",
 		"LLAMACTL_LOGS_DIR":              "/env/logs",
 		"LLAMACTL_MAX_INSTANCES":         "20",
-		"LLAMACTL_LLAMA_EXECUTABLE":      "/env/llama-server",
 		"LLAMACTL_DEFAULT_AUTO_RESTART":  "false",
 		"LLAMACTL_DEFAULT_MAX_RESTARTS":  "7",
 		"LLAMACTL_DEFAULT_RESTART_DELAY": "15",
@@ -150,8 +149,8 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
 	if cfg.Instances.MaxInstances != 20 {
 		t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances)
 	}
-	if cfg.Backends.LlamaExecutable != "/env/llama-server" {
-		t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Backends.LlamaExecutable)
+	if cfg.Backends.LlamaCpp.Command != "llama-server" {
+		t.Errorf("Expected default llama command 'llama-server', got %q", cfg.Backends.LlamaCpp.Command)
 	}
 	if cfg.Instances.DefaultAutoRestart {
 		t.Error("Expected auto restart to be false")
@@ -349,3 +348,165 @@ server:
 		t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
 	}
 }
+
+func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
+	bc := &config.BackendConfig{
+		LlamaCpp: config.BackendSettings{
+			Command: "custom-llama",
+			Args:    []string{"--verbose"},
+			Docker: &config.DockerSettings{
+				Enabled:     true,
+				Image:       "custom-llama:latest",
+				Args:        []string{"--gpus", "all"},
+				Environment: map[string]string{"CUDA_VISIBLE_DEVICES": "1"},
+			},
+		},
+		VLLM: config.BackendSettings{
+			Command: "custom-vllm",
+			Args:    []string{"serve", "--debug"},
+		},
+		MLX: config.BackendSettings{
+			Command: "custom-mlx",
+			Args:    []string{},
+		},
+	}
+
+	// Test llama-cpp with Docker
+	settings := bc.GetBackendSettings("llama-cpp")
+	if settings.Command != "custom-llama" {
+		t.Errorf("Expected command 'custom-llama', got %q", settings.Command)
+	}
+	if len(settings.Args) != 1 || settings.Args[0] != "--verbose" {
+		t.Errorf("Expected args ['--verbose'], got %v", settings.Args)
+	}
+	if settings.Docker == nil || !settings.Docker.Enabled {
+		t.Error("Expected Docker to be enabled")
+	}
+	if settings.Docker.Image != "custom-llama:latest" {
+		t.Errorf("Expected Docker image 'custom-llama:latest', got %q", settings.Docker.Image)
+	}
+
+	// Test vLLM without Docker
+	settings = bc.GetBackendSettings("vllm")
+	if settings.Command != "custom-vllm" {
+		t.Errorf("Expected command 'custom-vllm', got %q", settings.Command)
+	}
+	if len(settings.Args) != 2 || settings.Args[0] != "serve" || settings.Args[1] != "--debug" {
+		t.Errorf("Expected args ['serve', '--debug'], got %v", settings.Args)
+	}
+	if settings.Docker != nil && settings.Docker.Enabled {
+		t.Error("Expected Docker to be disabled or nil")
+	}
+
+	// Test MLX
+	settings = bc.GetBackendSettings("mlx")
+	if settings.Command != "custom-mlx" {
+		t.Errorf("Expected command 'custom-mlx', got %q", settings.Command)
+	}
+}
+
+func TestGetBackendSettings_EmptyConfig(t *testing.T) {
+	bc := &config.BackendConfig{}
+
+	// Test empty llama-cpp
+	settings := bc.GetBackendSettings("llama-cpp")
+	if settings.Command != "" {
+		t.Errorf("Expected empty command, got %q", settings.Command)
+	}
+
+	// Test empty vLLM
+	settings = bc.GetBackendSettings("vllm")
+	if settings.Command != "" {
+		t.Errorf("Expected empty command, got %q", settings.Command)
+	}
+
+	// Test empty MLX
+	settings = bc.GetBackendSettings("mlx")
+	if settings.Command != "" {
+		t.Errorf("Expected empty command, got %q", settings.Command)
+	}
+}
+
+func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) {
+	// Test that backend environment variables work correctly
+	envVars := map[string]string{
+		"LLAMACTL_LLAMACPP_COMMAND":        "env-llama",
+		"LLAMACTL_LLAMACPP_ARGS":           "--verbose --threads 4",
+		"LLAMACTL_LLAMACPP_DOCKER_ENABLED": "true",
+		"LLAMACTL_LLAMACPP_DOCKER_IMAGE":   "env-llama:latest",
+		"LLAMACTL_LLAMACPP_DOCKER_ARGS":    "run --rm --network host --gpus all",
+		"LLAMACTL_LLAMACPP_DOCKER_ENV":     "CUDA_VISIBLE_DEVICES=0,OMP_NUM_THREADS=4",
+		"LLAMACTL_VLLM_COMMAND":            "env-vllm",
+		"LLAMACTL_VLLM_DOCKER_ENABLED":     "false",
+		"LLAMACTL_VLLM_DOCKER_IMAGE":       "env-vllm:latest",
+		"LLAMACTL_VLLM_DOCKER_ENV":         "PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,CUDA_VISIBLE_DEVICES=1",
+		"LLAMACTL_MLX_COMMAND":             "env-mlx",
+	}
+
+	// Set env vars and ensure cleanup
+	for key, value := range envVars {
+		os.Setenv(key, value)
+		defer os.Unsetenv(key)
+	}
+
+	cfg, err := config.LoadConfig("nonexistent-file.yaml")
+	if err != nil {
+		t.Fatalf("LoadConfig failed: %v", err)
+	}
+
+	// Verify llama-cpp environment overrides
+	if cfg.Backends.LlamaCpp.Command != "env-llama" {
+		t.Errorf("Expected llama command 'env-llama', got %q", cfg.Backends.LlamaCpp.Command)
+	}
+	expectedArgs := []string{"--verbose", "--threads", "4"}
+	if len(cfg.Backends.LlamaCpp.Args) != len(expectedArgs) {
+		t.Errorf("Expected llama args %v, got %v", expectedArgs, cfg.Backends.LlamaCpp.Args)
+	}
+	if !cfg.Backends.LlamaCpp.Docker.Enabled {
+		t.Error("Expected llama Docker to be enabled")
+	}
+	if cfg.Backends.LlamaCpp.Docker.Image != "env-llama:latest" {
+		t.Errorf("Expected llama Docker image 'env-llama:latest', got %q", cfg.Backends.LlamaCpp.Docker.Image)
+	}
+	expectedDockerArgs := []string{"run", "--rm", "--network", "host", "--gpus", "all"}
+	if len(cfg.Backends.LlamaCpp.Docker.Args) != len(expectedDockerArgs) {
+		t.Errorf("Expected llama Docker args %v, got %v", expectedDockerArgs, cfg.Backends.LlamaCpp.Docker.Args)
+	}
+	if cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"] != "0" {
+		t.Errorf("Expected CUDA_VISIBLE_DEVICES=0, got %q", cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"])
+	}
+	if cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"] != "4" {
+		t.Errorf("Expected OMP_NUM_THREADS=4, got %q", cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"])
+	}
+
+	// Verify vLLM environment overrides
+	if cfg.Backends.VLLM.Command != "env-vllm" {
+		t.Errorf("Expected vLLM command 'env-vllm', got %q", cfg.Backends.VLLM.Command)
+	}
+	if cfg.Backends.VLLM.Docker.Enabled {
+		t.Error("Expected vLLM Docker to be disabled")
+	}
+	if cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"] != "max_split_size_mb:512" {
+		t.Errorf("Expected PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512, got %q", cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"])
+	}
+
+	// Verify MLX environment overrides
+	if cfg.Backends.MLX.Command != "env-mlx" {
+		t.Errorf("Expected MLX command 'env-mlx', got %q", cfg.Backends.MLX.Command)
+	}
+}
+
+func TestGetBackendSettings_InvalidBackendType(t *testing.T) {
+	bc := &config.BackendConfig{
+		LlamaCpp: config.BackendSettings{
+			Command: "llama-server",
+			Args:    []string{},
+		},
+	}
+
+	// Test invalid backend type returns empty settings
+	settings := bc.GetBackendSettings("invalid-backend")
+	if settings.Command != "" {
+		t.Errorf("Expected empty command for invalid backend, got %q", settings.Command)
+	}
+}
--- a/pkg/instance/instance.go
+++ b/pkg/instance/instance.go
@@ -221,14 +221,33 @@ func (i *Process) MarshalJSON() ([]byte, error) {
 	i.mu.RLock()
 	defer i.mu.RUnlock()

+	// Determine if docker is enabled for this instance's backend
+	var dockerEnabled bool
+	if i.options != nil {
+		switch i.options.BackendType {
+		case backends.BackendTypeLlamaCpp:
+			if i.globalBackendSettings != nil && i.globalBackendSettings.LlamaCpp.Docker != nil && i.globalBackendSettings.LlamaCpp.Docker.Enabled {
+				dockerEnabled = true
+			}
+		case backends.BackendTypeVllm:
+			if i.globalBackendSettings != nil && i.globalBackendSettings.VLLM.Docker != nil && i.globalBackendSettings.VLLM.Docker.Enabled {
+				dockerEnabled = true
+			}
+		case backends.BackendTypeMlxLm:
+			// MLX does not support docker currently
+		}
+	}
+
 	// Use anonymous struct to avoid recursion
 	type Alias Process
 	return json.Marshal(&struct {
 		*Alias
 		Options       *CreateInstanceOptions `json:"options,omitempty"`
+		DockerEnabled bool                   `json:"docker_enabled,omitempty"`
 	}{
 		Alias:         (*Alias)(i),
 		Options:       i.options,
+		DockerEnabled: dockerEnabled,
 	})
 }

--- a/pkg/instance/instance_test.go
+++ b/pkg/instance/instance_test.go
@@ -12,8 +12,18 @@ import (

 func TestNewInstance(t *testing.T) {
 	backendConfig := &config.BackendConfig{
-		LlamaExecutable: "llama-server",
-		MLXLMExecutable: "mlx_lm.server",
+		LlamaCpp: config.BackendSettings{
+			Command: "llama-server",
+			Args:    []string{},
+		},
+		MLX: config.BackendSettings{
+			Command: "mlx_lm.server",
+			Args:    []string{},
+		},
+		VLLM: config.BackendSettings{
+			Command: "vllm",
+			Args:    []string{"serve"},
+		},
 	}

 	globalSettings := &config.InstancesConfig{
@@ -66,8 +76,18 @@ func TestNewInstance(t *testing.T) {

 func TestNewInstance_WithRestartOptions(t *testing.T) {
 	backendConfig := &config.BackendConfig{
-		LlamaExecutable: "llama-server",
-		MLXLMExecutable: "mlx_lm.server",
+		LlamaCpp: config.BackendSettings{
+			Command: "llama-server",
+			Args:    []string{},
+		},
+		MLX: config.BackendSettings{
+			Command: "mlx_lm.server",
+			Args:    []string{},
+		},
+		VLLM: config.BackendSettings{
+			Command: "vllm",
+			Args:    []string{"serve"},
+		},
 	}

 	globalSettings := &config.InstancesConfig{
@@ -112,8 +132,18 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {

 func TestSetOptions(t *testing.T) {
 	backendConfig := &config.BackendConfig{
-		LlamaExecutable: "llama-server",
-		MLXLMExecutable: "mlx_lm.server",
+		LlamaCpp: config.BackendSettings{
+			Command: "llama-server",
+			Args:    []string{},
+		},
+		MLX: config.BackendSettings{
+			Command: "mlx_lm.server",
+			Args:    []string{},
+		},
+		VLLM: config.BackendSettings{
+			Command: "vllm",
+			Args:    []string{"serve"},
+		},
 	}

 	globalSettings := &config.InstancesConfig{
@@ -163,8 +193,18 @@ func TestSetOptions(t *testing.T) {

 func TestGetProxy(t *testing.T) {
 	backendConfig := &config.BackendConfig{
-		LlamaExecutable: "llama-server",
-		MLXLMExecutable: "mlx_lm.server",
+		LlamaCpp: config.BackendSettings{
+			Command: "llama-server",
+			Args:    []string{},
+		},
+		MLX: config.BackendSettings{
+			Command: "mlx_lm.server",
+			Args:    []string{},
+		},
+		VLLM: config.BackendSettings{
+			Command: "vllm",
+			Args:    []string{"serve"},
+		},
 	}

 	globalSettings := &config.InstancesConfig{
@@ -205,8 +245,18 @@ func TestGetProxy(t *testing.T) {

 func TestMarshalJSON(t *testing.T) {
 	backendConfig := &config.BackendConfig{
-		LlamaExecutable: "llama-server",
-		MLXLMExecutable: "mlx_lm.server",
+		LlamaCpp: config.BackendSettings{
+			Command: "llama-server",
+			Args:    []string{},
+		},
+		MLX: config.BackendSettings{
+			Command: "mlx_lm.server",
+			Args:    []string{},
+		},
+		VLLM: config.BackendSettings{
+			Command: "vllm",
+			Args:    []string{"serve"},
+		},
 	}

 	globalSettings := &config.InstancesConfig{
@@ -364,8 +414,18 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
 	}

 	backendConfig := &config.BackendConfig{
-		LlamaExecutable: "llama-server",
-		MLXLMExecutable: "mlx_lm.server",
+		LlamaCpp: config.BackendSettings{
+			Command: "llama-server",
+			Args:    []string{},
+		},
+		MLX: config.BackendSettings{
+			Command: "mlx_lm.server",
+			Args:    []string{},
+		},
+		VLLM: config.BackendSettings{
+			Command: "vllm",
+			Args:    []string{"serve"},
+		},
 	}

 	globalSettings := &config.InstancesConfig{
--- a/pkg/instance/lifecycle.go
+++ b/pkg/instance/lifecycle.go
@@ -11,6 +11,7 @@ import (
 	"time"

 	"llamactl/pkg/backends"
+	"llamactl/pkg/config"
 )

 // Start starts the llama server instance and returns an error if it fails.
@@ -41,24 +42,14 @@ func (i *Process) Start() error {
 		return fmt.Errorf("failed to create log files: %w", err)
 	}

-	args := i.options.BuildCommandArgs()
-	i.ctx, i.cancel = context.WithCancel(context.Background())
-
-	var executable string
-
-	// Get executable from global configuration
-	switch i.options.BackendType {
-	case backends.BackendTypeLlamaCpp:
-		executable = i.globalBackendSettings.LlamaExecutable
-	case backends.BackendTypeMlxLm:
-		executable = i.globalBackendSettings.MLXLMExecutable
-	case backends.BackendTypeVllm:
-		executable = i.globalBackendSettings.VllmExecutable
-	default:
-		return fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
+	// Build command using backend-specific methods
+	cmd, cmdErr := i.buildCommand()
+	if cmdErr != nil {
+		return fmt.Errorf("failed to build command: %w", cmdErr)
 	}

-	i.cmd = exec.CommandContext(i.ctx, executable, args...)
+	i.ctx, i.cancel = context.WithCancel(context.Background())
+	i.cmd = cmd

 	if runtime.GOOS != "windows" {
 		setProcAttrs(i.cmd)
@@ -372,3 +363,39 @@ func (i *Process) validateRestartConditions() (shouldRestart bool, maxRestarts i

 	return true, maxRestarts, restartDelay
 }
+
+// buildCommand builds the command to execute using backend-specific logic
+func (i *Process) buildCommand() (*exec.Cmd, error) {
+	// Get backend configuration
+	backendConfig, err := i.getBackendConfig()
+	if err != nil {
+		return nil, err
+	}
+
+	// Get the command to execute
+	cmd := i.options.GetCommand(backendConfig)
+
+	// Build command arguments
+	args := i.options.BuildCommandArgs(backendConfig)
+
+	return exec.Command(cmd, args...), nil
+}
+
+// getBackendConfig resolves the backend configuration for the current instance
+func (i *Process) getBackendConfig() (*config.BackendSettings, error) {
+	var backendTypeStr string
+
+	switch i.options.BackendType {
+	case backends.BackendTypeLlamaCpp:
+		backendTypeStr = "llama-cpp"
+	case backends.BackendTypeMlxLm:
+		backendTypeStr = "mlx"
+	case backends.BackendTypeVllm:
+		backendTypeStr = "vllm"
+	default:
+		return nil, fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
+	}
+
+	settings := i.globalBackendSettings.GetBackendSettings(backendTypeStr)
+	return &settings, nil
+}
--- a/pkg/instance/options.go
+++ b/pkg/instance/options.go
@@ -188,24 +188,55 @@ func (c *CreateInstanceOptions) ValidateAndApplyDefaults(name string, globalSett
 	}
 }

+func (c *CreateInstanceOptions) GetCommand(backendConfig *config.BackendSettings) string {
+
+	if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
+		return "docker"
+	}
+
+	return backendConfig.Command
+}
+
 // BuildCommandArgs builds command line arguments for the backend
-func (c *CreateInstanceOptions) BuildCommandArgs() []string {
+func (c *CreateInstanceOptions) BuildCommandArgs(backendConfig *config.BackendSettings) []string {
+
+	var args []string
+
+	if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
+		// For Docker, start with Docker args
+		args = append(args, backendConfig.Docker.Args...)
+		args = append(args, backendConfig.Docker.Image)
+
 		switch c.BackendType {
 		case backends.BackendTypeLlamaCpp:
 			if c.LlamaServerOptions != nil {
-			return c.LlamaServerOptions.BuildCommandArgs()
-		}
-	case backends.BackendTypeMlxLm:
-		if c.MlxServerOptions != nil {
-			return c.MlxServerOptions.BuildCommandArgs()
+				args = append(args, c.LlamaServerOptions.BuildDockerArgs()...)
+			}
+		case backends.BackendTypeVllm:
+			if c.VllmServerOptions != nil {
+				args = append(args, c.VllmServerOptions.BuildDockerArgs()...)
+			}
+		}
+
+	} else {
+		// For native execution, start with backend args
+		args = append(args, backendConfig.Args...)
+
+		switch c.BackendType {
+		case backends.BackendTypeLlamaCpp:
+			if c.LlamaServerOptions != nil {
+				args = append(args, c.LlamaServerOptions.BuildCommandArgs()...)
+			}
+		case backends.BackendTypeMlxLm:
+			if c.MlxServerOptions != nil {
+				args = append(args, c.MlxServerOptions.BuildCommandArgs()...)
 			}
 		case backends.BackendTypeVllm:
 			if c.VllmServerOptions != nil {
-			// Prepend "serve" as first argument
-			args := []string{"serve"}
 				args = append(args, c.VllmServerOptions.BuildCommandArgs()...)
+			}
+		}
+	}
+
 	return args
-		}
-	}
-	return []string{}
 }
--- a/pkg/instance/timeout_test.go
+++ b/pkg/instance/timeout_test.go
@@ -34,8 +34,12 @@ func (m *MockTimeProvider) SetTime(t time.Time) {

 func TestUpdateLastRequestTime(t *testing.T) {
 	backendConfig := &config.BackendConfig{
-		LlamaExecutable: "llama-server",
-		MLXLMExecutable: "mlx_lm.server",
+		LlamaCpp: config.BackendSettings{
+			Command: "llama-server",
+		},
+		MLX: config.BackendSettings{
+			Command: "mlx_lm.server",
+		},
 	}

 	globalSettings := &config.InstancesConfig{
@@ -60,8 +64,12 @@ func TestUpdateLastRequestTime(t *testing.T) {

 func TestShouldTimeout_NotRunning(t *testing.T) {
 	backendConfig := &config.BackendConfig{
-		LlamaExecutable: "llama-server",
-		MLXLMExecutable: "mlx_lm.server",
+		LlamaCpp: config.BackendSettings{
+			Command: "llama-server",
+		},
+		MLX: config.BackendSettings{
+			Command: "mlx_lm.server",
+		},
 	}

 	globalSettings := &config.InstancesConfig{
@@ -90,8 +98,12 @@ func TestShouldTimeout_NotRunning(t *testing.T) {

 func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
 	backendConfig := &config.BackendConfig{
-		LlamaExecutable: "llama-server",
-		MLXLMExecutable: "mlx_lm.server",
+		LlamaCpp: config.BackendSettings{
+			Command: "llama-server",
+		},
+		MLX: config.BackendSettings{
+			Command: "mlx_lm.server",
+		},
 	}

 	globalSettings := &config.InstancesConfig{
@@ -133,8 +145,12 @@ func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {

 func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
 	backendConfig := &config.BackendConfig{
-		LlamaExecutable: "llama-server",
-		MLXLMExecutable: "mlx_lm.server",
+		LlamaCpp: config.BackendSettings{
+			Command: "llama-server",
+		},
+		MLX: config.BackendSettings{
+			Command: "mlx_lm.server",
+		},
 	}

 	globalSettings := &config.InstancesConfig{
@@ -167,8 +183,12 @@ func TestShouldTimeout_WithinTimeLimit(t *testing.T) {

 func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
 	backendConfig := &config.BackendConfig{
-		LlamaExecutable: "llama-server",
-		MLXLMExecutable: "mlx_lm.server",
+		LlamaCpp: config.BackendSettings{
+			Command: "llama-server",
+		},
+		MLX: config.BackendSettings{
+			Command: "mlx_lm.server",
+		},
 	}

 	globalSettings := &config.InstancesConfig{
@@ -207,8 +227,12 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {

 func TestTimeoutConfiguration_Validation(t *testing.T) {
 	backendConfig := &config.BackendConfig{
-		LlamaExecutable: "llama-server",
-		MLXLMExecutable: "mlx_lm.server",
+		LlamaCpp: config.BackendSettings{
+			Command: "llama-server",
+		},
+		MLX: config.BackendSettings{
+			Command: "mlx_lm.server",
+		},
 	}

 	globalSettings := &config.InstancesConfig{
--- a/pkg/manager/manager_test.go
+++ b/pkg/manager/manager_test.go
@@ -16,8 +16,12 @@ import (

 func TestNewInstanceManager(t *testing.T) {
 	backendConfig := config.BackendConfig{
-		LlamaExecutable: "llama-server",
-		MLXLMExecutable: "mlx_lm.server",
+		LlamaCpp: config.BackendSettings{
+			Command: "llama-server",
+		},
+		MLX: config.BackendSettings{
+			Command: "mlx_lm.server",
+		},
 	}

 	cfg := config.InstancesConfig{
@@ -49,8 +53,12 @@ func TestPersistence(t *testing.T) {
 	tempDir := t.TempDir()

 	backendConfig := config.BackendConfig{
-		LlamaExecutable: "llama-server",
-		MLXLMExecutable: "mlx_lm.server",
+		LlamaCpp: config.BackendSettings{
+			Command: "llama-server",
+		},
+		MLX: config.BackendSettings{
+			Command: "mlx_lm.server",
+		},
 	}

 	cfg := config.InstancesConfig{
@@ -182,8 +190,12 @@ func TestShutdown(t *testing.T) {
 // Helper function to create a test manager with standard config
 func createTestManager() manager.InstanceManager {
 	backendConfig := config.BackendConfig{
-		LlamaExecutable: "llama-server",
-		MLXLMExecutable: "mlx_lm.server",
+		LlamaCpp: config.BackendSettings{
+			Command: "llama-server",
+		},
+		MLX: config.BackendSettings{
+			Command: "mlx_lm.server",
+		},
 	}

 	cfg := config.InstancesConfig{
--- a/pkg/manager/operations_test.go
+++ b/pkg/manager/operations_test.go
@@ -63,8 +63,12 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) {

 	// Test max instances limit
 	backendConfig := config.BackendConfig{
-		LlamaExecutable: "llama-server",
-		MLXLMExecutable: "mlx_lm.server",
+		LlamaCpp: config.BackendSettings{
+			Command: "llama-server",
+		},
+		MLX: config.BackendSettings{
+			Command: "mlx_lm.server",
+		},
 	}
 	cfg := config.InstancesConfig{
 		PortRange:            [2]int{8000, 9000},
--- a/pkg/manager/timeout.go
+++ b/pkg/manager/timeout.go
@@ -34,7 +34,7 @@ func (im *instanceManager) EvictLRUInstance() error {
 	im.mu.RLock()
 	var lruInstance *instance.Process

-	for name, _ := range im.runningInstances {
+	for name := range im.runningInstances {
 		inst := im.instances[name]
 		if inst == nil {
 			continue
--- a/pkg/manager/timeout_test.go
+++ b/pkg/manager/timeout_test.go
@@ -14,8 +14,8 @@ import (
 func TestTimeoutFunctionality(t *testing.T) {
 	// Test timeout checker initialization
 	backendConfig := config.BackendConfig{
-		LlamaExecutable: "llama-server",
-		MLXLMExecutable: "mlx_lm.server",
+		LlamaCpp: config.BackendSettings{Command: "llama-server"},
+		MLX:      config.BackendSettings{Command: "mlx_lm.server"},
 	}
 	cfg := config.InstancesConfig{
 		PortRange:            [2]int{8000, 9000},
--- a/webui/src/components/BackendBadge.tsx
+++ b/webui/src/components/BackendBadge.tsx
@@ -1,13 +1,14 @@
 import React from "react";
 import { Badge } from "@/components/ui/badge";
 import { BackendType, type BackendTypeValue } from "@/types/instance";
-import { Server } from "lucide-react";
+import { Server, Package } from "lucide-react";

 interface BackendBadgeProps {
  backend?: BackendTypeValue;
+  docker?: boolean;
 }

-const BackendBadge: React.FC<BackendBadgeProps> = ({ backend }) => {
+const BackendBadge: React.FC<BackendBadgeProps> = ({ backend, docker }) => {
  if (!backend) {
    return null;
  }
@@ -39,6 +40,7 @@ const BackendBadge: React.FC<BackendBadgeProps> = ({ backend }) => {
  };

  return (
+    <div className="flex items-center gap-1">
      <Badge
        variant="outline"
        className={`flex items-center gap-1.5 ${getColorClasses()}`}
@@ -46,6 +48,17 @@ const BackendBadge: React.FC<BackendBadgeProps> = ({ backend }) => {
        <Server className="h-3 w-3" />
        <span className="text-xs">{getText()}</span>
      </Badge>
+      {docker && (
+        <Badge
+          variant="outline"
+          className="flex items-center gap-1.5 bg-orange-100 text-orange-800 border-orange-200 dark:bg-orange-900 dark:text-orange-200 dark:border-orange-800"
+          title="Docker enabled"
+        >
+          <Package className="h-3 w-3" />
+          <span className="text-[10px] uppercase tracking-wide">Docker</span>
+        </Badge>
+      )}
+    </div>
  );
 };

--- a/webui/src/components/InstanceCard.tsx
+++ b/webui/src/components/InstanceCard.tsx
@@ -66,7 +66,7 @@ function InstanceCard({
            
            {/* Badges row */}
            <div className="flex items-center gap-2 flex-wrap">
-              <BackendBadge backend={instance.options?.backend_type} />
+              <BackendBadge backend={instance.options?.backend_type} docker={instance.docker_enabled} />
              {running && <HealthBadge health={health} />}
            </div>
          </div>
--- a/webui/src/types/instance.ts
+++ b/webui/src/types/instance.ts
@@ -23,4 +23,5 @@ export interface Instance {
  name: string;
  status: InstanceStatus;
  options?: CreateInstanceOptions;
+  docker_enabled?: boolean; // indicates backend is running via Docker
 }