diff --git a/README.md b/README.md index 99eb77e..4865174 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ ### 🔗 Universal Compatibility - **OpenAI API Compatible**: Drop-in replacement - route requests by instance name - **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM +- **Docker Support**: Run backends in containers ### 🌐 User-Friendly Interface - **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools) @@ -32,6 +33,7 @@ # For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start # For MLX on macOS: pip install mlx-lm # For vLLM: pip install vllm +# Or use Docker - no local installation required # 2. Download and run llamactl LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/') @@ -112,6 +114,7 @@ You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp) brew install llama.cpp # Or build from source - see llama.cpp docs +# Or use Docker - no local installation required ``` **For MLX backend (macOS only):** @@ -139,9 +142,51 @@ python -m venv vllm-env source vllm-env/bin/activate pip install vllm -# For production deployments, consider container-based installation +# Or use Docker - no local installation required ``` +## Docker Support + +llamactl supports running backends in Docker containers with identical behavior to native execution. This is particularly useful for: +- Production deployments without local backend installation +- Isolating backend dependencies +- GPU-accelerated inference using official Docker images + +### Docker Configuration + +Enable Docker support using the new structured backend configuration: + +```yaml +backends: + llama-cpp: + command: "llama-server" + docker: + enabled: true + image: "ghcr.io/ggml-org/llama.cpp:server" + args: ["run", "--rm", "--network", "host", "--gpus", "all"] + + vllm: + command: "vllm" + args: ["serve"] + docker: + enabled: true + image: "vllm/vllm-openai:latest" + args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"] +``` + +### Key Features + +- **Host Networking**: Uses `--network host` for seamless port management +- **GPU Support**: Includes `--gpus all` for GPU acceleration +- **Environment Variables**: Configure container environment as needed +- **Flexible Configuration**: Per-backend Docker settings with sensible defaults + +### Requirements + +- Docker installed and running +- For GPU support: nvidia-docker2 (Linux) or Docker Desktop with GPU support +- No local backend installation required when using Docker + ## Configuration llamactl works out of the box with sensible defaults. @@ -154,9 +199,27 @@ server: enable_swagger: false # Enable Swagger UI for API docs backends: - llama_executable: llama-server # Path to llama-server executable - mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable - vllm_executable: vllm # Path to vllm executable + llama-cpp: + command: "llama-server" + args: [] + docker: + enabled: false + image: "ghcr.io/ggml-org/llama.cpp:server" + args: ["run", "--rm", "--network", "host", "--gpus", "all"] + environment: {} + + vllm: + command: "vllm" + args: ["serve"] + docker: + enabled: false + image: "vllm/vllm-openai:latest" + args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"] + environment: {} + + mlx: + command: "mlx_lm.server" + args: [] instances: port_range: [8000, 9000] # Port range for instances diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md index 4100492..f014f13 100644 --- a/docs/getting-started/configuration.md +++ b/docs/getting-started/configuration.md @@ -20,9 +20,27 @@ server: enable_swagger: false # Enable Swagger UI for API docs backends: - llama_executable: llama-server # Path to llama-server executable - mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable - vllm_executable: vllm # Path to vllm executable + llama-cpp: + command: "llama-server" + args: [] + docker: + enabled: false + image: "ghcr.io/ggml-org/llama.cpp:server" + args: ["run", "--rm", "--network", "host", "--gpus", "all"] + environment: {} + + vllm: + command: "vllm" + args: ["serve"] + docker: + enabled: false + image: "vllm/vllm-openai:latest" + args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"] + environment: {} + + mlx: + command: "mlx_lm.server" + args: [] instances: port_range: [8000, 9000] # Port range for instances @@ -90,18 +108,40 @@ server: - `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false) ### Backend Configuration - ```yaml backends: - llama_executable: "llama-server" # Path to llama-server executable (default: "llama-server") - mlx_lm_executable: "mlx_lm.server" # Path to mlx_lm.server executable (default: "mlx_lm.server") - vllm_executable: "vllm" # Path to vllm executable (default: "vllm") + llama-cpp: + command: "llama-server" + args: [] + docker: + enabled: false # Enable Docker runtime (default: false) + image: "ghcr.io/ggml-org/llama.cpp:server" + args: ["run", "--rm", "--network", "host", "--gpus", "all"] + environment: {} + + vllm: + command: "vllm" + args: ["serve"] + docker: + enabled: false + image: "vllm/vllm-openai:latest" + args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"] + environment: {} + + mlx: + command: "mlx_lm.server" + args: [] + # MLX does not support Docker ``` -**Environment Variables:** -- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable -- `LLAMACTL_MLX_LM_EXECUTABLE` - Path to mlx_lm.server executable -- `LLAMACTL_VLLM_EXECUTABLE` - Path to vllm executable +**Backend Configuration Fields:** +- `command`: Executable name/path for the backend +- `args`: Default arguments prepended to all instances +- `docker`: Docker-specific configuration (optional) + - `enabled`: Boolean flag to enable Docker runtime + - `image`: Docker image to use + - `args`: Additional arguments passed to `docker run` + - `environment`: Environment variables for the container (optional) ### Instance Configuration diff --git a/docs/getting-started/quick-start.md b/docs/getting-started/quick-start.md index 20d8aa8..b6846e3 100644 --- a/docs/getting-started/quick-start.md +++ b/docs/getting-started/quick-start.md @@ -88,6 +88,21 @@ Here are basic example configurations for each backend: } ``` +## Docker Support + +Llamactl can run backends in Docker containers. To enable Docker for a backend, add a `docker` section to that backend in your YAML configuration file (e.g. `config.yaml`) as shown below: + +```yaml +backends: + vllm: + command: "vllm" + args: ["serve"] + docker: + enabled: true + image: "vllm/vllm-openai:latest" + args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"] +``` + ## Using the API You can also manage instances via the REST API: diff --git a/pkg/backends/builder.go b/pkg/backends/builder.go index 23c3bb1..d5b5c0c 100644 --- a/pkg/backends/builder.go +++ b/pkg/backends/builder.go @@ -1,6 +1,8 @@ package backends import ( + "fmt" + "llamactl/pkg/config" "reflect" "strconv" "strings" @@ -68,3 +70,24 @@ func BuildCommandArgs(options any, multipleFlags map[string]bool) []string { return args } + +// BuildDockerCommand builds a Docker command with the specified configuration and arguments +func BuildDockerCommand(backendConfig *config.BackendSettings, instanceArgs []string) (string, []string, error) { + // Start with configured Docker arguments (should include "run", "--rm", etc.) + dockerArgs := make([]string, len(backendConfig.Docker.Args)) + copy(dockerArgs, backendConfig.Docker.Args) + + // Add environment variables + for key, value := range backendConfig.Docker.Environment { + dockerArgs = append(dockerArgs, "-e", fmt.Sprintf("%s=%s", key, value)) + } + + // Add image name + dockerArgs = append(dockerArgs, backendConfig.Docker.Image) + + // Add backend args and instance args + dockerArgs = append(dockerArgs, backendConfig.Args...) + dockerArgs = append(dockerArgs, instanceArgs...) + + return "docker", dockerArgs, nil +} diff --git a/pkg/backends/llamacpp/llama.go b/pkg/backends/llamacpp/llama.go index f2a7d31..bca29e8 100644 --- a/pkg/backends/llamacpp/llama.go +++ b/pkg/backends/llamacpp/llama.go @@ -7,6 +7,28 @@ import ( "strconv" ) +// multiValuedFlags defines flags that should be repeated for each value rather than comma-separated +// Used for both parsing (with underscores) and building (with dashes) +var multiValuedFlags = map[string]bool{ + // Parsing keys (with underscores) + "override_tensor": true, + "override_kv": true, + "lora": true, + "lora_scaled": true, + "control_vector": true, + "control_vector_scaled": true, + "dry_sequence_breaker": true, + "logit_bias": true, + // Building keys (with dashes) + "override-tensor": true, + "override-kv": true, + "lora-scaled": true, + "control-vector": true, + "control-vector-scaled": true, + "dry-sequence-breaker": true, + "logit-bias": true, +} + type LlamaServerOptions struct { // Common params VerbosePrompt bool `json:"verbose_prompt,omitempty"` @@ -316,17 +338,13 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error { // BuildCommandArgs converts InstanceOptions to command line arguments func (o *LlamaServerOptions) BuildCommandArgs() []string { // Llama uses multiple flags for arrays by default (not comma-separated) - multipleFlags := map[string]bool{ - "override-tensor": true, - "override-kv": true, - "lora": true, - "lora-scaled": true, - "control-vector": true, - "control-vector-scaled": true, - "dry-sequence-breaker": true, - "logit-bias": true, - } - return backends.BuildCommandArgs(o, multipleFlags) + // Use package-level multiValuedFlags variable + return backends.BuildCommandArgs(o, multiValuedFlags) +} + +func (o *LlamaServerOptions) BuildDockerArgs() []string { + // For llama, Docker args are the same as normal args + return o.BuildCommandArgs() } // ParseLlamaCommand parses a llama-server command string into LlamaServerOptions @@ -338,16 +356,7 @@ func (o *LlamaServerOptions) BuildCommandArgs() []string { func ParseLlamaCommand(command string) (*LlamaServerOptions, error) { executableNames := []string{"llama-server"} var subcommandNames []string // Llama has no subcommands - multiValuedFlags := map[string]bool{ - "override_tensor": true, - "override_kv": true, - "lora": true, - "lora_scaled": true, - "control_vector": true, - "control_vector_scaled": true, - "dry_sequence_breaker": true, - "logit_bias": true, - } + // Use package-level multiValuedFlags variable var llamaOptions LlamaServerOptions if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &llamaOptions); err != nil { diff --git a/pkg/backends/vllm/vllm.go b/pkg/backends/vllm/vllm.go index 7811c4c..d4fee25 100644 --- a/pkg/backends/vllm/vllm.go +++ b/pkg/backends/vllm/vllm.go @@ -4,6 +4,15 @@ import ( "llamactl/pkg/backends" ) +// multiValuedFlags defines flags that should be repeated for each value rather than comma-separated +var multiValuedFlags = map[string]bool{ + "api-key": true, + "allowed-origins": true, + "allowed-methods": true, + "allowed-headers": true, + "middleware": true, +} + type VllmServerOptions struct { // Basic connection options (auto-assigned by llamactl) Host string `json:"host,omitempty"` @@ -131,30 +140,32 @@ type VllmServerOptions struct { } // BuildCommandArgs converts VllmServerOptions to command line arguments -// Note: This does NOT include the "serve" subcommand, that's handled at the instance level -// For vLLM, the model parameter is passed as a positional argument, not a --model flag +// For vLLM native, model is a positional argument after "serve" func (o *VllmServerOptions) BuildCommandArgs() []string { var args []string - // Add model as positional argument if specified + // Add model as positional argument if specified (for native execution) if o.Model != "" { args = append(args, o.Model) } - // Create a copy of the options without the Model field to avoid including it as --model flag + // Create a copy without Model field to avoid --model flag optionsCopy := *o - optionsCopy.Model = "" // Clear model field so it won't be included as a flag + optionsCopy.Model = "" - multipleFlags := map[string]bool{ - "api-key": true, - "allowed-origins": true, - "allowed-methods": true, - "allowed-headers": true, - "middleware": true, - } + // Use package-level multipleFlags variable - // Build the rest of the arguments as flags - flagArgs := backends.BuildCommandArgs(&optionsCopy, multipleFlags) + flagArgs := backends.BuildCommandArgs(&optionsCopy, multiValuedFlags) + args = append(args, flagArgs...) + + return args +} + +func (o *VllmServerOptions) BuildDockerArgs() []string { + var args []string + + // Use package-level multipleFlags variable + flagArgs := backends.BuildCommandArgs(o, multiValuedFlags) args = append(args, flagArgs...) return args diff --git a/pkg/config/config.go b/pkg/config/config.go index 504ecc3..cf6f9cf 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -10,16 +10,26 @@ import ( "gopkg.in/yaml.v3" ) +// BackendSettings contains structured backend configuration +type BackendSettings struct { + Command string `yaml:"command"` + Args []string `yaml:"args"` + Docker *DockerSettings `yaml:"docker,omitempty"` +} + +// DockerSettings contains Docker-specific configuration +type DockerSettings struct { + Enabled bool `yaml:"enabled"` + Image string `yaml:"image"` + Args []string `yaml:"args"` + Environment map[string]string `yaml:"environment,omitempty"` +} + // BackendConfig contains backend executable configurations type BackendConfig struct { - // Path to llama-server executable (llama.cpp backend) - LlamaExecutable string `yaml:"llama_executable"` - - // Path to mlx_lm executable (MLX-LM backend) - MLXLMExecutable string `yaml:"mlx_lm_executable"` - - // Path to vllm executable (vLLM backend) - VllmExecutable string `yaml:"vllm_executable"` + LlamaCpp BackendSettings `yaml:"llama-cpp"` + VLLM BackendSettings `yaml:"vllm"` + MLX BackendSettings `yaml:"mlx"` } // AppConfig represents the configuration for llamactl @@ -123,9 +133,36 @@ func LoadConfig(configPath string) (AppConfig, error) { EnableSwagger: false, }, Backends: BackendConfig{ - LlamaExecutable: "llama-server", - MLXLMExecutable: "mlx_lm.server", - VllmExecutable: "vllm", + LlamaCpp: BackendSettings{ + Command: "llama-server", + Args: []string{}, + Docker: &DockerSettings{ + Enabled: false, + Image: "ghcr.io/ggml-org/llama.cpp:server", + Args: []string{ + "run", "--rm", "--network", "host", "--gpus", "all", + "-v", filepath.Join(getDefaultDataDirectory(), "llama.cpp") + ":/root/.cache/llama.cpp"}, + Environment: map[string]string{}, + }, + }, + VLLM: BackendSettings{ + Command: "vllm", + Args: []string{"serve"}, + Docker: &DockerSettings{ + Enabled: false, + Image: "vllm/vllm-openai:latest", + Args: []string{ + "run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g", + "-v", filepath.Join(getDefaultDataDirectory(), "huggingface") + ":/root/.cache/huggingface", + }, + Environment: map[string]string{}, + }, + }, + MLX: BackendSettings{ + Command: "mlx_lm.server", + Args: []string{}, + // No Docker section for MLX - not supported + }, }, Instances: InstancesConfig{ PortRange: [2]int{8000, 9000}, @@ -244,15 +281,96 @@ func loadEnvVars(cfg *AppConfig) { } } // Backend config - if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" { - cfg.Backends.LlamaExecutable = llamaExec + // LlamaCpp backend + if llamaCmd := os.Getenv("LLAMACTL_LLAMACPP_COMMAND"); llamaCmd != "" { + cfg.Backends.LlamaCpp.Command = llamaCmd } - if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" { - cfg.Backends.MLXLMExecutable = mlxLMExec + if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" { + cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ") } - if vllmExec := os.Getenv("LLAMACTL_VLLM_EXECUTABLE"); vllmExec != "" { - cfg.Backends.VllmExecutable = vllmExec + if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" { + if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil { + if cfg.Backends.LlamaCpp.Docker == nil { + cfg.Backends.LlamaCpp.Docker = &DockerSettings{} + } + cfg.Backends.LlamaCpp.Docker.Enabled = b + } } + if llamaDockerImage := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_IMAGE"); llamaDockerImage != "" { + if cfg.Backends.LlamaCpp.Docker == nil { + cfg.Backends.LlamaCpp.Docker = &DockerSettings{} + } + cfg.Backends.LlamaCpp.Docker.Image = llamaDockerImage + } + if llamaDockerArgs := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ARGS"); llamaDockerArgs != "" { + if cfg.Backends.LlamaCpp.Docker == nil { + cfg.Backends.LlamaCpp.Docker = &DockerSettings{} + } + cfg.Backends.LlamaCpp.Docker.Args = strings.Split(llamaDockerArgs, " ") + } + if llamaDockerEnv := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENV"); llamaDockerEnv != "" { + if cfg.Backends.LlamaCpp.Docker == nil { + cfg.Backends.LlamaCpp.Docker = &DockerSettings{} + } + if cfg.Backends.LlamaCpp.Docker.Environment == nil { + cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string) + } + // Parse env vars in format "KEY1=value1,KEY2=value2" + for _, envPair := range strings.Split(llamaDockerEnv, ",") { + if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 { + cfg.Backends.LlamaCpp.Docker.Environment[parts[0]] = parts[1] + } + } + } + + // vLLM backend + if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" { + cfg.Backends.VLLM.Command = vllmCmd + } + if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" { + if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil { + if cfg.Backends.VLLM.Docker == nil { + cfg.Backends.VLLM.Docker = &DockerSettings{} + } + cfg.Backends.VLLM.Docker.Enabled = b + } + } + if vllmDockerImage := os.Getenv("LLAMACTL_VLLM_DOCKER_IMAGE"); vllmDockerImage != "" { + if cfg.Backends.VLLM.Docker == nil { + cfg.Backends.VLLM.Docker = &DockerSettings{} + } + cfg.Backends.VLLM.Docker.Image = vllmDockerImage + } + if vllmDockerArgs := os.Getenv("LLAMACTL_VLLM_DOCKER_ARGS"); vllmDockerArgs != "" { + if cfg.Backends.VLLM.Docker == nil { + cfg.Backends.VLLM.Docker = &DockerSettings{} + } + cfg.Backends.VLLM.Docker.Args = strings.Split(vllmDockerArgs, " ") + } + if vllmDockerEnv := os.Getenv("LLAMACTL_VLLM_DOCKER_ENV"); vllmDockerEnv != "" { + if cfg.Backends.VLLM.Docker == nil { + cfg.Backends.VLLM.Docker = &DockerSettings{} + } + if cfg.Backends.VLLM.Docker.Environment == nil { + cfg.Backends.VLLM.Docker.Environment = make(map[string]string) + } + // Parse env vars in format "KEY1=value1,KEY2=value2" + for _, envPair := range strings.Split(vllmDockerEnv, ",") { + if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 { + cfg.Backends.VLLM.Docker.Environment[parts[0]] = parts[1] + } + } + } + + // MLX backend + if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" { + cfg.Backends.MLX.Command = mlxCmd + } + if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" { + cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ") + } + + // Instance defaults if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" { if b, err := strconv.ParseBool(autoRestart); err == nil { cfg.Instances.DefaultAutoRestart = b @@ -386,3 +504,17 @@ func getDefaultConfigLocations() []string { return locations } + +// GetBackendSettings resolves backend settings +func (bc *BackendConfig) GetBackendSettings(backendType string) BackendSettings { + switch backendType { + case "llama-cpp": + return bc.LlamaCpp + case "vllm": + return bc.VLLM + case "mlx": + return bc.MLX + default: + return BackendSettings{} + } +} diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index ed95429..ad800ed 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -117,7 +117,6 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) { "LLAMACTL_INSTANCE_PORT_RANGE": "5000-6000", "LLAMACTL_LOGS_DIR": "/env/logs", "LLAMACTL_MAX_INSTANCES": "20", - "LLAMACTL_LLAMA_EXECUTABLE": "/env/llama-server", "LLAMACTL_DEFAULT_AUTO_RESTART": "false", "LLAMACTL_DEFAULT_MAX_RESTARTS": "7", "LLAMACTL_DEFAULT_RESTART_DELAY": "15", @@ -150,8 +149,8 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) { if cfg.Instances.MaxInstances != 20 { t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances) } - if cfg.Backends.LlamaExecutable != "/env/llama-server" { - t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Backends.LlamaExecutable) + if cfg.Backends.LlamaCpp.Command != "llama-server" { + t.Errorf("Expected default llama command 'llama-server', got %q", cfg.Backends.LlamaCpp.Command) } if cfg.Instances.DefaultAutoRestart { t.Error("Expected auto restart to be false") @@ -349,3 +348,165 @@ server: t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances) } } + +func TestGetBackendSettings_NewStructuredConfig(t *testing.T) { + bc := &config.BackendConfig{ + LlamaCpp: config.BackendSettings{ + Command: "custom-llama", + Args: []string{"--verbose"}, + Docker: &config.DockerSettings{ + Enabled: true, + Image: "custom-llama:latest", + Args: []string{"--gpus", "all"}, + Environment: map[string]string{"CUDA_VISIBLE_DEVICES": "1"}, + }, + }, + VLLM: config.BackendSettings{ + Command: "custom-vllm", + Args: []string{"serve", "--debug"}, + }, + MLX: config.BackendSettings{ + Command: "custom-mlx", + Args: []string{}, + }, + } + + // Test llama-cpp with Docker + settings := bc.GetBackendSettings("llama-cpp") + if settings.Command != "custom-llama" { + t.Errorf("Expected command 'custom-llama', got %q", settings.Command) + } + if len(settings.Args) != 1 || settings.Args[0] != "--verbose" { + t.Errorf("Expected args ['--verbose'], got %v", settings.Args) + } + if settings.Docker == nil || !settings.Docker.Enabled { + t.Error("Expected Docker to be enabled") + } + if settings.Docker.Image != "custom-llama:latest" { + t.Errorf("Expected Docker image 'custom-llama:latest', got %q", settings.Docker.Image) + } + + // Test vLLM without Docker + settings = bc.GetBackendSettings("vllm") + if settings.Command != "custom-vllm" { + t.Errorf("Expected command 'custom-vllm', got %q", settings.Command) + } + if len(settings.Args) != 2 || settings.Args[0] != "serve" || settings.Args[1] != "--debug" { + t.Errorf("Expected args ['serve', '--debug'], got %v", settings.Args) + } + if settings.Docker != nil && settings.Docker.Enabled { + t.Error("Expected Docker to be disabled or nil") + } + + // Test MLX + settings = bc.GetBackendSettings("mlx") + if settings.Command != "custom-mlx" { + t.Errorf("Expected command 'custom-mlx', got %q", settings.Command) + } +} + +func TestGetBackendSettings_EmptyConfig(t *testing.T) { + bc := &config.BackendConfig{} + + // Test empty llama-cpp + settings := bc.GetBackendSettings("llama-cpp") + if settings.Command != "" { + t.Errorf("Expected empty command, got %q", settings.Command) + } + + // Test empty vLLM + settings = bc.GetBackendSettings("vllm") + if settings.Command != "" { + t.Errorf("Expected empty command, got %q", settings.Command) + } + + // Test empty MLX + settings = bc.GetBackendSettings("mlx") + if settings.Command != "" { + t.Errorf("Expected empty command, got %q", settings.Command) + } +} + +func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) { + // Test that backend environment variables work correctly + envVars := map[string]string{ + "LLAMACTL_LLAMACPP_COMMAND": "env-llama", + "LLAMACTL_LLAMACPP_ARGS": "--verbose --threads 4", + "LLAMACTL_LLAMACPP_DOCKER_ENABLED": "true", + "LLAMACTL_LLAMACPP_DOCKER_IMAGE": "env-llama:latest", + "LLAMACTL_LLAMACPP_DOCKER_ARGS": "run --rm --network host --gpus all", + "LLAMACTL_LLAMACPP_DOCKER_ENV": "CUDA_VISIBLE_DEVICES=0,OMP_NUM_THREADS=4", + "LLAMACTL_VLLM_COMMAND": "env-vllm", + "LLAMACTL_VLLM_DOCKER_ENABLED": "false", + "LLAMACTL_VLLM_DOCKER_IMAGE": "env-vllm:latest", + "LLAMACTL_VLLM_DOCKER_ENV": "PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,CUDA_VISIBLE_DEVICES=1", + "LLAMACTL_MLX_COMMAND": "env-mlx", + } + + // Set env vars and ensure cleanup + for key, value := range envVars { + os.Setenv(key, value) + defer os.Unsetenv(key) + } + + cfg, err := config.LoadConfig("nonexistent-file.yaml") + if err != nil { + t.Fatalf("LoadConfig failed: %v", err) + } + + // Verify llama-cpp environment overrides + if cfg.Backends.LlamaCpp.Command != "env-llama" { + t.Errorf("Expected llama command 'env-llama', got %q", cfg.Backends.LlamaCpp.Command) + } + expectedArgs := []string{"--verbose", "--threads", "4"} + if len(cfg.Backends.LlamaCpp.Args) != len(expectedArgs) { + t.Errorf("Expected llama args %v, got %v", expectedArgs, cfg.Backends.LlamaCpp.Args) + } + if !cfg.Backends.LlamaCpp.Docker.Enabled { + t.Error("Expected llama Docker to be enabled") + } + if cfg.Backends.LlamaCpp.Docker.Image != "env-llama:latest" { + t.Errorf("Expected llama Docker image 'env-llama:latest', got %q", cfg.Backends.LlamaCpp.Docker.Image) + } + expectedDockerArgs := []string{"run", "--rm", "--network", "host", "--gpus", "all"} + if len(cfg.Backends.LlamaCpp.Docker.Args) != len(expectedDockerArgs) { + t.Errorf("Expected llama Docker args %v, got %v", expectedDockerArgs, cfg.Backends.LlamaCpp.Docker.Args) + } + if cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"] != "0" { + t.Errorf("Expected CUDA_VISIBLE_DEVICES=0, got %q", cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"]) + } + if cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"] != "4" { + t.Errorf("Expected OMP_NUM_THREADS=4, got %q", cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"]) + } + + // Verify vLLM environment overrides + if cfg.Backends.VLLM.Command != "env-vllm" { + t.Errorf("Expected vLLM command 'env-vllm', got %q", cfg.Backends.VLLM.Command) + } + if cfg.Backends.VLLM.Docker.Enabled { + t.Error("Expected vLLM Docker to be disabled") + } + if cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"] != "max_split_size_mb:512" { + t.Errorf("Expected PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512, got %q", cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"]) + } + + // Verify MLX environment overrides + if cfg.Backends.MLX.Command != "env-mlx" { + t.Errorf("Expected MLX command 'env-mlx', got %q", cfg.Backends.MLX.Command) + } +} + +func TestGetBackendSettings_InvalidBackendType(t *testing.T) { + bc := &config.BackendConfig{ + LlamaCpp: config.BackendSettings{ + Command: "llama-server", + Args: []string{}, + }, + } + + // Test invalid backend type returns empty settings + settings := bc.GetBackendSettings("invalid-backend") + if settings.Command != "" { + t.Errorf("Expected empty command for invalid backend, got %q", settings.Command) + } +} diff --git a/pkg/instance/instance.go b/pkg/instance/instance.go index e1509a8..0bea06c 100644 --- a/pkg/instance/instance.go +++ b/pkg/instance/instance.go @@ -221,14 +221,33 @@ func (i *Process) MarshalJSON() ([]byte, error) { i.mu.RLock() defer i.mu.RUnlock() + // Determine if docker is enabled for this instance's backend + var dockerEnabled bool + if i.options != nil { + switch i.options.BackendType { + case backends.BackendTypeLlamaCpp: + if i.globalBackendSettings != nil && i.globalBackendSettings.LlamaCpp.Docker != nil && i.globalBackendSettings.LlamaCpp.Docker.Enabled { + dockerEnabled = true + } + case backends.BackendTypeVllm: + if i.globalBackendSettings != nil && i.globalBackendSettings.VLLM.Docker != nil && i.globalBackendSettings.VLLM.Docker.Enabled { + dockerEnabled = true + } + case backends.BackendTypeMlxLm: + // MLX does not support docker currently + } + } + // Use anonymous struct to avoid recursion type Alias Process return json.Marshal(&struct { *Alias - Options *CreateInstanceOptions `json:"options,omitempty"` + Options *CreateInstanceOptions `json:"options,omitempty"` + DockerEnabled bool `json:"docker_enabled,omitempty"` }{ - Alias: (*Alias)(i), - Options: i.options, + Alias: (*Alias)(i), + Options: i.options, + DockerEnabled: dockerEnabled, }) } diff --git a/pkg/instance/instance_test.go b/pkg/instance/instance_test.go index 9ce2d61..fc41a94 100644 --- a/pkg/instance/instance_test.go +++ b/pkg/instance/instance_test.go @@ -12,8 +12,18 @@ import ( func TestNewInstance(t *testing.T) { backendConfig := &config.BackendConfig{ - LlamaExecutable: "llama-server", - MLXLMExecutable: "mlx_lm.server", + LlamaCpp: config.BackendSettings{ + Command: "llama-server", + Args: []string{}, + }, + MLX: config.BackendSettings{ + Command: "mlx_lm.server", + Args: []string{}, + }, + VLLM: config.BackendSettings{ + Command: "vllm", + Args: []string{"serve"}, + }, } globalSettings := &config.InstancesConfig{ @@ -66,8 +76,18 @@ func TestNewInstance(t *testing.T) { func TestNewInstance_WithRestartOptions(t *testing.T) { backendConfig := &config.BackendConfig{ - LlamaExecutable: "llama-server", - MLXLMExecutable: "mlx_lm.server", + LlamaCpp: config.BackendSettings{ + Command: "llama-server", + Args: []string{}, + }, + MLX: config.BackendSettings{ + Command: "mlx_lm.server", + Args: []string{}, + }, + VLLM: config.BackendSettings{ + Command: "vllm", + Args: []string{"serve"}, + }, } globalSettings := &config.InstancesConfig{ @@ -112,8 +132,18 @@ func TestNewInstance_WithRestartOptions(t *testing.T) { func TestSetOptions(t *testing.T) { backendConfig := &config.BackendConfig{ - LlamaExecutable: "llama-server", - MLXLMExecutable: "mlx_lm.server", + LlamaCpp: config.BackendSettings{ + Command: "llama-server", + Args: []string{}, + }, + MLX: config.BackendSettings{ + Command: "mlx_lm.server", + Args: []string{}, + }, + VLLM: config.BackendSettings{ + Command: "vllm", + Args: []string{"serve"}, + }, } globalSettings := &config.InstancesConfig{ @@ -163,8 +193,18 @@ func TestSetOptions(t *testing.T) { func TestGetProxy(t *testing.T) { backendConfig := &config.BackendConfig{ - LlamaExecutable: "llama-server", - MLXLMExecutable: "mlx_lm.server", + LlamaCpp: config.BackendSettings{ + Command: "llama-server", + Args: []string{}, + }, + MLX: config.BackendSettings{ + Command: "mlx_lm.server", + Args: []string{}, + }, + VLLM: config.BackendSettings{ + Command: "vllm", + Args: []string{"serve"}, + }, } globalSettings := &config.InstancesConfig{ @@ -205,8 +245,18 @@ func TestGetProxy(t *testing.T) { func TestMarshalJSON(t *testing.T) { backendConfig := &config.BackendConfig{ - LlamaExecutable: "llama-server", - MLXLMExecutable: "mlx_lm.server", + LlamaCpp: config.BackendSettings{ + Command: "llama-server", + Args: []string{}, + }, + MLX: config.BackendSettings{ + Command: "mlx_lm.server", + Args: []string{}, + }, + VLLM: config.BackendSettings{ + Command: "vllm", + Args: []string{"serve"}, + }, } globalSettings := &config.InstancesConfig{ @@ -364,8 +414,18 @@ func TestCreateInstanceOptionsValidation(t *testing.T) { } backendConfig := &config.BackendConfig{ - LlamaExecutable: "llama-server", - MLXLMExecutable: "mlx_lm.server", + LlamaCpp: config.BackendSettings{ + Command: "llama-server", + Args: []string{}, + }, + MLX: config.BackendSettings{ + Command: "mlx_lm.server", + Args: []string{}, + }, + VLLM: config.BackendSettings{ + Command: "vllm", + Args: []string{"serve"}, + }, } globalSettings := &config.InstancesConfig{ diff --git a/pkg/instance/lifecycle.go b/pkg/instance/lifecycle.go index 9eab260..9f7243a 100644 --- a/pkg/instance/lifecycle.go +++ b/pkg/instance/lifecycle.go @@ -11,6 +11,7 @@ import ( "time" "llamactl/pkg/backends" + "llamactl/pkg/config" ) // Start starts the llama server instance and returns an error if it fails. @@ -41,24 +42,14 @@ func (i *Process) Start() error { return fmt.Errorf("failed to create log files: %w", err) } - args := i.options.BuildCommandArgs() - i.ctx, i.cancel = context.WithCancel(context.Background()) - - var executable string - - // Get executable from global configuration - switch i.options.BackendType { - case backends.BackendTypeLlamaCpp: - executable = i.globalBackendSettings.LlamaExecutable - case backends.BackendTypeMlxLm: - executable = i.globalBackendSettings.MLXLMExecutable - case backends.BackendTypeVllm: - executable = i.globalBackendSettings.VllmExecutable - default: - return fmt.Errorf("unsupported backend type: %s", i.options.BackendType) + // Build command using backend-specific methods + cmd, cmdErr := i.buildCommand() + if cmdErr != nil { + return fmt.Errorf("failed to build command: %w", cmdErr) } - i.cmd = exec.CommandContext(i.ctx, executable, args...) + i.ctx, i.cancel = context.WithCancel(context.Background()) + i.cmd = cmd if runtime.GOOS != "windows" { setProcAttrs(i.cmd) @@ -372,3 +363,39 @@ func (i *Process) validateRestartConditions() (shouldRestart bool, maxRestarts i return true, maxRestarts, restartDelay } + +// buildCommand builds the command to execute using backend-specific logic +func (i *Process) buildCommand() (*exec.Cmd, error) { + // Get backend configuration + backendConfig, err := i.getBackendConfig() + if err != nil { + return nil, err + } + + // Get the command to execute + cmd := i.options.GetCommand(backendConfig) + + // Build command arguments + args := i.options.BuildCommandArgs(backendConfig) + + return exec.Command(cmd, args...), nil +} + +// getBackendConfig resolves the backend configuration for the current instance +func (i *Process) getBackendConfig() (*config.BackendSettings, error) { + var backendTypeStr string + + switch i.options.BackendType { + case backends.BackendTypeLlamaCpp: + backendTypeStr = "llama-cpp" + case backends.BackendTypeMlxLm: + backendTypeStr = "mlx" + case backends.BackendTypeVllm: + backendTypeStr = "vllm" + default: + return nil, fmt.Errorf("unsupported backend type: %s", i.options.BackendType) + } + + settings := i.globalBackendSettings.GetBackendSettings(backendTypeStr) + return &settings, nil +} diff --git a/pkg/instance/options.go b/pkg/instance/options.go index 2e0b2fd..e776e05 100644 --- a/pkg/instance/options.go +++ b/pkg/instance/options.go @@ -188,24 +188,55 @@ func (c *CreateInstanceOptions) ValidateAndApplyDefaults(name string, globalSett } } +func (c *CreateInstanceOptions) GetCommand(backendConfig *config.BackendSettings) string { + + if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm { + return "docker" + } + + return backendConfig.Command +} + // BuildCommandArgs builds command line arguments for the backend -func (c *CreateInstanceOptions) BuildCommandArgs() []string { - switch c.BackendType { - case backends.BackendTypeLlamaCpp: - if c.LlamaServerOptions != nil { - return c.LlamaServerOptions.BuildCommandArgs() +func (c *CreateInstanceOptions) BuildCommandArgs(backendConfig *config.BackendSettings) []string { + + var args []string + + if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm { + // For Docker, start with Docker args + args = append(args, backendConfig.Docker.Args...) + args = append(args, backendConfig.Docker.Image) + + switch c.BackendType { + case backends.BackendTypeLlamaCpp: + if c.LlamaServerOptions != nil { + args = append(args, c.LlamaServerOptions.BuildDockerArgs()...) + } + case backends.BackendTypeVllm: + if c.VllmServerOptions != nil { + args = append(args, c.VllmServerOptions.BuildDockerArgs()...) + } } - case backends.BackendTypeMlxLm: - if c.MlxServerOptions != nil { - return c.MlxServerOptions.BuildCommandArgs() - } - case backends.BackendTypeVllm: - if c.VllmServerOptions != nil { - // Prepend "serve" as first argument - args := []string{"serve"} - args = append(args, c.VllmServerOptions.BuildCommandArgs()...) - return args + + } else { + // For native execution, start with backend args + args = append(args, backendConfig.Args...) + + switch c.BackendType { + case backends.BackendTypeLlamaCpp: + if c.LlamaServerOptions != nil { + args = append(args, c.LlamaServerOptions.BuildCommandArgs()...) + } + case backends.BackendTypeMlxLm: + if c.MlxServerOptions != nil { + args = append(args, c.MlxServerOptions.BuildCommandArgs()...) + } + case backends.BackendTypeVllm: + if c.VllmServerOptions != nil { + args = append(args, c.VllmServerOptions.BuildCommandArgs()...) + } } } - return []string{} + + return args } diff --git a/pkg/instance/timeout_test.go b/pkg/instance/timeout_test.go index 21e3584..1171c6a 100644 --- a/pkg/instance/timeout_test.go +++ b/pkg/instance/timeout_test.go @@ -34,8 +34,12 @@ func (m *MockTimeProvider) SetTime(t time.Time) { func TestUpdateLastRequestTime(t *testing.T) { backendConfig := &config.BackendConfig{ - LlamaExecutable: "llama-server", - MLXLMExecutable: "mlx_lm.server", + LlamaCpp: config.BackendSettings{ + Command: "llama-server", + }, + MLX: config.BackendSettings{ + Command: "mlx_lm.server", + }, } globalSettings := &config.InstancesConfig{ @@ -60,8 +64,12 @@ func TestUpdateLastRequestTime(t *testing.T) { func TestShouldTimeout_NotRunning(t *testing.T) { backendConfig := &config.BackendConfig{ - LlamaExecutable: "llama-server", - MLXLMExecutable: "mlx_lm.server", + LlamaCpp: config.BackendSettings{ + Command: "llama-server", + }, + MLX: config.BackendSettings{ + Command: "mlx_lm.server", + }, } globalSettings := &config.InstancesConfig{ @@ -90,8 +98,12 @@ func TestShouldTimeout_NotRunning(t *testing.T) { func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) { backendConfig := &config.BackendConfig{ - LlamaExecutable: "llama-server", - MLXLMExecutable: "mlx_lm.server", + LlamaCpp: config.BackendSettings{ + Command: "llama-server", + }, + MLX: config.BackendSettings{ + Command: "mlx_lm.server", + }, } globalSettings := &config.InstancesConfig{ @@ -133,8 +145,12 @@ func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) { func TestShouldTimeout_WithinTimeLimit(t *testing.T) { backendConfig := &config.BackendConfig{ - LlamaExecutable: "llama-server", - MLXLMExecutable: "mlx_lm.server", + LlamaCpp: config.BackendSettings{ + Command: "llama-server", + }, + MLX: config.BackendSettings{ + Command: "mlx_lm.server", + }, } globalSettings := &config.InstancesConfig{ @@ -167,8 +183,12 @@ func TestShouldTimeout_WithinTimeLimit(t *testing.T) { func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) { backendConfig := &config.BackendConfig{ - LlamaExecutable: "llama-server", - MLXLMExecutable: "mlx_lm.server", + LlamaCpp: config.BackendSettings{ + Command: "llama-server", + }, + MLX: config.BackendSettings{ + Command: "mlx_lm.server", + }, } globalSettings := &config.InstancesConfig{ @@ -207,8 +227,12 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) { func TestTimeoutConfiguration_Validation(t *testing.T) { backendConfig := &config.BackendConfig{ - LlamaExecutable: "llama-server", - MLXLMExecutable: "mlx_lm.server", + LlamaCpp: config.BackendSettings{ + Command: "llama-server", + }, + MLX: config.BackendSettings{ + Command: "mlx_lm.server", + }, } globalSettings := &config.InstancesConfig{ diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go index e022c5f..c629c63 100644 --- a/pkg/manager/manager_test.go +++ b/pkg/manager/manager_test.go @@ -16,8 +16,12 @@ import ( func TestNewInstanceManager(t *testing.T) { backendConfig := config.BackendConfig{ - LlamaExecutable: "llama-server", - MLXLMExecutable: "mlx_lm.server", + LlamaCpp: config.BackendSettings{ + Command: "llama-server", + }, + MLX: config.BackendSettings{ + Command: "mlx_lm.server", + }, } cfg := config.InstancesConfig{ @@ -49,8 +53,12 @@ func TestPersistence(t *testing.T) { tempDir := t.TempDir() backendConfig := config.BackendConfig{ - LlamaExecutable: "llama-server", - MLXLMExecutable: "mlx_lm.server", + LlamaCpp: config.BackendSettings{ + Command: "llama-server", + }, + MLX: config.BackendSettings{ + Command: "mlx_lm.server", + }, } cfg := config.InstancesConfig{ @@ -182,8 +190,12 @@ func TestShutdown(t *testing.T) { // Helper function to create a test manager with standard config func createTestManager() manager.InstanceManager { backendConfig := config.BackendConfig{ - LlamaExecutable: "llama-server", - MLXLMExecutable: "mlx_lm.server", + LlamaCpp: config.BackendSettings{ + Command: "llama-server", + }, + MLX: config.BackendSettings{ + Command: "mlx_lm.server", + }, } cfg := config.InstancesConfig{ diff --git a/pkg/manager/operations_test.go b/pkg/manager/operations_test.go index 87c37d4..97358c5 100644 --- a/pkg/manager/operations_test.go +++ b/pkg/manager/operations_test.go @@ -63,8 +63,12 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) { // Test max instances limit backendConfig := config.BackendConfig{ - LlamaExecutable: "llama-server", - MLXLMExecutable: "mlx_lm.server", + LlamaCpp: config.BackendSettings{ + Command: "llama-server", + }, + MLX: config.BackendSettings{ + Command: "mlx_lm.server", + }, } cfg := config.InstancesConfig{ PortRange: [2]int{8000, 9000}, diff --git a/pkg/manager/timeout.go b/pkg/manager/timeout.go index c982f10..0ee9c11 100644 --- a/pkg/manager/timeout.go +++ b/pkg/manager/timeout.go @@ -34,7 +34,7 @@ func (im *instanceManager) EvictLRUInstance() error { im.mu.RLock() var lruInstance *instance.Process - for name, _ := range im.runningInstances { + for name := range im.runningInstances { inst := im.instances[name] if inst == nil { continue diff --git a/pkg/manager/timeout_test.go b/pkg/manager/timeout_test.go index 91b3ad7..08d500c 100644 --- a/pkg/manager/timeout_test.go +++ b/pkg/manager/timeout_test.go @@ -14,8 +14,8 @@ import ( func TestTimeoutFunctionality(t *testing.T) { // Test timeout checker initialization backendConfig := config.BackendConfig{ - LlamaExecutable: "llama-server", - MLXLMExecutable: "mlx_lm.server", + LlamaCpp: config.BackendSettings{Command: "llama-server"}, + MLX: config.BackendSettings{Command: "mlx_lm.server"}, } cfg := config.InstancesConfig{ PortRange: [2]int{8000, 9000}, diff --git a/webui/src/components/BackendBadge.tsx b/webui/src/components/BackendBadge.tsx index 779fc81..88d29d7 100644 --- a/webui/src/components/BackendBadge.tsx +++ b/webui/src/components/BackendBadge.tsx @@ -1,13 +1,14 @@ import React from "react"; import { Badge } from "@/components/ui/badge"; import { BackendType, type BackendTypeValue } from "@/types/instance"; -import { Server } from "lucide-react"; +import { Server, Package } from "lucide-react"; interface BackendBadgeProps { backend?: BackendTypeValue; + docker?: boolean; } -const BackendBadge: React.FC = ({ backend }) => { +const BackendBadge: React.FC = ({ backend, docker }) => { if (!backend) { return null; } @@ -39,13 +40,25 @@ const BackendBadge: React.FC = ({ backend }) => { }; return ( - - - {getText()} - +
+ + + {getText()} + + {docker && ( + + + Docker + + )} +
); }; diff --git a/webui/src/components/InstanceCard.tsx b/webui/src/components/InstanceCard.tsx index b3b3339..a867dd3 100644 --- a/webui/src/components/InstanceCard.tsx +++ b/webui/src/components/InstanceCard.tsx @@ -66,7 +66,7 @@ function InstanceCard({ {/* Badges row */}
- + {running && }
diff --git a/webui/src/types/instance.ts b/webui/src/types/instance.ts index f55600a..074e2f2 100644 --- a/webui/src/types/instance.ts +++ b/webui/src/types/instance.ts @@ -23,4 +23,5 @@ export interface Instance { name: string; status: InstanceStatus; options?: CreateInstanceOptions; + docker_enabled?: boolean; // indicates backend is running via Docker } \ No newline at end of file