diff --git a/pkg/config/config.go b/pkg/config/config.go index 504ecc3..57d863f 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -10,16 +10,26 @@ import ( "gopkg.in/yaml.v3" ) +// BackendSettings contains structured backend configuration +type BackendSettings struct { + Command string `yaml:"command"` + Args []string `yaml:"args"` + Docker *DockerSettings `yaml:"docker,omitempty"` +} + +// DockerSettings contains Docker-specific configuration +type DockerSettings struct { + Enabled bool `yaml:"enabled"` + Image string `yaml:"image"` + Args []string `yaml:"args"` + Environment map[string]string `yaml:"environment,omitempty"` +} + // BackendConfig contains backend executable configurations type BackendConfig struct { - // Path to llama-server executable (llama.cpp backend) - LlamaExecutable string `yaml:"llama_executable"` - - // Path to mlx_lm executable (MLX-LM backend) - MLXLMExecutable string `yaml:"mlx_lm_executable"` - - // Path to vllm executable (vLLM backend) - VllmExecutable string `yaml:"vllm_executable"` + LlamaCpp BackendSettings `yaml:"llama-cpp"` + VLLM BackendSettings `yaml:"vllm"` + MLX BackendSettings `yaml:"mlx"` } // AppConfig represents the configuration for llamactl @@ -123,9 +133,31 @@ func LoadConfig(configPath string) (AppConfig, error) { EnableSwagger: false, }, Backends: BackendConfig{ - LlamaExecutable: "llama-server", - MLXLMExecutable: "mlx_lm.server", - VllmExecutable: "vllm", + LlamaCpp: BackendSettings{ + Command: "llama-server", + Args: []string{}, + Docker: &DockerSettings{ + Enabled: false, + Image: "ghcr.io/ggml-org/llama.cpp:server", + Args: []string{"--network", "host", "--gpus", "all"}, + Environment: map[string]string{}, + }, + }, + VLLM: BackendSettings{ + Command: "vllm", + Args: []string{"serve"}, + Docker: &DockerSettings{ + Enabled: false, + Image: "vllm/vllm-openai:latest", + Args: []string{"--network", "host", "--gpus", "all", "--shm-size", "1g"}, + Environment: map[string]string{}, + }, + }, + MLX: BackendSettings{ + Command: "mlx_lm.server", + Args: []string{}, + // No Docker section for MLX - not supported + }, }, Instances: InstancesConfig{ PortRange: [2]int{8000, 9000}, @@ -244,15 +276,96 @@ func loadEnvVars(cfg *AppConfig) { } } // Backend config - if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" { - cfg.Backends.LlamaExecutable = llamaExec + // LlamaCpp backend + if llamaCmd := os.Getenv("LLAMACTL_LLAMACPP_COMMAND"); llamaCmd != "" { + cfg.Backends.LlamaCpp.Command = llamaCmd } - if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" { - cfg.Backends.MLXLMExecutable = mlxLMExec + if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" { + cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ") } - if vllmExec := os.Getenv("LLAMACTL_VLLM_EXECUTABLE"); vllmExec != "" { - cfg.Backends.VllmExecutable = vllmExec + if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" { + if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil { + if cfg.Backends.LlamaCpp.Docker == nil { + cfg.Backends.LlamaCpp.Docker = &DockerSettings{} + } + cfg.Backends.LlamaCpp.Docker.Enabled = b + } } + if llamaDockerImage := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_IMAGE"); llamaDockerImage != "" { + if cfg.Backends.LlamaCpp.Docker == nil { + cfg.Backends.LlamaCpp.Docker = &DockerSettings{} + } + cfg.Backends.LlamaCpp.Docker.Image = llamaDockerImage + } + if llamaDockerArgs := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ARGS"); llamaDockerArgs != "" { + if cfg.Backends.LlamaCpp.Docker == nil { + cfg.Backends.LlamaCpp.Docker = &DockerSettings{} + } + cfg.Backends.LlamaCpp.Docker.Args = strings.Split(llamaDockerArgs, " ") + } + if llamaDockerEnv := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENV"); llamaDockerEnv != "" { + if cfg.Backends.LlamaCpp.Docker == nil { + cfg.Backends.LlamaCpp.Docker = &DockerSettings{} + } + if cfg.Backends.LlamaCpp.Docker.Environment == nil { + cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string) + } + // Parse env vars in format "KEY1=value1,KEY2=value2" + for _, envPair := range strings.Split(llamaDockerEnv, ",") { + if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 { + cfg.Backends.LlamaCpp.Docker.Environment[parts[0]] = parts[1] + } + } + } + + // vLLM backend + if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" { + cfg.Backends.VLLM.Command = vllmCmd + } + if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" { + if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil { + if cfg.Backends.VLLM.Docker == nil { + cfg.Backends.VLLM.Docker = &DockerSettings{} + } + cfg.Backends.VLLM.Docker.Enabled = b + } + } + if vllmDockerImage := os.Getenv("LLAMACTL_VLLM_DOCKER_IMAGE"); vllmDockerImage != "" { + if cfg.Backends.VLLM.Docker == nil { + cfg.Backends.VLLM.Docker = &DockerSettings{} + } + cfg.Backends.VLLM.Docker.Image = vllmDockerImage + } + if vllmDockerArgs := os.Getenv("LLAMACTL_VLLM_DOCKER_ARGS"); vllmDockerArgs != "" { + if cfg.Backends.VLLM.Docker == nil { + cfg.Backends.VLLM.Docker = &DockerSettings{} + } + cfg.Backends.VLLM.Docker.Args = strings.Split(vllmDockerArgs, " ") + } + if vllmDockerEnv := os.Getenv("LLAMACTL_VLLM_DOCKER_ENV"); vllmDockerEnv != "" { + if cfg.Backends.VLLM.Docker == nil { + cfg.Backends.VLLM.Docker = &DockerSettings{} + } + if cfg.Backends.VLLM.Docker.Environment == nil { + cfg.Backends.VLLM.Docker.Environment = make(map[string]string) + } + // Parse env vars in format "KEY1=value1,KEY2=value2" + for _, envPair := range strings.Split(vllmDockerEnv, ",") { + if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 { + cfg.Backends.VLLM.Docker.Environment[parts[0]] = parts[1] + } + } + } + + // MLX backend + if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" { + cfg.Backends.MLX.Command = mlxCmd + } + if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" { + cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ") + } + + // Instance defaults if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" { if b, err := strconv.ParseBool(autoRestart); err == nil { cfg.Instances.DefaultAutoRestart = b @@ -386,3 +499,17 @@ func getDefaultConfigLocations() []string { return locations } + +// GetBackendSettings resolves backend settings +func (bc *BackendConfig) GetBackendSettings(backendType string) BackendSettings { + switch backendType { + case "llama-cpp": + return bc.LlamaCpp + case "vllm": + return bc.VLLM + case "mlx": + return bc.MLX + default: + return BackendSettings{} + } +} diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index ed95429..c541295 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -117,7 +117,6 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) { "LLAMACTL_INSTANCE_PORT_RANGE": "5000-6000", "LLAMACTL_LOGS_DIR": "/env/logs", "LLAMACTL_MAX_INSTANCES": "20", - "LLAMACTL_LLAMA_EXECUTABLE": "/env/llama-server", "LLAMACTL_DEFAULT_AUTO_RESTART": "false", "LLAMACTL_DEFAULT_MAX_RESTARTS": "7", "LLAMACTL_DEFAULT_RESTART_DELAY": "15", @@ -150,8 +149,8 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) { if cfg.Instances.MaxInstances != 20 { t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances) } - if cfg.Backends.LlamaExecutable != "/env/llama-server" { - t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Backends.LlamaExecutable) + if cfg.Backends.LlamaCpp.Command != "llama-server" { + t.Errorf("Expected default llama command 'llama-server', got %q", cfg.Backends.LlamaCpp.Command) } if cfg.Instances.DefaultAutoRestart { t.Error("Expected auto restart to be false") @@ -349,3 +348,165 @@ server: t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances) } } + +func TestGetBackendSettings_NewStructuredConfig(t *testing.T) { + bc := &config.BackendConfig{ + LlamaCpp: config.BackendSettings{ + Command: "custom-llama", + Args: []string{"--verbose"}, + Docker: &config.DockerSettings{ + Enabled: true, + Image: "custom-llama:latest", + Args: []string{"--gpus", "all"}, + Environment: map[string]string{"CUDA_VISIBLE_DEVICES": "1"}, + }, + }, + VLLM: config.BackendSettings{ + Command: "custom-vllm", + Args: []string{"serve", "--debug"}, + }, + MLX: config.BackendSettings{ + Command: "custom-mlx", + Args: []string{}, + }, + } + + // Test llama-cpp with Docker + settings := bc.GetBackendSettings("llama-cpp") + if settings.Command != "custom-llama" { + t.Errorf("Expected command 'custom-llama', got %q", settings.Command) + } + if len(settings.Args) != 1 || settings.Args[0] != "--verbose" { + t.Errorf("Expected args ['--verbose'], got %v", settings.Args) + } + if settings.Docker == nil || !settings.Docker.Enabled { + t.Error("Expected Docker to be enabled") + } + if settings.Docker.Image != "custom-llama:latest" { + t.Errorf("Expected Docker image 'custom-llama:latest', got %q", settings.Docker.Image) + } + + // Test vLLM without Docker + settings = bc.GetBackendSettings("vllm") + if settings.Command != "custom-vllm" { + t.Errorf("Expected command 'custom-vllm', got %q", settings.Command) + } + if len(settings.Args) != 2 || settings.Args[0] != "serve" || settings.Args[1] != "--debug" { + t.Errorf("Expected args ['serve', '--debug'], got %v", settings.Args) + } + if settings.Docker != nil && settings.Docker.Enabled { + t.Error("Expected Docker to be disabled or nil") + } + + // Test MLX + settings = bc.GetBackendSettings("mlx") + if settings.Command != "custom-mlx" { + t.Errorf("Expected command 'custom-mlx', got %q", settings.Command) + } +} + +func TestGetBackendSettings_EmptyConfig(t *testing.T) { + bc := &config.BackendConfig{} + + // Test empty llama-cpp + settings := bc.GetBackendSettings("llama-cpp") + if settings.Command != "" { + t.Errorf("Expected empty command, got %q", settings.Command) + } + + // Test empty vLLM + settings = bc.GetBackendSettings("vllm") + if settings.Command != "" { + t.Errorf("Expected empty command, got %q", settings.Command) + } + + // Test empty MLX + settings = bc.GetBackendSettings("mlx") + if settings.Command != "" { + t.Errorf("Expected empty command, got %q", settings.Command) + } +} + +func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) { + // Test that backend environment variables work correctly + envVars := map[string]string{ + "LLAMACTL_LLAMACPP_COMMAND": "env-llama", + "LLAMACTL_LLAMACPP_ARGS": "--verbose --threads 4", + "LLAMACTL_LLAMACPP_DOCKER_ENABLED": "true", + "LLAMACTL_LLAMACPP_DOCKER_IMAGE": "env-llama:latest", + "LLAMACTL_LLAMACPP_DOCKER_ARGS": "--network host --gpus all", + "LLAMACTL_LLAMACPP_DOCKER_ENV": "CUDA_VISIBLE_DEVICES=0,OMP_NUM_THREADS=4", + "LLAMACTL_VLLM_COMMAND": "env-vllm", + "LLAMACTL_VLLM_DOCKER_ENABLED": "false", + "LLAMACTL_VLLM_DOCKER_IMAGE": "env-vllm:latest", + "LLAMACTL_VLLM_DOCKER_ENV": "PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,CUDA_VISIBLE_DEVICES=1", + "LLAMACTL_MLX_COMMAND": "env-mlx", + } + + // Set env vars and ensure cleanup + for key, value := range envVars { + os.Setenv(key, value) + defer os.Unsetenv(key) + } + + cfg, err := config.LoadConfig("nonexistent-file.yaml") + if err != nil { + t.Fatalf("LoadConfig failed: %v", err) + } + + // Verify llama-cpp environment overrides + if cfg.Backends.LlamaCpp.Command != "env-llama" { + t.Errorf("Expected llama command 'env-llama', got %q", cfg.Backends.LlamaCpp.Command) + } + expectedArgs := []string{"--verbose", "--threads", "4"} + if len(cfg.Backends.LlamaCpp.Args) != len(expectedArgs) { + t.Errorf("Expected llama args %v, got %v", expectedArgs, cfg.Backends.LlamaCpp.Args) + } + if !cfg.Backends.LlamaCpp.Docker.Enabled { + t.Error("Expected llama Docker to be enabled") + } + if cfg.Backends.LlamaCpp.Docker.Image != "env-llama:latest" { + t.Errorf("Expected llama Docker image 'env-llama:latest', got %q", cfg.Backends.LlamaCpp.Docker.Image) + } + expectedDockerArgs := []string{"--network", "host", "--gpus", "all"} + if len(cfg.Backends.LlamaCpp.Docker.Args) != len(expectedDockerArgs) { + t.Errorf("Expected llama Docker args %v, got %v", expectedDockerArgs, cfg.Backends.LlamaCpp.Docker.Args) + } + if cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"] != "0" { + t.Errorf("Expected CUDA_VISIBLE_DEVICES=0, got %q", cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"]) + } + if cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"] != "4" { + t.Errorf("Expected OMP_NUM_THREADS=4, got %q", cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"]) + } + + // Verify vLLM environment overrides + if cfg.Backends.VLLM.Command != "env-vllm" { + t.Errorf("Expected vLLM command 'env-vllm', got %q", cfg.Backends.VLLM.Command) + } + if cfg.Backends.VLLM.Docker.Enabled { + t.Error("Expected vLLM Docker to be disabled") + } + if cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"] != "max_split_size_mb:512" { + t.Errorf("Expected PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512, got %q", cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"]) + } + + // Verify MLX environment overrides + if cfg.Backends.MLX.Command != "env-mlx" { + t.Errorf("Expected MLX command 'env-mlx', got %q", cfg.Backends.MLX.Command) + } +} + +func TestGetBackendSettings_InvalidBackendType(t *testing.T) { + bc := &config.BackendConfig{ + LlamaCpp: config.BackendSettings{ + Command: "llama-server", + Args: []string{}, + }, + } + + // Test invalid backend type returns empty settings + settings := bc.GetBackendSettings("invalid-backend") + if settings.Command != "" { + t.Errorf("Expected empty command for invalid backend, got %q", settings.Command) + } +}