Refactor backend configuration to use structured settings and update environment variable handling

2025-12-22 17:14:22 +00:00 · 2025-09-24 20:31:20 +02:00
parent 78a483ee4a
commit 9a56660f68
2 changed files with 308 additions and 20 deletions
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -10,16 +10,26 @@ import (
 	"gopkg.in/yaml.v3"
 )

+// BackendSettings contains structured backend configuration
+type BackendSettings struct {
+	Command string          `yaml:"command"`
+	Args    []string        `yaml:"args"`
+	Docker  *DockerSettings `yaml:"docker,omitempty"`
+}
+
+// DockerSettings contains Docker-specific configuration
+type DockerSettings struct {
+	Enabled     bool              `yaml:"enabled"`
+	Image       string            `yaml:"image"`
+	Args        []string          `yaml:"args"`
+	Environment map[string]string `yaml:"environment,omitempty"`
+}
+
 // BackendConfig contains backend executable configurations
 type BackendConfig struct {
-	// Path to llama-server executable (llama.cpp backend)
-	LlamaExecutable string `yaml:"llama_executable"`
-
-	// Path to mlx_lm executable (MLX-LM backend)
-	MLXLMExecutable string `yaml:"mlx_lm_executable"`
-
-	// Path to vllm executable (vLLM backend)
-	VllmExecutable string `yaml:"vllm_executable"`
+	LlamaCpp BackendSettings `yaml:"llama-cpp"`
+	VLLM     BackendSettings `yaml:"vllm"`
+	MLX      BackendSettings `yaml:"mlx"`
 }

 // AppConfig represents the configuration for llamactl
@@ -123,9 +133,31 @@ func LoadConfig(configPath string) (AppConfig, error) {
 			EnableSwagger:  false,
 		},
 		Backends: BackendConfig{
-			LlamaExecutable: "llama-server",
-			MLXLMExecutable: "mlx_lm.server",
-			VllmExecutable:  "vllm",
+			LlamaCpp: BackendSettings{
+				Command: "llama-server",
+				Args:    []string{},
+				Docker: &DockerSettings{
+					Enabled:     false,
+					Image:       "ghcr.io/ggml-org/llama.cpp:server",
+					Args:        []string{"--network", "host", "--gpus", "all"},
+					Environment: map[string]string{},
+				},
+			},
+			VLLM: BackendSettings{
+				Command: "vllm",
+				Args:    []string{"serve"},
+				Docker: &DockerSettings{
+					Enabled:     false,
+					Image:       "vllm/vllm-openai:latest",
+					Args:        []string{"--network", "host", "--gpus", "all", "--shm-size", "1g"},
+					Environment: map[string]string{},
+				},
+			},
+			MLX: BackendSettings{
+				Command: "mlx_lm.server",
+				Args:    []string{},
+				// No Docker section for MLX - not supported
+			},
 		},
 		Instances: InstancesConfig{
 			PortRange:            [2]int{8000, 9000},
@@ -244,15 +276,96 @@ func loadEnvVars(cfg *AppConfig) {
 		}
 	}
 	// Backend config
-	if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" {
-		cfg.Backends.LlamaExecutable = llamaExec
+	// LlamaCpp backend
+	if llamaCmd := os.Getenv("LLAMACTL_LLAMACPP_COMMAND"); llamaCmd != "" {
+		cfg.Backends.LlamaCpp.Command = llamaCmd
 	}
-	if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" {
-		cfg.Backends.MLXLMExecutable = mlxLMExec
+	if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
+		cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
 	}
-	if vllmExec := os.Getenv("LLAMACTL_VLLM_EXECUTABLE"); vllmExec != "" {
-		cfg.Backends.VllmExecutable = vllmExec
+	if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
+		if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
+			if cfg.Backends.LlamaCpp.Docker == nil {
+				cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
+			}
+			cfg.Backends.LlamaCpp.Docker.Enabled = b
+		}
 	}
+	if llamaDockerImage := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_IMAGE"); llamaDockerImage != "" {
+		if cfg.Backends.LlamaCpp.Docker == nil {
+			cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
+		}
+		cfg.Backends.LlamaCpp.Docker.Image = llamaDockerImage
+	}
+	if llamaDockerArgs := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ARGS"); llamaDockerArgs != "" {
+		if cfg.Backends.LlamaCpp.Docker == nil {
+			cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
+		}
+		cfg.Backends.LlamaCpp.Docker.Args = strings.Split(llamaDockerArgs, " ")
+	}
+	if llamaDockerEnv := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENV"); llamaDockerEnv != "" {
+		if cfg.Backends.LlamaCpp.Docker == nil {
+			cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
+		}
+		if cfg.Backends.LlamaCpp.Docker.Environment == nil {
+			cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
+		}
+		// Parse env vars in format "KEY1=value1,KEY2=value2"
+		for _, envPair := range strings.Split(llamaDockerEnv, ",") {
+			if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
+				cfg.Backends.LlamaCpp.Docker.Environment[parts[0]] = parts[1]
+			}
+		}
+	}
+
+	// vLLM backend
+	if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
+		cfg.Backends.VLLM.Command = vllmCmd
+	}
+	if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
+		if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
+			if cfg.Backends.VLLM.Docker == nil {
+				cfg.Backends.VLLM.Docker = &DockerSettings{}
+			}
+			cfg.Backends.VLLM.Docker.Enabled = b
+		}
+	}
+	if vllmDockerImage := os.Getenv("LLAMACTL_VLLM_DOCKER_IMAGE"); vllmDockerImage != "" {
+		if cfg.Backends.VLLM.Docker == nil {
+			cfg.Backends.VLLM.Docker = &DockerSettings{}
+		}
+		cfg.Backends.VLLM.Docker.Image = vllmDockerImage
+	}
+	if vllmDockerArgs := os.Getenv("LLAMACTL_VLLM_DOCKER_ARGS"); vllmDockerArgs != "" {
+		if cfg.Backends.VLLM.Docker == nil {
+			cfg.Backends.VLLM.Docker = &DockerSettings{}
+		}
+		cfg.Backends.VLLM.Docker.Args = strings.Split(vllmDockerArgs, " ")
+	}
+	if vllmDockerEnv := os.Getenv("LLAMACTL_VLLM_DOCKER_ENV"); vllmDockerEnv != "" {
+		if cfg.Backends.VLLM.Docker == nil {
+			cfg.Backends.VLLM.Docker = &DockerSettings{}
+		}
+		if cfg.Backends.VLLM.Docker.Environment == nil {
+			cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
+		}
+		// Parse env vars in format "KEY1=value1,KEY2=value2"
+		for _, envPair := range strings.Split(vllmDockerEnv, ",") {
+			if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
+				cfg.Backends.VLLM.Docker.Environment[parts[0]] = parts[1]
+			}
+		}
+	}
+
+	// MLX backend
+	if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
+		cfg.Backends.MLX.Command = mlxCmd
+	}
+	if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
+		cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
+	}
+
+	// Instance defaults
 	if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
 		if b, err := strconv.ParseBool(autoRestart); err == nil {
 			cfg.Instances.DefaultAutoRestart = b
@@ -386,3 +499,17 @@ func getDefaultConfigLocations() []string {

 	return locations
 }
+
+// GetBackendSettings resolves backend settings
+func (bc *BackendConfig) GetBackendSettings(backendType string) BackendSettings {
+	switch backendType {
+	case "llama-cpp":
+		return bc.LlamaCpp
+	case "vllm":
+		return bc.VLLM
+	case "mlx":
+		return bc.MLX
+	default:
+		return BackendSettings{}
+	}
+}
--- a/pkg/config/config_test.go
+++ b/pkg/config/config_test.go
@@ -117,7 +117,6 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
 		"LLAMACTL_INSTANCE_PORT_RANGE":   "5000-6000",
 		"LLAMACTL_LOGS_DIR":              "/env/logs",
 		"LLAMACTL_MAX_INSTANCES":         "20",
-		"LLAMACTL_LLAMA_EXECUTABLE":      "/env/llama-server",
 		"LLAMACTL_DEFAULT_AUTO_RESTART":  "false",
 		"LLAMACTL_DEFAULT_MAX_RESTARTS":  "7",
 		"LLAMACTL_DEFAULT_RESTART_DELAY": "15",
@@ -150,8 +149,8 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
 	if cfg.Instances.MaxInstances != 20 {
 		t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances)
 	}
-	if cfg.Backends.LlamaExecutable != "/env/llama-server" {
-		t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Backends.LlamaExecutable)
+	if cfg.Backends.LlamaCpp.Command != "llama-server" {
+		t.Errorf("Expected default llama command 'llama-server', got %q", cfg.Backends.LlamaCpp.Command)
 	}
 	if cfg.Instances.DefaultAutoRestart {
 		t.Error("Expected auto restart to be false")
@@ -349,3 +348,165 @@ server:
 		t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
 	}
 }
+
+func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
+	bc := &config.BackendConfig{
+		LlamaCpp: config.BackendSettings{
+			Command: "custom-llama",
+			Args:    []string{"--verbose"},
+			Docker: &config.DockerSettings{
+				Enabled:     true,
+				Image:       "custom-llama:latest",
+				Args:        []string{"--gpus", "all"},
+				Environment: map[string]string{"CUDA_VISIBLE_DEVICES": "1"},
+			},
+		},
+		VLLM: config.BackendSettings{
+			Command: "custom-vllm",
+			Args:    []string{"serve", "--debug"},
+		},
+		MLX: config.BackendSettings{
+			Command: "custom-mlx",
+			Args:    []string{},
+		},
+	}
+
+	// Test llama-cpp with Docker
+	settings := bc.GetBackendSettings("llama-cpp")
+	if settings.Command != "custom-llama" {
+		t.Errorf("Expected command 'custom-llama', got %q", settings.Command)
+	}
+	if len(settings.Args) != 1 || settings.Args[0] != "--verbose" {
+		t.Errorf("Expected args ['--verbose'], got %v", settings.Args)
+	}
+	if settings.Docker == nil || !settings.Docker.Enabled {
+		t.Error("Expected Docker to be enabled")
+	}
+	if settings.Docker.Image != "custom-llama:latest" {
+		t.Errorf("Expected Docker image 'custom-llama:latest', got %q", settings.Docker.Image)
+	}
+
+	// Test vLLM without Docker
+	settings = bc.GetBackendSettings("vllm")
+	if settings.Command != "custom-vllm" {
+		t.Errorf("Expected command 'custom-vllm', got %q", settings.Command)
+	}
+	if len(settings.Args) != 2 || settings.Args[0] != "serve" || settings.Args[1] != "--debug" {
+		t.Errorf("Expected args ['serve', '--debug'], got %v", settings.Args)
+	}
+	if settings.Docker != nil && settings.Docker.Enabled {
+		t.Error("Expected Docker to be disabled or nil")
+	}
+
+	// Test MLX
+	settings = bc.GetBackendSettings("mlx")
+	if settings.Command != "custom-mlx" {
+		t.Errorf("Expected command 'custom-mlx', got %q", settings.Command)
+	}
+}
+
+func TestGetBackendSettings_EmptyConfig(t *testing.T) {
+	bc := &config.BackendConfig{}
+
+	// Test empty llama-cpp
+	settings := bc.GetBackendSettings("llama-cpp")
+	if settings.Command != "" {
+		t.Errorf("Expected empty command, got %q", settings.Command)
+	}
+
+	// Test empty vLLM
+	settings = bc.GetBackendSettings("vllm")
+	if settings.Command != "" {
+		t.Errorf("Expected empty command, got %q", settings.Command)
+	}
+
+	// Test empty MLX
+	settings = bc.GetBackendSettings("mlx")
+	if settings.Command != "" {
+		t.Errorf("Expected empty command, got %q", settings.Command)
+	}
+}
+
+func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) {
+	// Test that backend environment variables work correctly
+	envVars := map[string]string{
+		"LLAMACTL_LLAMACPP_COMMAND":        "env-llama",
+		"LLAMACTL_LLAMACPP_ARGS":           "--verbose --threads 4",
+		"LLAMACTL_LLAMACPP_DOCKER_ENABLED": "true",
+		"LLAMACTL_LLAMACPP_DOCKER_IMAGE":   "env-llama:latest",
+		"LLAMACTL_LLAMACPP_DOCKER_ARGS":    "--network host --gpus all",
+		"LLAMACTL_LLAMACPP_DOCKER_ENV":     "CUDA_VISIBLE_DEVICES=0,OMP_NUM_THREADS=4",
+		"LLAMACTL_VLLM_COMMAND":            "env-vllm",
+		"LLAMACTL_VLLM_DOCKER_ENABLED":     "false",
+		"LLAMACTL_VLLM_DOCKER_IMAGE":       "env-vllm:latest",
+		"LLAMACTL_VLLM_DOCKER_ENV":         "PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,CUDA_VISIBLE_DEVICES=1",
+		"LLAMACTL_MLX_COMMAND":             "env-mlx",
+	}
+
+	// Set env vars and ensure cleanup
+	for key, value := range envVars {
+		os.Setenv(key, value)
+		defer os.Unsetenv(key)
+	}
+
+	cfg, err := config.LoadConfig("nonexistent-file.yaml")
+	if err != nil {
+		t.Fatalf("LoadConfig failed: %v", err)
+	}
+
+	// Verify llama-cpp environment overrides
+	if cfg.Backends.LlamaCpp.Command != "env-llama" {
+		t.Errorf("Expected llama command 'env-llama', got %q", cfg.Backends.LlamaCpp.Command)
+	}
+	expectedArgs := []string{"--verbose", "--threads", "4"}
+	if len(cfg.Backends.LlamaCpp.Args) != len(expectedArgs) {
+		t.Errorf("Expected llama args %v, got %v", expectedArgs, cfg.Backends.LlamaCpp.Args)
+	}
+	if !cfg.Backends.LlamaCpp.Docker.Enabled {
+		t.Error("Expected llama Docker to be enabled")
+	}
+	if cfg.Backends.LlamaCpp.Docker.Image != "env-llama:latest" {
+		t.Errorf("Expected llama Docker image 'env-llama:latest', got %q", cfg.Backends.LlamaCpp.Docker.Image)
+	}
+	expectedDockerArgs := []string{"--network", "host", "--gpus", "all"}
+	if len(cfg.Backends.LlamaCpp.Docker.Args) != len(expectedDockerArgs) {
+		t.Errorf("Expected llama Docker args %v, got %v", expectedDockerArgs, cfg.Backends.LlamaCpp.Docker.Args)
+	}
+	if cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"] != "0" {
+		t.Errorf("Expected CUDA_VISIBLE_DEVICES=0, got %q", cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"])
+	}
+	if cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"] != "4" {
+		t.Errorf("Expected OMP_NUM_THREADS=4, got %q", cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"])
+	}
+
+	// Verify vLLM environment overrides
+	if cfg.Backends.VLLM.Command != "env-vllm" {
+		t.Errorf("Expected vLLM command 'env-vllm', got %q", cfg.Backends.VLLM.Command)
+	}
+	if cfg.Backends.VLLM.Docker.Enabled {
+		t.Error("Expected vLLM Docker to be disabled")
+	}
+	if cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"] != "max_split_size_mb:512" {
+		t.Errorf("Expected PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512, got %q", cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"])
+	}
+
+	// Verify MLX environment overrides
+	if cfg.Backends.MLX.Command != "env-mlx" {
+		t.Errorf("Expected MLX command 'env-mlx', got %q", cfg.Backends.MLX.Command)
+	}
+}
+
+func TestGetBackendSettings_InvalidBackendType(t *testing.T) {
+	bc := &config.BackendConfig{
+		LlamaCpp: config.BackendSettings{
+			Command: "llama-server",
+			Args:    []string{},
+		},
+	}
+
+	// Test invalid backend type returns empty settings
+	settings := bc.GetBackendSettings("invalid-backend")
+	if settings.Command != "" {
+		t.Errorf("Expected empty command for invalid backend, got %q", settings.Command)
+	}
+}