mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-06 00:54:23 +00:00
Refactor backend configuration to use structured settings and update environment variable handling
This commit is contained in:
@@ -10,16 +10,26 @@ import (
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// BackendSettings contains structured backend configuration
|
||||
type BackendSettings struct {
|
||||
Command string `yaml:"command"`
|
||||
Args []string `yaml:"args"`
|
||||
Docker *DockerSettings `yaml:"docker,omitempty"`
|
||||
}
|
||||
|
||||
// DockerSettings contains Docker-specific configuration
|
||||
type DockerSettings struct {
|
||||
Enabled bool `yaml:"enabled"`
|
||||
Image string `yaml:"image"`
|
||||
Args []string `yaml:"args"`
|
||||
Environment map[string]string `yaml:"environment,omitempty"`
|
||||
}
|
||||
|
||||
// BackendConfig contains backend executable configurations
|
||||
type BackendConfig struct {
|
||||
// Path to llama-server executable (llama.cpp backend)
|
||||
LlamaExecutable string `yaml:"llama_executable"`
|
||||
|
||||
// Path to mlx_lm executable (MLX-LM backend)
|
||||
MLXLMExecutable string `yaml:"mlx_lm_executable"`
|
||||
|
||||
// Path to vllm executable (vLLM backend)
|
||||
VllmExecutable string `yaml:"vllm_executable"`
|
||||
LlamaCpp BackendSettings `yaml:"llama-cpp"`
|
||||
VLLM BackendSettings `yaml:"vllm"`
|
||||
MLX BackendSettings `yaml:"mlx"`
|
||||
}
|
||||
|
||||
// AppConfig represents the configuration for llamactl
|
||||
@@ -123,9 +133,31 @@ func LoadConfig(configPath string) (AppConfig, error) {
|
||||
EnableSwagger: false,
|
||||
},
|
||||
Backends: BackendConfig{
|
||||
LlamaExecutable: "llama-server",
|
||||
MLXLMExecutable: "mlx_lm.server",
|
||||
VllmExecutable: "vllm",
|
||||
LlamaCpp: BackendSettings{
|
||||
Command: "llama-server",
|
||||
Args: []string{},
|
||||
Docker: &DockerSettings{
|
||||
Enabled: false,
|
||||
Image: "ghcr.io/ggml-org/llama.cpp:server",
|
||||
Args: []string{"--network", "host", "--gpus", "all"},
|
||||
Environment: map[string]string{},
|
||||
},
|
||||
},
|
||||
VLLM: BackendSettings{
|
||||
Command: "vllm",
|
||||
Args: []string{"serve"},
|
||||
Docker: &DockerSettings{
|
||||
Enabled: false,
|
||||
Image: "vllm/vllm-openai:latest",
|
||||
Args: []string{"--network", "host", "--gpus", "all", "--shm-size", "1g"},
|
||||
Environment: map[string]string{},
|
||||
},
|
||||
},
|
||||
MLX: BackendSettings{
|
||||
Command: "mlx_lm.server",
|
||||
Args: []string{},
|
||||
// No Docker section for MLX - not supported
|
||||
},
|
||||
},
|
||||
Instances: InstancesConfig{
|
||||
PortRange: [2]int{8000, 9000},
|
||||
@@ -244,15 +276,96 @@ func loadEnvVars(cfg *AppConfig) {
|
||||
}
|
||||
}
|
||||
// Backend config
|
||||
if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" {
|
||||
cfg.Backends.LlamaExecutable = llamaExec
|
||||
// LlamaCpp backend
|
||||
if llamaCmd := os.Getenv("LLAMACTL_LLAMACPP_COMMAND"); llamaCmd != "" {
|
||||
cfg.Backends.LlamaCpp.Command = llamaCmd
|
||||
}
|
||||
if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" {
|
||||
cfg.Backends.MLXLMExecutable = mlxLMExec
|
||||
if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
|
||||
cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
|
||||
}
|
||||
if vllmExec := os.Getenv("LLAMACTL_VLLM_EXECUTABLE"); vllmExec != "" {
|
||||
cfg.Backends.VllmExecutable = vllmExec
|
||||
if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
|
||||
if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
|
||||
if cfg.Backends.LlamaCpp.Docker == nil {
|
||||
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
|
||||
}
|
||||
cfg.Backends.LlamaCpp.Docker.Enabled = b
|
||||
}
|
||||
}
|
||||
if llamaDockerImage := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_IMAGE"); llamaDockerImage != "" {
|
||||
if cfg.Backends.LlamaCpp.Docker == nil {
|
||||
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
|
||||
}
|
||||
cfg.Backends.LlamaCpp.Docker.Image = llamaDockerImage
|
||||
}
|
||||
if llamaDockerArgs := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ARGS"); llamaDockerArgs != "" {
|
||||
if cfg.Backends.LlamaCpp.Docker == nil {
|
||||
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
|
||||
}
|
||||
cfg.Backends.LlamaCpp.Docker.Args = strings.Split(llamaDockerArgs, " ")
|
||||
}
|
||||
if llamaDockerEnv := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENV"); llamaDockerEnv != "" {
|
||||
if cfg.Backends.LlamaCpp.Docker == nil {
|
||||
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
|
||||
}
|
||||
if cfg.Backends.LlamaCpp.Docker.Environment == nil {
|
||||
cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
|
||||
}
|
||||
// Parse env vars in format "KEY1=value1,KEY2=value2"
|
||||
for _, envPair := range strings.Split(llamaDockerEnv, ",") {
|
||||
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
|
||||
cfg.Backends.LlamaCpp.Docker.Environment[parts[0]] = parts[1]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// vLLM backend
|
||||
if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
|
||||
cfg.Backends.VLLM.Command = vllmCmd
|
||||
}
|
||||
if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
|
||||
if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
|
||||
if cfg.Backends.VLLM.Docker == nil {
|
||||
cfg.Backends.VLLM.Docker = &DockerSettings{}
|
||||
}
|
||||
cfg.Backends.VLLM.Docker.Enabled = b
|
||||
}
|
||||
}
|
||||
if vllmDockerImage := os.Getenv("LLAMACTL_VLLM_DOCKER_IMAGE"); vllmDockerImage != "" {
|
||||
if cfg.Backends.VLLM.Docker == nil {
|
||||
cfg.Backends.VLLM.Docker = &DockerSettings{}
|
||||
}
|
||||
cfg.Backends.VLLM.Docker.Image = vllmDockerImage
|
||||
}
|
||||
if vllmDockerArgs := os.Getenv("LLAMACTL_VLLM_DOCKER_ARGS"); vllmDockerArgs != "" {
|
||||
if cfg.Backends.VLLM.Docker == nil {
|
||||
cfg.Backends.VLLM.Docker = &DockerSettings{}
|
||||
}
|
||||
cfg.Backends.VLLM.Docker.Args = strings.Split(vllmDockerArgs, " ")
|
||||
}
|
||||
if vllmDockerEnv := os.Getenv("LLAMACTL_VLLM_DOCKER_ENV"); vllmDockerEnv != "" {
|
||||
if cfg.Backends.VLLM.Docker == nil {
|
||||
cfg.Backends.VLLM.Docker = &DockerSettings{}
|
||||
}
|
||||
if cfg.Backends.VLLM.Docker.Environment == nil {
|
||||
cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
|
||||
}
|
||||
// Parse env vars in format "KEY1=value1,KEY2=value2"
|
||||
for _, envPair := range strings.Split(vllmDockerEnv, ",") {
|
||||
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
|
||||
cfg.Backends.VLLM.Docker.Environment[parts[0]] = parts[1]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MLX backend
|
||||
if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
|
||||
cfg.Backends.MLX.Command = mlxCmd
|
||||
}
|
||||
if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
|
||||
cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
|
||||
}
|
||||
|
||||
// Instance defaults
|
||||
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
|
||||
if b, err := strconv.ParseBool(autoRestart); err == nil {
|
||||
cfg.Instances.DefaultAutoRestart = b
|
||||
@@ -386,3 +499,17 @@ func getDefaultConfigLocations() []string {
|
||||
|
||||
return locations
|
||||
}
|
||||
|
||||
// GetBackendSettings resolves backend settings
|
||||
func (bc *BackendConfig) GetBackendSettings(backendType string) BackendSettings {
|
||||
switch backendType {
|
||||
case "llama-cpp":
|
||||
return bc.LlamaCpp
|
||||
case "vllm":
|
||||
return bc.VLLM
|
||||
case "mlx":
|
||||
return bc.MLX
|
||||
default:
|
||||
return BackendSettings{}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -117,7 +117,6 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
|
||||
"LLAMACTL_INSTANCE_PORT_RANGE": "5000-6000",
|
||||
"LLAMACTL_LOGS_DIR": "/env/logs",
|
||||
"LLAMACTL_MAX_INSTANCES": "20",
|
||||
"LLAMACTL_LLAMA_EXECUTABLE": "/env/llama-server",
|
||||
"LLAMACTL_DEFAULT_AUTO_RESTART": "false",
|
||||
"LLAMACTL_DEFAULT_MAX_RESTARTS": "7",
|
||||
"LLAMACTL_DEFAULT_RESTART_DELAY": "15",
|
||||
@@ -150,8 +149,8 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
|
||||
if cfg.Instances.MaxInstances != 20 {
|
||||
t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances)
|
||||
}
|
||||
if cfg.Backends.LlamaExecutable != "/env/llama-server" {
|
||||
t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Backends.LlamaExecutable)
|
||||
if cfg.Backends.LlamaCpp.Command != "llama-server" {
|
||||
t.Errorf("Expected default llama command 'llama-server', got %q", cfg.Backends.LlamaCpp.Command)
|
||||
}
|
||||
if cfg.Instances.DefaultAutoRestart {
|
||||
t.Error("Expected auto restart to be false")
|
||||
@@ -349,3 +348,165 @@ server:
|
||||
t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
|
||||
bc := &config.BackendConfig{
|
||||
LlamaCpp: config.BackendSettings{
|
||||
Command: "custom-llama",
|
||||
Args: []string{"--verbose"},
|
||||
Docker: &config.DockerSettings{
|
||||
Enabled: true,
|
||||
Image: "custom-llama:latest",
|
||||
Args: []string{"--gpus", "all"},
|
||||
Environment: map[string]string{"CUDA_VISIBLE_DEVICES": "1"},
|
||||
},
|
||||
},
|
||||
VLLM: config.BackendSettings{
|
||||
Command: "custom-vllm",
|
||||
Args: []string{"serve", "--debug"},
|
||||
},
|
||||
MLX: config.BackendSettings{
|
||||
Command: "custom-mlx",
|
||||
Args: []string{},
|
||||
},
|
||||
}
|
||||
|
||||
// Test llama-cpp with Docker
|
||||
settings := bc.GetBackendSettings("llama-cpp")
|
||||
if settings.Command != "custom-llama" {
|
||||
t.Errorf("Expected command 'custom-llama', got %q", settings.Command)
|
||||
}
|
||||
if len(settings.Args) != 1 || settings.Args[0] != "--verbose" {
|
||||
t.Errorf("Expected args ['--verbose'], got %v", settings.Args)
|
||||
}
|
||||
if settings.Docker == nil || !settings.Docker.Enabled {
|
||||
t.Error("Expected Docker to be enabled")
|
||||
}
|
||||
if settings.Docker.Image != "custom-llama:latest" {
|
||||
t.Errorf("Expected Docker image 'custom-llama:latest', got %q", settings.Docker.Image)
|
||||
}
|
||||
|
||||
// Test vLLM without Docker
|
||||
settings = bc.GetBackendSettings("vllm")
|
||||
if settings.Command != "custom-vllm" {
|
||||
t.Errorf("Expected command 'custom-vllm', got %q", settings.Command)
|
||||
}
|
||||
if len(settings.Args) != 2 || settings.Args[0] != "serve" || settings.Args[1] != "--debug" {
|
||||
t.Errorf("Expected args ['serve', '--debug'], got %v", settings.Args)
|
||||
}
|
||||
if settings.Docker != nil && settings.Docker.Enabled {
|
||||
t.Error("Expected Docker to be disabled or nil")
|
||||
}
|
||||
|
||||
// Test MLX
|
||||
settings = bc.GetBackendSettings("mlx")
|
||||
if settings.Command != "custom-mlx" {
|
||||
t.Errorf("Expected command 'custom-mlx', got %q", settings.Command)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetBackendSettings_EmptyConfig(t *testing.T) {
|
||||
bc := &config.BackendConfig{}
|
||||
|
||||
// Test empty llama-cpp
|
||||
settings := bc.GetBackendSettings("llama-cpp")
|
||||
if settings.Command != "" {
|
||||
t.Errorf("Expected empty command, got %q", settings.Command)
|
||||
}
|
||||
|
||||
// Test empty vLLM
|
||||
settings = bc.GetBackendSettings("vllm")
|
||||
if settings.Command != "" {
|
||||
t.Errorf("Expected empty command, got %q", settings.Command)
|
||||
}
|
||||
|
||||
// Test empty MLX
|
||||
settings = bc.GetBackendSettings("mlx")
|
||||
if settings.Command != "" {
|
||||
t.Errorf("Expected empty command, got %q", settings.Command)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) {
|
||||
// Test that backend environment variables work correctly
|
||||
envVars := map[string]string{
|
||||
"LLAMACTL_LLAMACPP_COMMAND": "env-llama",
|
||||
"LLAMACTL_LLAMACPP_ARGS": "--verbose --threads 4",
|
||||
"LLAMACTL_LLAMACPP_DOCKER_ENABLED": "true",
|
||||
"LLAMACTL_LLAMACPP_DOCKER_IMAGE": "env-llama:latest",
|
||||
"LLAMACTL_LLAMACPP_DOCKER_ARGS": "--network host --gpus all",
|
||||
"LLAMACTL_LLAMACPP_DOCKER_ENV": "CUDA_VISIBLE_DEVICES=0,OMP_NUM_THREADS=4",
|
||||
"LLAMACTL_VLLM_COMMAND": "env-vllm",
|
||||
"LLAMACTL_VLLM_DOCKER_ENABLED": "false",
|
||||
"LLAMACTL_VLLM_DOCKER_IMAGE": "env-vllm:latest",
|
||||
"LLAMACTL_VLLM_DOCKER_ENV": "PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,CUDA_VISIBLE_DEVICES=1",
|
||||
"LLAMACTL_MLX_COMMAND": "env-mlx",
|
||||
}
|
||||
|
||||
// Set env vars and ensure cleanup
|
||||
for key, value := range envVars {
|
||||
os.Setenv(key, value)
|
||||
defer os.Unsetenv(key)
|
||||
}
|
||||
|
||||
cfg, err := config.LoadConfig("nonexistent-file.yaml")
|
||||
if err != nil {
|
||||
t.Fatalf("LoadConfig failed: %v", err)
|
||||
}
|
||||
|
||||
// Verify llama-cpp environment overrides
|
||||
if cfg.Backends.LlamaCpp.Command != "env-llama" {
|
||||
t.Errorf("Expected llama command 'env-llama', got %q", cfg.Backends.LlamaCpp.Command)
|
||||
}
|
||||
expectedArgs := []string{"--verbose", "--threads", "4"}
|
||||
if len(cfg.Backends.LlamaCpp.Args) != len(expectedArgs) {
|
||||
t.Errorf("Expected llama args %v, got %v", expectedArgs, cfg.Backends.LlamaCpp.Args)
|
||||
}
|
||||
if !cfg.Backends.LlamaCpp.Docker.Enabled {
|
||||
t.Error("Expected llama Docker to be enabled")
|
||||
}
|
||||
if cfg.Backends.LlamaCpp.Docker.Image != "env-llama:latest" {
|
||||
t.Errorf("Expected llama Docker image 'env-llama:latest', got %q", cfg.Backends.LlamaCpp.Docker.Image)
|
||||
}
|
||||
expectedDockerArgs := []string{"--network", "host", "--gpus", "all"}
|
||||
if len(cfg.Backends.LlamaCpp.Docker.Args) != len(expectedDockerArgs) {
|
||||
t.Errorf("Expected llama Docker args %v, got %v", expectedDockerArgs, cfg.Backends.LlamaCpp.Docker.Args)
|
||||
}
|
||||
if cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"] != "0" {
|
||||
t.Errorf("Expected CUDA_VISIBLE_DEVICES=0, got %q", cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"])
|
||||
}
|
||||
if cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"] != "4" {
|
||||
t.Errorf("Expected OMP_NUM_THREADS=4, got %q", cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"])
|
||||
}
|
||||
|
||||
// Verify vLLM environment overrides
|
||||
if cfg.Backends.VLLM.Command != "env-vllm" {
|
||||
t.Errorf("Expected vLLM command 'env-vllm', got %q", cfg.Backends.VLLM.Command)
|
||||
}
|
||||
if cfg.Backends.VLLM.Docker.Enabled {
|
||||
t.Error("Expected vLLM Docker to be disabled")
|
||||
}
|
||||
if cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"] != "max_split_size_mb:512" {
|
||||
t.Errorf("Expected PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512, got %q", cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"])
|
||||
}
|
||||
|
||||
// Verify MLX environment overrides
|
||||
if cfg.Backends.MLX.Command != "env-mlx" {
|
||||
t.Errorf("Expected MLX command 'env-mlx', got %q", cfg.Backends.MLX.Command)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetBackendSettings_InvalidBackendType(t *testing.T) {
|
||||
bc := &config.BackendConfig{
|
||||
LlamaCpp: config.BackendSettings{
|
||||
Command: "llama-server",
|
||||
Args: []string{},
|
||||
},
|
||||
}
|
||||
|
||||
// Test invalid backend type returns empty settings
|
||||
settings := bc.GetBackendSettings("invalid-backend")
|
||||
if settings.Command != "" {
|
||||
t.Errorf("Expected empty command for invalid backend, got %q", settings.Command)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user