mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-05 16:44:22 +00:00
Merge pull request #42 from lordmathis/feat/docker-backends
feat: Add support for dockerized backends
This commit is contained in:
71
README.md
71
README.md
@@ -14,6 +14,7 @@
|
||||
### 🔗 Universal Compatibility
|
||||
- **OpenAI API Compatible**: Drop-in replacement - route requests by instance name
|
||||
- **Multi-Backend Support**: Native support for llama.cpp, MLX (Apple Silicon optimized), and vLLM
|
||||
- **Docker Support**: Run backends in containers
|
||||
|
||||
### 🌐 User-Friendly Interface
|
||||
- **Web Dashboard**: Modern React UI for visual management (unlike CLI-only tools)
|
||||
@@ -32,6 +33,7 @@
|
||||
# For llama.cpp: https://github.com/ggml-org/llama.cpp#quick-start
|
||||
# For MLX on macOS: pip install mlx-lm
|
||||
# For vLLM: pip install vllm
|
||||
# Or use Docker - no local installation required
|
||||
|
||||
# 2. Download and run llamactl
|
||||
LATEST_VERSION=$(curl -s https://api.github.com/repos/lordmathis/llamactl/releases/latest | grep '"tag_name":' | sed -E 's/.*"([^"]+)".*/\1/')
|
||||
@@ -112,6 +114,7 @@ You need `llama-server` from [llama.cpp](https://github.com/ggml-org/llama.cpp)
|
||||
brew install llama.cpp
|
||||
|
||||
# Or build from source - see llama.cpp docs
|
||||
# Or use Docker - no local installation required
|
||||
```
|
||||
|
||||
**For MLX backend (macOS only):**
|
||||
@@ -139,9 +142,51 @@ python -m venv vllm-env
|
||||
source vllm-env/bin/activate
|
||||
pip install vllm
|
||||
|
||||
# For production deployments, consider container-based installation
|
||||
# Or use Docker - no local installation required
|
||||
```
|
||||
|
||||
## Docker Support
|
||||
|
||||
llamactl supports running backends in Docker containers with identical behavior to native execution. This is particularly useful for:
|
||||
- Production deployments without local backend installation
|
||||
- Isolating backend dependencies
|
||||
- GPU-accelerated inference using official Docker images
|
||||
|
||||
### Docker Configuration
|
||||
|
||||
Enable Docker support using the new structured backend configuration:
|
||||
|
||||
```yaml
|
||||
backends:
|
||||
llama-cpp:
|
||||
command: "llama-server"
|
||||
docker:
|
||||
enabled: true
|
||||
image: "ghcr.io/ggml-org/llama.cpp:server"
|
||||
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
|
||||
|
||||
vllm:
|
||||
command: "vllm"
|
||||
args: ["serve"]
|
||||
docker:
|
||||
enabled: true
|
||||
image: "vllm/vllm-openai:latest"
|
||||
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
|
||||
```
|
||||
|
||||
### Key Features
|
||||
|
||||
- **Host Networking**: Uses `--network host` for seamless port management
|
||||
- **GPU Support**: Includes `--gpus all` for GPU acceleration
|
||||
- **Environment Variables**: Configure container environment as needed
|
||||
- **Flexible Configuration**: Per-backend Docker settings with sensible defaults
|
||||
|
||||
### Requirements
|
||||
|
||||
- Docker installed and running
|
||||
- For GPU support: nvidia-docker2 (Linux) or Docker Desktop with GPU support
|
||||
- No local backend installation required when using Docker
|
||||
|
||||
## Configuration
|
||||
|
||||
llamactl works out of the box with sensible defaults.
|
||||
@@ -154,9 +199,27 @@ server:
|
||||
enable_swagger: false # Enable Swagger UI for API docs
|
||||
|
||||
backends:
|
||||
llama_executable: llama-server # Path to llama-server executable
|
||||
mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
|
||||
vllm_executable: vllm # Path to vllm executable
|
||||
llama-cpp:
|
||||
command: "llama-server"
|
||||
args: []
|
||||
docker:
|
||||
enabled: false
|
||||
image: "ghcr.io/ggml-org/llama.cpp:server"
|
||||
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
|
||||
environment: {}
|
||||
|
||||
vllm:
|
||||
command: "vllm"
|
||||
args: ["serve"]
|
||||
docker:
|
||||
enabled: false
|
||||
image: "vllm/vllm-openai:latest"
|
||||
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
|
||||
environment: {}
|
||||
|
||||
mlx:
|
||||
command: "mlx_lm.server"
|
||||
args: []
|
||||
|
||||
instances:
|
||||
port_range: [8000, 9000] # Port range for instances
|
||||
|
||||
@@ -20,9 +20,27 @@ server:
|
||||
enable_swagger: false # Enable Swagger UI for API docs
|
||||
|
||||
backends:
|
||||
llama_executable: llama-server # Path to llama-server executable
|
||||
mlx_lm_executable: mlx_lm.server # Path to mlx_lm.server executable
|
||||
vllm_executable: vllm # Path to vllm executable
|
||||
llama-cpp:
|
||||
command: "llama-server"
|
||||
args: []
|
||||
docker:
|
||||
enabled: false
|
||||
image: "ghcr.io/ggml-org/llama.cpp:server"
|
||||
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
|
||||
environment: {}
|
||||
|
||||
vllm:
|
||||
command: "vllm"
|
||||
args: ["serve"]
|
||||
docker:
|
||||
enabled: false
|
||||
image: "vllm/vllm-openai:latest"
|
||||
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
|
||||
environment: {}
|
||||
|
||||
mlx:
|
||||
command: "mlx_lm.server"
|
||||
args: []
|
||||
|
||||
instances:
|
||||
port_range: [8000, 9000] # Port range for instances
|
||||
@@ -90,18 +108,40 @@ server:
|
||||
- `LLAMACTL_ENABLE_SWAGGER` - Enable Swagger UI (true/false)
|
||||
|
||||
### Backend Configuration
|
||||
|
||||
```yaml
|
||||
backends:
|
||||
llama_executable: "llama-server" # Path to llama-server executable (default: "llama-server")
|
||||
mlx_lm_executable: "mlx_lm.server" # Path to mlx_lm.server executable (default: "mlx_lm.server")
|
||||
vllm_executable: "vllm" # Path to vllm executable (default: "vllm")
|
||||
llama-cpp:
|
||||
command: "llama-server"
|
||||
args: []
|
||||
docker:
|
||||
enabled: false # Enable Docker runtime (default: false)
|
||||
image: "ghcr.io/ggml-org/llama.cpp:server"
|
||||
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
|
||||
environment: {}
|
||||
|
||||
vllm:
|
||||
command: "vllm"
|
||||
args: ["serve"]
|
||||
docker:
|
||||
enabled: false
|
||||
image: "vllm/vllm-openai:latest"
|
||||
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
|
||||
environment: {}
|
||||
|
||||
mlx:
|
||||
command: "mlx_lm.server"
|
||||
args: []
|
||||
# MLX does not support Docker
|
||||
```
|
||||
|
||||
**Environment Variables:**
|
||||
- `LLAMACTL_LLAMA_EXECUTABLE` - Path to llama-server executable
|
||||
- `LLAMACTL_MLX_LM_EXECUTABLE` - Path to mlx_lm.server executable
|
||||
- `LLAMACTL_VLLM_EXECUTABLE` - Path to vllm executable
|
||||
**Backend Configuration Fields:**
|
||||
- `command`: Executable name/path for the backend
|
||||
- `args`: Default arguments prepended to all instances
|
||||
- `docker`: Docker-specific configuration (optional)
|
||||
- `enabled`: Boolean flag to enable Docker runtime
|
||||
- `image`: Docker image to use
|
||||
- `args`: Additional arguments passed to `docker run`
|
||||
- `environment`: Environment variables for the container (optional)
|
||||
|
||||
### Instance Configuration
|
||||
|
||||
|
||||
@@ -88,6 +88,21 @@ Here are basic example configurations for each backend:
|
||||
}
|
||||
```
|
||||
|
||||
## Docker Support
|
||||
|
||||
Llamactl can run backends in Docker containers. To enable Docker for a backend, add a `docker` section to that backend in your YAML configuration file (e.g. `config.yaml`) as shown below:
|
||||
|
||||
```yaml
|
||||
backends:
|
||||
vllm:
|
||||
command: "vllm"
|
||||
args: ["serve"]
|
||||
docker:
|
||||
enabled: true
|
||||
image: "vllm/vllm-openai:latest"
|
||||
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
|
||||
```
|
||||
|
||||
## Using the API
|
||||
|
||||
You can also manage instances via the REST API:
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
package backends
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"llamactl/pkg/config"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -68,3 +70,24 @@ func BuildCommandArgs(options any, multipleFlags map[string]bool) []string {
|
||||
|
||||
return args
|
||||
}
|
||||
|
||||
// BuildDockerCommand builds a Docker command with the specified configuration and arguments
|
||||
func BuildDockerCommand(backendConfig *config.BackendSettings, instanceArgs []string) (string, []string, error) {
|
||||
// Start with configured Docker arguments (should include "run", "--rm", etc.)
|
||||
dockerArgs := make([]string, len(backendConfig.Docker.Args))
|
||||
copy(dockerArgs, backendConfig.Docker.Args)
|
||||
|
||||
// Add environment variables
|
||||
for key, value := range backendConfig.Docker.Environment {
|
||||
dockerArgs = append(dockerArgs, "-e", fmt.Sprintf("%s=%s", key, value))
|
||||
}
|
||||
|
||||
// Add image name
|
||||
dockerArgs = append(dockerArgs, backendConfig.Docker.Image)
|
||||
|
||||
// Add backend args and instance args
|
||||
dockerArgs = append(dockerArgs, backendConfig.Args...)
|
||||
dockerArgs = append(dockerArgs, instanceArgs...)
|
||||
|
||||
return "docker", dockerArgs, nil
|
||||
}
|
||||
|
||||
@@ -7,6 +7,28 @@ import (
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// multiValuedFlags defines flags that should be repeated for each value rather than comma-separated
|
||||
// Used for both parsing (with underscores) and building (with dashes)
|
||||
var multiValuedFlags = map[string]bool{
|
||||
// Parsing keys (with underscores)
|
||||
"override_tensor": true,
|
||||
"override_kv": true,
|
||||
"lora": true,
|
||||
"lora_scaled": true,
|
||||
"control_vector": true,
|
||||
"control_vector_scaled": true,
|
||||
"dry_sequence_breaker": true,
|
||||
"logit_bias": true,
|
||||
// Building keys (with dashes)
|
||||
"override-tensor": true,
|
||||
"override-kv": true,
|
||||
"lora-scaled": true,
|
||||
"control-vector": true,
|
||||
"control-vector-scaled": true,
|
||||
"dry-sequence-breaker": true,
|
||||
"logit-bias": true,
|
||||
}
|
||||
|
||||
type LlamaServerOptions struct {
|
||||
// Common params
|
||||
VerbosePrompt bool `json:"verbose_prompt,omitempty"`
|
||||
@@ -316,17 +338,13 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
|
||||
// BuildCommandArgs converts InstanceOptions to command line arguments
|
||||
func (o *LlamaServerOptions) BuildCommandArgs() []string {
|
||||
// Llama uses multiple flags for arrays by default (not comma-separated)
|
||||
multipleFlags := map[string]bool{
|
||||
"override-tensor": true,
|
||||
"override-kv": true,
|
||||
"lora": true,
|
||||
"lora-scaled": true,
|
||||
"control-vector": true,
|
||||
"control-vector-scaled": true,
|
||||
"dry-sequence-breaker": true,
|
||||
"logit-bias": true,
|
||||
}
|
||||
return backends.BuildCommandArgs(o, multipleFlags)
|
||||
// Use package-level multiValuedFlags variable
|
||||
return backends.BuildCommandArgs(o, multiValuedFlags)
|
||||
}
|
||||
|
||||
func (o *LlamaServerOptions) BuildDockerArgs() []string {
|
||||
// For llama, Docker args are the same as normal args
|
||||
return o.BuildCommandArgs()
|
||||
}
|
||||
|
||||
// ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
|
||||
@@ -338,16 +356,7 @@ func (o *LlamaServerOptions) BuildCommandArgs() []string {
|
||||
func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
|
||||
executableNames := []string{"llama-server"}
|
||||
var subcommandNames []string // Llama has no subcommands
|
||||
multiValuedFlags := map[string]bool{
|
||||
"override_tensor": true,
|
||||
"override_kv": true,
|
||||
"lora": true,
|
||||
"lora_scaled": true,
|
||||
"control_vector": true,
|
||||
"control_vector_scaled": true,
|
||||
"dry_sequence_breaker": true,
|
||||
"logit_bias": true,
|
||||
}
|
||||
// Use package-level multiValuedFlags variable
|
||||
|
||||
var llamaOptions LlamaServerOptions
|
||||
if err := backends.ParseCommand(command, executableNames, subcommandNames, multiValuedFlags, &llamaOptions); err != nil {
|
||||
|
||||
@@ -4,6 +4,15 @@ import (
|
||||
"llamactl/pkg/backends"
|
||||
)
|
||||
|
||||
// multiValuedFlags defines flags that should be repeated for each value rather than comma-separated
|
||||
var multiValuedFlags = map[string]bool{
|
||||
"api-key": true,
|
||||
"allowed-origins": true,
|
||||
"allowed-methods": true,
|
||||
"allowed-headers": true,
|
||||
"middleware": true,
|
||||
}
|
||||
|
||||
type VllmServerOptions struct {
|
||||
// Basic connection options (auto-assigned by llamactl)
|
||||
Host string `json:"host,omitempty"`
|
||||
@@ -131,30 +140,32 @@ type VllmServerOptions struct {
|
||||
}
|
||||
|
||||
// BuildCommandArgs converts VllmServerOptions to command line arguments
|
||||
// Note: This does NOT include the "serve" subcommand, that's handled at the instance level
|
||||
// For vLLM, the model parameter is passed as a positional argument, not a --model flag
|
||||
// For vLLM native, model is a positional argument after "serve"
|
||||
func (o *VllmServerOptions) BuildCommandArgs() []string {
|
||||
var args []string
|
||||
|
||||
// Add model as positional argument if specified
|
||||
// Add model as positional argument if specified (for native execution)
|
||||
if o.Model != "" {
|
||||
args = append(args, o.Model)
|
||||
}
|
||||
|
||||
// Create a copy of the options without the Model field to avoid including it as --model flag
|
||||
// Create a copy without Model field to avoid --model flag
|
||||
optionsCopy := *o
|
||||
optionsCopy.Model = "" // Clear model field so it won't be included as a flag
|
||||
optionsCopy.Model = ""
|
||||
|
||||
multipleFlags := map[string]bool{
|
||||
"api-key": true,
|
||||
"allowed-origins": true,
|
||||
"allowed-methods": true,
|
||||
"allowed-headers": true,
|
||||
"middleware": true,
|
||||
}
|
||||
// Use package-level multipleFlags variable
|
||||
|
||||
// Build the rest of the arguments as flags
|
||||
flagArgs := backends.BuildCommandArgs(&optionsCopy, multipleFlags)
|
||||
flagArgs := backends.BuildCommandArgs(&optionsCopy, multiValuedFlags)
|
||||
args = append(args, flagArgs...)
|
||||
|
||||
return args
|
||||
}
|
||||
|
||||
func (o *VllmServerOptions) BuildDockerArgs() []string {
|
||||
var args []string
|
||||
|
||||
// Use package-level multipleFlags variable
|
||||
flagArgs := backends.BuildCommandArgs(o, multiValuedFlags)
|
||||
args = append(args, flagArgs...)
|
||||
|
||||
return args
|
||||
|
||||
@@ -10,16 +10,26 @@ import (
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// BackendSettings contains structured backend configuration
|
||||
type BackendSettings struct {
|
||||
Command string `yaml:"command"`
|
||||
Args []string `yaml:"args"`
|
||||
Docker *DockerSettings `yaml:"docker,omitempty"`
|
||||
}
|
||||
|
||||
// DockerSettings contains Docker-specific configuration
|
||||
type DockerSettings struct {
|
||||
Enabled bool `yaml:"enabled"`
|
||||
Image string `yaml:"image"`
|
||||
Args []string `yaml:"args"`
|
||||
Environment map[string]string `yaml:"environment,omitempty"`
|
||||
}
|
||||
|
||||
// BackendConfig contains backend executable configurations
|
||||
type BackendConfig struct {
|
||||
// Path to llama-server executable (llama.cpp backend)
|
||||
LlamaExecutable string `yaml:"llama_executable"`
|
||||
|
||||
// Path to mlx_lm executable (MLX-LM backend)
|
||||
MLXLMExecutable string `yaml:"mlx_lm_executable"`
|
||||
|
||||
// Path to vllm executable (vLLM backend)
|
||||
VllmExecutable string `yaml:"vllm_executable"`
|
||||
LlamaCpp BackendSettings `yaml:"llama-cpp"`
|
||||
VLLM BackendSettings `yaml:"vllm"`
|
||||
MLX BackendSettings `yaml:"mlx"`
|
||||
}
|
||||
|
||||
// AppConfig represents the configuration for llamactl
|
||||
@@ -123,9 +133,36 @@ func LoadConfig(configPath string) (AppConfig, error) {
|
||||
EnableSwagger: false,
|
||||
},
|
||||
Backends: BackendConfig{
|
||||
LlamaExecutable: "llama-server",
|
||||
MLXLMExecutable: "mlx_lm.server",
|
||||
VllmExecutable: "vllm",
|
||||
LlamaCpp: BackendSettings{
|
||||
Command: "llama-server",
|
||||
Args: []string{},
|
||||
Docker: &DockerSettings{
|
||||
Enabled: false,
|
||||
Image: "ghcr.io/ggml-org/llama.cpp:server",
|
||||
Args: []string{
|
||||
"run", "--rm", "--network", "host", "--gpus", "all",
|
||||
"-v", filepath.Join(getDefaultDataDirectory(), "llama.cpp") + ":/root/.cache/llama.cpp"},
|
||||
Environment: map[string]string{},
|
||||
},
|
||||
},
|
||||
VLLM: BackendSettings{
|
||||
Command: "vllm",
|
||||
Args: []string{"serve"},
|
||||
Docker: &DockerSettings{
|
||||
Enabled: false,
|
||||
Image: "vllm/vllm-openai:latest",
|
||||
Args: []string{
|
||||
"run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g",
|
||||
"-v", filepath.Join(getDefaultDataDirectory(), "huggingface") + ":/root/.cache/huggingface",
|
||||
},
|
||||
Environment: map[string]string{},
|
||||
},
|
||||
},
|
||||
MLX: BackendSettings{
|
||||
Command: "mlx_lm.server",
|
||||
Args: []string{},
|
||||
// No Docker section for MLX - not supported
|
||||
},
|
||||
},
|
||||
Instances: InstancesConfig{
|
||||
PortRange: [2]int{8000, 9000},
|
||||
@@ -244,15 +281,96 @@ func loadEnvVars(cfg *AppConfig) {
|
||||
}
|
||||
}
|
||||
// Backend config
|
||||
if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" {
|
||||
cfg.Backends.LlamaExecutable = llamaExec
|
||||
// LlamaCpp backend
|
||||
if llamaCmd := os.Getenv("LLAMACTL_LLAMACPP_COMMAND"); llamaCmd != "" {
|
||||
cfg.Backends.LlamaCpp.Command = llamaCmd
|
||||
}
|
||||
if mlxLMExec := os.Getenv("LLAMACTL_MLX_LM_EXECUTABLE"); mlxLMExec != "" {
|
||||
cfg.Backends.MLXLMExecutable = mlxLMExec
|
||||
if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" {
|
||||
cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ")
|
||||
}
|
||||
if vllmExec := os.Getenv("LLAMACTL_VLLM_EXECUTABLE"); vllmExec != "" {
|
||||
cfg.Backends.VllmExecutable = vllmExec
|
||||
if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" {
|
||||
if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil {
|
||||
if cfg.Backends.LlamaCpp.Docker == nil {
|
||||
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
|
||||
}
|
||||
cfg.Backends.LlamaCpp.Docker.Enabled = b
|
||||
}
|
||||
}
|
||||
if llamaDockerImage := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_IMAGE"); llamaDockerImage != "" {
|
||||
if cfg.Backends.LlamaCpp.Docker == nil {
|
||||
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
|
||||
}
|
||||
cfg.Backends.LlamaCpp.Docker.Image = llamaDockerImage
|
||||
}
|
||||
if llamaDockerArgs := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ARGS"); llamaDockerArgs != "" {
|
||||
if cfg.Backends.LlamaCpp.Docker == nil {
|
||||
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
|
||||
}
|
||||
cfg.Backends.LlamaCpp.Docker.Args = strings.Split(llamaDockerArgs, " ")
|
||||
}
|
||||
if llamaDockerEnv := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENV"); llamaDockerEnv != "" {
|
||||
if cfg.Backends.LlamaCpp.Docker == nil {
|
||||
cfg.Backends.LlamaCpp.Docker = &DockerSettings{}
|
||||
}
|
||||
if cfg.Backends.LlamaCpp.Docker.Environment == nil {
|
||||
cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string)
|
||||
}
|
||||
// Parse env vars in format "KEY1=value1,KEY2=value2"
|
||||
for _, envPair := range strings.Split(llamaDockerEnv, ",") {
|
||||
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
|
||||
cfg.Backends.LlamaCpp.Docker.Environment[parts[0]] = parts[1]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// vLLM backend
|
||||
if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
|
||||
cfg.Backends.VLLM.Command = vllmCmd
|
||||
}
|
||||
if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" {
|
||||
if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil {
|
||||
if cfg.Backends.VLLM.Docker == nil {
|
||||
cfg.Backends.VLLM.Docker = &DockerSettings{}
|
||||
}
|
||||
cfg.Backends.VLLM.Docker.Enabled = b
|
||||
}
|
||||
}
|
||||
if vllmDockerImage := os.Getenv("LLAMACTL_VLLM_DOCKER_IMAGE"); vllmDockerImage != "" {
|
||||
if cfg.Backends.VLLM.Docker == nil {
|
||||
cfg.Backends.VLLM.Docker = &DockerSettings{}
|
||||
}
|
||||
cfg.Backends.VLLM.Docker.Image = vllmDockerImage
|
||||
}
|
||||
if vllmDockerArgs := os.Getenv("LLAMACTL_VLLM_DOCKER_ARGS"); vllmDockerArgs != "" {
|
||||
if cfg.Backends.VLLM.Docker == nil {
|
||||
cfg.Backends.VLLM.Docker = &DockerSettings{}
|
||||
}
|
||||
cfg.Backends.VLLM.Docker.Args = strings.Split(vllmDockerArgs, " ")
|
||||
}
|
||||
if vllmDockerEnv := os.Getenv("LLAMACTL_VLLM_DOCKER_ENV"); vllmDockerEnv != "" {
|
||||
if cfg.Backends.VLLM.Docker == nil {
|
||||
cfg.Backends.VLLM.Docker = &DockerSettings{}
|
||||
}
|
||||
if cfg.Backends.VLLM.Docker.Environment == nil {
|
||||
cfg.Backends.VLLM.Docker.Environment = make(map[string]string)
|
||||
}
|
||||
// Parse env vars in format "KEY1=value1,KEY2=value2"
|
||||
for _, envPair := range strings.Split(vllmDockerEnv, ",") {
|
||||
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
|
||||
cfg.Backends.VLLM.Docker.Environment[parts[0]] = parts[1]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// MLX backend
|
||||
if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
|
||||
cfg.Backends.MLX.Command = mlxCmd
|
||||
}
|
||||
if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" {
|
||||
cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ")
|
||||
}
|
||||
|
||||
// Instance defaults
|
||||
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
|
||||
if b, err := strconv.ParseBool(autoRestart); err == nil {
|
||||
cfg.Instances.DefaultAutoRestart = b
|
||||
@@ -386,3 +504,17 @@ func getDefaultConfigLocations() []string {
|
||||
|
||||
return locations
|
||||
}
|
||||
|
||||
// GetBackendSettings resolves backend settings
|
||||
func (bc *BackendConfig) GetBackendSettings(backendType string) BackendSettings {
|
||||
switch backendType {
|
||||
case "llama-cpp":
|
||||
return bc.LlamaCpp
|
||||
case "vllm":
|
||||
return bc.VLLM
|
||||
case "mlx":
|
||||
return bc.MLX
|
||||
default:
|
||||
return BackendSettings{}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -117,7 +117,6 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
|
||||
"LLAMACTL_INSTANCE_PORT_RANGE": "5000-6000",
|
||||
"LLAMACTL_LOGS_DIR": "/env/logs",
|
||||
"LLAMACTL_MAX_INSTANCES": "20",
|
||||
"LLAMACTL_LLAMA_EXECUTABLE": "/env/llama-server",
|
||||
"LLAMACTL_DEFAULT_AUTO_RESTART": "false",
|
||||
"LLAMACTL_DEFAULT_MAX_RESTARTS": "7",
|
||||
"LLAMACTL_DEFAULT_RESTART_DELAY": "15",
|
||||
@@ -150,8 +149,8 @@ func TestLoadConfig_EnvironmentOverrides(t *testing.T) {
|
||||
if cfg.Instances.MaxInstances != 20 {
|
||||
t.Errorf("Expected max instances 20, got %d", cfg.Instances.MaxInstances)
|
||||
}
|
||||
if cfg.Backends.LlamaExecutable != "/env/llama-server" {
|
||||
t.Errorf("Expected executable '/env/llama-server', got %q", cfg.Backends.LlamaExecutable)
|
||||
if cfg.Backends.LlamaCpp.Command != "llama-server" {
|
||||
t.Errorf("Expected default llama command 'llama-server', got %q", cfg.Backends.LlamaCpp.Command)
|
||||
}
|
||||
if cfg.Instances.DefaultAutoRestart {
|
||||
t.Error("Expected auto restart to be false")
|
||||
@@ -349,3 +348,165 @@ server:
|
||||
t.Errorf("Expected default max instances -1, got %d", cfg.Instances.MaxInstances)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetBackendSettings_NewStructuredConfig(t *testing.T) {
|
||||
bc := &config.BackendConfig{
|
||||
LlamaCpp: config.BackendSettings{
|
||||
Command: "custom-llama",
|
||||
Args: []string{"--verbose"},
|
||||
Docker: &config.DockerSettings{
|
||||
Enabled: true,
|
||||
Image: "custom-llama:latest",
|
||||
Args: []string{"--gpus", "all"},
|
||||
Environment: map[string]string{"CUDA_VISIBLE_DEVICES": "1"},
|
||||
},
|
||||
},
|
||||
VLLM: config.BackendSettings{
|
||||
Command: "custom-vllm",
|
||||
Args: []string{"serve", "--debug"},
|
||||
},
|
||||
MLX: config.BackendSettings{
|
||||
Command: "custom-mlx",
|
||||
Args: []string{},
|
||||
},
|
||||
}
|
||||
|
||||
// Test llama-cpp with Docker
|
||||
settings := bc.GetBackendSettings("llama-cpp")
|
||||
if settings.Command != "custom-llama" {
|
||||
t.Errorf("Expected command 'custom-llama', got %q", settings.Command)
|
||||
}
|
||||
if len(settings.Args) != 1 || settings.Args[0] != "--verbose" {
|
||||
t.Errorf("Expected args ['--verbose'], got %v", settings.Args)
|
||||
}
|
||||
if settings.Docker == nil || !settings.Docker.Enabled {
|
||||
t.Error("Expected Docker to be enabled")
|
||||
}
|
||||
if settings.Docker.Image != "custom-llama:latest" {
|
||||
t.Errorf("Expected Docker image 'custom-llama:latest', got %q", settings.Docker.Image)
|
||||
}
|
||||
|
||||
// Test vLLM without Docker
|
||||
settings = bc.GetBackendSettings("vllm")
|
||||
if settings.Command != "custom-vllm" {
|
||||
t.Errorf("Expected command 'custom-vllm', got %q", settings.Command)
|
||||
}
|
||||
if len(settings.Args) != 2 || settings.Args[0] != "serve" || settings.Args[1] != "--debug" {
|
||||
t.Errorf("Expected args ['serve', '--debug'], got %v", settings.Args)
|
||||
}
|
||||
if settings.Docker != nil && settings.Docker.Enabled {
|
||||
t.Error("Expected Docker to be disabled or nil")
|
||||
}
|
||||
|
||||
// Test MLX
|
||||
settings = bc.GetBackendSettings("mlx")
|
||||
if settings.Command != "custom-mlx" {
|
||||
t.Errorf("Expected command 'custom-mlx', got %q", settings.Command)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetBackendSettings_EmptyConfig(t *testing.T) {
|
||||
bc := &config.BackendConfig{}
|
||||
|
||||
// Test empty llama-cpp
|
||||
settings := bc.GetBackendSettings("llama-cpp")
|
||||
if settings.Command != "" {
|
||||
t.Errorf("Expected empty command, got %q", settings.Command)
|
||||
}
|
||||
|
||||
// Test empty vLLM
|
||||
settings = bc.GetBackendSettings("vllm")
|
||||
if settings.Command != "" {
|
||||
t.Errorf("Expected empty command, got %q", settings.Command)
|
||||
}
|
||||
|
||||
// Test empty MLX
|
||||
settings = bc.GetBackendSettings("mlx")
|
||||
if settings.Command != "" {
|
||||
t.Errorf("Expected empty command, got %q", settings.Command)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadConfig_BackendEnvironmentVariables(t *testing.T) {
|
||||
// Test that backend environment variables work correctly
|
||||
envVars := map[string]string{
|
||||
"LLAMACTL_LLAMACPP_COMMAND": "env-llama",
|
||||
"LLAMACTL_LLAMACPP_ARGS": "--verbose --threads 4",
|
||||
"LLAMACTL_LLAMACPP_DOCKER_ENABLED": "true",
|
||||
"LLAMACTL_LLAMACPP_DOCKER_IMAGE": "env-llama:latest",
|
||||
"LLAMACTL_LLAMACPP_DOCKER_ARGS": "run --rm --network host --gpus all",
|
||||
"LLAMACTL_LLAMACPP_DOCKER_ENV": "CUDA_VISIBLE_DEVICES=0,OMP_NUM_THREADS=4",
|
||||
"LLAMACTL_VLLM_COMMAND": "env-vllm",
|
||||
"LLAMACTL_VLLM_DOCKER_ENABLED": "false",
|
||||
"LLAMACTL_VLLM_DOCKER_IMAGE": "env-vllm:latest",
|
||||
"LLAMACTL_VLLM_DOCKER_ENV": "PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512,CUDA_VISIBLE_DEVICES=1",
|
||||
"LLAMACTL_MLX_COMMAND": "env-mlx",
|
||||
}
|
||||
|
||||
// Set env vars and ensure cleanup
|
||||
for key, value := range envVars {
|
||||
os.Setenv(key, value)
|
||||
defer os.Unsetenv(key)
|
||||
}
|
||||
|
||||
cfg, err := config.LoadConfig("nonexistent-file.yaml")
|
||||
if err != nil {
|
||||
t.Fatalf("LoadConfig failed: %v", err)
|
||||
}
|
||||
|
||||
// Verify llama-cpp environment overrides
|
||||
if cfg.Backends.LlamaCpp.Command != "env-llama" {
|
||||
t.Errorf("Expected llama command 'env-llama', got %q", cfg.Backends.LlamaCpp.Command)
|
||||
}
|
||||
expectedArgs := []string{"--verbose", "--threads", "4"}
|
||||
if len(cfg.Backends.LlamaCpp.Args) != len(expectedArgs) {
|
||||
t.Errorf("Expected llama args %v, got %v", expectedArgs, cfg.Backends.LlamaCpp.Args)
|
||||
}
|
||||
if !cfg.Backends.LlamaCpp.Docker.Enabled {
|
||||
t.Error("Expected llama Docker to be enabled")
|
||||
}
|
||||
if cfg.Backends.LlamaCpp.Docker.Image != "env-llama:latest" {
|
||||
t.Errorf("Expected llama Docker image 'env-llama:latest', got %q", cfg.Backends.LlamaCpp.Docker.Image)
|
||||
}
|
||||
expectedDockerArgs := []string{"run", "--rm", "--network", "host", "--gpus", "all"}
|
||||
if len(cfg.Backends.LlamaCpp.Docker.Args) != len(expectedDockerArgs) {
|
||||
t.Errorf("Expected llama Docker args %v, got %v", expectedDockerArgs, cfg.Backends.LlamaCpp.Docker.Args)
|
||||
}
|
||||
if cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"] != "0" {
|
||||
t.Errorf("Expected CUDA_VISIBLE_DEVICES=0, got %q", cfg.Backends.LlamaCpp.Docker.Environment["CUDA_VISIBLE_DEVICES"])
|
||||
}
|
||||
if cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"] != "4" {
|
||||
t.Errorf("Expected OMP_NUM_THREADS=4, got %q", cfg.Backends.LlamaCpp.Docker.Environment["OMP_NUM_THREADS"])
|
||||
}
|
||||
|
||||
// Verify vLLM environment overrides
|
||||
if cfg.Backends.VLLM.Command != "env-vllm" {
|
||||
t.Errorf("Expected vLLM command 'env-vllm', got %q", cfg.Backends.VLLM.Command)
|
||||
}
|
||||
if cfg.Backends.VLLM.Docker.Enabled {
|
||||
t.Error("Expected vLLM Docker to be disabled")
|
||||
}
|
||||
if cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"] != "max_split_size_mb:512" {
|
||||
t.Errorf("Expected PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512, got %q", cfg.Backends.VLLM.Docker.Environment["PYTORCH_CUDA_ALLOC_CONF"])
|
||||
}
|
||||
|
||||
// Verify MLX environment overrides
|
||||
if cfg.Backends.MLX.Command != "env-mlx" {
|
||||
t.Errorf("Expected MLX command 'env-mlx', got %q", cfg.Backends.MLX.Command)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetBackendSettings_InvalidBackendType(t *testing.T) {
|
||||
bc := &config.BackendConfig{
|
||||
LlamaCpp: config.BackendSettings{
|
||||
Command: "llama-server",
|
||||
Args: []string{},
|
||||
},
|
||||
}
|
||||
|
||||
// Test invalid backend type returns empty settings
|
||||
settings := bc.GetBackendSettings("invalid-backend")
|
||||
if settings.Command != "" {
|
||||
t.Errorf("Expected empty command for invalid backend, got %q", settings.Command)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -221,14 +221,33 @@ func (i *Process) MarshalJSON() ([]byte, error) {
|
||||
i.mu.RLock()
|
||||
defer i.mu.RUnlock()
|
||||
|
||||
// Determine if docker is enabled for this instance's backend
|
||||
var dockerEnabled bool
|
||||
if i.options != nil {
|
||||
switch i.options.BackendType {
|
||||
case backends.BackendTypeLlamaCpp:
|
||||
if i.globalBackendSettings != nil && i.globalBackendSettings.LlamaCpp.Docker != nil && i.globalBackendSettings.LlamaCpp.Docker.Enabled {
|
||||
dockerEnabled = true
|
||||
}
|
||||
case backends.BackendTypeVllm:
|
||||
if i.globalBackendSettings != nil && i.globalBackendSettings.VLLM.Docker != nil && i.globalBackendSettings.VLLM.Docker.Enabled {
|
||||
dockerEnabled = true
|
||||
}
|
||||
case backends.BackendTypeMlxLm:
|
||||
// MLX does not support docker currently
|
||||
}
|
||||
}
|
||||
|
||||
// Use anonymous struct to avoid recursion
|
||||
type Alias Process
|
||||
return json.Marshal(&struct {
|
||||
*Alias
|
||||
Options *CreateInstanceOptions `json:"options,omitempty"`
|
||||
DockerEnabled bool `json:"docker_enabled,omitempty"`
|
||||
}{
|
||||
Alias: (*Alias)(i),
|
||||
Options: i.options,
|
||||
DockerEnabled: dockerEnabled,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -12,8 +12,18 @@ import (
|
||||
|
||||
func TestNewInstance(t *testing.T) {
|
||||
backendConfig := &config.BackendConfig{
|
||||
LlamaExecutable: "llama-server",
|
||||
MLXLMExecutable: "mlx_lm.server",
|
||||
LlamaCpp: config.BackendSettings{
|
||||
Command: "llama-server",
|
||||
Args: []string{},
|
||||
},
|
||||
MLX: config.BackendSettings{
|
||||
Command: "mlx_lm.server",
|
||||
Args: []string{},
|
||||
},
|
||||
VLLM: config.BackendSettings{
|
||||
Command: "vllm",
|
||||
Args: []string{"serve"},
|
||||
},
|
||||
}
|
||||
|
||||
globalSettings := &config.InstancesConfig{
|
||||
@@ -66,8 +76,18 @@ func TestNewInstance(t *testing.T) {
|
||||
|
||||
func TestNewInstance_WithRestartOptions(t *testing.T) {
|
||||
backendConfig := &config.BackendConfig{
|
||||
LlamaExecutable: "llama-server",
|
||||
MLXLMExecutable: "mlx_lm.server",
|
||||
LlamaCpp: config.BackendSettings{
|
||||
Command: "llama-server",
|
||||
Args: []string{},
|
||||
},
|
||||
MLX: config.BackendSettings{
|
||||
Command: "mlx_lm.server",
|
||||
Args: []string{},
|
||||
},
|
||||
VLLM: config.BackendSettings{
|
||||
Command: "vllm",
|
||||
Args: []string{"serve"},
|
||||
},
|
||||
}
|
||||
|
||||
globalSettings := &config.InstancesConfig{
|
||||
@@ -112,8 +132,18 @@ func TestNewInstance_WithRestartOptions(t *testing.T) {
|
||||
|
||||
func TestSetOptions(t *testing.T) {
|
||||
backendConfig := &config.BackendConfig{
|
||||
LlamaExecutable: "llama-server",
|
||||
MLXLMExecutable: "mlx_lm.server",
|
||||
LlamaCpp: config.BackendSettings{
|
||||
Command: "llama-server",
|
||||
Args: []string{},
|
||||
},
|
||||
MLX: config.BackendSettings{
|
||||
Command: "mlx_lm.server",
|
||||
Args: []string{},
|
||||
},
|
||||
VLLM: config.BackendSettings{
|
||||
Command: "vllm",
|
||||
Args: []string{"serve"},
|
||||
},
|
||||
}
|
||||
|
||||
globalSettings := &config.InstancesConfig{
|
||||
@@ -163,8 +193,18 @@ func TestSetOptions(t *testing.T) {
|
||||
|
||||
func TestGetProxy(t *testing.T) {
|
||||
backendConfig := &config.BackendConfig{
|
||||
LlamaExecutable: "llama-server",
|
||||
MLXLMExecutable: "mlx_lm.server",
|
||||
LlamaCpp: config.BackendSettings{
|
||||
Command: "llama-server",
|
||||
Args: []string{},
|
||||
},
|
||||
MLX: config.BackendSettings{
|
||||
Command: "mlx_lm.server",
|
||||
Args: []string{},
|
||||
},
|
||||
VLLM: config.BackendSettings{
|
||||
Command: "vllm",
|
||||
Args: []string{"serve"},
|
||||
},
|
||||
}
|
||||
|
||||
globalSettings := &config.InstancesConfig{
|
||||
@@ -205,8 +245,18 @@ func TestGetProxy(t *testing.T) {
|
||||
|
||||
func TestMarshalJSON(t *testing.T) {
|
||||
backendConfig := &config.BackendConfig{
|
||||
LlamaExecutable: "llama-server",
|
||||
MLXLMExecutable: "mlx_lm.server",
|
||||
LlamaCpp: config.BackendSettings{
|
||||
Command: "llama-server",
|
||||
Args: []string{},
|
||||
},
|
||||
MLX: config.BackendSettings{
|
||||
Command: "mlx_lm.server",
|
||||
Args: []string{},
|
||||
},
|
||||
VLLM: config.BackendSettings{
|
||||
Command: "vllm",
|
||||
Args: []string{"serve"},
|
||||
},
|
||||
}
|
||||
|
||||
globalSettings := &config.InstancesConfig{
|
||||
@@ -364,8 +414,18 @@ func TestCreateInstanceOptionsValidation(t *testing.T) {
|
||||
}
|
||||
|
||||
backendConfig := &config.BackendConfig{
|
||||
LlamaExecutable: "llama-server",
|
||||
MLXLMExecutable: "mlx_lm.server",
|
||||
LlamaCpp: config.BackendSettings{
|
||||
Command: "llama-server",
|
||||
Args: []string{},
|
||||
},
|
||||
MLX: config.BackendSettings{
|
||||
Command: "mlx_lm.server",
|
||||
Args: []string{},
|
||||
},
|
||||
VLLM: config.BackendSettings{
|
||||
Command: "vllm",
|
||||
Args: []string{"serve"},
|
||||
},
|
||||
}
|
||||
|
||||
globalSettings := &config.InstancesConfig{
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"time"
|
||||
|
||||
"llamactl/pkg/backends"
|
||||
"llamactl/pkg/config"
|
||||
)
|
||||
|
||||
// Start starts the llama server instance and returns an error if it fails.
|
||||
@@ -41,24 +42,14 @@ func (i *Process) Start() error {
|
||||
return fmt.Errorf("failed to create log files: %w", err)
|
||||
}
|
||||
|
||||
args := i.options.BuildCommandArgs()
|
||||
i.ctx, i.cancel = context.WithCancel(context.Background())
|
||||
|
||||
var executable string
|
||||
|
||||
// Get executable from global configuration
|
||||
switch i.options.BackendType {
|
||||
case backends.BackendTypeLlamaCpp:
|
||||
executable = i.globalBackendSettings.LlamaExecutable
|
||||
case backends.BackendTypeMlxLm:
|
||||
executable = i.globalBackendSettings.MLXLMExecutable
|
||||
case backends.BackendTypeVllm:
|
||||
executable = i.globalBackendSettings.VllmExecutable
|
||||
default:
|
||||
return fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
|
||||
// Build command using backend-specific methods
|
||||
cmd, cmdErr := i.buildCommand()
|
||||
if cmdErr != nil {
|
||||
return fmt.Errorf("failed to build command: %w", cmdErr)
|
||||
}
|
||||
|
||||
i.cmd = exec.CommandContext(i.ctx, executable, args...)
|
||||
i.ctx, i.cancel = context.WithCancel(context.Background())
|
||||
i.cmd = cmd
|
||||
|
||||
if runtime.GOOS != "windows" {
|
||||
setProcAttrs(i.cmd)
|
||||
@@ -372,3 +363,39 @@ func (i *Process) validateRestartConditions() (shouldRestart bool, maxRestarts i
|
||||
|
||||
return true, maxRestarts, restartDelay
|
||||
}
|
||||
|
||||
// buildCommand builds the command to execute using backend-specific logic
|
||||
func (i *Process) buildCommand() (*exec.Cmd, error) {
|
||||
// Get backend configuration
|
||||
backendConfig, err := i.getBackendConfig()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Get the command to execute
|
||||
cmd := i.options.GetCommand(backendConfig)
|
||||
|
||||
// Build command arguments
|
||||
args := i.options.BuildCommandArgs(backendConfig)
|
||||
|
||||
return exec.Command(cmd, args...), nil
|
||||
}
|
||||
|
||||
// getBackendConfig resolves the backend configuration for the current instance
|
||||
func (i *Process) getBackendConfig() (*config.BackendSettings, error) {
|
||||
var backendTypeStr string
|
||||
|
||||
switch i.options.BackendType {
|
||||
case backends.BackendTypeLlamaCpp:
|
||||
backendTypeStr = "llama-cpp"
|
||||
case backends.BackendTypeMlxLm:
|
||||
backendTypeStr = "mlx"
|
||||
case backends.BackendTypeVllm:
|
||||
backendTypeStr = "vllm"
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported backend type: %s", i.options.BackendType)
|
||||
}
|
||||
|
||||
settings := i.globalBackendSettings.GetBackendSettings(backendTypeStr)
|
||||
return &settings, nil
|
||||
}
|
||||
|
||||
@@ -188,24 +188,55 @@ func (c *CreateInstanceOptions) ValidateAndApplyDefaults(name string, globalSett
|
||||
}
|
||||
}
|
||||
|
||||
func (c *CreateInstanceOptions) GetCommand(backendConfig *config.BackendSettings) string {
|
||||
|
||||
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
|
||||
return "docker"
|
||||
}
|
||||
|
||||
return backendConfig.Command
|
||||
}
|
||||
|
||||
// BuildCommandArgs builds command line arguments for the backend
|
||||
func (c *CreateInstanceOptions) BuildCommandArgs() []string {
|
||||
func (c *CreateInstanceOptions) BuildCommandArgs(backendConfig *config.BackendSettings) []string {
|
||||
|
||||
var args []string
|
||||
|
||||
if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm {
|
||||
// For Docker, start with Docker args
|
||||
args = append(args, backendConfig.Docker.Args...)
|
||||
args = append(args, backendConfig.Docker.Image)
|
||||
|
||||
switch c.BackendType {
|
||||
case backends.BackendTypeLlamaCpp:
|
||||
if c.LlamaServerOptions != nil {
|
||||
return c.LlamaServerOptions.BuildCommandArgs()
|
||||
}
|
||||
case backends.BackendTypeMlxLm:
|
||||
if c.MlxServerOptions != nil {
|
||||
return c.MlxServerOptions.BuildCommandArgs()
|
||||
args = append(args, c.LlamaServerOptions.BuildDockerArgs()...)
|
||||
}
|
||||
case backends.BackendTypeVllm:
|
||||
if c.VllmServerOptions != nil {
|
||||
args = append(args, c.VllmServerOptions.BuildDockerArgs()...)
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
// For native execution, start with backend args
|
||||
args = append(args, backendConfig.Args...)
|
||||
|
||||
switch c.BackendType {
|
||||
case backends.BackendTypeLlamaCpp:
|
||||
if c.LlamaServerOptions != nil {
|
||||
args = append(args, c.LlamaServerOptions.BuildCommandArgs()...)
|
||||
}
|
||||
case backends.BackendTypeMlxLm:
|
||||
if c.MlxServerOptions != nil {
|
||||
args = append(args, c.MlxServerOptions.BuildCommandArgs()...)
|
||||
}
|
||||
case backends.BackendTypeVllm:
|
||||
if c.VllmServerOptions != nil {
|
||||
// Prepend "serve" as first argument
|
||||
args := []string{"serve"}
|
||||
args = append(args, c.VllmServerOptions.BuildCommandArgs()...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return args
|
||||
}
|
||||
}
|
||||
return []string{}
|
||||
}
|
||||
|
||||
@@ -34,8 +34,12 @@ func (m *MockTimeProvider) SetTime(t time.Time) {
|
||||
|
||||
func TestUpdateLastRequestTime(t *testing.T) {
|
||||
backendConfig := &config.BackendConfig{
|
||||
LlamaExecutable: "llama-server",
|
||||
MLXLMExecutable: "mlx_lm.server",
|
||||
LlamaCpp: config.BackendSettings{
|
||||
Command: "llama-server",
|
||||
},
|
||||
MLX: config.BackendSettings{
|
||||
Command: "mlx_lm.server",
|
||||
},
|
||||
}
|
||||
|
||||
globalSettings := &config.InstancesConfig{
|
||||
@@ -60,8 +64,12 @@ func TestUpdateLastRequestTime(t *testing.T) {
|
||||
|
||||
func TestShouldTimeout_NotRunning(t *testing.T) {
|
||||
backendConfig := &config.BackendConfig{
|
||||
LlamaExecutable: "llama-server",
|
||||
MLXLMExecutable: "mlx_lm.server",
|
||||
LlamaCpp: config.BackendSettings{
|
||||
Command: "llama-server",
|
||||
},
|
||||
MLX: config.BackendSettings{
|
||||
Command: "mlx_lm.server",
|
||||
},
|
||||
}
|
||||
|
||||
globalSettings := &config.InstancesConfig{
|
||||
@@ -90,8 +98,12 @@ func TestShouldTimeout_NotRunning(t *testing.T) {
|
||||
|
||||
func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
|
||||
backendConfig := &config.BackendConfig{
|
||||
LlamaExecutable: "llama-server",
|
||||
MLXLMExecutable: "mlx_lm.server",
|
||||
LlamaCpp: config.BackendSettings{
|
||||
Command: "llama-server",
|
||||
},
|
||||
MLX: config.BackendSettings{
|
||||
Command: "mlx_lm.server",
|
||||
},
|
||||
}
|
||||
|
||||
globalSettings := &config.InstancesConfig{
|
||||
@@ -133,8 +145,12 @@ func TestShouldTimeout_NoTimeoutConfigured(t *testing.T) {
|
||||
|
||||
func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
|
||||
backendConfig := &config.BackendConfig{
|
||||
LlamaExecutable: "llama-server",
|
||||
MLXLMExecutable: "mlx_lm.server",
|
||||
LlamaCpp: config.BackendSettings{
|
||||
Command: "llama-server",
|
||||
},
|
||||
MLX: config.BackendSettings{
|
||||
Command: "mlx_lm.server",
|
||||
},
|
||||
}
|
||||
|
||||
globalSettings := &config.InstancesConfig{
|
||||
@@ -167,8 +183,12 @@ func TestShouldTimeout_WithinTimeLimit(t *testing.T) {
|
||||
|
||||
func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
|
||||
backendConfig := &config.BackendConfig{
|
||||
LlamaExecutable: "llama-server",
|
||||
MLXLMExecutable: "mlx_lm.server",
|
||||
LlamaCpp: config.BackendSettings{
|
||||
Command: "llama-server",
|
||||
},
|
||||
MLX: config.BackendSettings{
|
||||
Command: "mlx_lm.server",
|
||||
},
|
||||
}
|
||||
|
||||
globalSettings := &config.InstancesConfig{
|
||||
@@ -207,8 +227,12 @@ func TestShouldTimeout_ExceedsTimeLimit(t *testing.T) {
|
||||
|
||||
func TestTimeoutConfiguration_Validation(t *testing.T) {
|
||||
backendConfig := &config.BackendConfig{
|
||||
LlamaExecutable: "llama-server",
|
||||
MLXLMExecutable: "mlx_lm.server",
|
||||
LlamaCpp: config.BackendSettings{
|
||||
Command: "llama-server",
|
||||
},
|
||||
MLX: config.BackendSettings{
|
||||
Command: "mlx_lm.server",
|
||||
},
|
||||
}
|
||||
|
||||
globalSettings := &config.InstancesConfig{
|
||||
|
||||
@@ -16,8 +16,12 @@ import (
|
||||
|
||||
func TestNewInstanceManager(t *testing.T) {
|
||||
backendConfig := config.BackendConfig{
|
||||
LlamaExecutable: "llama-server",
|
||||
MLXLMExecutable: "mlx_lm.server",
|
||||
LlamaCpp: config.BackendSettings{
|
||||
Command: "llama-server",
|
||||
},
|
||||
MLX: config.BackendSettings{
|
||||
Command: "mlx_lm.server",
|
||||
},
|
||||
}
|
||||
|
||||
cfg := config.InstancesConfig{
|
||||
@@ -49,8 +53,12 @@ func TestPersistence(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
|
||||
backendConfig := config.BackendConfig{
|
||||
LlamaExecutable: "llama-server",
|
||||
MLXLMExecutable: "mlx_lm.server",
|
||||
LlamaCpp: config.BackendSettings{
|
||||
Command: "llama-server",
|
||||
},
|
||||
MLX: config.BackendSettings{
|
||||
Command: "mlx_lm.server",
|
||||
},
|
||||
}
|
||||
|
||||
cfg := config.InstancesConfig{
|
||||
@@ -182,8 +190,12 @@ func TestShutdown(t *testing.T) {
|
||||
// Helper function to create a test manager with standard config
|
||||
func createTestManager() manager.InstanceManager {
|
||||
backendConfig := config.BackendConfig{
|
||||
LlamaExecutable: "llama-server",
|
||||
MLXLMExecutable: "mlx_lm.server",
|
||||
LlamaCpp: config.BackendSettings{
|
||||
Command: "llama-server",
|
||||
},
|
||||
MLX: config.BackendSettings{
|
||||
Command: "mlx_lm.server",
|
||||
},
|
||||
}
|
||||
|
||||
cfg := config.InstancesConfig{
|
||||
|
||||
@@ -63,8 +63,12 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) {
|
||||
|
||||
// Test max instances limit
|
||||
backendConfig := config.BackendConfig{
|
||||
LlamaExecutable: "llama-server",
|
||||
MLXLMExecutable: "mlx_lm.server",
|
||||
LlamaCpp: config.BackendSettings{
|
||||
Command: "llama-server",
|
||||
},
|
||||
MLX: config.BackendSettings{
|
||||
Command: "mlx_lm.server",
|
||||
},
|
||||
}
|
||||
cfg := config.InstancesConfig{
|
||||
PortRange: [2]int{8000, 9000},
|
||||
|
||||
@@ -34,7 +34,7 @@ func (im *instanceManager) EvictLRUInstance() error {
|
||||
im.mu.RLock()
|
||||
var lruInstance *instance.Process
|
||||
|
||||
for name, _ := range im.runningInstances {
|
||||
for name := range im.runningInstances {
|
||||
inst := im.instances[name]
|
||||
if inst == nil {
|
||||
continue
|
||||
|
||||
@@ -14,8 +14,8 @@ import (
|
||||
func TestTimeoutFunctionality(t *testing.T) {
|
||||
// Test timeout checker initialization
|
||||
backendConfig := config.BackendConfig{
|
||||
LlamaExecutable: "llama-server",
|
||||
MLXLMExecutable: "mlx_lm.server",
|
||||
LlamaCpp: config.BackendSettings{Command: "llama-server"},
|
||||
MLX: config.BackendSettings{Command: "mlx_lm.server"},
|
||||
}
|
||||
cfg := config.InstancesConfig{
|
||||
PortRange: [2]int{8000, 9000},
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
import React from "react";
|
||||
import { Badge } from "@/components/ui/badge";
|
||||
import { BackendType, type BackendTypeValue } from "@/types/instance";
|
||||
import { Server } from "lucide-react";
|
||||
import { Server, Package } from "lucide-react";
|
||||
|
||||
interface BackendBadgeProps {
|
||||
backend?: BackendTypeValue;
|
||||
docker?: boolean;
|
||||
}
|
||||
|
||||
const BackendBadge: React.FC<BackendBadgeProps> = ({ backend }) => {
|
||||
const BackendBadge: React.FC<BackendBadgeProps> = ({ backend, docker }) => {
|
||||
if (!backend) {
|
||||
return null;
|
||||
}
|
||||
@@ -39,6 +40,7 @@ const BackendBadge: React.FC<BackendBadgeProps> = ({ backend }) => {
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex items-center gap-1">
|
||||
<Badge
|
||||
variant="outline"
|
||||
className={`flex items-center gap-1.5 ${getColorClasses()}`}
|
||||
@@ -46,6 +48,17 @@ const BackendBadge: React.FC<BackendBadgeProps> = ({ backend }) => {
|
||||
<Server className="h-3 w-3" />
|
||||
<span className="text-xs">{getText()}</span>
|
||||
</Badge>
|
||||
{docker && (
|
||||
<Badge
|
||||
variant="outline"
|
||||
className="flex items-center gap-1.5 bg-orange-100 text-orange-800 border-orange-200 dark:bg-orange-900 dark:text-orange-200 dark:border-orange-800"
|
||||
title="Docker enabled"
|
||||
>
|
||||
<Package className="h-3 w-3" />
|
||||
<span className="text-[10px] uppercase tracking-wide">Docker</span>
|
||||
</Badge>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
|
||||
@@ -66,7 +66,7 @@ function InstanceCard({
|
||||
|
||||
{/* Badges row */}
|
||||
<div className="flex items-center gap-2 flex-wrap">
|
||||
<BackendBadge backend={instance.options?.backend_type} />
|
||||
<BackendBadge backend={instance.options?.backend_type} docker={instance.docker_enabled} />
|
||||
{running && <HealthBadge health={health} />}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -23,4 +23,5 @@ export interface Instance {
|
||||
name: string;
|
||||
status: InstanceStatus;
|
||||
options?: CreateInstanceOptions;
|
||||
docker_enabled?: boolean; // indicates backend is running via Docker
|
||||
}
|
||||
Reference in New Issue
Block a user