diff --git a/.gitignore b/.gitignore index 4075d71..d96fc8c 100644 --- a/.gitignore +++ b/.gitignore @@ -42,4 +42,7 @@ site/ llamactl.dev.yaml # Debug files -__debug* \ No newline at end of file +__debug* + +# Binary +llamactl-* \ No newline at end of file diff --git a/docs/docs.go b/docs/docs.go index f46ac36..8d6a8f1 100644 --- a/docs/docs.go +++ b/docs/docs.go @@ -256,6 +256,34 @@ const docTemplate = `{ } } }, + "/api/v1/config": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Returns the current server configuration (sanitized)", + "tags": [ + "System" + ], + "summary": "Get server configuration", + "responses": { + "200": { + "description": "Sanitized configuration", + "schema": { + "$ref": "#/definitions/config.AppConfig" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, "/api/v1/instances": { "get": { "security": [ @@ -1475,6 +1503,247 @@ const docTemplate = `{ } }, "definitions": { + "config.AppConfig": { + "type": "object", + "properties": { + "auth": { + "$ref": "#/definitions/config.AuthConfig" + }, + "backends": { + "$ref": "#/definitions/config.BackendConfig" + }, + "build_time": { + "type": "string" + }, + "commit_hash": { + "type": "string" + }, + "instances": { + "$ref": "#/definitions/config.InstancesConfig" + }, + "local_node": { + "type": "string" + }, + "nodes": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/config.NodeConfig" + } + }, + "server": { + "$ref": "#/definitions/config.ServerConfig" + }, + "version": { + "type": "string" + } + } + }, + "config.AuthConfig": { + "type": "object", + "properties": { + "inference_keys": { + "description": "List of keys for OpenAI compatible inference endpoints", + "type": "array", + "items": { + "type": "string" + } + }, + "management_keys": { + "description": "List of keys for management endpoints", + "type": "array", + "items": { + "type": "string" + } + }, + "require_inference_auth": { + "description": "Require authentication for OpenAI compatible inference endpoints", + "type": "boolean" + }, + "require_management_auth": { + "description": "Require authentication for management endpoints", + "type": "boolean" + } + } + }, + "config.BackendConfig": { + "type": "object", + "properties": { + "llama-cpp": { + "$ref": "#/definitions/config.BackendSettings" + }, + "mlx": { + "$ref": "#/definitions/config.BackendSettings" + }, + "vllm": { + "$ref": "#/definitions/config.BackendSettings" + } + } + }, + "config.BackendSettings": { + "type": "object", + "properties": { + "args": { + "type": "array", + "items": { + "type": "string" + } + }, + "command": { + "type": "string" + }, + "docker": { + "$ref": "#/definitions/config.DockerSettings" + }, + "environment": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "response_headers": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + }, + "config.DockerSettings": { + "type": "object", + "properties": { + "args": { + "type": "array", + "items": { + "type": "string" + } + }, + "enabled": { + "type": "boolean" + }, + "environment": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "image": { + "type": "string" + } + } + }, + "config.InstancesConfig": { + "type": "object", + "properties": { + "auto_create_dirs": { + "description": "Automatically create the data directory if it doesn't exist", + "type": "boolean" + }, + "configs_dir": { + "description": "Instance config directory override", + "type": "string" + }, + "data_dir": { + "description": "Directory where all llamactl data will be stored (instances.json, logs, etc.)", + "type": "string" + }, + "default_auto_restart": { + "description": "Default auto-restart setting for new instances", + "type": "boolean" + }, + "default_max_restarts": { + "description": "Default max restarts for new instances", + "type": "integer" + }, + "default_on_demand_start": { + "description": "Default on-demand start setting for new instances", + "type": "boolean" + }, + "default_restart_delay": { + "description": "Default restart delay for new instances (in seconds)", + "type": "integer" + }, + "enable_lru_eviction": { + "description": "Enable LRU eviction for instance logs", + "type": "boolean" + }, + "logs_dir": { + "description": "Logs directory override", + "type": "string" + }, + "max_instances": { + "description": "Maximum number of instances that can be created", + "type": "integer" + }, + "max_running_instances": { + "description": "Maximum number of instances that can be running at the same time", + "type": "integer" + }, + "on_demand_start_timeout": { + "description": "How long to wait for an instance to start on demand (in seconds)", + "type": "integer" + }, + "port_range": { + "description": "Port range for instances (e.g., 8000,9000)", + "type": "array", + "items": { + "type": "integer" + } + }, + "timeout_check_interval": { + "description": "Interval for checking instance timeouts (in minutes)", + "type": "integer" + } + } + }, + "config.NodeConfig": { + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "api_key": { + "type": "string" + } + } + }, + "config.ServerConfig": { + "type": "object", + "properties": { + "allowed_headers": { + "description": "Allowed headers for CORS (e.g., \"Accept\", \"Authorization\", \"Content-Type\", \"X-CSRF-Token\")", + "type": "array", + "items": { + "type": "string" + } + }, + "allowed_origins": { + "description": "Allowed origins for CORS (e.g., \"http://localhost:3000\")", + "type": "array", + "items": { + "type": "string" + } + }, + "enable_swagger": { + "description": "Enable Swagger UI for API documentation", + "type": "boolean" + }, + "host": { + "description": "Server host to bind to", + "type": "string" + }, + "port": { + "description": "Server port to bind to", + "type": "integer" + }, + "response_headers": { + "description": "Response headers to send with responses", + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + }, "instance.Instance": { "type": "object", "properties": { @@ -1494,6 +1763,13 @@ const docTemplate = `{ "description": "Auto restart", "type": "boolean" }, + "command_override": { + "type": "string" + }, + "docker_enabled": { + "description": "Execution context overrides", + "type": "boolean" + }, "environment": { "description": "Environment variables", "type": "object", diff --git a/docs/managing-instances.md b/docs/managing-instances.md index be5e768..7504905 100644 --- a/docs/managing-instances.md +++ b/docs/managing-instances.md @@ -42,33 +42,41 @@ Each instance is displayed as a card showing: ![Create Instance Screenshot](images/create_instance.png) 1. Click the **"Create Instance"** button on the dashboard -2. *Optional*: Click **"Import"** in the dialog header to load a previously exported configuration -2. Enter a unique **Name** for your instance (only required field) -3. **Select Target Node**: Choose which node to deploy the instance to from the dropdown -4. **Choose Backend Type**: - - **llama.cpp**: For GGUF models using llama-server - - **MLX**: For MLX-optimized models (macOS only) +2. *Optional*: Click **"Import"** to load a previously exported configuration + +**Instance Settings:** + +3. Enter a unique **Instance Name** (required) +4. **Select Node**: Choose which node to deploy the instance to +5. Configure **Auto Restart** settings: + - Enable automatic restart on failure + - Set max restarts and delay between attempts +6. Configure basic instance options: + - **Idle Timeout**: Minutes before stopping idle instance + - **On Demand Start**: Start instance only when needed + +**Backend Configuration:** + +7. **Select Backend Type**: + - **Llama Server**: For GGUF models using llama-server + - **MLX LM**: For MLX-optimized models (macOS only) - **vLLM**: For distributed serving and high-throughput inference -5. Configure model source: - - **For llama.cpp**: GGUF model path or HuggingFace repo - - **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`) - - **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`) -6. Configure optional instance management settings: - - **Auto Restart**: Automatically restart instance on failure - - **Max Restarts**: Maximum number of restart attempts - - **Restart Delay**: Delay in seconds between restart attempts - - **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint - - **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable) - - **Environment Variables**: Set custom environment variables for the instance process -7. Configure backend-specific options: - - **llama.cpp**: Threads, context size, GPU layers, port, etc. - - **MLX**: Temperature, top-p, adapter path, Python environment, etc. - - **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc. +8. *Optional*: Click **"Parse Command"** to import settings from an existing backend command +9. Configure **Execution Context**: + - **Enable Docker**: Run backend in Docker container + - **Command Override**: Custom path to backend executable + - **Environment Variables**: Custom environment variables !!! tip "Auto-Assignment" Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and generates API keys if authentication is enabled. You typically don't need to manually specify these values. - -8. Click **"Create"** to save the instance + +10. Configure **Basic Backend Options** (varies by backend): + - **llama.cpp**: Model path, threads, context size, GPU layers, etc. + - **MLX**: Model identifier, temperature, max tokens, etc. + - **vLLM**: Model identifier, tensor parallel size, GPU memory utilization, etc. +11. *Optional*: Expand **Advanced Backend Options** for additional settings +12. *Optional*: Add **Extra Args** as key-value pairs for custom command-line arguments +13. Click **"Create"** to save the instance **Via API** @@ -83,11 +91,34 @@ curl -X POST http://localhost:8080/api/v1/instances/my-llama-instance \ "model": "/path/to/model.gguf", "threads": 8, "ctx_size": 4096, - "gpu_layers": 32 + "gpu_layers": 32, + "flash_attn": "on" }, + "auto_restart": true, + "max_restarts": 3, + "docker_enabled": false, + "command_override": "/opt/llama-server-dev", "nodes": ["main"] }' +# Create vLLM instance with environment variables +curl -X POST http://localhost:8080/api/v1/instances/my-vllm-instance \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer " \ + -d '{ + "backend_type": "vllm", + "backend_options": { + "model": "microsoft/DialoGPT-medium", + "tensor_parallel_size": 2, + "gpu_memory_utilization": 0.9 + }, + "on_demand_start": true, + "environment": { + "CUDA_VISIBLE_DEVICES": "0,1" + }, + "nodes": ["worker1", "worker2"] + }' + # Create MLX instance (macOS only) curl -X POST http://localhost:8080/api/v1/instances/my-mlx-instance \ -H "Content-Type: application/json" \ @@ -97,74 +128,10 @@ curl -X POST http://localhost:8080/api/v1/instances/my-mlx-instance \ "backend_options": { "model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit", "temp": 0.7, - "top_p": 0.9, "max_tokens": 2048 }, - "auto_restart": true, - "max_restarts": 3, "nodes": ["main"] }' - -# Create vLLM instance -curl -X POST http://localhost:8080/api/v1/instances/my-vllm-instance \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer " \ - -d '{ - "backend_type": "vllm", - "backend_options": { - "model": "microsoft/DialoGPT-medium", - "tensor_parallel_size": 2, - "gpu_memory_utilization": 0.9 - }, - "auto_restart": true, - "on_demand_start": true, - "environment": { - "CUDA_VISIBLE_DEVICES": "0,1", - "NCCL_DEBUG": "INFO", - "PYTHONPATH": "/custom/path" - }, - "nodes": ["main"] - }' - -# Create llama.cpp instance with HuggingFace model -curl -X POST http://localhost:8080/api/v1/instances/gemma-3-27b \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer " \ - -d '{ - "backend_type": "llama_cpp", - "backend_options": { - "hf_repo": "unsloth/gemma-3-27b-it-GGUF", - "hf_file": "gemma-3-27b-it-GGUF.gguf", - "gpu_layers": 32 - }, - "nodes": ["main"] - }' - -# Create instance on specific remote node -curl -X POST http://localhost:8080/api/v1/instances/remote-llama \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer " \ - -d '{ - "backend_type": "llama_cpp", - "backend_options": { - "model": "/models/llama-7b.gguf", - "gpu_layers": 32 - }, - "nodes": ["worker1"] - }' - -# Create instance on multiple nodes for high availability -curl -X POST http://localhost:8080/api/v1/instances/multi-node-llama \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer " \ - -d '{ - "backend_type": "llama_cpp", - "backend_options": { - "model": "/models/llama-7b.gguf", - "gpu_layers": 32 - }, - "nodes": ["worker1", "worker2", "worker3"] - }' ``` ## Start Instance diff --git a/docs/swagger.json b/docs/swagger.json index 26f9662..f79a008 100644 --- a/docs/swagger.json +++ b/docs/swagger.json @@ -249,6 +249,34 @@ } } }, + "/api/v1/config": { + "get": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Returns the current server configuration (sanitized)", + "tags": [ + "System" + ], + "summary": "Get server configuration", + "responses": { + "200": { + "description": "Sanitized configuration", + "schema": { + "$ref": "#/definitions/config.AppConfig" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, "/api/v1/instances": { "get": { "security": [ @@ -1468,6 +1496,247 @@ } }, "definitions": { + "config.AppConfig": { + "type": "object", + "properties": { + "auth": { + "$ref": "#/definitions/config.AuthConfig" + }, + "backends": { + "$ref": "#/definitions/config.BackendConfig" + }, + "build_time": { + "type": "string" + }, + "commit_hash": { + "type": "string" + }, + "instances": { + "$ref": "#/definitions/config.InstancesConfig" + }, + "local_node": { + "type": "string" + }, + "nodes": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/config.NodeConfig" + } + }, + "server": { + "$ref": "#/definitions/config.ServerConfig" + }, + "version": { + "type": "string" + } + } + }, + "config.AuthConfig": { + "type": "object", + "properties": { + "inference_keys": { + "description": "List of keys for OpenAI compatible inference endpoints", + "type": "array", + "items": { + "type": "string" + } + }, + "management_keys": { + "description": "List of keys for management endpoints", + "type": "array", + "items": { + "type": "string" + } + }, + "require_inference_auth": { + "description": "Require authentication for OpenAI compatible inference endpoints", + "type": "boolean" + }, + "require_management_auth": { + "description": "Require authentication for management endpoints", + "type": "boolean" + } + } + }, + "config.BackendConfig": { + "type": "object", + "properties": { + "llama-cpp": { + "$ref": "#/definitions/config.BackendSettings" + }, + "mlx": { + "$ref": "#/definitions/config.BackendSettings" + }, + "vllm": { + "$ref": "#/definitions/config.BackendSettings" + } + } + }, + "config.BackendSettings": { + "type": "object", + "properties": { + "args": { + "type": "array", + "items": { + "type": "string" + } + }, + "command": { + "type": "string" + }, + "docker": { + "$ref": "#/definitions/config.DockerSettings" + }, + "environment": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "response_headers": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + }, + "config.DockerSettings": { + "type": "object", + "properties": { + "args": { + "type": "array", + "items": { + "type": "string" + } + }, + "enabled": { + "type": "boolean" + }, + "environment": { + "type": "object", + "additionalProperties": { + "type": "string" + } + }, + "image": { + "type": "string" + } + } + }, + "config.InstancesConfig": { + "type": "object", + "properties": { + "auto_create_dirs": { + "description": "Automatically create the data directory if it doesn't exist", + "type": "boolean" + }, + "configs_dir": { + "description": "Instance config directory override", + "type": "string" + }, + "data_dir": { + "description": "Directory where all llamactl data will be stored (instances.json, logs, etc.)", + "type": "string" + }, + "default_auto_restart": { + "description": "Default auto-restart setting for new instances", + "type": "boolean" + }, + "default_max_restarts": { + "description": "Default max restarts for new instances", + "type": "integer" + }, + "default_on_demand_start": { + "description": "Default on-demand start setting for new instances", + "type": "boolean" + }, + "default_restart_delay": { + "description": "Default restart delay for new instances (in seconds)", + "type": "integer" + }, + "enable_lru_eviction": { + "description": "Enable LRU eviction for instance logs", + "type": "boolean" + }, + "logs_dir": { + "description": "Logs directory override", + "type": "string" + }, + "max_instances": { + "description": "Maximum number of instances that can be created", + "type": "integer" + }, + "max_running_instances": { + "description": "Maximum number of instances that can be running at the same time", + "type": "integer" + }, + "on_demand_start_timeout": { + "description": "How long to wait for an instance to start on demand (in seconds)", + "type": "integer" + }, + "port_range": { + "description": "Port range for instances (e.g., 8000,9000)", + "type": "array", + "items": { + "type": "integer" + } + }, + "timeout_check_interval": { + "description": "Interval for checking instance timeouts (in minutes)", + "type": "integer" + } + } + }, + "config.NodeConfig": { + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "api_key": { + "type": "string" + } + } + }, + "config.ServerConfig": { + "type": "object", + "properties": { + "allowed_headers": { + "description": "Allowed headers for CORS (e.g., \"Accept\", \"Authorization\", \"Content-Type\", \"X-CSRF-Token\")", + "type": "array", + "items": { + "type": "string" + } + }, + "allowed_origins": { + "description": "Allowed origins for CORS (e.g., \"http://localhost:3000\")", + "type": "array", + "items": { + "type": "string" + } + }, + "enable_swagger": { + "description": "Enable Swagger UI for API documentation", + "type": "boolean" + }, + "host": { + "description": "Server host to bind to", + "type": "string" + }, + "port": { + "description": "Server port to bind to", + "type": "integer" + }, + "response_headers": { + "description": "Response headers to send with responses", + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + }, "instance.Instance": { "type": "object", "properties": { @@ -1487,6 +1756,13 @@ "description": "Auto restart", "type": "boolean" }, + "command_override": { + "type": "string" + }, + "docker_enabled": { + "description": "Execution context overrides", + "type": "boolean" + }, "environment": { "description": "Environment variables", "type": "object", diff --git a/docs/swagger.yaml b/docs/swagger.yaml index 7506036..2888ce1 100644 --- a/docs/swagger.yaml +++ b/docs/swagger.yaml @@ -1,5 +1,173 @@ basePath: /api/v1 definitions: + config.AppConfig: + properties: + auth: + $ref: '#/definitions/config.AuthConfig' + backends: + $ref: '#/definitions/config.BackendConfig' + build_time: + type: string + commit_hash: + type: string + instances: + $ref: '#/definitions/config.InstancesConfig' + local_node: + type: string + nodes: + additionalProperties: + $ref: '#/definitions/config.NodeConfig' + type: object + server: + $ref: '#/definitions/config.ServerConfig' + version: + type: string + type: object + config.AuthConfig: + properties: + inference_keys: + description: List of keys for OpenAI compatible inference endpoints + items: + type: string + type: array + management_keys: + description: List of keys for management endpoints + items: + type: string + type: array + require_inference_auth: + description: Require authentication for OpenAI compatible inference endpoints + type: boolean + require_management_auth: + description: Require authentication for management endpoints + type: boolean + type: object + config.BackendConfig: + properties: + llama-cpp: + $ref: '#/definitions/config.BackendSettings' + mlx: + $ref: '#/definitions/config.BackendSettings' + vllm: + $ref: '#/definitions/config.BackendSettings' + type: object + config.BackendSettings: + properties: + args: + items: + type: string + type: array + command: + type: string + docker: + $ref: '#/definitions/config.DockerSettings' + environment: + additionalProperties: + type: string + type: object + response_headers: + additionalProperties: + type: string + type: object + type: object + config.DockerSettings: + properties: + args: + items: + type: string + type: array + enabled: + type: boolean + environment: + additionalProperties: + type: string + type: object + image: + type: string + type: object + config.InstancesConfig: + properties: + auto_create_dirs: + description: Automatically create the data directory if it doesn't exist + type: boolean + configs_dir: + description: Instance config directory override + type: string + data_dir: + description: Directory where all llamactl data will be stored (instances.json, + logs, etc.) + type: string + default_auto_restart: + description: Default auto-restart setting for new instances + type: boolean + default_max_restarts: + description: Default max restarts for new instances + type: integer + default_on_demand_start: + description: Default on-demand start setting for new instances + type: boolean + default_restart_delay: + description: Default restart delay for new instances (in seconds) + type: integer + enable_lru_eviction: + description: Enable LRU eviction for instance logs + type: boolean + logs_dir: + description: Logs directory override + type: string + max_instances: + description: Maximum number of instances that can be created + type: integer + max_running_instances: + description: Maximum number of instances that can be running at the same time + type: integer + on_demand_start_timeout: + description: How long to wait for an instance to start on demand (in seconds) + type: integer + port_range: + description: Port range for instances (e.g., 8000,9000) + items: + type: integer + type: array + timeout_check_interval: + description: Interval for checking instance timeouts (in minutes) + type: integer + type: object + config.NodeConfig: + properties: + address: + type: string + api_key: + type: string + type: object + config.ServerConfig: + properties: + allowed_headers: + description: Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type", + "X-CSRF-Token") + items: + type: string + type: array + allowed_origins: + description: Allowed origins for CORS (e.g., "http://localhost:3000") + items: + type: string + type: array + enable_swagger: + description: Enable Swagger UI for API documentation + type: boolean + host: + description: Server host to bind to + type: string + port: + description: Server port to bind to + type: integer + response_headers: + additionalProperties: + type: string + description: Response headers to send with responses + type: object + type: object instance.Instance: properties: created: @@ -13,6 +181,11 @@ definitions: auto_restart: description: Auto restart type: boolean + command_override: + type: string + docker_enabled: + description: Execution context overrides + type: boolean environment: additionalProperties: type: string @@ -216,6 +389,23 @@ paths: summary: Parse vllm serve command tags: - Backends + /api/v1/config: + get: + description: Returns the current server configuration (sanitized) + responses: + "200": + description: Sanitized configuration + schema: + $ref: '#/definitions/config.AppConfig' + "500": + description: Internal Server Error + schema: + type: string + security: + - ApiKeyAuth: [] + summary: Get server configuration + tags: + - System /api/v1/instances: get: description: Returns a list of all instances managed by the server diff --git a/pkg/backends/backend.go b/pkg/backends/backend.go index 022e778..ad138a5 100644 --- a/pkg/backends/backend.go +++ b/pkg/backends/backend.go @@ -79,14 +79,8 @@ func (o *Options) UnmarshalJSON(data []byte) error { } func (o *Options) MarshalJSON() ([]byte, error) { - type Alias Options - aux := &struct { - *Alias - }{ - Alias: (*Alias)(o), - } - // Get backend and marshal it + var backendOptions map[string]any backend := o.getBackend() if backend != nil { optionsData, err := json.Marshal(backend) @@ -94,13 +88,19 @@ func (o *Options) MarshalJSON() ([]byte, error) { return nil, fmt.Errorf("failed to marshal backend options: %w", err) } // Create a new map to avoid concurrent map writes - aux.BackendOptions = make(map[string]any) - if err := json.Unmarshal(optionsData, &aux.BackendOptions); err != nil { + backendOptions = make(map[string]any) + if err := json.Unmarshal(optionsData, &backendOptions); err != nil { return nil, fmt.Errorf("failed to unmarshal backend options to map: %w", err) } } - return json.Marshal(aux) + return json.Marshal(&struct { + BackendType BackendType `json:"backend_type"` + BackendOptions map[string]any `json:"backend_options,omitempty"` + }{ + BackendType: o.BackendType, + BackendOptions: backendOptions, + }) } // setBackendOptions stores the backend in the appropriate typed field @@ -142,32 +142,54 @@ func (o *Options) getBackend() backend { } } -func (o *Options) isDockerEnabled(backend *config.BackendSettings) bool { - if backend.Docker != nil && backend.Docker.Enabled && o.BackendType != BackendTypeMlxLm { - return true +// isDockerEnabled checks if Docker is enabled with an optional override +func (o *Options) isDockerEnabled(backend *config.BackendSettings, dockerEnabledOverride *bool) bool { + // Check if backend supports Docker + if backend.Docker == nil { + return false } - return false + + // MLX doesn't support Docker + if o.BackendType == BackendTypeMlxLm { + return false + } + + // Check for instance-level override + if dockerEnabledOverride != nil { + return *dockerEnabledOverride + } + + // Fall back to config value + return backend.Docker.Enabled } -func (o *Options) IsDockerEnabled(backendConfig *config.BackendConfig) bool { +func (o *Options) IsDockerEnabled(backendConfig *config.BackendConfig, dockerEnabled *bool) bool { backendSettings := o.getBackendSettings(backendConfig) - return o.isDockerEnabled(backendSettings) + return o.isDockerEnabled(backendSettings, dockerEnabled) } // GetCommand builds the command to run the backend -func (o *Options) GetCommand(backendConfig *config.BackendConfig) string { - +func (o *Options) GetCommand(backendConfig *config.BackendConfig, dockerEnabled *bool, commandOverride string) string { backendSettings := o.getBackendSettings(backendConfig) - if o.isDockerEnabled(backendSettings) { + // Determine if Docker is enabled + useDocker := o.isDockerEnabled(backendSettings, dockerEnabled) + + if useDocker { return "docker" } + // Check for command override (only applies when not in Docker mode) + if commandOverride != "" { + return commandOverride + } + + // Fall back to config command return backendSettings.Command } // buildCommandArgs builds command line arguments for the backend -func (o *Options) BuildCommandArgs(backendConfig *config.BackendConfig) []string { +func (o *Options) BuildCommandArgs(backendConfig *config.BackendConfig, dockerEnabled *bool) []string { var args []string @@ -177,7 +199,7 @@ func (o *Options) BuildCommandArgs(backendConfig *config.BackendConfig) []string return args } - if o.isDockerEnabled(backendSettings) { + if o.isDockerEnabled(backendSettings, dockerEnabled) { // For Docker, start with Docker args args = append(args, backendSettings.Docker.Args...) args = append(args, backendSettings.Docker.Image) @@ -193,7 +215,7 @@ func (o *Options) BuildCommandArgs(backendConfig *config.BackendConfig) []string } // BuildEnvironment builds the environment variables for the backend process -func (o *Options) BuildEnvironment(backendConfig *config.BackendConfig, environment map[string]string) map[string]string { +func (o *Options) BuildEnvironment(backendConfig *config.BackendConfig, dockerEnabled *bool, environment map[string]string) map[string]string { backendSettings := o.getBackendSettings(backendConfig) env := map[string]string{} @@ -202,7 +224,7 @@ func (o *Options) BuildEnvironment(backendConfig *config.BackendConfig, environm maps.Copy(env, backendSettings.Environment) } - if o.isDockerEnabled(backendSettings) { + if o.isDockerEnabled(backendSettings, dockerEnabled) { if backendSettings.Docker.Environment != nil { maps.Copy(env, backendSettings.Docker.Environment) } diff --git a/pkg/backends/llama_test.go b/pkg/backends/llama_test.go index 961967b..4440092 100644 --- a/pkg/backends/llama_test.go +++ b/pkg/backends/llama_test.go @@ -4,6 +4,7 @@ import ( "encoding/json" "fmt" "llamactl/pkg/backends" + "llamactl/pkg/config" "llamactl/pkg/testutil" "reflect" "testing" @@ -549,3 +550,79 @@ func TestParseLlamaCommand_ExtraArgs(t *testing.T) { }) } } +func TestLlamaCppGetCommand_WithOverrides(t *testing.T) { + tests := []struct { + name string + dockerInConfig bool + dockerEnabled *bool + commandOverride string + expected string + }{ + { + name: "no overrides - use config command", + dockerInConfig: false, + dockerEnabled: nil, + commandOverride: "", + expected: "/usr/bin/llama-server", + }, + { + name: "override to enable docker", + dockerInConfig: false, + dockerEnabled: boolPtr(true), + commandOverride: "", + expected: "docker", + }, + { + name: "override to disable docker", + dockerInConfig: true, + dockerEnabled: boolPtr(false), + commandOverride: "", + expected: "/usr/bin/llama-server", + }, + { + name: "command override", + dockerInConfig: false, + dockerEnabled: nil, + commandOverride: "/custom/llama-server", + expected: "/custom/llama-server", + }, + { + name: "docker takes precedence over command override", + dockerInConfig: false, + dockerEnabled: boolPtr(true), + commandOverride: "/custom/llama-server", + expected: "docker", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + backendConfig := &config.BackendConfig{ + LlamaCpp: config.BackendSettings{ + Command: "/usr/bin/llama-server", + Docker: &config.DockerSettings{ + Enabled: tt.dockerInConfig, + Image: "test-image", + }, + }, + } + + opts := backends.Options{ + BackendType: backends.BackendTypeLlamaCpp, + LlamaServerOptions: &backends.LlamaServerOptions{ + Model: "test-model.gguf", + }, + } + + result := opts.GetCommand(backendConfig, tt.dockerEnabled, tt.commandOverride) + if result != tt.expected { + t.Errorf("GetCommand() = %v, want %v", result, tt.expected) + } + }) + } +} + +// Helper function to create bool pointer +func boolPtr(b bool) *bool { + return &b +} diff --git a/pkg/backends/mlx_test.go b/pkg/backends/mlx_test.go index f8a2ee5..f24d1a5 100644 --- a/pkg/backends/mlx_test.go +++ b/pkg/backends/mlx_test.go @@ -2,6 +2,7 @@ package backends_test import ( "llamactl/pkg/backends" + "llamactl/pkg/config" "llamactl/pkg/testutil" "testing" ) @@ -274,3 +275,57 @@ func TestParseMlxCommand_ExtraArgs(t *testing.T) { }) } } +func TestMlxGetCommand_NoDocker(t *testing.T) { + // MLX backend should never use Docker + backendConfig := &config.BackendConfig{ + MLX: config.BackendSettings{ + Command: "/usr/bin/mlx-server", + Docker: &config.DockerSettings{ + Enabled: true, // Even if enabled in config + Image: "test-image", + }, + }, + } + + opts := backends.Options{ + BackendType: backends.BackendTypeMlxLm, + MlxServerOptions: &backends.MlxServerOptions{ + Model: "test-model", + }, + } + + tests := []struct { + name string + dockerEnabled *bool + commandOverride string + expected string + }{ + { + name: "ignores docker in config", + dockerEnabled: nil, + commandOverride: "", + expected: "/usr/bin/mlx-server", + }, + { + name: "ignores docker override", + dockerEnabled: boolPtr(true), + commandOverride: "", + expected: "/usr/bin/mlx-server", + }, + { + name: "respects command override", + dockerEnabled: nil, + commandOverride: "/custom/mlx-server", + expected: "/custom/mlx-server", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := opts.GetCommand(backendConfig, tt.dockerEnabled, tt.commandOverride) + if result != tt.expected { + t.Errorf("GetCommand() = %v, want %v", result, tt.expected) + } + }) + } +} diff --git a/pkg/config/config.go b/pkg/config/config.go index 6df9e42..5f85f20 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -1,6 +1,7 @@ package config import ( + "encoding/json" "fmt" "log" "os" @@ -14,126 +15,126 @@ import ( // BackendSettings contains structured backend configuration type BackendSettings struct { - Command string `yaml:"command"` - Args []string `yaml:"args"` - Environment map[string]string `yaml:"environment,omitempty"` - Docker *DockerSettings `yaml:"docker,omitempty"` - ResponseHeaders map[string]string `yaml:"response_headers,omitempty"` + Command string `yaml:"command" json:"command"` + Args []string `yaml:"args" json:"args"` + Environment map[string]string `yaml:"environment,omitempty" json:"environment,omitempty"` + Docker *DockerSettings `yaml:"docker,omitempty" json:"docker,omitempty"` + ResponseHeaders map[string]string `yaml:"response_headers,omitempty" json:"response_headers,omitempty"` } // DockerSettings contains Docker-specific configuration type DockerSettings struct { - Enabled bool `yaml:"enabled"` - Image string `yaml:"image"` - Args []string `yaml:"args"` - Environment map[string]string `yaml:"environment,omitempty"` + Enabled bool `yaml:"enabled" json:"enabled"` + Image string `yaml:"image" json:"image"` + Args []string `yaml:"args" json:"args"` + Environment map[string]string `yaml:"environment,omitempty" json:"environment,omitempty"` } // BackendConfig contains backend executable configurations type BackendConfig struct { - LlamaCpp BackendSettings `yaml:"llama-cpp"` - VLLM BackendSettings `yaml:"vllm"` - MLX BackendSettings `yaml:"mlx"` + LlamaCpp BackendSettings `yaml:"llama-cpp" json:"llama-cpp"` + VLLM BackendSettings `yaml:"vllm" json:"vllm"` + MLX BackendSettings `yaml:"mlx" json:"mlx"` } // AppConfig represents the configuration for llamactl type AppConfig struct { - Server ServerConfig `yaml:"server"` - Backends BackendConfig `yaml:"backends"` - Instances InstancesConfig `yaml:"instances"` - Auth AuthConfig `yaml:"auth"` - LocalNode string `yaml:"local_node,omitempty"` - Nodes map[string]NodeConfig `yaml:"nodes,omitempty"` - Version string `yaml:"-"` - CommitHash string `yaml:"-"` - BuildTime string `yaml:"-"` + Server ServerConfig `yaml:"server" json:"server"` + Backends BackendConfig `yaml:"backends" json:"backends"` + Instances InstancesConfig `yaml:"instances" json:"instances"` + Auth AuthConfig `yaml:"auth" json:"auth"` + LocalNode string `yaml:"local_node,omitempty" json:"local_node,omitempty"` + Nodes map[string]NodeConfig `yaml:"nodes,omitempty" json:"nodes,omitempty"` + Version string `yaml:"-" json:"version"` + CommitHash string `yaml:"-" json:"commit_hash"` + BuildTime string `yaml:"-" json:"build_time"` } // ServerConfig contains HTTP server configuration type ServerConfig struct { // Server host to bind to - Host string `yaml:"host"` + Host string `yaml:"host" json:"host"` // Server port to bind to - Port int `yaml:"port"` + Port int `yaml:"port" json:"port"` // Allowed origins for CORS (e.g., "http://localhost:3000") - AllowedOrigins []string `yaml:"allowed_origins"` + AllowedOrigins []string `yaml:"allowed_origins" json:"allowed_origins"` // Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type", "X-CSRF-Token") - AllowedHeaders []string `yaml:"allowed_headers"` + AllowedHeaders []string `yaml:"allowed_headers" json:"allowed_headers"` // Enable Swagger UI for API documentation - EnableSwagger bool `yaml:"enable_swagger"` + EnableSwagger bool `yaml:"enable_swagger" json:"enable_swagger"` // Response headers to send with responses - ResponseHeaders map[string]string `yaml:"response_headers,omitempty"` + ResponseHeaders map[string]string `yaml:"response_headers,omitempty" json:"response_headers,omitempty"` } // InstancesConfig contains instance management configuration type InstancesConfig struct { // Port range for instances (e.g., 8000,9000) - PortRange [2]int `yaml:"port_range"` + PortRange [2]int `yaml:"port_range" json:"port_range"` // Directory where all llamactl data will be stored (instances.json, logs, etc.) - DataDir string `yaml:"data_dir"` + DataDir string `yaml:"data_dir" json:"data_dir"` // Instance config directory override - InstancesDir string `yaml:"configs_dir"` + InstancesDir string `yaml:"configs_dir" json:"configs_dir"` // Logs directory override - LogsDir string `yaml:"logs_dir"` + LogsDir string `yaml:"logs_dir" json:"logs_dir"` // Automatically create the data directory if it doesn't exist - AutoCreateDirs bool `yaml:"auto_create_dirs"` + AutoCreateDirs bool `yaml:"auto_create_dirs" json:"auto_create_dirs"` // Maximum number of instances that can be created - MaxInstances int `yaml:"max_instances"` + MaxInstances int `yaml:"max_instances" json:"max_instances"` // Maximum number of instances that can be running at the same time - MaxRunningInstances int `yaml:"max_running_instances,omitempty"` + MaxRunningInstances int `yaml:"max_running_instances,omitempty" json:"max_running_instances,omitempty"` // Enable LRU eviction for instance logs - EnableLRUEviction bool `yaml:"enable_lru_eviction"` + EnableLRUEviction bool `yaml:"enable_lru_eviction" json:"enable_lru_eviction"` // Default auto-restart setting for new instances - DefaultAutoRestart bool `yaml:"default_auto_restart"` + DefaultAutoRestart bool `yaml:"default_auto_restart" json:"default_auto_restart"` // Default max restarts for new instances - DefaultMaxRestarts int `yaml:"default_max_restarts"` + DefaultMaxRestarts int `yaml:"default_max_restarts" json:"default_max_restarts"` // Default restart delay for new instances (in seconds) - DefaultRestartDelay int `yaml:"default_restart_delay"` + DefaultRestartDelay int `yaml:"default_restart_delay" json:"default_restart_delay"` // Default on-demand start setting for new instances - DefaultOnDemandStart bool `yaml:"default_on_demand_start"` + DefaultOnDemandStart bool `yaml:"default_on_demand_start" json:"default_on_demand_start"` // How long to wait for an instance to start on demand (in seconds) - OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty"` + OnDemandStartTimeout int `yaml:"on_demand_start_timeout,omitempty" json:"on_demand_start_timeout,omitempty"` // Interval for checking instance timeouts (in minutes) - TimeoutCheckInterval int `yaml:"timeout_check_interval"` + TimeoutCheckInterval int `yaml:"timeout_check_interval" json:"timeout_check_interval"` } // AuthConfig contains authentication settings type AuthConfig struct { // Require authentication for OpenAI compatible inference endpoints - RequireInferenceAuth bool `yaml:"require_inference_auth"` + RequireInferenceAuth bool `yaml:"require_inference_auth" json:"require_inference_auth"` // List of keys for OpenAI compatible inference endpoints - InferenceKeys []string `yaml:"inference_keys"` + InferenceKeys []string `yaml:"inference_keys" json:"inference_keys"` // Require authentication for management endpoints - RequireManagementAuth bool `yaml:"require_management_auth"` + RequireManagementAuth bool `yaml:"require_management_auth" json:"require_management_auth"` // List of keys for management endpoints - ManagementKeys []string `yaml:"management_keys"` + ManagementKeys []string `yaml:"management_keys" json:"management_keys"` } type NodeConfig struct { - Address string `yaml:"address"` - APIKey string `yaml:"api_key,omitempty"` + Address string `yaml:"address" json:"address"` + APIKey string `yaml:"api_key,omitempty" json:"api_key,omitempty"` } // LoadConfig loads configuration with the following precedence: @@ -610,3 +611,31 @@ func getDefaultConfigLocations() []string { return locations } + +// SanitizedCopy returns a copy of the AppConfig with sensitive information removed +func (cfg *AppConfig) SanitizedCopy() (AppConfig, error) { + // Deep copy via JSON marshal/unmarshal to avoid concurrent map access + data, err := json.Marshal(cfg) + if err != nil { + log.Printf("Failed to marshal config for sanitization: %v", err) + return AppConfig{}, err + } + + var sanitized AppConfig + if err := json.Unmarshal(data, &sanitized); err != nil { + log.Printf("Failed to unmarshal config for sanitization: %v", err) + return AppConfig{}, err + } + + // Clear sensitive information + sanitized.Auth.InferenceKeys = []string{} + sanitized.Auth.ManagementKeys = []string{} + + // Clear API keys from nodes + for nodeName, node := range sanitized.Nodes { + node.APIKey = "" + sanitized.Nodes[nodeName] = node + } + + return sanitized, nil +} diff --git a/pkg/instance/instance.go b/pkg/instance/instance.go index 5e5dc27..376cc0c 100644 --- a/pkg/instance/instance.go +++ b/pkg/instance/instance.go @@ -255,7 +255,7 @@ func (i *Instance) getCommand() string { return "" } - return opts.BackendOptions.GetCommand(i.globalBackendSettings) + return opts.BackendOptions.GetCommand(i.globalBackendSettings, opts.DockerEnabled, opts.CommandOverride) } func (i *Instance) buildCommandArgs() []string { @@ -264,7 +264,7 @@ func (i *Instance) buildCommandArgs() []string { return nil } - return opts.BackendOptions.BuildCommandArgs(i.globalBackendSettings) + return opts.BackendOptions.BuildCommandArgs(i.globalBackendSettings, opts.DockerEnabled) } func (i *Instance) buildEnvironment() map[string]string { @@ -273,29 +273,21 @@ func (i *Instance) buildEnvironment() map[string]string { return nil } - return opts.BackendOptions.BuildEnvironment(i.globalBackendSettings, opts.Environment) + return opts.BackendOptions.BuildEnvironment(i.globalBackendSettings, opts.DockerEnabled, opts.Environment) } // MarshalJSON implements json.Marshaler for Instance func (i *Instance) MarshalJSON() ([]byte, error) { - // Get options - opts := i.GetOptions() - - // Determine if docker is enabled for this instance's backend - dockerEnabled := opts.BackendOptions.IsDockerEnabled(i.globalBackendSettings) - return json.Marshal(&struct { - Name string `json:"name"` - Status *status `json:"status"` - Created int64 `json:"created,omitempty"` - Options *options `json:"options,omitempty"` - DockerEnabled bool `json:"docker_enabled,omitempty"` + Name string `json:"name"` + Status *status `json:"status"` + Created int64 `json:"created,omitempty"` + Options *options `json:"options,omitempty"` }{ - Name: i.Name, - Status: i.status, - Created: i.Created, - Options: i.options, - DockerEnabled: dockerEnabled, + Name: i.Name, + Status: i.status, + Created: i.Created, + Options: i.options, }) } diff --git a/pkg/instance/options.go b/pkg/instance/options.go index 0c4b582..57a3ce9 100644 --- a/pkg/instance/options.go +++ b/pkg/instance/options.go @@ -5,7 +5,9 @@ import ( "fmt" "llamactl/pkg/backends" "llamactl/pkg/config" + "llamactl/pkg/validation" "log" + "maps" "slices" "sync" ) @@ -22,6 +24,11 @@ type Options struct { IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes // Environment variables Environment map[string]string `json:"environment,omitempty"` + + // Execution context overrides + DockerEnabled *bool `json:"docker_enabled,omitempty"` + CommandOverride string `json:"command_override,omitempty"` + // Assigned nodes Nodes map[string]struct{} `json:"-"` // Backend options @@ -138,15 +145,25 @@ func (c *Options) UnmarshalJSON(data []byte) error { // MarshalJSON implements custom JSON marshaling for Options func (c *Options) MarshalJSON() ([]byte, error) { - // Use anonymous struct to avoid recursion type Alias Options - aux := struct { + + // Make a copy of the struct + temp := *c + + // Copy environment map to avoid concurrent access issues + if temp.Environment != nil { + envCopy := make(map[string]string, len(temp.Environment)) + maps.Copy(envCopy, temp.Environment) + temp.Environment = envCopy + } + + aux := &struct { Nodes []string `json:"nodes,omitempty"` // Output as JSON array BackendType backends.BackendType `json:"backend_type"` BackendOptions map[string]any `json:"backend_options,omitempty"` *Alias }{ - Alias: (*Alias)(c), + Alias: (*Alias)(&temp), } // Convert nodes map to array (sorted for consistency) @@ -163,13 +180,12 @@ func (c *Options) MarshalJSON() ([]byte, error) { aux.BackendType = c.BackendOptions.BackendType // Marshal the backends.Options struct to get the properly formatted backend options - // Marshal a pointer to trigger the pointer receiver MarshalJSON method backendData, err := json.Marshal(&c.BackendOptions) if err != nil { return nil, fmt.Errorf("failed to marshal backend options: %w", err) } - // Unmarshal into a temporary struct to extract the backend_options map + // Unmarshal into a new temporary map to extract the backend_options var tempBackend struct { BackendOptions map[string]any `json:"backend_options,omitempty"` } @@ -200,6 +216,28 @@ func (c *Options) validateAndApplyDefaults(name string, globalSettings *config.I *c.IdleTimeout = 0 } + // Validate docker_enabled and command_override relationship + if c.DockerEnabled != nil && *c.DockerEnabled && c.CommandOverride != "" { + log.Printf("Instance %s: command_override cannot be set when docker_enabled is true, ignoring command_override", name) + c.CommandOverride = "" // Clear invalid configuration + } + + // Validate command_override if set + if c.CommandOverride != "" { + if err := validation.ValidateStringForInjection(c.CommandOverride); err != nil { + log.Printf("Instance %s: invalid command_override: %v, clearing value", name, err) + c.CommandOverride = "" // Clear invalid value + } + } + + // Validate docker_enabled for MLX backend + if c.BackendOptions.BackendType == backends.BackendTypeMlxLm { + if c.DockerEnabled != nil && *c.DockerEnabled { + log.Printf("Instance %s: docker_enabled is not supported for MLX backend, ignoring", name) + c.DockerEnabled = nil // Clear invalid configuration + } + } + // Apply defaults from global settings for nil fields if globalSettings != nil { if c.AutoRestart == nil { diff --git a/pkg/server/handlers_system.go b/pkg/server/handlers_system.go index 46410f3..0af346c 100644 --- a/pkg/server/handlers_system.go +++ b/pkg/server/handlers_system.go @@ -20,3 +20,23 @@ func (h *Handler) VersionHandler() http.HandlerFunc { writeText(w, http.StatusOK, versionInfo) } } + +// ConfigHandler godoc +// @Summary Get server configuration +// @Description Returns the current server configuration (sanitized) +// @Tags System +// @Security ApiKeyAuth +// @Produces application/json +// @Success 200 {object} config.AppConfig "Sanitized configuration" +// @Failure 500 {string} string "Internal Server Error" +// @Router /api/v1/config [get] +func (h *Handler) ConfigHandler() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + sanitizedConfig, err := h.cfg.SanitizedCopy() + if err != nil { + writeError(w, http.StatusInternalServerError, "sanitized_copy_error", "Failed to get sanitized config") + return + } + writeJSON(w, http.StatusOK, sanitizedConfig) + } +} diff --git a/pkg/server/routes.go b/pkg/server/routes.go index 618dbc0..b159968 100644 --- a/pkg/server/routes.go +++ b/pkg/server/routes.go @@ -42,7 +42,9 @@ func SetupRouter(handler *Handler) *chi.Mux { r.Use(authMiddleware.AuthMiddleware(KeyTypeManagement)) } - r.Get("/version", handler.VersionHandler()) // Get server version + r.Get("/version", handler.VersionHandler()) + + r.Get("/config", handler.ConfigHandler()) // Backend-specific endpoints r.Route("/backends", func(r chi.Router) { diff --git a/webui/src/__tests__/App.test.tsx b/webui/src/__tests__/App.test.tsx index 7497c31..eb212a4 100644 --- a/webui/src/__tests__/App.test.tsx +++ b/webui/src/__tests__/App.test.tsx @@ -4,8 +4,7 @@ import userEvent from '@testing-library/user-event' import App from '@/App' import { InstancesProvider } from '@/contexts/InstancesContext' import { instancesApi } from '@/lib/api' -import type { Instance } from '@/types/instance' -import { BackendType } from '@/types/instance' +import {BackendType, type Instance } from '@/types/instance' import { AuthProvider } from '@/contexts/AuthContext' // Mock the API @@ -49,6 +48,21 @@ vi.mock('@/lib/healthService', () => ({ })), })) +// Mock the ConfigContext helper hooks +vi.mock('@/hooks/useConfig', () => ({ + useInstanceDefaults: () => ({ + autoRestart: true, + maxRestarts: 3, + restartDelay: 5, + onDemandStart: false, + }), + useBackendSettings: () => ({ + command: '/usr/bin/llama-server', + dockerEnabled: false, + dockerImage: '', + }), +})) + function renderApp() { return render( @@ -119,8 +133,12 @@ describe('App Component - Critical Business Logic Only', () => { // Verify correct API call await waitFor(() => { expect(instancesApi.create).toHaveBeenCalledWith('new-test-instance', { - auto_restart: true, // Default value - backend_type: BackendType.LLAMA_CPP + auto_restart: true, // Default value from config + backend_type: BackendType.LLAMA_CPP, + docker_enabled: false, + max_restarts: 3, + on_demand_start: false, + restart_delay: 5 }) }) diff --git a/webui/src/components/InstanceCard.tsx b/webui/src/components/InstanceCard.tsx index 657f1ab..d889655 100644 --- a/webui/src/components/InstanceCard.tsx +++ b/webui/src/components/InstanceCard.tsx @@ -59,12 +59,8 @@ function InstanceCard({ // Fetch the most up-to-date instance data from the backend const instanceData = await instancesApi.get(instance.name); - // Remove docker_enabled as it's a computed field, not persisted to disk - // eslint-disable-next-line @typescript-eslint/no-unused-vars - const { docker_enabled, ...persistedData } = instanceData; - // Convert to JSON string with pretty formatting (matching backend format) - const jsonString = JSON.stringify(persistedData, null, 2); + const jsonString = JSON.stringify(instanceData, null, 2); // Create a blob and download link const blob = new Blob([jsonString], { type: "application/json" }); @@ -101,7 +97,7 @@ function InstanceCard({ {/* Badges row */}
- + {running && }
diff --git a/webui/src/components/InstanceDialog.tsx b/webui/src/components/InstanceDialog.tsx index 25d48a3..638a45a 100644 --- a/webui/src/components/InstanceDialog.tsx +++ b/webui/src/components/InstanceDialog.tsx @@ -14,6 +14,7 @@ import ParseCommandDialog from "@/components/ParseCommandDialog"; import InstanceSettingsCard from "@/components/instance/InstanceSettingsCard"; import BackendConfigurationCard from "@/components/instance/BackendConfigurationCard"; import { Upload } from "lucide-react"; +import { useInstanceDefaults, useBackendSettings } from "@/hooks/useConfig"; interface InstanceDialogProps { open: boolean; @@ -29,6 +30,7 @@ const InstanceDialog: React.FC = ({ instance, }) => { const isEditing = !!instance; + const instanceDefaults = useInstanceDefaults(); const [instanceName, setInstanceName] = useState(""); const [formData, setFormData] = useState({}); @@ -36,6 +38,10 @@ const InstanceDialog: React.FC = ({ const [showParseDialog, setShowParseDialog] = useState(false); const fileInputRef = useRef(null); + // Get backend settings for all backends (we'll use this to update docker_enabled on backend type change) + const llamaCppSettings = useBackendSettings(BackendType.LLAMA_CPP); + const vllmSettings = useBackendSettings(BackendType.VLLM); + const mlxSettings = useBackendSettings(BackendType.MLX_LM); // Reset form when dialog opens/closes or when instance changes useEffect(() => { @@ -45,25 +51,40 @@ const InstanceDialog: React.FC = ({ setInstanceName(instance.name); setFormData(instance.options || {}); } else { - // Reset form for new instance + // Reset form for new instance with defaults from config setInstanceName(""); setFormData({ - auto_restart: true, // Default value + auto_restart: instanceDefaults?.autoRestart ?? true, + max_restarts: instanceDefaults?.maxRestarts, + restart_delay: instanceDefaults?.restartDelay, + on_demand_start: instanceDefaults?.onDemandStart, backend_type: BackendType.LLAMA_CPP, // Default backend type + docker_enabled: llamaCppSettings?.dockerEnabled ?? false, backend_options: {}, }); } setNameError(""); // Reset any name errors } + // eslint-disable-next-line react-hooks/exhaustive-deps }, [open, instance]); const handleFieldChange = (key: keyof CreateInstanceOptions, value: unknown) => { setFormData((prev) => { - // If backend_type is changing, clear backend_options + // If backend_type is changing, update docker_enabled default and clear backend_options if (key === 'backend_type' && prev.backend_type !== value) { + let dockerEnabled = false; + if (value === BackendType.LLAMA_CPP) { + dockerEnabled = llamaCppSettings?.dockerEnabled ?? false; + } else if (value === BackendType.VLLM) { + dockerEnabled = vllmSettings?.dockerEnabled ?? false; + } else if (value === BackendType.MLX_LM) { + dockerEnabled = mlxSettings?.dockerEnabled ?? false; + } + return { ...prev, backend_type: value as CreateInstanceOptions['backend_type'], + docker_enabled: dockerEnabled, backend_options: {}, // Clear backend options when backend type changes }; } @@ -106,6 +127,14 @@ const InstanceDialog: React.FC = ({ return; } + // Validate docker_enabled and command_override relationship + if (formData.backend_type !== BackendType.MLX_LM) { + if (formData.docker_enabled === true && formData.command_override) { + setNameError("Command override cannot be set when Docker is enabled"); + return; + } + } + // Clean up undefined values to avoid sending empty fields const cleanOptions: CreateInstanceOptions = {} as CreateInstanceOptions; Object.entries(formData).forEach(([key, value]) => { diff --git a/webui/src/components/__tests__/InstanceModal.test.tsx b/webui/src/components/__tests__/InstanceModal.test.tsx index a931ae9..3b84d25 100644 --- a/webui/src/components/__tests__/InstanceModal.test.tsx +++ b/webui/src/components/__tests__/InstanceModal.test.tsx @@ -2,8 +2,22 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest' import { render, screen, waitFor } from '@testing-library/react' import userEvent from '@testing-library/user-event' import InstanceDialog from '@/components/InstanceDialog' -import type { Instance } from '@/types/instance' -import { BackendType } from '@/types/instance' +import { BackendType, type Instance } from '@/types/instance' + +// Mock the ConfigContext helper hooks +vi.mock('@/hooks/useConfig', () => ({ + useInstanceDefaults: () => ({ + autoRestart: true, + maxRestarts: 3, + restartDelay: 5, + onDemandStart: false, + }), + useBackendSettings: () => ({ + command: '/usr/bin/llama-server', + dockerEnabled: false, + dockerImage: '', + }), +})) describe('InstanceModal - Form Logic and Validation', () => { const mockOnSave = vi.fn() @@ -75,7 +89,7 @@ afterEach(() => { it('submits form with correct data structure', async () => { const user = userEvent.setup() - + render( { // Fill required name await user.type(screen.getByLabelText(/Instance Name/), 'my-instance') - + // Submit form await user.click(screen.getByTestId('dialog-save-button')) expect(mockOnSave).toHaveBeenCalledWith('my-instance', { - auto_restart: true, // Default value - backend_type: BackendType.LLAMA_CPP + auto_restart: true, // Default value from config + backend_type: BackendType.LLAMA_CPP, + docker_enabled: false, + max_restarts: 3, + on_demand_start: false, + restart_delay: 5 }) }) @@ -253,7 +271,7 @@ afterEach(() => { it('includes restart options in form submission when enabled', async () => { const user = userEvent.setup() - + render( { // Fill form await user.type(screen.getByLabelText(/Instance Name/), 'test-instance') - - // Set restart options - await user.type(screen.getByLabelText(/Max Restarts/), '5') - await user.type(screen.getByLabelText(/Restart Delay/), '10') + + // Clear default values and set new restart options + const maxRestartsInput = screen.getByLabelText(/Max Restarts/) + const restartDelayInput = screen.getByLabelText(/Restart Delay/) + await user.clear(maxRestartsInput) + await user.type(maxRestartsInput, '5') + await user.clear(restartDelayInput) + await user.type(restartDelayInput, '10') await user.click(screen.getByTestId('dialog-save-button')) expect(mockOnSave).toHaveBeenCalledWith('test-instance', { auto_restart: true, backend_type: BackendType.LLAMA_CPP, + docker_enabled: false, max_restarts: 5, + on_demand_start: false, restart_delay: 10 }) }) @@ -284,7 +308,7 @@ afterEach(() => { describe('Form Data Handling', () => { it('cleans up undefined values before submission', async () => { const user = userEvent.setup() - + render( { await user.click(screen.getByTestId('dialog-save-button')) - // Should only include non-empty values + // Should include default values from config expect(mockOnSave).toHaveBeenCalledWith('clean-instance', { - auto_restart: true, // Only this default value should be included - backend_type: BackendType.LLAMA_CPP + auto_restart: true, + backend_type: BackendType.LLAMA_CPP, + docker_enabled: false, + max_restarts: 3, + on_demand_start: false, + restart_delay: 5 }) }) it('handles numeric fields correctly', async () => { const user = userEvent.setup() - + render( { ) await user.type(screen.getByLabelText(/Instance Name/), 'numeric-test') - + // Test GPU layers field (numeric) const gpuLayersInput = screen.getByLabelText(/GPU Layers/) await user.type(gpuLayersInput, '15') @@ -328,6 +356,10 @@ afterEach(() => { auto_restart: true, backend_type: BackendType.LLAMA_CPP, backend_options: { gpu_layers: 15 }, // Should be number, not string + docker_enabled: false, + max_restarts: 3, + on_demand_start: false, + restart_delay: 5 }) }) }) diff --git a/webui/src/components/form/EnvVarsInput.tsx b/webui/src/components/form/EnvVarsInput.tsx index 476a98a..5c77433 100644 --- a/webui/src/components/form/EnvVarsInput.tsx +++ b/webui/src/components/form/EnvVarsInput.tsx @@ -18,7 +18,6 @@ const EnvVarsInput: React.FC = (props) => { keyPlaceholder="Variable name" valuePlaceholder="Variable value" addButtonText="Add Variable" - helperText="Environment variables that will be passed to the backend process" allowEmptyValues={false} /> ) diff --git a/webui/src/components/instance/BackendConfigurationCard.tsx b/webui/src/components/instance/BackendConfigurationCard.tsx index 799ea2b..00fa371 100644 --- a/webui/src/components/instance/BackendConfigurationCard.tsx +++ b/webui/src/components/instance/BackendConfigurationCard.tsx @@ -6,6 +6,7 @@ import { Terminal, ChevronDown, ChevronRight } from 'lucide-react' import { getBasicBackendFields, getAdvancedBackendFields } from '@/lib/zodFormUtils' import BackendFormField from '@/components/BackendFormField' import SelectInput from '@/components/form/SelectInput' +import ExecutionContextSection from '@/components/instance/ExecutionContextSection' interface BackendConfigurationCardProps { formData: CreateInstanceOptions @@ -59,6 +60,12 @@ const BackendConfigurationCard: React.FC = ({

+ {/* Execution Context Section */} + + {/* Basic Backend Options */} {basicBackendFields.length > 0 && (
diff --git a/webui/src/components/instance/ExecutionContextSection.tsx b/webui/src/components/instance/ExecutionContextSection.tsx new file mode 100644 index 0000000..cdd73cd --- /dev/null +++ b/webui/src/components/instance/ExecutionContextSection.tsx @@ -0,0 +1,76 @@ +import React from 'react' +import { BackendType, type CreateInstanceOptions } from '@/types/instance' +import CheckboxInput from '@/components/form/CheckboxInput' +import TextInput from '@/components/form/TextInput' +import EnvVarsInput from '@/components/form/EnvVarsInput' +import { useBackendSettings } from '@/hooks/useConfig' + +interface ExecutionContextSectionProps { + formData: CreateInstanceOptions + onChange: (key: keyof CreateInstanceOptions, value: unknown) => void +} + +const ExecutionContextSection: React.FC = ({ + formData, + onChange +}) => { + const backendSettings = useBackendSettings(formData.backend_type) + + // Get placeholder for command override based on backend type and config + const getCommandPlaceholder = () => { + if (backendSettings?.command) { + return backendSettings.command + } + + // Fallback placeholders if config is not loaded + switch (formData.backend_type) { + case BackendType.LLAMA_CPP: + return "llama-server" + case BackendType.VLLM: + return "vllm" + case BackendType.MLX_LM: + return "mlx_lm.server" + default: + return "" + } + } + + return ( +
+

Execution Context

+ + {/* Docker Mode Toggle - only for backends that support Docker */} + {formData.backend_type !== BackendType.MLX_LM && ( + onChange('docker_enabled', value)} + description="Run backend in Docker container" + /> + )} + + {/* Command Override - only shown when Docker is disabled or backend is MLX */} + {(formData.backend_type === BackendType.MLX_LM || formData.docker_enabled !== true) && ( + onChange('command_override', value)} + placeholder={getCommandPlaceholder()} + description="Custom path to backend executable (leave empty to use config default)" + /> + )} + + onChange('environment', value)} + description="Custom environment variables for the instance" + /> +
+ ) +} + +export default ExecutionContextSection diff --git a/webui/src/components/instance/InstanceSettingsCard.tsx b/webui/src/components/instance/InstanceSettingsCard.tsx index 7b853cb..b5f945c 100644 --- a/webui/src/components/instance/InstanceSettingsCard.tsx +++ b/webui/src/components/instance/InstanceSettingsCard.tsx @@ -1,12 +1,11 @@ import React, { useState, useEffect } from 'react' -import type { CreateInstanceOptions } from '@/types/instance' +import { type CreateInstanceOptions } from '@/types/instance' import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' import { Label } from '@/components/ui/label' import { Input } from '@/components/ui/input' import AutoRestartConfiguration from '@/components/instance/AutoRestartConfiguration' import NumberInput from '@/components/form/NumberInput' import CheckboxInput from '@/components/form/CheckboxInput' -import EnvVarsInput from '@/components/form/EnvVarsInput' import SelectInput from '@/components/form/SelectInput' import { nodesApi, type NodesMap } from '@/lib/api' @@ -131,14 +130,6 @@ const InstanceSettingsCard: React.FC = ({ onChange={(value) => onChange('on_demand_start', value)} description="Start instance only when needed" /> - - onChange('environment', value)} - description="Custom environment variables for the instance" - />
diff --git a/webui/src/contexts/AuthContext.tsx b/webui/src/contexts/AuthContext.tsx index d3b8a12..edeb501 100644 --- a/webui/src/contexts/AuthContext.tsx +++ b/webui/src/contexts/AuthContext.tsx @@ -147,16 +147,3 @@ export const useAuth = (): AuthContextType => { } return context } - -// Helper hook for getting auth headers -export const useAuthHeaders = (): HeadersInit => { - const { apiKey, isAuthenticated } = useAuth() - - if (!isAuthenticated || !apiKey) { - return {} - } - - return { - 'Authorization': `Bearer ${apiKey}` - } -} diff --git a/webui/src/contexts/ConfigContext.tsx b/webui/src/contexts/ConfigContext.tsx new file mode 100644 index 0000000..a42e9b1 --- /dev/null +++ b/webui/src/contexts/ConfigContext.tsx @@ -0,0 +1,62 @@ +import { type ReactNode, createContext, useContext, useEffect, useState, useRef } from 'react' +import { serverApi } from '@/lib/api' +import type { AppConfig } from '@/types/config' +import { useAuth } from './AuthContext' + +interface ConfigContextType { + config: AppConfig | null + isLoading: boolean + error: string | null +} + +const ConfigContext = createContext(undefined) + +interface ConfigProviderProps { + children: ReactNode +} + +export const ConfigProvider = ({ children }: ConfigProviderProps) => { + const { isAuthenticated } = useAuth() + const [config, setConfig] = useState(null) + const [isLoading, setIsLoading] = useState(true) + const [error, setError] = useState(null) + const loadedRef = useRef(false) + + useEffect(() => { + if (!isAuthenticated || loadedRef.current) { + setIsLoading(false) + return + } + + loadedRef.current = true + + const loadConfig = async () => { + try { + const data = await serverApi.getConfig() + setConfig(data) + } catch (err) { + const errorMessage = err instanceof Error ? err.message : 'Failed to load configuration' + setError(errorMessage) + console.error('Error loading config:', err) + } finally { + setIsLoading(false) + } + } + + void loadConfig() + }, [isAuthenticated]) + + return ( + + {children} + + ) +} + +export const useConfig = (): ConfigContextType => { + const context = useContext(ConfigContext) + if (context === undefined) { + throw new Error('useConfig must be used within a ConfigProvider') + } + return context +} diff --git a/webui/src/hooks/useConfig.ts b/webui/src/hooks/useConfig.ts new file mode 100644 index 0000000..4615be5 --- /dev/null +++ b/webui/src/hooks/useConfig.ts @@ -0,0 +1,51 @@ +import { useConfig } from '@/contexts/ConfigContext' + +// Helper hook to get instance default values from config +export const useInstanceDefaults = () => { + const { config } = useConfig() + + if (!config || !config.instances) { + return null + } + + return { + autoRestart: config.instances.default_auto_restart, + maxRestarts: config.instances.default_max_restarts, + restartDelay: config.instances.default_restart_delay, + onDemandStart: config.instances.default_on_demand_start, + } +} + +// Helper hook to get specific backend settings by backend type +export const useBackendSettings = (backendType: string | undefined) => { + const { config } = useConfig() + + if (!config || !config.backends || !backendType) { + return null + } + + // Map backend type to config key + const backendKey = backendType === 'llama_cpp' + ? 'llama-cpp' + : backendType === 'mlx_lm' + ? 'mlx' + : backendType === 'vllm' + ? 'vllm' + : null + + if (!backendKey) { + return null + } + + const backendConfig = config.backends[backendKey as keyof typeof config.backends] + + if (!backendConfig) { + return null + } + + return { + command: backendConfig.command || '', + dockerEnabled: backendConfig.docker?.enabled ?? false, + dockerImage: backendConfig.docker?.image || '', + } +} diff --git a/webui/src/lib/api.ts b/webui/src/lib/api.ts index ef03408..2ac679c 100644 --- a/webui/src/lib/api.ts +++ b/webui/src/lib/api.ts @@ -1,4 +1,5 @@ import type { CreateInstanceOptions, Instance } from "@/types/instance"; +import type { AppConfig } from "@/types/config"; import { handleApiError } from "./errorUtils"; // Adding baseURI as a prefix to support being served behind a subpath @@ -73,6 +74,9 @@ export const serverApi = { // GET /backends/llama-cpp/devices getDevices: () => apiCall("/backends/llama-cpp/devices", {}, "text"), + + // GET /config + getConfig: () => apiCall("/config"), }; // Backend API functions diff --git a/webui/src/main.tsx b/webui/src/main.tsx index ab046c2..6418a1e 100644 --- a/webui/src/main.tsx +++ b/webui/src/main.tsx @@ -4,13 +4,16 @@ import App from './App' import { InstancesProvider } from './contexts/InstancesContext' import './index.css' import { AuthProvider } from './contexts/AuthContext' +import { ConfigProvider } from './contexts/ConfigContext' ReactDOM.createRoot(document.getElementById('root')!).render( - - - + + + + + , ) \ No newline at end of file diff --git a/webui/src/schemas/instanceOptions.ts b/webui/src/schemas/instanceOptions.ts index 3cbf523..b7530e1 100644 --- a/webui/src/schemas/instanceOptions.ts +++ b/webui/src/schemas/instanceOptions.ts @@ -36,6 +36,10 @@ export const CreateInstanceOptionsSchema = z.object({ // Environment variables environment: z.record(z.string(), z.string()).optional(), + // Execution context overrides + docker_enabled: z.boolean().optional(), + command_override: z.string().optional(), + // Backend configuration backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(), backend_options: BackendOptionsSchema.optional(), diff --git a/webui/src/types/config.ts b/webui/src/types/config.ts new file mode 100644 index 0000000..21f15fa --- /dev/null +++ b/webui/src/types/config.ts @@ -0,0 +1,70 @@ +export interface BackendSettings { + command: string + args: string[] + environment?: Record + docker?: DockerSettings + response_headers?: Record +} + +export interface DockerSettings { + enabled: boolean + image: string + args: string[] + environment?: Record +} + +export interface BackendConfig { + 'llama-cpp': BackendSettings + vllm: BackendSettings + mlx: BackendSettings +} + +export interface ServerConfig { + host: string + port: number + allowed_origins: string[] + allowed_headers: string[] + enable_swagger: boolean + response_headers?: Record +} + +export interface InstancesConfig { + port_range: [number, number] + data_dir: string + configs_dir: string + logs_dir: string + auto_create_dirs: boolean + max_instances: number + max_running_instances: number + enable_lru_eviction: boolean + default_auto_restart: boolean + default_max_restarts: number + default_restart_delay: number + default_on_demand_start: boolean + on_demand_start_timeout: number + timeout_check_interval: number +} + +export interface AuthConfig { + require_inference_auth: boolean + inference_keys: string[] // Will be empty in sanitized response + require_management_auth: boolean + management_keys: string[] // Will be empty in sanitized response +} + +export interface NodeConfig { + address: string + api_key: string // Will be empty in sanitized response +} + +export interface AppConfig { + server: ServerConfig + backends: BackendConfig + instances: InstancesConfig + auth: AuthConfig + local_node: string + nodes: Record + version?: string + commit_hash?: string + build_time?: string +} diff --git a/webui/src/types/instance.ts b/webui/src/types/instance.ts index e243b72..0977233 100644 --- a/webui/src/types/instance.ts +++ b/webui/src/types/instance.ts @@ -27,5 +27,4 @@ export interface Instance { name: string; status: InstanceStatus; options?: CreateInstanceOptions; - docker_enabled?: boolean; // indicates backend is running via Docker } \ No newline at end of file