From 50e135520582822ded0a43ebba389dced08e3a2c Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sun, 28 Sep 2025 13:31:26 +0200 Subject: [PATCH 1/8] Add environment field to BackendSettings for improved configuration --- pkg/config/config.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index fac8222..8fd032e 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -13,9 +13,10 @@ import ( // BackendSettings contains structured backend configuration type BackendSettings struct { - Command string `yaml:"command"` - Args []string `yaml:"args"` - Docker *DockerSettings `yaml:"docker,omitempty"` + Command string `yaml:"command"` + Args []string `yaml:"args"` + Environment map[string]string `yaml:"environment,omitempty"` + Docker *DockerSettings `yaml:"docker,omitempty"` } // DockerSettings contains Docker-specific configuration @@ -135,8 +136,9 @@ func LoadConfig(configPath string) (AppConfig, error) { }, Backends: BackendConfig{ LlamaCpp: BackendSettings{ - Command: "llama-server", - Args: []string{}, + Command: "llama-server", + Args: []string{}, + Environment: map[string]string{}, Docker: &DockerSettings{ Enabled: false, Image: "ghcr.io/ggml-org/llama.cpp:server", From c984d95723d57a470fa4890d84c19fb33df9ad01 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sun, 28 Sep 2025 13:31:41 +0200 Subject: [PATCH 2/8] Add environment variable support to instance options and command building --- pkg/instance/lifecycle.go | 14 ++++++++++++-- pkg/instance/options.go | 23 +++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/pkg/instance/lifecycle.go b/pkg/instance/lifecycle.go index 9f7243a..eec9689 100644 --- a/pkg/instance/lifecycle.go +++ b/pkg/instance/lifecycle.go @@ -372,13 +372,23 @@ func (i *Process) buildCommand() (*exec.Cmd, error) { return nil, err } + // Build the environment variables + env := i.options.BuildEnvironment(backendConfig) + // Get the command to execute - cmd := i.options.GetCommand(backendConfig) + command := i.options.GetCommand(backendConfig) // Build command arguments args := i.options.BuildCommandArgs(backendConfig) - return exec.Command(cmd, args...), nil + // Create the exec.Cmd + cmd := exec.CommandContext(i.ctx, command, args...) + cmd.Env = []string{} + for k, v := range env { + cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v)) + } + + return cmd, nil } // getBackendConfig resolves the backend configuration for the current instance diff --git a/pkg/instance/options.go b/pkg/instance/options.go index e776e05..62181dd 100644 --- a/pkg/instance/options.go +++ b/pkg/instance/options.go @@ -9,6 +9,7 @@ import ( "llamactl/pkg/backends/vllm" "llamactl/pkg/config" "log" + "maps" ) type CreateInstanceOptions struct { @@ -20,6 +21,8 @@ type CreateInstanceOptions struct { OnDemandStart *bool `json:"on_demand_start,omitempty"` // Idle timeout IdleTimeout *int `json:"idle_timeout,omitempty"` // minutes + //Environment variables + Environment map[string]string `json:"environment,omitempty"` BackendType backends.BackendType `json:"backend_type"` BackendOptions map[string]any `json:"backend_options,omitempty"` @@ -240,3 +243,23 @@ func (c *CreateInstanceOptions) BuildCommandArgs(backendConfig *config.BackendSe return args } + +func (c *CreateInstanceOptions) BuildEnvironment(backendConfig *config.BackendSettings) map[string]string { + env := map[string]string{} + + if backendConfig.Environment != nil { + maps.Copy(env, backendConfig.Environment) + } + + if backendConfig.Docker != nil && backendConfig.Docker.Enabled && c.BackendType != backends.BackendTypeMlxLm { + if backendConfig.Docker.Environment != nil { + maps.Copy(env, backendConfig.Docker.Environment) + } + } + + if c.Environment != nil { + maps.Copy(env, c.Environment) + } + + return env +} From 1fbf809a2db58e7946600ae42f1bbfc4e036b1ba Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sun, 28 Sep 2025 14:40:04 +0200 Subject: [PATCH 3/8] Add EnvironmentVariablesInput component and integrate into InstanceSettingsCard --- .../form/EnvironmentVariablesInput.tsx | 144 ++++++++++++++++++ .../instance/InstanceSettingsCard.tsx | 9 ++ webui/src/lib/zodFormUtils.ts | 51 ------- webui/src/schemas/instanceOptions.ts | 4 + 4 files changed, 157 insertions(+), 51 deletions(-) create mode 100644 webui/src/components/form/EnvironmentVariablesInput.tsx diff --git a/webui/src/components/form/EnvironmentVariablesInput.tsx b/webui/src/components/form/EnvironmentVariablesInput.tsx new file mode 100644 index 0000000..47739f0 --- /dev/null +++ b/webui/src/components/form/EnvironmentVariablesInput.tsx @@ -0,0 +1,144 @@ +import React, { useState } from 'react' +import { Input } from '@/components/ui/input' +import { Label } from '@/components/ui/label' +import { Button } from '@/components/ui/button' +import { X, Plus } from 'lucide-react' + +interface EnvironmentVariablesInputProps { + id: string + label: string + value: Record | undefined + onChange: (value: Record | undefined) => void + description?: string + disabled?: boolean + className?: string +} + +interface EnvVar { + key: string + value: string +} + +const EnvironmentVariablesInput: React.FC = ({ + id, + label, + value, + onChange, + description, + disabled = false, + className +}) => { + // Convert the value object to an array of key-value pairs for editing + const envVarsFromValue = value + ? Object.entries(value).map(([key, val]) => ({ key, value: val })) + : [] + + const [envVars, setEnvVars] = useState( + envVarsFromValue.length > 0 ? envVarsFromValue : [{ key: '', value: '' }] + ) + + // Update parent component when env vars change + const updateParent = (newEnvVars: EnvVar[]) => { + // Filter out empty entries + const validVars = newEnvVars.filter(env => env.key.trim() !== '' && env.value.trim() !== '') + + if (validVars.length === 0) { + onChange(undefined) + } else { + const envObject = validVars.reduce((acc, env) => { + acc[env.key.trim()] = env.value.trim() + return acc + }, {} as Record) + onChange(envObject) + } + } + + const handleKeyChange = (index: number, newKey: string) => { + const newEnvVars = [...envVars] + newEnvVars[index].key = newKey + setEnvVars(newEnvVars) + updateParent(newEnvVars) + } + + const handleValueChange = (index: number, newValue: string) => { + const newEnvVars = [...envVars] + newEnvVars[index].value = newValue + setEnvVars(newEnvVars) + updateParent(newEnvVars) + } + + const addEnvVar = () => { + const newEnvVars = [...envVars, { key: '', value: '' }] + setEnvVars(newEnvVars) + } + + const removeEnvVar = (index: number) => { + if (envVars.length === 1) { + // Reset to empty if it's the last one + const newEnvVars = [{ key: '', value: '' }] + setEnvVars(newEnvVars) + updateParent(newEnvVars) + } else { + const newEnvVars = envVars.filter((_, i) => i !== index) + setEnvVars(newEnvVars) + updateParent(newEnvVars) + } + } + + return ( +
+ +
+ {envVars.map((envVar, index) => ( +
+ handleKeyChange(index, e.target.value)} + disabled={disabled} + className="flex-1" + /> + handleValueChange(index, e.target.value)} + disabled={disabled} + className="flex-1" + /> + +
+ ))} + +
+ {description && ( +

{description}

+ )} +

+ Environment variables that will be passed to the backend process +

+
+ ) +} + +export default EnvironmentVariablesInput \ No newline at end of file diff --git a/webui/src/components/instance/InstanceSettingsCard.tsx b/webui/src/components/instance/InstanceSettingsCard.tsx index d997a8c..c85eda9 100644 --- a/webui/src/components/instance/InstanceSettingsCard.tsx +++ b/webui/src/components/instance/InstanceSettingsCard.tsx @@ -6,6 +6,7 @@ import { Input } from '@/components/ui/input' import AutoRestartConfiguration from '@/components/instance/AutoRestartConfiguration' import NumberInput from '@/components/form/NumberInput' import CheckboxInput from '@/components/form/CheckboxInput' +import EnvironmentVariablesInput from '@/components/form/EnvironmentVariablesInput' interface InstanceSettingsCardProps { instanceName: string @@ -75,6 +76,14 @@ const InstanceSettingsCard: React.FC = ({ onChange={(value) => onChange('on_demand_start', value)} description="Start instance only when needed" /> + + onChange('environment', value)} + description="Custom environment variables for the instance" + /> diff --git a/webui/src/lib/zodFormUtils.ts b/webui/src/lib/zodFormUtils.ts index 88294c4..6d959d6 100644 --- a/webui/src/lib/zodFormUtils.ts +++ b/webui/src/lib/zodFormUtils.ts @@ -1,12 +1,10 @@ import { - type CreateInstanceOptions, type LlamaCppBackendOptions, type MlxBackendOptions, type VllmBackendOptions, LlamaCppBackendOptionsSchema, MlxBackendOptionsSchema, VllmBackendOptionsSchema, - getAllFieldKeys, getAllLlamaCppFieldKeys, getAllMlxFieldKeys, getAllVllmFieldKeys, @@ -15,41 +13,6 @@ import { getVllmFieldType } from '@/schemas/instanceOptions' -// Instance-level basic fields (not backend-specific) -export const basicFieldsConfig: Record = { - auto_restart: { - label: 'Auto Restart', - description: 'Automatically restart the instance on failure' - }, - max_restarts: { - label: 'Max Restarts', - placeholder: '3', - description: 'Maximum number of restart attempts (0 = unlimited)' - }, - restart_delay: { - label: 'Restart Delay (seconds)', - placeholder: '5', - description: 'Delay in seconds before attempting restart' - }, - idle_timeout: { - label: 'Idle Timeout (minutes)', - placeholder: '60', - description: 'Time in minutes before instance is considered idle and stopped' - }, - on_demand_start: { - label: 'On-Demand Start', - description: 'Start instance upon receiving OpenAI-compatible API request' - }, - backend_type: { - label: 'Backend Type', - description: 'Type of backend to use for this instance' - } -} - // LlamaCpp backend-specific basic fields const basicLlamaCppFieldsConfig: Record !isBasicField(key)) -} - export function getBasicBackendFields(backendType?: string): string[] { const normalizedType = (backendType || 'llama_cpp') as keyof typeof backendFieldConfigs const config = backendFieldConfigs[normalizedType] || basicLlamaCppFieldsConfig @@ -222,5 +173,3 @@ export function getBackendFieldType(key: string): 'text' | 'number' | 'boolean' return 'text' } -// Re-export the Zod-based functions -export { getFieldType } from '@/schemas/instanceOptions' \ No newline at end of file diff --git a/webui/src/schemas/instanceOptions.ts b/webui/src/schemas/instanceOptions.ts index 3d2df94..0af09c1 100644 --- a/webui/src/schemas/instanceOptions.ts +++ b/webui/src/schemas/instanceOptions.ts @@ -33,6 +33,9 @@ export const CreateInstanceOptionsSchema = z.object({ idle_timeout: z.number().optional(), on_demand_start: z.boolean().optional(), + // Environment variables + environment: z.record(z.string(), z.string()).optional(), + // Backend configuration backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(), backend_options: BackendOptionsSchema.optional(), @@ -75,5 +78,6 @@ export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number if (innerSchema instanceof z.ZodNumber) return 'number' if (innerSchema instanceof z.ZodArray) return 'array' if (innerSchema instanceof z.ZodObject) return 'object' + if (innerSchema instanceof z.ZodRecord) return 'object' // Handle ZodRecord as object return 'text' // ZodString and others default to text } \ No newline at end of file From ffa0a0c161dee137fc3980ff95f605d6cafc3417 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sun, 28 Sep 2025 14:40:16 +0200 Subject: [PATCH 4/8] Remove ZodFormField and BasicInstanceFields components --- webui/src/components/ZodFormField.tsx | 144 ------------------ .../instance/BasicInstanceFields.tsx | 99 ------------ 2 files changed, 243 deletions(-) delete mode 100644 webui/src/components/ZodFormField.tsx delete mode 100644 webui/src/components/instance/BasicInstanceFields.tsx diff --git a/webui/src/components/ZodFormField.tsx b/webui/src/components/ZodFormField.tsx deleted file mode 100644 index 594d907..0000000 --- a/webui/src/components/ZodFormField.tsx +++ /dev/null @@ -1,144 +0,0 @@ -import React from 'react' -import { Input } from '@/components/ui/input' -import { Label } from '@/components/ui/label' -import { Checkbox } from '@/components/ui/checkbox' -import { BackendType, type CreateInstanceOptions } from '@/types/instance' -import { getFieldType, basicFieldsConfig } from '@/lib/zodFormUtils' - -interface ZodFormFieldProps { - fieldKey: keyof CreateInstanceOptions - value: string | number | boolean | string[] | undefined - onChange: (key: keyof CreateInstanceOptions, value: string | number | boolean | string[] | undefined) => void -} - -const ZodFormField: React.FC = ({ fieldKey, value, onChange }) => { - // Get configuration for basic fields, or use field name for advanced fields - const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey } - - // Get type from Zod schema - const fieldType = getFieldType(fieldKey) - - const handleChange = (newValue: string | number | boolean | string[] | undefined) => { - onChange(fieldKey, newValue) - } - - const renderField = () => { - // Special handling for backend_type field - render as dropdown - if (fieldKey === 'backend_type') { - return ( -
- - - {config.description && ( -

{config.description}

- )} -
- ) - } - - switch (fieldType) { - case 'boolean': - return ( -
- handleChange(checked)} - /> - -
- ) - - case 'number': - return ( -
- - { - const numValue = e.target.value ? parseFloat(e.target.value) : undefined - // Only update if the parsed value is valid or the input is empty - if (e.target.value === '' || (numValue !== undefined && !isNaN(numValue))) { - handleChange(numValue) - } - }} - placeholder={config.placeholder} - /> - {config.description && ( -

{config.description}

- )} -
- ) - - case 'array': - return ( -
- - { - const arrayValue = e.target.value - ? e.target.value.split(',').map(s => s.trim()).filter(Boolean) - : undefined - handleChange(arrayValue) - }} - placeholder="item1, item2, item3" - /> - {config.description && ( -

{config.description}

- )} -

Separate multiple values with commas

-
- ) - - case 'text': - default: - return ( -
- - handleChange(e.target.value || undefined)} - placeholder={config.placeholder} - /> - {config.description && ( -

{config.description}

- )} -
- ) - } - } - - return
{renderField()}
-} - -export default ZodFormField \ No newline at end of file diff --git a/webui/src/components/instance/BasicInstanceFields.tsx b/webui/src/components/instance/BasicInstanceFields.tsx deleted file mode 100644 index 9dce284..0000000 --- a/webui/src/components/instance/BasicInstanceFields.tsx +++ /dev/null @@ -1,99 +0,0 @@ -import React from 'react' -import { BackendType, type CreateInstanceOptions } from '@/types/instance' -import { getBasicFields, basicFieldsConfig } from '@/lib/zodFormUtils' -import { getFieldType } from '@/schemas/instanceOptions' -import TextInput from '@/components/form/TextInput' -import NumberInput from '@/components/form/NumberInput' -import CheckboxInput from '@/components/form/CheckboxInput' -import SelectInput from '@/components/form/SelectInput' - -interface BasicInstanceFieldsProps { - formData: CreateInstanceOptions - onChange: (key: keyof CreateInstanceOptions, value: any) => void -} - -const BasicInstanceFields: React.FC = ({ - formData, - onChange -}) => { - const basicFields = getBasicFields() - - const renderField = (fieldKey: keyof CreateInstanceOptions) => { - const config = basicFieldsConfig[fieldKey as string] || { label: fieldKey } - const fieldType = getFieldType(fieldKey) - - // Special handling for backend_type field - if (fieldKey === 'backend_type') { - return ( - onChange(fieldKey, value)} - options={[ - { value: BackendType.LLAMA_CPP, label: 'Llama Server' }, - { value: BackendType.MLX_LM, label: 'MLX LM' }, - { value: BackendType.VLLM, label: 'vLLM' } - ]} - description={config.description} - /> - ) - } - - // Render based on field type - switch (fieldType) { - case 'boolean': - return ( - onChange(fieldKey, value)} - description={config.description} - /> - ) - - case 'number': - return ( - onChange(fieldKey, value)} - placeholder={config.placeholder} - description={config.description} - /> - ) - - default: - return ( - onChange(fieldKey, value)} - placeholder={config.placeholder} - description={config.description} - /> - ) - } - } - - // Filter out auto restart fields and backend_options (handled separately) - const fieldsToRender = basicFields.filter( - fieldKey => !['auto_restart', 'max_restarts', 'restart_delay', 'backend_options'].includes(fieldKey as string) - ) - - return ( -
-

Basic Configuration

- {fieldsToRender.map(renderField)} -
- ) -} - -export default BasicInstanceFields \ No newline at end of file From d092518114ca95fffcb1aad254e1d944ac55bb86 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sun, 28 Sep 2025 15:10:35 +0200 Subject: [PATCH 5/8] Update documentation --- README.md | 10 ++++++---- docs/user-guide/api-reference.md | 23 +++++++++++++++++++++-- docs/user-guide/managing-instances.md | 8 +++++++- 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 4865174..7f547cc 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,8 @@ ### ⚡ Smart Operations - **Instance Monitoring**: Health checks, auto-restart, log management -- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits +- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits +- **Environment Variables**: Set custom environment variables per instance for advanced configuration ![Dashboard Screenshot](docs/images/dashboard.png) @@ -52,7 +53,8 @@ llamactl 2. Click "Create Instance" 3. Choose backend type (llama.cpp, MLX, or vLLM) 4. Set model path and backend-specific options -5. Start or stop the instance +5. Configure environment variables if needed (optional) +6. Start or stop the instance ### Or use the REST API: ```bash @@ -66,10 +68,10 @@ curl -X POST localhost:8080/api/v1/instances/my-mlx-model \ -H "Authorization: Bearer your-key" \ -d '{"backend_type": "mlx_lm", "backend_options": {"model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit"}}' -# Create vLLM instance +# Create vLLM instance with environment variables curl -X POST localhost:8080/api/v1/instances/my-vllm-model \ -H "Authorization: Bearer your-key" \ - -d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}}' + -d '{"backend_type": "vllm", "backend_options": {"model": "microsoft/DialoGPT-medium", "tensor_parallel_size": 2}, "environment": {"CUDA_VISIBLE_DEVICES": "0,1", "NCCL_DEBUG": "INFO"}}' # Use with OpenAI SDK curl -X POST localhost:8080/v1/chat/completions \ diff --git a/docs/user-guide/api-reference.md b/docs/user-guide/api-reference.md index 348c1c0..26e01e4 100644 --- a/docs/user-guide/api-reference.md +++ b/docs/user-guide/api-reference.md @@ -116,7 +116,18 @@ Create and start a new instance. POST /api/v1/instances/{name} ``` -**Request Body:** JSON object with instance configuration. See [Managing Instances](managing-instances.md) for available configuration options. +**Request Body:** JSON object with instance configuration. Common fields include: + +- `backend_type`: Backend type (`llama_cpp`, `mlx_lm`, or `vllm`) +- `backend_options`: Backend-specific configuration +- `auto_restart`: Enable automatic restart on failure +- `max_restarts`: Maximum restart attempts +- `restart_delay`: Delay between restarts in seconds +- `on_demand_start`: Start instance when receiving requests +- `idle_timeout`: Idle timeout in minutes +- `environment`: Environment variables as key-value pairs + +See [Managing Instances](managing-instances.md) for complete configuration options. **Response:** ```json @@ -354,7 +365,15 @@ curl -X POST http://localhost:8080/api/v1/instances/my-model \ -H "Content-Type: application/json" \ -H "Authorization: Bearer your-api-key" \ -d '{ - "model": "/models/llama-2-7b.gguf" + "backend_type": "llama_cpp", + "backend_options": { + "model": "/models/llama-2-7b.gguf", + "gpu_layers": 32 + }, + "environment": { + "CUDA_VISIBLE_DEVICES": "0", + "OMP_NUM_THREADS": "8" + } }' # Check instance status diff --git a/docs/user-guide/managing-instances.md b/docs/user-guide/managing-instances.md index e094d42..824c4fe 100644 --- a/docs/user-guide/managing-instances.md +++ b/docs/user-guide/managing-instances.md @@ -53,6 +53,7 @@ Each instance is displayed as a card showing: - **Restart Delay**: Delay in seconds between restart attempts - **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint - **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable) + - **Environment Variables**: Set custom environment variables for the instance process 6. Configure backend-specific options: - **llama.cpp**: Threads, context size, GPU layers, port, etc. - **MLX**: Temperature, top-p, adapter path, Python environment, etc. @@ -101,7 +102,12 @@ curl -X POST http://localhost:8080/api/instances/my-vllm-instance \ "gpu_memory_utilization": 0.9 }, "auto_restart": true, - "on_demand_start": true + "on_demand_start": true, + "environment": { + "CUDA_VISIBLE_DEVICES": "0,1", + "NCCL_DEBUG": "INFO", + "PYTHONPATH": "/custom/path" + } }' # Create llama.cpp instance with HuggingFace model From fa9335663a3090a89ec224dd2ffc7e4838597bd8 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sun, 28 Sep 2025 15:22:01 +0200 Subject: [PATCH 6/8] Parse backend env vars from env vars --- pkg/config/config.go | 48 +++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index 8fd032e..59b9ce9 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -302,6 +302,12 @@ func loadEnvVars(cfg *AppConfig) { if llamaArgs := os.Getenv("LLAMACTL_LLAMACPP_ARGS"); llamaArgs != "" { cfg.Backends.LlamaCpp.Args = strings.Split(llamaArgs, " ") } + if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_ENV"); llamaEnv != "" { + if cfg.Backends.LlamaCpp.Environment == nil { + cfg.Backends.LlamaCpp.Environment = make(map[string]string) + } + parseEnvVars(llamaEnv, cfg.Backends.LlamaCpp.Environment) + } if llamaDockerEnabled := os.Getenv("LLAMACTL_LLAMACPP_DOCKER_ENABLED"); llamaDockerEnabled != "" { if b, err := strconv.ParseBool(llamaDockerEnabled); err == nil { if cfg.Backends.LlamaCpp.Docker == nil { @@ -329,18 +335,22 @@ func loadEnvVars(cfg *AppConfig) { if cfg.Backends.LlamaCpp.Docker.Environment == nil { cfg.Backends.LlamaCpp.Docker.Environment = make(map[string]string) } - // Parse env vars in format "KEY1=value1,KEY2=value2" - for _, envPair := range strings.Split(llamaDockerEnv, ",") { - if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 { - cfg.Backends.LlamaCpp.Docker.Environment[parts[0]] = parts[1] - } - } + parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment) } // vLLM backend if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" { cfg.Backends.VLLM.Command = vllmCmd } + if vllmArgs := os.Getenv("LLAMACTL_VLLM_ARGS"); vllmArgs != "" { + cfg.Backends.VLLM.Args = strings.Split(vllmArgs, " ") + } + if vllmEnv := os.Getenv("LLAMACTL_VLLM_ENV"); vllmEnv != "" { + if cfg.Backends.VLLM.Environment == nil { + cfg.Backends.VLLM.Environment = make(map[string]string) + } + parseEnvVars(vllmEnv, cfg.Backends.VLLM.Environment) + } if vllmDockerEnabled := os.Getenv("LLAMACTL_VLLM_DOCKER_ENABLED"); vllmDockerEnabled != "" { if b, err := strconv.ParseBool(vllmDockerEnabled); err == nil { if cfg.Backends.VLLM.Docker == nil { @@ -368,12 +378,7 @@ func loadEnvVars(cfg *AppConfig) { if cfg.Backends.VLLM.Docker.Environment == nil { cfg.Backends.VLLM.Docker.Environment = make(map[string]string) } - // Parse env vars in format "KEY1=value1,KEY2=value2" - for _, envPair := range strings.Split(vllmDockerEnv, ",") { - if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 { - cfg.Backends.VLLM.Docker.Environment[parts[0]] = parts[1] - } - } + parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment) } // MLX backend @@ -383,6 +388,12 @@ func loadEnvVars(cfg *AppConfig) { if mlxArgs := os.Getenv("LLAMACTL_MLX_ARGS"); mlxArgs != "" { cfg.Backends.MLX.Args = strings.Split(mlxArgs, " ") } + if mlxEnv := os.Getenv("LLAMACTL_MLX_ENV"); mlxEnv != "" { + if cfg.Backends.MLX.Environment == nil { + cfg.Backends.MLX.Environment = make(map[string]string) + } + parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment) + } // Instance defaults if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" { @@ -457,6 +468,19 @@ func ParsePortRange(s string) [2]int { return [2]int{0, 0} // Invalid format } +// parseEnvVars parses environment variables in format "KEY1=value1,KEY2=value2" +// and populates the provided environment map +func parseEnvVars(envString string, envMap map[string]string) { + if envString == "" { + return + } + for _, envPair := range strings.Split(envString, ",") { + if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 { + envMap[parts[0]] = parts[1] + } + } +} + // getDefaultDataDirectory returns platform-specific default data directory func getDefaultDataDirectory() string { switch runtime.GOOS { From 97a7c9a4e3eecbaedf601f1a996de6a204edacc8 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sun, 28 Sep 2025 15:29:43 +0200 Subject: [PATCH 7/8] Detail env var support in docs --- README.md | 11 +++++++-- docs/getting-started/configuration.md | 32 +++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7f547cc..7d65702 100644 --- a/README.md +++ b/README.md @@ -162,18 +162,22 @@ Enable Docker support using the new structured backend configuration: backends: llama-cpp: command: "llama-server" + environment: {} # Environment variables for the backend process docker: enabled: true image: "ghcr.io/ggml-org/llama.cpp:server" args: ["run", "--rm", "--network", "host", "--gpus", "all"] + environment: {} # Environment variables for the container vllm: command: "vllm" args: ["serve"] + environment: {} # Environment variables for the backend process docker: enabled: true image: "vllm/vllm-openai:latest" args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"] + environment: {} # Environment variables for the container ``` ### Key Features @@ -204,24 +208,27 @@ backends: llama-cpp: command: "llama-server" args: [] + environment: {} # Environment variables for the backend process docker: enabled: false image: "ghcr.io/ggml-org/llama.cpp:server" args: ["run", "--rm", "--network", "host", "--gpus", "all"] - environment: {} + environment: {} # Environment variables for the container vllm: command: "vllm" args: ["serve"] + environment: {} # Environment variables for the backend process docker: enabled: false image: "vllm/vllm-openai:latest" args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"] - environment: {} + environment: {} # Environment variables for the container mlx: command: "mlx_lm.server" args: [] + environment: {} # Environment variables for the backend process instances: port_range: [8000, 9000] # Port range for instances diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md index f014f13..1d50126 100644 --- a/docs/getting-started/configuration.md +++ b/docs/getting-started/configuration.md @@ -23,6 +23,7 @@ backends: llama-cpp: command: "llama-server" args: [] + environment: {} # Environment variables for the backend process docker: enabled: false image: "ghcr.io/ggml-org/llama.cpp:server" @@ -32,6 +33,7 @@ backends: vllm: command: "vllm" args: ["serve"] + environment: {} # Environment variables for the backend process docker: enabled: false image: "vllm/vllm-openai:latest" @@ -41,6 +43,7 @@ backends: mlx: command: "mlx_lm.server" args: [] + environment: {} # Environment variables for the backend process instances: port_range: [8000, 9000] # Port range for instances @@ -113,6 +116,7 @@ backends: llama-cpp: command: "llama-server" args: [] + environment: {} # Environment variables for the backend process docker: enabled: false # Enable Docker runtime (default: false) image: "ghcr.io/ggml-org/llama.cpp:server" @@ -122,6 +126,7 @@ backends: vllm: command: "vllm" args: ["serve"] + environment: {} # Environment variables for the backend process docker: enabled: false image: "vllm/vllm-openai:latest" @@ -131,18 +136,45 @@ backends: mlx: command: "mlx_lm.server" args: [] + environment: {} # Environment variables for the backend process # MLX does not support Docker ``` **Backend Configuration Fields:** - `command`: Executable name/path for the backend - `args`: Default arguments prepended to all instances +- `environment`: Environment variables for the backend process (optional) - `docker`: Docker-specific configuration (optional) - `enabled`: Boolean flag to enable Docker runtime - `image`: Docker image to use - `args`: Additional arguments passed to `docker run` - `environment`: Environment variables for the container (optional) +**Environment Variables:** + +**LlamaCpp Backend:** +- `LLAMACTL_LLAMACPP_COMMAND` - LlamaCpp executable command +- `LLAMACTL_LLAMACPP_ARGS` - Space-separated default arguments +- `LLAMACTL_LLAMACPP_ENV` - Environment variables in format "KEY1=value1,KEY2=value2" +- `LLAMACTL_LLAMACPP_DOCKER_ENABLED` - Enable Docker runtime (true/false) +- `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use +- `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments +- `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2" + +**VLLM Backend:** +- `LLAMACTL_VLLM_COMMAND` - VLLM executable command +- `LLAMACTL_VLLM_ARGS` - Space-separated default arguments +- `LLAMACTL_VLLM_ENV` - Environment variables in format "KEY1=value1,KEY2=value2" +- `LLAMACTL_VLLM_DOCKER_ENABLED` - Enable Docker runtime (true/false) +- `LLAMACTL_VLLM_DOCKER_IMAGE` - Docker image to use +- `LLAMACTL_VLLM_DOCKER_ARGS` - Space-separated Docker arguments +- `LLAMACTL_VLLM_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2" + +**MLX Backend:** +- `LLAMACTL_MLX_COMMAND` - MLX executable command +- `LLAMACTL_MLX_ARGS` - Space-separated default arguments +- `LLAMACTL_MLX_ENV` - Environment variables in format "KEY1=value1,KEY2=value2" + ### Instance Configuration ```yaml From 9a7255a52d184219c6e9f2592a819ad2ef3c36f1 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sun, 28 Sep 2025 15:31:50 +0200 Subject: [PATCH 8/8] Refactor Docker support section in README for clarity and conciseness --- README.md | 34 +++------------------------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 7d65702..dc68e4f 100644 --- a/README.md +++ b/README.md @@ -149,49 +149,21 @@ pip install vllm ## Docker Support -llamactl supports running backends in Docker containers with identical behavior to native execution. This is particularly useful for: -- Production deployments without local backend installation -- Isolating backend dependencies -- GPU-accelerated inference using official Docker images - -### Docker Configuration - -Enable Docker support using the new structured backend configuration: +llamactl supports running backends in Docker containers - perfect for production deployments without local backend installation. Simply enable Docker in your configuration: ```yaml backends: llama-cpp: - command: "llama-server" - environment: {} # Environment variables for the backend process docker: enabled: true - image: "ghcr.io/ggml-org/llama.cpp:server" - args: ["run", "--rm", "--network", "host", "--gpus", "all"] - environment: {} # Environment variables for the container - vllm: - command: "vllm" - args: ["serve"] - environment: {} # Environment variables for the backend process docker: enabled: true - image: "vllm/vllm-openai:latest" - args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"] - environment: {} # Environment variables for the container ``` -### Key Features +**Requirements:** Docker installed and running. For GPU support: nvidia-docker2 (Linux) or Docker Desktop with GPU support. -- **Host Networking**: Uses `--network host` for seamless port management -- **GPU Support**: Includes `--gpus all` for GPU acceleration -- **Environment Variables**: Configure container environment as needed -- **Flexible Configuration**: Per-backend Docker settings with sensible defaults - -### Requirements - -- Docker installed and running -- For GPU support: nvidia-docker2 (Linux) or Docker Desktop with GPU support -- No local backend installation required when using Docker +For detailed Docker configuration options, see the [Configuration Guide](docs/getting-started/configuration.md). ## Configuration