diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md index 1d50126..1ed750e 100644 --- a/docs/getting-started/configuration.md +++ b/docs/getting-started/configuration.md @@ -23,27 +23,30 @@ backends: llama-cpp: command: "llama-server" args: [] - environment: {} # Environment variables for the backend process + environment: {} # Environment variables for the backend process docker: enabled: false image: "ghcr.io/ggml-org/llama.cpp:server" args: ["run", "--rm", "--network", "host", "--gpus", "all"] environment: {} + response_headers: {} # Additional response headers to send with responses vllm: command: "vllm" args: ["serve"] - environment: {} # Environment variables for the backend process + environment: {} # Environment variables for the backend process docker: enabled: false image: "vllm/vllm-openai:latest" args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"] environment: {} + response_headers: {} # Additional response headers to send with responses mlx: command: "mlx_lm.server" args: [] - environment: {} # Environment variables for the backend process + environment: {} # Environment variables for the backend process + response_headers: {} # Additional response headers to send with responses instances: port_range: [8000, 9000] # Port range for instances @@ -116,40 +119,46 @@ backends: llama-cpp: command: "llama-server" args: [] - environment: {} # Environment variables for the backend process + environment: {} # Environment variables for the backend process docker: - enabled: false # Enable Docker runtime (default: false) + enabled: false # Enable Docker runtime (default: false) image: "ghcr.io/ggml-org/llama.cpp:server" args: ["run", "--rm", "--network", "host", "--gpus", "all"] environment: {} + response_headers: {} # Additional response headers to send with responses vllm: command: "vllm" args: ["serve"] - environment: {} # Environment variables for the backend process + environment: {} # Environment variables for the backend process docker: - enabled: false + enabled: false # Enable Docker runtime (default: false) image: "vllm/vllm-openai:latest" args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"] environment: {} + response_headers: {} # Additional response headers to send with responses mlx: command: "mlx_lm.server" args: [] - environment: {} # Environment variables for the backend process + environment: {} # Environment variables for the backend process # MLX does not support Docker + response_headers: {} # Additional response headers to send with responses ``` **Backend Configuration Fields:** - `command`: Executable name/path for the backend - `args`: Default arguments prepended to all instances - `environment`: Environment variables for the backend process (optional) +- `response_headers`: Additional response headers to send with responses (optional) - `docker`: Docker-specific configuration (optional) - `enabled`: Boolean flag to enable Docker runtime - `image`: Docker image to use - `args`: Additional arguments passed to `docker run` - `environment`: Environment variables for the container (optional) +> If llamactl is behind an NGINX proxy, `X-Accel-Buffering: no` response header may be required for NGINX to properly stream the responses without buffering. + **Environment Variables:** **LlamaCpp Backend:** @@ -160,6 +169,7 @@ backends: - `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use - `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments - `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2" +- `LLAMACTL_LLAMACPP_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2" **VLLM Backend:** - `LLAMACTL_VLLM_COMMAND` - VLLM executable command @@ -169,11 +179,13 @@ backends: - `LLAMACTL_VLLM_DOCKER_IMAGE` - Docker image to use - `LLAMACTL_VLLM_DOCKER_ARGS` - Space-separated Docker arguments - `LLAMACTL_VLLM_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2" +- `LLAMACTL_VLLM_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2" **MLX Backend:** - `LLAMACTL_MLX_COMMAND` - MLX executable command - `LLAMACTL_MLX_ARGS` - Space-separated default arguments - `LLAMACTL_MLX_ENV` - Environment variables in format "KEY1=value1,KEY2=value2" +- `LLAMACTL_MLX_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2" ### Instance Configuration diff --git a/pkg/config/config.go b/pkg/config/config.go index 59b9ce9..3701643 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -13,10 +13,11 @@ import ( // BackendSettings contains structured backend configuration type BackendSettings struct { - Command string `yaml:"command"` - Args []string `yaml:"args"` - Environment map[string]string `yaml:"environment,omitempty"` - Docker *DockerSettings `yaml:"docker,omitempty"` + Command string `yaml:"command"` + Args []string `yaml:"args"` + Environment map[string]string `yaml:"environment,omitempty"` + Docker *DockerSettings `yaml:"docker,omitempty"` + ResponseHeaders map[string]string `yaml:"response_headers,omitempty"` } // DockerSettings contains Docker-specific configuration @@ -58,6 +59,9 @@ type ServerConfig struct { // Enable Swagger UI for API documentation EnableSwagger bool `yaml:"enable_swagger"` + + // Response headers to send with responses + ResponseHeaders map[string]string `yaml:"response_headers,omitempty"` } // InstancesConfig contains instance management configuration @@ -337,6 +341,12 @@ func loadEnvVars(cfg *AppConfig) { } parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment) } + if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_RESPONSE_HEADERS"); llamaEnv != "" { + if cfg.Backends.LlamaCpp.ResponseHeaders == nil { + cfg.Backends.LlamaCpp.ResponseHeaders = make(map[string]string) + } + parseHeaders(llamaEnv, cfg.Backends.LlamaCpp.ResponseHeaders) + } // vLLM backend if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" { @@ -380,6 +390,12 @@ func loadEnvVars(cfg *AppConfig) { } parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment) } + if llamaEnv := os.Getenv("LLAMACTL_VLLM_RESPONSE_HEADERS"); llamaEnv != "" { + if cfg.Backends.VLLM.ResponseHeaders == nil { + cfg.Backends.VLLM.ResponseHeaders = make(map[string]string) + } + parseHeaders(llamaEnv, cfg.Backends.VLLM.ResponseHeaders) + } // MLX backend if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" { @@ -394,6 +410,12 @@ func loadEnvVars(cfg *AppConfig) { } parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment) } + if llamaEnv := os.Getenv("LLAMACTL_MLX_RESPONSE_HEADERS"); llamaEnv != "" { + if cfg.Backends.MLX.ResponseHeaders == nil { + cfg.Backends.MLX.ResponseHeaders = make(map[string]string) + } + parseHeaders(llamaEnv, cfg.Backends.MLX.ResponseHeaders) + } // Instance defaults if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" { @@ -481,6 +503,19 @@ func parseEnvVars(envString string, envMap map[string]string) { } } +// parseHeaders parses HTTP headers in format "KEY1=value1;KEY2=value2" +// and populates the provided environment map +func parseHeaders(envString string, envMap map[string]string) { + if envString == "" { + return + } + for _, envPair := range strings.Split(envString, ";") { + if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 { + envMap[parts[0]] = parts[1] + } + } +} + // getDefaultDataDirectory returns platform-specific default data directory func getDefaultDataDirectory() string { switch runtime.GOOS { diff --git a/pkg/instance/instance.go b/pkg/instance/instance.go index 0bea06c..228f382 100644 --- a/pkg/instance/instance.go +++ b/pkg/instance/instance.go @@ -198,6 +198,15 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) { proxy := httputil.NewSingleHostReverseProxy(targetURL) + var responseHeaders map[string]string + switch i.options.BackendType { + case backends.BackendTypeLlamaCpp: + responseHeaders = i.globalBackendSettings.LlamaCpp.ResponseHeaders + case backends.BackendTypeVllm: + responseHeaders = i.globalBackendSettings.VLLM.ResponseHeaders + case backends.BackendTypeMlxLm: + responseHeaders = i.globalBackendSettings.MLX.ResponseHeaders + } proxy.ModifyResponse = func(resp *http.Response) error { // Remove CORS headers from llama-server response to avoid conflicts // llamactl will add its own CORS headers @@ -207,6 +216,10 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) { resp.Header.Del("Access-Control-Allow-Credentials") resp.Header.Del("Access-Control-Max-Age") resp.Header.Del("Access-Control-Expose-Headers") + + for key, value := range responseHeaders { + resp.Header.Set(key, value) + } return nil } diff --git a/webui/src/contexts/AuthContext.tsx b/webui/src/contexts/AuthContext.tsx index 3d2e7ff..d3b8a12 100644 --- a/webui/src/contexts/AuthContext.tsx +++ b/webui/src/contexts/AuthContext.tsx @@ -1,4 +1,4 @@ -import { type ReactNode, createContext, useContext, useState, useEffect, useCallback } from 'react' +import { type ReactNode, createContext, useCallback, useContext, useEffect, useState } from 'react' interface AuthContextState { isAuthenticated: boolean @@ -62,7 +62,7 @@ export const AuthProvider = ({ children }: AuthProviderProps) => { // Validate API key by making a test request const validateApiKey = async (key: string): Promise => { try { - const response = await fetch('/api/v1/instances', { + const response = await fetch(document.baseURI + 'api/v1/instances', { headers: { 'Authorization': `Bearer ${key}`, 'Content-Type': 'application/json' diff --git a/webui/src/lib/__tests__/api.test.ts b/webui/src/lib/__tests__/api.test.ts index d7881bc..87e8ac7 100644 --- a/webui/src/lib/__tests__/api.test.ts +++ b/webui/src/lib/__tests__/api.test.ts @@ -1,5 +1,5 @@ -import { describe, it, expect, vi, beforeEach } from 'vitest' import { instancesApi } from '@/lib/api' +import { beforeEach, describe, expect, it, vi } from 'vitest' // Mock fetch globally const mockFetch = vi.fn() @@ -53,7 +53,9 @@ describe('API Error Handling', () => { await instancesApi.getLogs('test-instance', 100) expect(mockFetch).toHaveBeenCalledWith( - '/api/v1/instances/test-instance/logs?lines=100', + expect.stringMatching( + /^https?:\/\/[^/]+\/api\/v1\/instances\/test-instance\/logs\?lines=100$/ + ), expect.any(Object) ) }) diff --git a/webui/src/lib/api.ts b/webui/src/lib/api.ts index 5bd7991..f7ecdf2 100644 --- a/webui/src/lib/api.ts +++ b/webui/src/lib/api.ts @@ -1,7 +1,10 @@ import type { CreateInstanceOptions, Instance } from "@/types/instance"; import { handleApiError } from "./errorUtils"; -const API_BASE = "/api/v1"; +// Adding baseURI as a prefix to support being served behind a subpath +// e.g. when llmamctl's `/` is served behind a reverse proxy at `/proxy/...` +// the baseURI will be `/proxy/` and the API calls will be made to `/proxy/api/v1/` +export const API_BASE = document.baseURI + "api/v1"; // Base API call function with error handling async function apiCall( diff --git a/webui/vite.config.ts b/webui/vite.config.ts index 2d06ca4..3df9fac 100644 --- a/webui/vite.config.ts +++ b/webui/vite.config.ts @@ -21,4 +21,6 @@ export default defineConfig({ setupFiles: ['./src/test/setup.ts'], css: true, }, + // ensures relative asset paths to support being served behind a subpath + base: "./" }) \ No newline at end of file