Added support for serving behind a reverse proxy

- Added support for specifying response headers for each backend
  - Allowing users to set `X-Accel-Buffering: no` to disable buffering for streaming responses in nginx
  - Updated `configuration.md` to document the new configuration options
- Modified Vite config to build with `base: "./"`, making assets be accessed via relative paths
- Updated API_BASE to use `document.baseURI`, allowing API calls to be made relative to the base path
This commit is contained in:
Anuruth Lertpiya
2025-09-29 12:37:00 +00:00
parent 92cb57e816
commit 5906d89f8d
6 changed files with 68 additions and 7 deletions

View File

@@ -122,6 +122,7 @@ backends:
image: "ghcr.io/ggml-org/llama.cpp:server" image: "ghcr.io/ggml-org/llama.cpp:server"
args: ["run", "--rm", "--network", "host", "--gpus", "all"] args: ["run", "--rm", "--network", "host", "--gpus", "all"]
environment: {} environment: {}
response_headers: {} # Additional response headers to send with responses
vllm: vllm:
command: "vllm" command: "vllm"
@@ -132,24 +133,29 @@ backends:
image: "vllm/vllm-openai:latest" image: "vllm/vllm-openai:latest"
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"] args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
environment: {} environment: {}
response_headers: {} # Additional response headers to send with responses
mlx: mlx:
command: "mlx_lm.server" command: "mlx_lm.server"
args: [] args: []
environment: {} # Environment variables for the backend process environment: {} # Environment variables for the backend process
# MLX does not support Docker # MLX does not support Docker
response_headers: {} # Additional response headers to send with responses
``` ```
**Backend Configuration Fields:** **Backend Configuration Fields:**
- `command`: Executable name/path for the backend - `command`: Executable name/path for the backend
- `args`: Default arguments prepended to all instances - `args`: Default arguments prepended to all instances
- `environment`: Environment variables for the backend process (optional) - `environment`: Environment variables for the backend process (optional)
- `response_headers`: Additional response headers to send with responses (optional)
- `docker`: Docker-specific configuration (optional) - `docker`: Docker-specific configuration (optional)
- `enabled`: Boolean flag to enable Docker runtime - `enabled`: Boolean flag to enable Docker runtime
- `image`: Docker image to use - `image`: Docker image to use
- `args`: Additional arguments passed to `docker run` - `args`: Additional arguments passed to `docker run`
- `environment`: Environment variables for the container (optional) - `environment`: Environment variables for the container (optional)
> If llamactl is behind an nginx proxy, `X-Accel-Buffering: no` may be required for nginx to properly stream the responses without buffering.
**Environment Variables:** **Environment Variables:**
**LlamaCpp Backend:** **LlamaCpp Backend:**
@@ -160,6 +166,7 @@ backends:
- `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use - `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use
- `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments - `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments
- `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2" - `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
- `LLAMACTL_LLAMACPP_RESPONSE_HEADERS` - Response headers in format "KEY1=value1,KEY2=value2"
**VLLM Backend:** **VLLM Backend:**
- `LLAMACTL_VLLM_COMMAND` - VLLM executable command - `LLAMACTL_VLLM_COMMAND` - VLLM executable command

View File

@@ -17,6 +17,7 @@ type BackendSettings struct {
Args []string `yaml:"args"` Args []string `yaml:"args"`
Environment map[string]string `yaml:"environment,omitempty"` Environment map[string]string `yaml:"environment,omitempty"`
Docker *DockerSettings `yaml:"docker,omitempty"` Docker *DockerSettings `yaml:"docker,omitempty"`
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
} }
// DockerSettings contains Docker-specific configuration // DockerSettings contains Docker-specific configuration
@@ -58,6 +59,9 @@ type ServerConfig struct {
// Enable Swagger UI for API documentation // Enable Swagger UI for API documentation
EnableSwagger bool `yaml:"enable_swagger"` EnableSwagger bool `yaml:"enable_swagger"`
// Response headers to send with responses
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
} }
// InstancesConfig contains instance management configuration // InstancesConfig contains instance management configuration
@@ -337,6 +341,12 @@ func loadEnvVars(cfg *AppConfig) {
} }
parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment) parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
} }
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.LlamaCpp.ResponseHeaders == nil {
cfg.Backends.LlamaCpp.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.LlamaCpp.ResponseHeaders)
}
// vLLM backend // vLLM backend
if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" { if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
@@ -380,6 +390,12 @@ func loadEnvVars(cfg *AppConfig) {
} }
parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment) parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
} }
if llamaEnv := os.Getenv("LLAMACTL_VLLM_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.VLLM.ResponseHeaders == nil {
cfg.Backends.VLLM.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.VLLM.ResponseHeaders)
}
// MLX backend // MLX backend
if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" { if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
@@ -394,6 +410,12 @@ func loadEnvVars(cfg *AppConfig) {
} }
parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment) parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
} }
if llamaEnv := os.Getenv("LLAMACTL_MLX_RESPONSE_HEADERS"); llamaEnv != "" {
if cfg.Backends.MLX.ResponseHeaders == nil {
cfg.Backends.MLX.ResponseHeaders = make(map[string]string)
}
parseHeaders(llamaEnv, cfg.Backends.MLX.ResponseHeaders)
}
// Instance defaults // Instance defaults
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" { if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
@@ -481,6 +503,19 @@ func parseEnvVars(envString string, envMap map[string]string) {
} }
} }
// parseHeaders parses HTTP headers in format "KEY1=value1;KEY2=value2"
// and populates the provided environment map
func parseHeaders(envString string, envMap map[string]string) {
if envString == "" {
return
}
for _, envPair := range strings.Split(envString, ";") {
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
envMap[parts[0]] = parts[1]
}
}
}
// getDefaultDataDirectory returns platform-specific default data directory // getDefaultDataDirectory returns platform-specific default data directory
func getDefaultDataDirectory() string { func getDefaultDataDirectory() string {
switch runtime.GOOS { switch runtime.GOOS {

View File

@@ -198,6 +198,15 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
proxy := httputil.NewSingleHostReverseProxy(targetURL) proxy := httputil.NewSingleHostReverseProxy(targetURL)
var responseHeaders map[string]string
switch i.options.BackendType {
case backends.BackendTypeLlamaCpp:
responseHeaders = i.globalBackendSettings.LlamaCpp.ResponseHeaders
case backends.BackendTypeVllm:
responseHeaders = i.globalBackendSettings.VLLM.ResponseHeaders
case backends.BackendTypeMlxLm:
responseHeaders = i.globalBackendSettings.MLX.ResponseHeaders
}
proxy.ModifyResponse = func(resp *http.Response) error { proxy.ModifyResponse = func(resp *http.Response) error {
// Remove CORS headers from llama-server response to avoid conflicts // Remove CORS headers from llama-server response to avoid conflicts
// llamactl will add its own CORS headers // llamactl will add its own CORS headers
@@ -207,6 +216,10 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
resp.Header.Del("Access-Control-Allow-Credentials") resp.Header.Del("Access-Control-Allow-Credentials")
resp.Header.Del("Access-Control-Max-Age") resp.Header.Del("Access-Control-Max-Age")
resp.Header.Del("Access-Control-Expose-Headers") resp.Header.Del("Access-Control-Expose-Headers")
for key, value := range responseHeaders {
resp.Header.Set(key, value)
}
return nil return nil
} }

View File

@@ -1,4 +1,5 @@
import { type ReactNode, createContext, useContext, useState, useEffect, useCallback } from 'react' import { API_BASE } from '@/lib/api'
import { type ReactNode, createContext, useCallback, useContext, useEffect, useState } from 'react'
interface AuthContextState { interface AuthContextState {
isAuthenticated: boolean isAuthenticated: boolean
@@ -62,7 +63,7 @@ export const AuthProvider = ({ children }: AuthProviderProps) => {
// Validate API key by making a test request // Validate API key by making a test request
const validateApiKey = async (key: string): Promise<boolean> => { const validateApiKey = async (key: string): Promise<boolean> => {
try { try {
const response = await fetch('/api/v1/instances', { const response = await fetch(API_BASE + '/instances', {
headers: { headers: {
'Authorization': `Bearer ${key}`, 'Authorization': `Bearer ${key}`,
'Content-Type': 'application/json' 'Content-Type': 'application/json'

View File

@@ -1,7 +1,10 @@
import type { CreateInstanceOptions, Instance } from "@/types/instance"; import type { CreateInstanceOptions, Instance } from "@/types/instance";
import { handleApiError } from "./errorUtils"; import { handleApiError } from "./errorUtils";
const API_BASE = "/api/v1"; // Adding baseURI as a prefix to support being served behind a subpath
// e.g. when llmamctl's `/` is served behind a reverse proxy at `/proxy/...`
// the baseURI will be `/proxy/` and the API calls will be made to `/proxy/api/v1/<endpoint>`
export const API_BASE = document.baseURI + "api/v1";
// Base API call function with error handling // Base API call function with error handling
async function apiCall<T>( async function apiCall<T>(

View File

@@ -21,4 +21,6 @@ export default defineConfig({
setupFiles: ['./src/test/setup.ts'], setupFiles: ['./src/test/setup.ts'],
css: true, css: true,
}, },
// ensures relative asset paths to support being served behind a subpath
base: "./"
}) })