Added support for serving behind a reverse proxy

- Added support for specifying response headers for each backend - Allowing users to set `X-Accel-Buffering: no` to disable buffering for streaming responses in nginx - Updated `configuration.md` to document the new configuration options - Modified Vite config to build with `base: "./"`, making assets be accessed via relative paths - Updated API_BASE to use `document.baseURI`, allowing API calls to be made relative to the base path
2025-12-22 17:14:22 +00:00 · 2025-09-29 12:37:00 +00:00
parent 92cb57e816
commit 5906d89f8d
6 changed files with 68 additions and 7 deletions
--- a/docs/getting-started/configuration.md
+++ b/docs/getting-started/configuration.md
@@ -122,6 +122,7 @@ backends:
      image: "ghcr.io/ggml-org/llama.cpp:server"
      args: ["run", "--rm", "--network", "host", "--gpus", "all"]
      environment: {}
    response_headers: {}               # Additional response headers to send with responses
  vllm:
    command: "vllm"
@@ -132,24 +133,29 @@ backends:
      image: "vllm/vllm-openai:latest"
      args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
      environment: {}
    response_headers: {}               # Additional response headers to send with responses
  mlx:
    command: "mlx_lm.server"
    args: []
    environment: {}                    # Environment variables for the backend process
    # MLX does not support Docker
    response_headers: {}               # Additional response headers to send with responses
 ```
 **Backend Configuration Fields:**
 - `command`: Executable name/path for the backend
 - `args`: Default arguments prepended to all instances
 - `environment`: Environment variables for the backend process (optional)
 - `response_headers`: Additional response headers to send with responses (optional)
 - `docker`: Docker-specific configuration (optional)
  - `enabled`: Boolean flag to enable Docker runtime
  - `image`: Docker image to use
  - `args`: Additional arguments passed to `docker run`
  - `environment`: Environment variables for the container (optional)
 > If llamactl is behind an nginx proxy, `X-Accel-Buffering: no` may be required for nginx to properly stream the responses without buffering.
 **Environment Variables:**
 **LlamaCpp Backend:**
@@ -160,6 +166,7 @@ backends:
 - `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use
 - `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments
 - `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
 - `LLAMACTL_LLAMACPP_RESPONSE_HEADERS` - Response headers in format "KEY1=value1,KEY2=value2"
 **VLLM Backend:**
 - `LLAMACTL_VLLM_COMMAND` - VLLM executable command
--- a/pkg/config/config.go
+++ b/pkg/config/config.go
@@ -17,6 +17,7 @@ type BackendSettings struct {
 	Args            []string          `yaml:"args"`
 	Environment     map[string]string `yaml:"environment,omitempty"`
 	Docker          *DockerSettings   `yaml:"docker,omitempty"`
 	ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
 }
 // DockerSettings contains Docker-specific configuration
@@ -58,6 +59,9 @@ type ServerConfig struct {
 	// Enable Swagger UI for API documentation
 	EnableSwagger bool `yaml:"enable_swagger"`
 	// Response headers to send with responses
 	ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
 }
 // InstancesConfig contains instance management configuration
@@ -337,6 +341,12 @@ func loadEnvVars(cfg *AppConfig) {
 		}
 		parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
 	}
 	if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_RESPONSE_HEADERS"); llamaEnv != "" {
 		if cfg.Backends.LlamaCpp.ResponseHeaders == nil {
 			cfg.Backends.LlamaCpp.ResponseHeaders = make(map[string]string)
 		}
 		parseHeaders(llamaEnv, cfg.Backends.LlamaCpp.ResponseHeaders)
 	}
 	// vLLM backend
 	if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
@@ -380,6 +390,12 @@ func loadEnvVars(cfg *AppConfig) {
 		}
 		parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
 	}
 	if llamaEnv := os.Getenv("LLAMACTL_VLLM_RESPONSE_HEADERS"); llamaEnv != "" {
 		if cfg.Backends.VLLM.ResponseHeaders == nil {
 			cfg.Backends.VLLM.ResponseHeaders = make(map[string]string)
 		}
 		parseHeaders(llamaEnv, cfg.Backends.VLLM.ResponseHeaders)
 	}
 	// MLX backend
 	if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
@@ -394,6 +410,12 @@ func loadEnvVars(cfg *AppConfig) {
 		}
 		parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
 	}
 	if llamaEnv := os.Getenv("LLAMACTL_MLX_RESPONSE_HEADERS"); llamaEnv != "" {
 		if cfg.Backends.MLX.ResponseHeaders == nil {
 			cfg.Backends.MLX.ResponseHeaders = make(map[string]string)
 		}
 		parseHeaders(llamaEnv, cfg.Backends.MLX.ResponseHeaders)
 	}
 	// Instance defaults
 	if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
@@ -481,6 +503,19 @@ func parseEnvVars(envString string, envMap map[string]string) {
 	}
 }
 // parseHeaders parses HTTP headers in format "KEY1=value1;KEY2=value2"
 // and populates the provided environment map
 func parseHeaders(envString string, envMap map[string]string) {
 	if envString == "" {
 		return
 	}
 	for _, envPair := range strings.Split(envString, ";") {
 		if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
 			envMap[parts[0]] = parts[1]
 		}
 	}
 }
 // getDefaultDataDirectory returns platform-specific default data directory
 func getDefaultDataDirectory() string {
 	switch runtime.GOOS {
--- a/pkg/instance/instance.go
+++ b/pkg/instance/instance.go
@@ -198,6 +198,15 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
 	proxy := httputil.NewSingleHostReverseProxy(targetURL)
 	var responseHeaders map[string]string
 	switch i.options.BackendType {
 	case backends.BackendTypeLlamaCpp:
 		responseHeaders = i.globalBackendSettings.LlamaCpp.ResponseHeaders
 	case backends.BackendTypeVllm:
 		responseHeaders = i.globalBackendSettings.VLLM.ResponseHeaders
 	case backends.BackendTypeMlxLm:
 		responseHeaders = i.globalBackendSettings.MLX.ResponseHeaders
 	}
 	proxy.ModifyResponse = func(resp *http.Response) error {
 		// Remove CORS headers from llama-server response to avoid conflicts
 		// llamactl will add its own CORS headers
@@ -207,6 +216,10 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
 		resp.Header.Del("Access-Control-Allow-Credentials")
 		resp.Header.Del("Access-Control-Max-Age")
 		resp.Header.Del("Access-Control-Expose-Headers")
 		for key, value := range responseHeaders {
 			resp.Header.Set(key, value)
 		}
 		return nil
 	}
--- a/webui/src/contexts/AuthContext.tsx
+++ b/webui/src/contexts/AuthContext.tsx
@@ -1,4 +1,5 @@
-import { type ReactNode, createContext, useContext, useState, useEffect, useCallback } from 'react'
+import { API_BASE } from '@/lib/api'
 import { type ReactNode, createContext, useCallback, useContext, useEffect, useState } from 'react'
 interface AuthContextState {
  isAuthenticated: boolean
@@ -62,7 +63,7 @@ export const AuthProvider = ({ children }: AuthProviderProps) => {
  // Validate API key by making a test request
  const validateApiKey = async (key: string): Promise<boolean> => {
    try {
-      const response = await fetch('/api/v1/instances', {
+      const response = await fetch(API_BASE + '/instances', {
        headers: {
          'Authorization': `Bearer ${key}`,
          'Content-Type': 'application/json'
--- a/webui/src/lib/api.ts
+++ b/webui/src/lib/api.ts
@@ -1,7 +1,10 @@
 import type { CreateInstanceOptions, Instance } from "@/types/instance";
 import { handleApiError } from "./errorUtils";
-const API_BASE = "/api/v1";
+// Adding baseURI as a prefix to support being served behind a subpath
 // e.g. when llmamctl's `/` is served behind a reverse proxy at `/proxy/...`
 // the baseURI will be `/proxy/` and the API calls will be made to `/proxy/api/v1/<endpoint>`
 export const API_BASE = document.baseURI + "api/v1";
 // Base API call function with error handling
 async function apiCall<T>(
--- a/webui/vite.config.ts
+++ b/webui/vite.config.ts
@@ -21,4 +21,6 @@ export default defineConfig({
    setupFiles: ['./src/test/setup.ts'],
    css: true,
  },
  // ensures relative asset paths to support being served behind a subpath
  base: "./"
 })