mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-06 17:14:28 +00:00
Added support for serving behind a reverse proxy
- Added support for specifying response headers for each backend - Allowing users to set `X-Accel-Buffering: no` to disable buffering for streaming responses in nginx - Updated `configuration.md` to document the new configuration options - Modified Vite config to build with `base: "./"`, making assets be accessed via relative paths - Updated API_BASE to use `document.baseURI`, allowing API calls to be made relative to the base path
This commit is contained in:
@@ -122,6 +122,7 @@ backends:
|
||||
image: "ghcr.io/ggml-org/llama.cpp:server"
|
||||
args: ["run", "--rm", "--network", "host", "--gpus", "all"]
|
||||
environment: {}
|
||||
response_headers: {} # Additional response headers to send with responses
|
||||
|
||||
vllm:
|
||||
command: "vllm"
|
||||
@@ -132,24 +133,29 @@ backends:
|
||||
image: "vllm/vllm-openai:latest"
|
||||
args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
|
||||
environment: {}
|
||||
response_headers: {} # Additional response headers to send with responses
|
||||
|
||||
mlx:
|
||||
command: "mlx_lm.server"
|
||||
args: []
|
||||
environment: {} # Environment variables for the backend process
|
||||
# MLX does not support Docker
|
||||
response_headers: {} # Additional response headers to send with responses
|
||||
```
|
||||
|
||||
**Backend Configuration Fields:**
|
||||
- `command`: Executable name/path for the backend
|
||||
- `args`: Default arguments prepended to all instances
|
||||
- `environment`: Environment variables for the backend process (optional)
|
||||
- `response_headers`: Additional response headers to send with responses (optional)
|
||||
- `docker`: Docker-specific configuration (optional)
|
||||
- `enabled`: Boolean flag to enable Docker runtime
|
||||
- `image`: Docker image to use
|
||||
- `args`: Additional arguments passed to `docker run`
|
||||
- `environment`: Environment variables for the container (optional)
|
||||
|
||||
> If llamactl is behind an nginx proxy, `X-Accel-Buffering: no` may be required for nginx to properly stream the responses without buffering.
|
||||
|
||||
**Environment Variables:**
|
||||
|
||||
**LlamaCpp Backend:**
|
||||
@@ -160,6 +166,7 @@ backends:
|
||||
- `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use
|
||||
- `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments
|
||||
- `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
|
||||
- `LLAMACTL_LLAMACPP_RESPONSE_HEADERS` - Response headers in format "KEY1=value1,KEY2=value2"
|
||||
|
||||
**VLLM Backend:**
|
||||
- `LLAMACTL_VLLM_COMMAND` - VLLM executable command
|
||||
|
||||
@@ -13,10 +13,11 @@ import (
|
||||
|
||||
// BackendSettings contains structured backend configuration
|
||||
type BackendSettings struct {
|
||||
Command string `yaml:"command"`
|
||||
Args []string `yaml:"args"`
|
||||
Environment map[string]string `yaml:"environment,omitempty"`
|
||||
Docker *DockerSettings `yaml:"docker,omitempty"`
|
||||
Command string `yaml:"command"`
|
||||
Args []string `yaml:"args"`
|
||||
Environment map[string]string `yaml:"environment,omitempty"`
|
||||
Docker *DockerSettings `yaml:"docker,omitempty"`
|
||||
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
|
||||
}
|
||||
|
||||
// DockerSettings contains Docker-specific configuration
|
||||
@@ -58,6 +59,9 @@ type ServerConfig struct {
|
||||
|
||||
// Enable Swagger UI for API documentation
|
||||
EnableSwagger bool `yaml:"enable_swagger"`
|
||||
|
||||
// Response headers to send with responses
|
||||
ResponseHeaders map[string]string `yaml:"response_headers,omitempty"`
|
||||
}
|
||||
|
||||
// InstancesConfig contains instance management configuration
|
||||
@@ -337,6 +341,12 @@ func loadEnvVars(cfg *AppConfig) {
|
||||
}
|
||||
parseEnvVars(llamaDockerEnv, cfg.Backends.LlamaCpp.Docker.Environment)
|
||||
}
|
||||
if llamaEnv := os.Getenv("LLAMACTL_LLAMACPP_RESPONSE_HEADERS"); llamaEnv != "" {
|
||||
if cfg.Backends.LlamaCpp.ResponseHeaders == nil {
|
||||
cfg.Backends.LlamaCpp.ResponseHeaders = make(map[string]string)
|
||||
}
|
||||
parseHeaders(llamaEnv, cfg.Backends.LlamaCpp.ResponseHeaders)
|
||||
}
|
||||
|
||||
// vLLM backend
|
||||
if vllmCmd := os.Getenv("LLAMACTL_VLLM_COMMAND"); vllmCmd != "" {
|
||||
@@ -380,6 +390,12 @@ func loadEnvVars(cfg *AppConfig) {
|
||||
}
|
||||
parseEnvVars(vllmDockerEnv, cfg.Backends.VLLM.Docker.Environment)
|
||||
}
|
||||
if llamaEnv := os.Getenv("LLAMACTL_VLLM_RESPONSE_HEADERS"); llamaEnv != "" {
|
||||
if cfg.Backends.VLLM.ResponseHeaders == nil {
|
||||
cfg.Backends.VLLM.ResponseHeaders = make(map[string]string)
|
||||
}
|
||||
parseHeaders(llamaEnv, cfg.Backends.VLLM.ResponseHeaders)
|
||||
}
|
||||
|
||||
// MLX backend
|
||||
if mlxCmd := os.Getenv("LLAMACTL_MLX_COMMAND"); mlxCmd != "" {
|
||||
@@ -394,6 +410,12 @@ func loadEnvVars(cfg *AppConfig) {
|
||||
}
|
||||
parseEnvVars(mlxEnv, cfg.Backends.MLX.Environment)
|
||||
}
|
||||
if llamaEnv := os.Getenv("LLAMACTL_MLX_RESPONSE_HEADERS"); llamaEnv != "" {
|
||||
if cfg.Backends.MLX.ResponseHeaders == nil {
|
||||
cfg.Backends.MLX.ResponseHeaders = make(map[string]string)
|
||||
}
|
||||
parseHeaders(llamaEnv, cfg.Backends.MLX.ResponseHeaders)
|
||||
}
|
||||
|
||||
// Instance defaults
|
||||
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
|
||||
@@ -481,6 +503,19 @@ func parseEnvVars(envString string, envMap map[string]string) {
|
||||
}
|
||||
}
|
||||
|
||||
// parseHeaders parses HTTP headers in format "KEY1=value1;KEY2=value2"
|
||||
// and populates the provided environment map
|
||||
func parseHeaders(envString string, envMap map[string]string) {
|
||||
if envString == "" {
|
||||
return
|
||||
}
|
||||
for _, envPair := range strings.Split(envString, ";") {
|
||||
if parts := strings.SplitN(strings.TrimSpace(envPair), "=", 2); len(parts) == 2 {
|
||||
envMap[parts[0]] = parts[1]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// getDefaultDataDirectory returns platform-specific default data directory
|
||||
func getDefaultDataDirectory() string {
|
||||
switch runtime.GOOS {
|
||||
|
||||
@@ -198,6 +198,15 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
|
||||
|
||||
proxy := httputil.NewSingleHostReverseProxy(targetURL)
|
||||
|
||||
var responseHeaders map[string]string
|
||||
switch i.options.BackendType {
|
||||
case backends.BackendTypeLlamaCpp:
|
||||
responseHeaders = i.globalBackendSettings.LlamaCpp.ResponseHeaders
|
||||
case backends.BackendTypeVllm:
|
||||
responseHeaders = i.globalBackendSettings.VLLM.ResponseHeaders
|
||||
case backends.BackendTypeMlxLm:
|
||||
responseHeaders = i.globalBackendSettings.MLX.ResponseHeaders
|
||||
}
|
||||
proxy.ModifyResponse = func(resp *http.Response) error {
|
||||
// Remove CORS headers from llama-server response to avoid conflicts
|
||||
// llamactl will add its own CORS headers
|
||||
@@ -207,6 +216,10 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
|
||||
resp.Header.Del("Access-Control-Allow-Credentials")
|
||||
resp.Header.Del("Access-Control-Max-Age")
|
||||
resp.Header.Del("Access-Control-Expose-Headers")
|
||||
|
||||
for key, value := range responseHeaders {
|
||||
resp.Header.Set(key, value)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { type ReactNode, createContext, useContext, useState, useEffect, useCallback } from 'react'
|
||||
import { API_BASE } from '@/lib/api'
|
||||
import { type ReactNode, createContext, useCallback, useContext, useEffect, useState } from 'react'
|
||||
|
||||
interface AuthContextState {
|
||||
isAuthenticated: boolean
|
||||
@@ -62,7 +63,7 @@ export const AuthProvider = ({ children }: AuthProviderProps) => {
|
||||
// Validate API key by making a test request
|
||||
const validateApiKey = async (key: string): Promise<boolean> => {
|
||||
try {
|
||||
const response = await fetch('/api/v1/instances', {
|
||||
const response = await fetch(API_BASE + '/instances', {
|
||||
headers: {
|
||||
'Authorization': `Bearer ${key}`,
|
||||
'Content-Type': 'application/json'
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import type { CreateInstanceOptions, Instance } from "@/types/instance";
|
||||
import { handleApiError } from "./errorUtils";
|
||||
|
||||
const API_BASE = "/api/v1";
|
||||
// Adding baseURI as a prefix to support being served behind a subpath
|
||||
// e.g. when llmamctl's `/` is served behind a reverse proxy at `/proxy/...`
|
||||
// the baseURI will be `/proxy/` and the API calls will be made to `/proxy/api/v1/<endpoint>`
|
||||
export const API_BASE = document.baseURI + "api/v1";
|
||||
|
||||
// Base API call function with error handling
|
||||
async function apiCall<T>(
|
||||
|
||||
@@ -21,4 +21,6 @@ export default defineConfig({
|
||||
setupFiles: ['./src/test/setup.ts'],
|
||||
css: true,
|
||||
},
|
||||
// ensures relative asset paths to support being served behind a subpath
|
||||
base: "./"
|
||||
})
|
||||
Reference in New Issue
Block a user