From 03a7a5d139ee72b269414d968767497b5d8ec964 Mon Sep 17 00:00:00 2001
From: Anuruth Lertpiya <l2k.bobby@gmail.com>
Date: Mon, 29 Sep 2025 13:54:15 +0000
Subject: [PATCH] Update configration.md with reverse proxy related information

---
 docs/getting-started/configuration.md | 31 ++++++++++++++++-----------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md
index c938f38..1ed750e 100644
--- a/docs/getting-started/configuration.md
+++ b/docs/getting-started/configuration.md
@@ -23,27 +23,30 @@ backends:
   llama-cpp:
     command: "llama-server"
     args: []
-    environment: {}               # Environment variables for the backend process
+    environment: {}              # Environment variables for the backend process
     docker:
       enabled: false
       image: "ghcr.io/ggml-org/llama.cpp:server"
       args: ["run", "--rm", "--network", "host", "--gpus", "all"]
       environment: {}
+    response_headers: {}         # Additional response headers to send with responses
 
   vllm:
     command: "vllm"
     args: ["serve"]
-    environment: {}               # Environment variables for the backend process
+    environment: {}              # Environment variables for the backend process
     docker:
       enabled: false
       image: "vllm/vllm-openai:latest"
       args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
       environment: {}
+    response_headers: {}         # Additional response headers to send with responses
 
   mlx:
     command: "mlx_lm.server"
     args: []
-    environment: {}               # Environment variables for the backend process
+    environment: {}              # Environment variables for the backend process
+    response_headers: {}         # Additional response headers to send with responses
 
 instances:
   port_range: [8000, 9000]       # Port range for instances
@@ -116,31 +119,31 @@ backends:
   llama-cpp:
     command: "llama-server"
     args: []
-    environment: {}                    # Environment variables for the backend process
+    environment: {}              # Environment variables for the backend process
     docker:
-      enabled: false                   # Enable Docker runtime (default: false)
+      enabled: false             # Enable Docker runtime (default: false)
       image: "ghcr.io/ggml-org/llama.cpp:server"
       args: ["run", "--rm", "--network", "host", "--gpus", "all"]
       environment: {}
-    response_headers: {}               # Additional response headers to send with responses
+    response_headers: {}         # Additional response headers to send with responses
 
   vllm:
     command: "vllm"
     args: ["serve"]
-    environment: {}                    # Environment variables for the backend process
+    environment: {}              # Environment variables for the backend process
     docker:
-      enabled: false
+      enabled: false             # Enable Docker runtime (default: false)
       image: "vllm/vllm-openai:latest"
       args: ["run", "--rm", "--network", "host", "--gpus", "all", "--shm-size", "1g"]
       environment: {}
-    response_headers: {}               # Additional response headers to send with responses
+    response_headers: {}         # Additional response headers to send with responses
 
   mlx:
     command: "mlx_lm.server"
     args: []
-    environment: {}                    # Environment variables for the backend process
+    environment: {}              # Environment variables for the backend process
     # MLX does not support Docker
-    response_headers: {}               # Additional response headers to send with responses
+    response_headers: {}         # Additional response headers to send with responses
 ```
 
 **Backend Configuration Fields:**
@@ -154,7 +157,7 @@ backends:
   - `args`: Additional arguments passed to `docker run`
   - `environment`: Environment variables for the container (optional)
 
-> If llamactl is behind an nginx proxy, `X-Accel-Buffering: no` may be required for nginx to properly stream the responses without buffering.
+> If llamactl is behind an NGINX proxy, `X-Accel-Buffering: no` response header may be required for NGINX to properly stream the responses without buffering.
 
 **Environment Variables:**
 
@@ -166,7 +169,7 @@ backends:
 - `LLAMACTL_LLAMACPP_DOCKER_IMAGE` - Docker image to use
 - `LLAMACTL_LLAMACPP_DOCKER_ARGS` - Space-separated Docker arguments
 - `LLAMACTL_LLAMACPP_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
-- `LLAMACTL_LLAMACPP_RESPONSE_HEADERS` - Response headers in format "KEY1=value1,KEY2=value2"
+- `LLAMACTL_LLAMACPP_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
 
 **VLLM Backend:**
 - `LLAMACTL_VLLM_COMMAND` - VLLM executable command
@@ -176,11 +179,13 @@ backends:
 - `LLAMACTL_VLLM_DOCKER_IMAGE` - Docker image to use
 - `LLAMACTL_VLLM_DOCKER_ARGS` - Space-separated Docker arguments
 - `LLAMACTL_VLLM_DOCKER_ENV` - Docker environment variables in format "KEY1=value1,KEY2=value2"
+- `LLAMACTL_VLLM_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
 
 **MLX Backend:**
 - `LLAMACTL_MLX_COMMAND` - MLX executable command
 - `LLAMACTL_MLX_ARGS` - Space-separated default arguments
 - `LLAMACTL_MLX_ENV` - Environment variables in format "KEY1=value1,KEY2=value2"
+- `LLAMACTL_MLX_RESPONSE_HEADERS` - Response headers in format "KEY1=value1;KEY2=value2"
 
 ### Instance Configuration