mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-05 16:44:22 +00:00
Deployed 4fa75d9 to dev with MkDocs 1.5.3 and mike 2.0.0
This commit is contained in:
Binary file not shown.
@@ -848,58 +848,59 @@
|
||||
<a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a><span class="w"> </span><span class="nt">host</span><span class="p">:</span><span class="w"> </span><span class="s">"0.0.0.0"</span><span class="w"> </span><span class="c1"># Server host to bind to</span>
|
||||
<a id="__codelineno-1-3" name="__codelineno-1-3" href="#__codelineno-1-3"></a><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">8080</span><span class="w"> </span><span class="c1"># Server port to bind to</span>
|
||||
<a id="__codelineno-1-4" name="__codelineno-1-4" href="#__codelineno-1-4"></a><span class="w"> </span><span class="nt">allowed_origins</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="s">"*"</span><span class="p p-Indicator">]</span><span class="w"> </span><span class="c1"># Allowed CORS origins (default: all)</span>
|
||||
<a id="__codelineno-1-5" name="__codelineno-1-5" href="#__codelineno-1-5"></a><span class="w"> </span><span class="nt">enable_swagger</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span><span class="w"> </span><span class="c1"># Enable Swagger UI for API docs</span>
|
||||
<a id="__codelineno-1-6" name="__codelineno-1-6" href="#__codelineno-1-6"></a>
|
||||
<a id="__codelineno-1-7" name="__codelineno-1-7" href="#__codelineno-1-7"></a><span class="nt">backends</span><span class="p">:</span>
|
||||
<a id="__codelineno-1-8" name="__codelineno-1-8" href="#__codelineno-1-8"></a><span class="w"> </span><span class="nt">llama-cpp</span><span class="p">:</span>
|
||||
<a id="__codelineno-1-9" name="__codelineno-1-9" href="#__codelineno-1-9"></a><span class="w"> </span><span class="nt">command</span><span class="p">:</span><span class="w"> </span><span class="s">"llama-server"</span>
|
||||
<a id="__codelineno-1-10" name="__codelineno-1-10" href="#__codelineno-1-10"></a><span class="w"> </span><span class="nt">args</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[]</span>
|
||||
<a id="__codelineno-1-11" name="__codelineno-1-11" href="#__codelineno-1-11"></a><span class="w"> </span><span class="nt">environment</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">{}</span><span class="w"> </span><span class="c1"># Environment variables for the backend process</span>
|
||||
<a id="__codelineno-1-12" name="__codelineno-1-12" href="#__codelineno-1-12"></a><span class="w"> </span><span class="nt">docker</span><span class="p">:</span>
|
||||
<a id="__codelineno-1-13" name="__codelineno-1-13" href="#__codelineno-1-13"></a><span class="w"> </span><span class="nt">enabled</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
<a id="__codelineno-1-14" name="__codelineno-1-14" href="#__codelineno-1-14"></a><span class="w"> </span><span class="nt">image</span><span class="p">:</span><span class="w"> </span><span class="s">"ghcr.io/ggml-org/llama.cpp:server"</span>
|
||||
<a id="__codelineno-1-15" name="__codelineno-1-15" href="#__codelineno-1-15"></a><span class="w"> </span><span class="nt">args</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="s">"run"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"--rm"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"--network"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"host"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"--gpus"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"all"</span><span class="p p-Indicator">]</span>
|
||||
<a id="__codelineno-1-16" name="__codelineno-1-16" href="#__codelineno-1-16"></a><span class="w"> </span><span class="nt">environment</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">{}</span>
|
||||
<a id="__codelineno-1-17" name="__codelineno-1-17" href="#__codelineno-1-17"></a><span class="w"> </span><span class="nt">response_headers</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">{}</span><span class="w"> </span><span class="c1"># Additional response headers to send with responses</span>
|
||||
<a id="__codelineno-1-18" name="__codelineno-1-18" href="#__codelineno-1-18"></a>
|
||||
<a id="__codelineno-1-19" name="__codelineno-1-19" href="#__codelineno-1-19"></a><span class="w"> </span><span class="nt">vllm</span><span class="p">:</span>
|
||||
<a id="__codelineno-1-20" name="__codelineno-1-20" href="#__codelineno-1-20"></a><span class="w"> </span><span class="nt">command</span><span class="p">:</span><span class="w"> </span><span class="s">"vllm"</span>
|
||||
<a id="__codelineno-1-21" name="__codelineno-1-21" href="#__codelineno-1-21"></a><span class="w"> </span><span class="nt">args</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="s">"serve"</span><span class="p p-Indicator">]</span>
|
||||
<a id="__codelineno-1-22" name="__codelineno-1-22" href="#__codelineno-1-22"></a><span class="w"> </span><span class="nt">environment</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">{}</span><span class="w"> </span><span class="c1"># Environment variables for the backend process</span>
|
||||
<a id="__codelineno-1-23" name="__codelineno-1-23" href="#__codelineno-1-23"></a><span class="w"> </span><span class="nt">docker</span><span class="p">:</span>
|
||||
<a id="__codelineno-1-24" name="__codelineno-1-24" href="#__codelineno-1-24"></a><span class="w"> </span><span class="nt">enabled</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
<a id="__codelineno-1-25" name="__codelineno-1-25" href="#__codelineno-1-25"></a><span class="w"> </span><span class="nt">image</span><span class="p">:</span><span class="w"> </span><span class="s">"vllm/vllm-openai:latest"</span>
|
||||
<a id="__codelineno-1-26" name="__codelineno-1-26" href="#__codelineno-1-26"></a><span class="w"> </span><span class="nt">args</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="s">"run"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"--rm"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"--network"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"host"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"--gpus"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"all"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"--shm-size"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"1g"</span><span class="p p-Indicator">]</span>
|
||||
<a id="__codelineno-1-27" name="__codelineno-1-27" href="#__codelineno-1-27"></a><span class="w"> </span><span class="nt">environment</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">{}</span>
|
||||
<a id="__codelineno-1-28" name="__codelineno-1-28" href="#__codelineno-1-28"></a><span class="w"> </span><span class="nt">response_headers</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">{}</span><span class="w"> </span><span class="c1"># Additional response headers to send with responses</span>
|
||||
<a id="__codelineno-1-29" name="__codelineno-1-29" href="#__codelineno-1-29"></a>
|
||||
<a id="__codelineno-1-30" name="__codelineno-1-30" href="#__codelineno-1-30"></a><span class="w"> </span><span class="nt">mlx</span><span class="p">:</span>
|
||||
<a id="__codelineno-1-31" name="__codelineno-1-31" href="#__codelineno-1-31"></a><span class="w"> </span><span class="nt">command</span><span class="p">:</span><span class="w"> </span><span class="s">"mlx_lm.server"</span>
|
||||
<a id="__codelineno-1-32" name="__codelineno-1-32" href="#__codelineno-1-32"></a><span class="w"> </span><span class="nt">args</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[]</span>
|
||||
<a id="__codelineno-1-33" name="__codelineno-1-33" href="#__codelineno-1-33"></a><span class="w"> </span><span class="nt">environment</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">{}</span><span class="w"> </span><span class="c1"># Environment variables for the backend process</span>
|
||||
<a id="__codelineno-1-34" name="__codelineno-1-34" href="#__codelineno-1-34"></a><span class="w"> </span><span class="nt">response_headers</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">{}</span><span class="w"> </span><span class="c1"># Additional response headers to send with responses</span>
|
||||
<a id="__codelineno-1-35" name="__codelineno-1-35" href="#__codelineno-1-35"></a>
|
||||
<a id="__codelineno-1-36" name="__codelineno-1-36" href="#__codelineno-1-36"></a><span class="nt">instances</span><span class="p">:</span>
|
||||
<a id="__codelineno-1-37" name="__codelineno-1-37" href="#__codelineno-1-37"></a><span class="w"> </span><span class="nt">port_range</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">8000</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">9000</span><span class="p p-Indicator">]</span><span class="w"> </span><span class="c1"># Port range for instances</span>
|
||||
<a id="__codelineno-1-38" name="__codelineno-1-38" href="#__codelineno-1-38"></a><span class="w"> </span><span class="nt">data_dir</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">~/.local/share/llamactl</span><span class="w"> </span><span class="c1"># Data directory (platform-specific, see below)</span>
|
||||
<a id="__codelineno-1-39" name="__codelineno-1-39" href="#__codelineno-1-39"></a><span class="w"> </span><span class="nt">configs_dir</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">~/.local/share/llamactl/instances</span><span class="w"> </span><span class="c1"># Instance configs directory</span>
|
||||
<a id="__codelineno-1-40" name="__codelineno-1-40" href="#__codelineno-1-40"></a><span class="w"> </span><span class="nt">logs_dir</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">~/.local/share/llamactl/logs</span><span class="w"> </span><span class="c1"># Logs directory</span>
|
||||
<a id="__codelineno-1-41" name="__codelineno-1-41" href="#__codelineno-1-41"></a><span class="w"> </span><span class="nt">auto_create_dirs</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Auto-create data/config/logs dirs if missing</span>
|
||||
<a id="__codelineno-1-42" name="__codelineno-1-42" href="#__codelineno-1-42"></a><span class="w"> </span><span class="nt">max_instances</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">-1</span><span class="w"> </span><span class="c1"># Max instances (-1 = unlimited)</span>
|
||||
<a id="__codelineno-1-43" name="__codelineno-1-43" href="#__codelineno-1-43"></a><span class="w"> </span><span class="nt">max_running_instances</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">-1</span><span class="w"> </span><span class="c1"># Max running instances (-1 = unlimited)</span>
|
||||
<a id="__codelineno-1-44" name="__codelineno-1-44" href="#__codelineno-1-44"></a><span class="w"> </span><span class="nt">enable_lru_eviction</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Enable LRU eviction for idle instances</span>
|
||||
<a id="__codelineno-1-45" name="__codelineno-1-45" href="#__codelineno-1-45"></a><span class="w"> </span><span class="nt">default_auto_restart</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Auto-restart new instances by default</span>
|
||||
<a id="__codelineno-1-46" name="__codelineno-1-46" href="#__codelineno-1-46"></a><span class="w"> </span><span class="nt">default_max_restarts</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">3</span><span class="w"> </span><span class="c1"># Max restarts for new instances</span>
|
||||
<a id="__codelineno-1-47" name="__codelineno-1-47" href="#__codelineno-1-47"></a><span class="w"> </span><span class="nt">default_restart_delay</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">5</span><span class="w"> </span><span class="c1"># Restart delay (seconds) for new instances</span>
|
||||
<a id="__codelineno-1-48" name="__codelineno-1-48" href="#__codelineno-1-48"></a><span class="w"> </span><span class="nt">default_on_demand_start</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Default on-demand start setting</span>
|
||||
<a id="__codelineno-1-49" name="__codelineno-1-49" href="#__codelineno-1-49"></a><span class="w"> </span><span class="nt">on_demand_start_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">120</span><span class="w"> </span><span class="c1"># Default on-demand start timeout in seconds</span>
|
||||
<a id="__codelineno-1-50" name="__codelineno-1-50" href="#__codelineno-1-50"></a><span class="w"> </span><span class="nt">timeout_check_interval</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">5</span><span class="w"> </span><span class="c1"># Idle instance timeout check in minutes</span>
|
||||
<a id="__codelineno-1-51" name="__codelineno-1-51" href="#__codelineno-1-51"></a>
|
||||
<a id="__codelineno-1-52" name="__codelineno-1-52" href="#__codelineno-1-52"></a><span class="nt">auth</span><span class="p">:</span>
|
||||
<a id="__codelineno-1-53" name="__codelineno-1-53" href="#__codelineno-1-53"></a><span class="w"> </span><span class="nt">require_inference_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Require auth for inference endpoints</span>
|
||||
<a id="__codelineno-1-54" name="__codelineno-1-54" href="#__codelineno-1-54"></a><span class="w"> </span><span class="nt">inference_keys</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[]</span><span class="w"> </span><span class="c1"># Keys for inference endpoints</span>
|
||||
<a id="__codelineno-1-55" name="__codelineno-1-55" href="#__codelineno-1-55"></a><span class="w"> </span><span class="nt">require_management_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Require auth for management endpoints</span>
|
||||
<a id="__codelineno-1-56" name="__codelineno-1-56" href="#__codelineno-1-56"></a><span class="w"> </span><span class="nt">management_keys</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[]</span><span class="w"> </span><span class="c1"># Keys for management endpoints</span>
|
||||
<a id="__codelineno-1-5" name="__codelineno-1-5" href="#__codelineno-1-5"></a><span class="w"> </span><span class="nt">allowed_headers</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="s">"*"</span><span class="p p-Indicator">]</span><span class="w"> </span><span class="c1"># Allowed CORS headers (default: all)</span>
|
||||
<a id="__codelineno-1-6" name="__codelineno-1-6" href="#__codelineno-1-6"></a><span class="w"> </span><span class="nt">enable_swagger</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span><span class="w"> </span><span class="c1"># Enable Swagger UI for API docs</span>
|
||||
<a id="__codelineno-1-7" name="__codelineno-1-7" href="#__codelineno-1-7"></a>
|
||||
<a id="__codelineno-1-8" name="__codelineno-1-8" href="#__codelineno-1-8"></a><span class="nt">backends</span><span class="p">:</span>
|
||||
<a id="__codelineno-1-9" name="__codelineno-1-9" href="#__codelineno-1-9"></a><span class="w"> </span><span class="nt">llama-cpp</span><span class="p">:</span>
|
||||
<a id="__codelineno-1-10" name="__codelineno-1-10" href="#__codelineno-1-10"></a><span class="w"> </span><span class="nt">command</span><span class="p">:</span><span class="w"> </span><span class="s">"llama-server"</span>
|
||||
<a id="__codelineno-1-11" name="__codelineno-1-11" href="#__codelineno-1-11"></a><span class="w"> </span><span class="nt">args</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[]</span>
|
||||
<a id="__codelineno-1-12" name="__codelineno-1-12" href="#__codelineno-1-12"></a><span class="w"> </span><span class="nt">environment</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">{}</span><span class="w"> </span><span class="c1"># Environment variables for the backend process</span>
|
||||
<a id="__codelineno-1-13" name="__codelineno-1-13" href="#__codelineno-1-13"></a><span class="w"> </span><span class="nt">docker</span><span class="p">:</span>
|
||||
<a id="__codelineno-1-14" name="__codelineno-1-14" href="#__codelineno-1-14"></a><span class="w"> </span><span class="nt">enabled</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
<a id="__codelineno-1-15" name="__codelineno-1-15" href="#__codelineno-1-15"></a><span class="w"> </span><span class="nt">image</span><span class="p">:</span><span class="w"> </span><span class="s">"ghcr.io/ggml-org/llama.cpp:server"</span>
|
||||
<a id="__codelineno-1-16" name="__codelineno-1-16" href="#__codelineno-1-16"></a><span class="w"> </span><span class="nt">args</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="s">"run"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"--rm"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"--network"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"host"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"--gpus"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"all"</span><span class="p p-Indicator">]</span>
|
||||
<a id="__codelineno-1-17" name="__codelineno-1-17" href="#__codelineno-1-17"></a><span class="w"> </span><span class="nt">environment</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">{}</span>
|
||||
<a id="__codelineno-1-18" name="__codelineno-1-18" href="#__codelineno-1-18"></a><span class="w"> </span><span class="nt">response_headers</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">{}</span><span class="w"> </span><span class="c1"># Additional response headers to send with responses</span>
|
||||
<a id="__codelineno-1-19" name="__codelineno-1-19" href="#__codelineno-1-19"></a>
|
||||
<a id="__codelineno-1-20" name="__codelineno-1-20" href="#__codelineno-1-20"></a><span class="w"> </span><span class="nt">vllm</span><span class="p">:</span>
|
||||
<a id="__codelineno-1-21" name="__codelineno-1-21" href="#__codelineno-1-21"></a><span class="w"> </span><span class="nt">command</span><span class="p">:</span><span class="w"> </span><span class="s">"vllm"</span>
|
||||
<a id="__codelineno-1-22" name="__codelineno-1-22" href="#__codelineno-1-22"></a><span class="w"> </span><span class="nt">args</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="s">"serve"</span><span class="p p-Indicator">]</span>
|
||||
<a id="__codelineno-1-23" name="__codelineno-1-23" href="#__codelineno-1-23"></a><span class="w"> </span><span class="nt">environment</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">{}</span><span class="w"> </span><span class="c1"># Environment variables for the backend process</span>
|
||||
<a id="__codelineno-1-24" name="__codelineno-1-24" href="#__codelineno-1-24"></a><span class="w"> </span><span class="nt">docker</span><span class="p">:</span>
|
||||
<a id="__codelineno-1-25" name="__codelineno-1-25" href="#__codelineno-1-25"></a><span class="w"> </span><span class="nt">enabled</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||
<a id="__codelineno-1-26" name="__codelineno-1-26" href="#__codelineno-1-26"></a><span class="w"> </span><span class="nt">image</span><span class="p">:</span><span class="w"> </span><span class="s">"vllm/vllm-openai:latest"</span>
|
||||
<a id="__codelineno-1-27" name="__codelineno-1-27" href="#__codelineno-1-27"></a><span class="w"> </span><span class="nt">args</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="s">"run"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"--rm"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"--network"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"host"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"--gpus"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"all"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"--shm-size"</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="s">"1g"</span><span class="p p-Indicator">]</span>
|
||||
<a id="__codelineno-1-28" name="__codelineno-1-28" href="#__codelineno-1-28"></a><span class="w"> </span><span class="nt">environment</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">{}</span>
|
||||
<a id="__codelineno-1-29" name="__codelineno-1-29" href="#__codelineno-1-29"></a><span class="w"> </span><span class="nt">response_headers</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">{}</span><span class="w"> </span><span class="c1"># Additional response headers to send with responses</span>
|
||||
<a id="__codelineno-1-30" name="__codelineno-1-30" href="#__codelineno-1-30"></a>
|
||||
<a id="__codelineno-1-31" name="__codelineno-1-31" href="#__codelineno-1-31"></a><span class="w"> </span><span class="nt">mlx</span><span class="p">:</span>
|
||||
<a id="__codelineno-1-32" name="__codelineno-1-32" href="#__codelineno-1-32"></a><span class="w"> </span><span class="nt">command</span><span class="p">:</span><span class="w"> </span><span class="s">"mlx_lm.server"</span>
|
||||
<a id="__codelineno-1-33" name="__codelineno-1-33" href="#__codelineno-1-33"></a><span class="w"> </span><span class="nt">args</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[]</span>
|
||||
<a id="__codelineno-1-34" name="__codelineno-1-34" href="#__codelineno-1-34"></a><span class="w"> </span><span class="nt">environment</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">{}</span><span class="w"> </span><span class="c1"># Environment variables for the backend process</span>
|
||||
<a id="__codelineno-1-35" name="__codelineno-1-35" href="#__codelineno-1-35"></a><span class="w"> </span><span class="nt">response_headers</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">{}</span><span class="w"> </span><span class="c1"># Additional response headers to send with responses</span>
|
||||
<a id="__codelineno-1-36" name="__codelineno-1-36" href="#__codelineno-1-36"></a>
|
||||
<a id="__codelineno-1-37" name="__codelineno-1-37" href="#__codelineno-1-37"></a><span class="nt">instances</span><span class="p">:</span>
|
||||
<a id="__codelineno-1-38" name="__codelineno-1-38" href="#__codelineno-1-38"></a><span class="w"> </span><span class="nt">port_range</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">8000</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">9000</span><span class="p p-Indicator">]</span><span class="w"> </span><span class="c1"># Port range for instances</span>
|
||||
<a id="__codelineno-1-39" name="__codelineno-1-39" href="#__codelineno-1-39"></a><span class="w"> </span><span class="nt">data_dir</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">~/.local/share/llamactl</span><span class="w"> </span><span class="c1"># Data directory (platform-specific, see below)</span>
|
||||
<a id="__codelineno-1-40" name="__codelineno-1-40" href="#__codelineno-1-40"></a><span class="w"> </span><span class="nt">configs_dir</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">~/.local/share/llamactl/instances</span><span class="w"> </span><span class="c1"># Instance configs directory</span>
|
||||
<a id="__codelineno-1-41" name="__codelineno-1-41" href="#__codelineno-1-41"></a><span class="w"> </span><span class="nt">logs_dir</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">~/.local/share/llamactl/logs</span><span class="w"> </span><span class="c1"># Logs directory</span>
|
||||
<a id="__codelineno-1-42" name="__codelineno-1-42" href="#__codelineno-1-42"></a><span class="w"> </span><span class="nt">auto_create_dirs</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Auto-create data/config/logs dirs if missing</span>
|
||||
<a id="__codelineno-1-43" name="__codelineno-1-43" href="#__codelineno-1-43"></a><span class="w"> </span><span class="nt">max_instances</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">-1</span><span class="w"> </span><span class="c1"># Max instances (-1 = unlimited)</span>
|
||||
<a id="__codelineno-1-44" name="__codelineno-1-44" href="#__codelineno-1-44"></a><span class="w"> </span><span class="nt">max_running_instances</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">-1</span><span class="w"> </span><span class="c1"># Max running instances (-1 = unlimited)</span>
|
||||
<a id="__codelineno-1-45" name="__codelineno-1-45" href="#__codelineno-1-45"></a><span class="w"> </span><span class="nt">enable_lru_eviction</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Enable LRU eviction for idle instances</span>
|
||||
<a id="__codelineno-1-46" name="__codelineno-1-46" href="#__codelineno-1-46"></a><span class="w"> </span><span class="nt">default_auto_restart</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Auto-restart new instances by default</span>
|
||||
<a id="__codelineno-1-47" name="__codelineno-1-47" href="#__codelineno-1-47"></a><span class="w"> </span><span class="nt">default_max_restarts</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">3</span><span class="w"> </span><span class="c1"># Max restarts for new instances</span>
|
||||
<a id="__codelineno-1-48" name="__codelineno-1-48" href="#__codelineno-1-48"></a><span class="w"> </span><span class="nt">default_restart_delay</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">5</span><span class="w"> </span><span class="c1"># Restart delay (seconds) for new instances</span>
|
||||
<a id="__codelineno-1-49" name="__codelineno-1-49" href="#__codelineno-1-49"></a><span class="w"> </span><span class="nt">default_on_demand_start</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Default on-demand start setting</span>
|
||||
<a id="__codelineno-1-50" name="__codelineno-1-50" href="#__codelineno-1-50"></a><span class="w"> </span><span class="nt">on_demand_start_timeout</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">120</span><span class="w"> </span><span class="c1"># Default on-demand start timeout in seconds</span>
|
||||
<a id="__codelineno-1-51" name="__codelineno-1-51" href="#__codelineno-1-51"></a><span class="w"> </span><span class="nt">timeout_check_interval</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">5</span><span class="w"> </span><span class="c1"># Idle instance timeout check in minutes</span>
|
||||
<a id="__codelineno-1-52" name="__codelineno-1-52" href="#__codelineno-1-52"></a>
|
||||
<a id="__codelineno-1-53" name="__codelineno-1-53" href="#__codelineno-1-53"></a><span class="nt">auth</span><span class="p">:</span>
|
||||
<a id="__codelineno-1-54" name="__codelineno-1-54" href="#__codelineno-1-54"></a><span class="w"> </span><span class="nt">require_inference_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Require auth for inference endpoints</span>
|
||||
<a id="__codelineno-1-55" name="__codelineno-1-55" href="#__codelineno-1-55"></a><span class="w"> </span><span class="nt">inference_keys</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[]</span><span class="w"> </span><span class="c1"># Keys for inference endpoints</span>
|
||||
<a id="__codelineno-1-56" name="__codelineno-1-56" href="#__codelineno-1-56"></a><span class="w"> </span><span class="nt">require_management_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Require auth for management endpoints</span>
|
||||
<a id="__codelineno-1-57" name="__codelineno-1-57" href="#__codelineno-1-57"></a><span class="w"> </span><span class="nt">management_keys</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[]</span><span class="w"> </span><span class="c1"># Keys for management endpoints</span>
|
||||
</code></pre></div>
|
||||
<h2 id="configuration-files">Configuration Files<a class="headerlink" href="#configuration-files" title="Permanent link">¶</a></h2>
|
||||
<h3 id="configuration-file-locations">Configuration File Locations<a class="headerlink" href="#configuration-file-locations" title="Permanent link">¶</a></h3>
|
||||
@@ -924,7 +925,8 @@
|
||||
<a id="__codelineno-2-2" name="__codelineno-2-2" href="#__codelineno-2-2"></a><span class="w"> </span><span class="nt">host</span><span class="p">:</span><span class="w"> </span><span class="s">"0.0.0.0"</span><span class="w"> </span><span class="c1"># Server host to bind to (default: "0.0.0.0")</span>
|
||||
<a id="__codelineno-2-3" name="__codelineno-2-3" href="#__codelineno-2-3"></a><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">8080</span><span class="w"> </span><span class="c1"># Server port to bind to (default: 8080)</span>
|
||||
<a id="__codelineno-2-4" name="__codelineno-2-4" href="#__codelineno-2-4"></a><span class="w"> </span><span class="nt">allowed_origins</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="s">"*"</span><span class="p p-Indicator">]</span><span class="w"> </span><span class="c1"># CORS allowed origins (default: ["*"])</span>
|
||||
<a id="__codelineno-2-5" name="__codelineno-2-5" href="#__codelineno-2-5"></a><span class="w"> </span><span class="nt">enable_swagger</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span><span class="w"> </span><span class="c1"># Enable Swagger UI (default: false)</span>
|
||||
<a id="__codelineno-2-5" name="__codelineno-2-5" href="#__codelineno-2-5"></a><span class="w"> </span><span class="nt">allowed_headers</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="s">"*"</span><span class="p p-Indicator">]</span><span class="w"> </span><span class="c1"># CORS allowed headers (default: ["*"])</span>
|
||||
<a id="__codelineno-2-6" name="__codelineno-2-6" href="#__codelineno-2-6"></a><span class="w"> </span><span class="nt">enable_swagger</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span><span class="w"> </span><span class="c1"># Enable Swagger UI (default: false)</span>
|
||||
</code></pre></div>
|
||||
<p><strong>Environment Variables:</strong>
|
||||
- <code>LLAMACTL_HOST</code> - Server host
|
||||
@@ -1068,7 +1070,7 @@
|
||||
<span class="md-icon" title="Last update">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M21 13.1c-.1 0-.3.1-.4.2l-1 1 2.1 2.1 1-1c.2-.2.2-.6 0-.8l-1.3-1.3c-.1-.1-.2-.2-.4-.2m-1.9 1.8-6.1 6V23h2.1l6.1-6.1-2.1-2M12.5 7v5.2l4 2.4-1 1L11 13V7h1.5M11 21.9c-5.1-.5-9-4.8-9-9.9C2 6.5 6.5 2 12 2c5.3 0 9.6 4.1 10 9.3-.3-.1-.6-.2-1-.2s-.7.1-1 .2C19.6 7.2 16.2 4 12 4c-4.4 0-8 3.6-8 8 0 4.1 3.1 7.5 7.1 7.9l-.1.2v1.8Z"/></svg>
|
||||
</span>
|
||||
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">September 29, 2025</span>
|
||||
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">October 4, 2025</span>
|
||||
</span>
|
||||
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -2,37 +2,37 @@
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<url>
|
||||
<loc>https://llamactl.org/dev/</loc>
|
||||
<lastmod>2025-09-29</lastmod>
|
||||
<lastmod>2025-10-04</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://llamactl.org/dev/getting-started/configuration/</loc>
|
||||
<lastmod>2025-09-29</lastmod>
|
||||
<lastmod>2025-10-04</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://llamactl.org/dev/getting-started/installation/</loc>
|
||||
<lastmod>2025-09-29</lastmod>
|
||||
<lastmod>2025-10-04</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://llamactl.org/dev/getting-started/quick-start/</loc>
|
||||
<lastmod>2025-09-29</lastmod>
|
||||
<lastmod>2025-10-04</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://llamactl.org/dev/user-guide/api-reference/</loc>
|
||||
<lastmod>2025-09-29</lastmod>
|
||||
<lastmod>2025-10-04</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://llamactl.org/dev/user-guide/managing-instances/</loc>
|
||||
<lastmod>2025-09-29</lastmod>
|
||||
<lastmod>2025-10-04</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://llamactl.org/dev/user-guide/troubleshooting/</loc>
|
||||
<lastmod>2025-09-29</lastmod>
|
||||
<lastmod>2025-10-04</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
</urlset>
|
||||
Binary file not shown.
Reference in New Issue
Block a user