mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-06 17:14:28 +00:00
Deployed cf20f30 to dev with MkDocs 1.5.3 and mike 2.0.0
This commit is contained in:
BIN
dev/__pycache__/fix_line_endings.cpython-311.pyc
Normal file
BIN
dev/__pycache__/fix_line_endings.cpython-311.pyc
Normal file
Binary file not shown.
Binary file not shown.
60
dev/fix_line_endings.py
Normal file
60
dev/fix_line_endings.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
"""
|
||||||
|
MkDocs hook to fix line endings for proper rendering.
|
||||||
|
Automatically adds two spaces at the end of lines that need line breaks.
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
def on_page_markdown(markdown, page, config, **kwargs):
|
||||||
|
"""
|
||||||
|
Fix line endings in markdown content for proper MkDocs rendering.
|
||||||
|
Adds two spaces at the end of lines that need line breaks.
|
||||||
|
"""
|
||||||
|
lines = markdown.split('\n')
|
||||||
|
processed_lines = []
|
||||||
|
in_code_block = False
|
||||||
|
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
stripped = line.strip()
|
||||||
|
|
||||||
|
# Track code blocks
|
||||||
|
if stripped.startswith('```'):
|
||||||
|
in_code_block = not in_code_block
|
||||||
|
processed_lines.append(line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Skip processing inside code blocks
|
||||||
|
if in_code_block:
|
||||||
|
processed_lines.append(line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Skip empty lines
|
||||||
|
if not stripped:
|
||||||
|
processed_lines.append(line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Skip lines that shouldn't have line breaks:
|
||||||
|
# - Headers (# ## ###)
|
||||||
|
# - Blockquotes (>)
|
||||||
|
# - Table rows (|)
|
||||||
|
# - Lines already ending with two spaces
|
||||||
|
# - YAML front matter and HTML tags
|
||||||
|
# - Standalone punctuation lines
|
||||||
|
if (stripped.startswith('#') or
|
||||||
|
stripped.startswith('>') or
|
||||||
|
'|' in stripped or
|
||||||
|
line.endswith(' ') or
|
||||||
|
stripped.startswith('---') or
|
||||||
|
stripped.startswith('<') or
|
||||||
|
stripped.endswith('>') or
|
||||||
|
stripped in ('.', '!', '?', ':', ';', '```', '---', ',')):
|
||||||
|
processed_lines.append(line)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Add two spaces to lines that end with regular text or most punctuation
|
||||||
|
if stripped and not in_code_block:
|
||||||
|
processed_lines.append(line.rstrip() + ' ')
|
||||||
|
else:
|
||||||
|
processed_lines.append(line)
|
||||||
|
|
||||||
|
return '\n'.join(processed_lines)
|
||||||
@@ -932,10 +932,10 @@
|
|||||||
<a id="__codelineno-2-5" name="__codelineno-2-5" href="#__codelineno-2-5"></a><span class="w"> </span><span class="nt">allowed_headers</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="s">"*"</span><span class="p p-Indicator">]</span><span class="w"> </span><span class="c1"># CORS allowed headers (default: ["*"])</span>
|
<a id="__codelineno-2-5" name="__codelineno-2-5" href="#__codelineno-2-5"></a><span class="w"> </span><span class="nt">allowed_headers</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="s">"*"</span><span class="p p-Indicator">]</span><span class="w"> </span><span class="c1"># CORS allowed headers (default: ["*"])</span>
|
||||||
<a id="__codelineno-2-6" name="__codelineno-2-6" href="#__codelineno-2-6"></a><span class="w"> </span><span class="nt">enable_swagger</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span><span class="w"> </span><span class="c1"># Enable Swagger UI (default: false)</span>
|
<a id="__codelineno-2-6" name="__codelineno-2-6" href="#__codelineno-2-6"></a><span class="w"> </span><span class="nt">enable_swagger</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span><span class="w"> </span><span class="c1"># Enable Swagger UI (default: false)</span>
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<p><strong>Environment Variables:</strong>
|
<p><strong>Environment Variables:</strong><br />
|
||||||
- <code>LLAMACTL_HOST</code> - Server host
|
- <code>LLAMACTL_HOST</code> - Server host<br />
|
||||||
- <code>LLAMACTL_PORT</code> - Server port
|
- <code>LLAMACTL_PORT</code> - Server port<br />
|
||||||
- <code>LLAMACTL_ALLOWED_ORIGINS</code> - Comma-separated CORS origins
|
- <code>LLAMACTL_ALLOWED_ORIGINS</code> - Comma-separated CORS origins<br />
|
||||||
- <code>LLAMACTL_ENABLE_SWAGGER</code> - Enable Swagger UI (true/false) </p>
|
- <code>LLAMACTL_ENABLE_SWAGGER</code> - Enable Swagger UI (true/false) </p>
|
||||||
<h3 id="backend-configuration">Backend Configuration<a class="headerlink" href="#backend-configuration" title="Permanent link">¶</a></h3>
|
<h3 id="backend-configuration">Backend Configuration<a class="headerlink" href="#backend-configuration" title="Permanent link">¶</a></h3>
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a><span class="nt">backends</span><span class="p">:</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a><span class="nt">backends</span><span class="p">:</span>
|
||||||
@@ -968,42 +968,42 @@
|
|||||||
<a id="__codelineno-3-28" name="__codelineno-3-28" href="#__codelineno-3-28"></a><span class="w"> </span><span class="c1"># MLX does not support Docker</span>
|
<a id="__codelineno-3-28" name="__codelineno-3-28" href="#__codelineno-3-28"></a><span class="w"> </span><span class="c1"># MLX does not support Docker</span>
|
||||||
<a id="__codelineno-3-29" name="__codelineno-3-29" href="#__codelineno-3-29"></a><span class="w"> </span><span class="nt">response_headers</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">{}</span><span class="w"> </span><span class="c1"># Additional response headers to send with responses</span>
|
<a id="__codelineno-3-29" name="__codelineno-3-29" href="#__codelineno-3-29"></a><span class="w"> </span><span class="nt">response_headers</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">{}</span><span class="w"> </span><span class="c1"># Additional response headers to send with responses</span>
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<p><strong>Backend Configuration Fields:</strong>
|
<p><strong>Backend Configuration Fields:</strong><br />
|
||||||
- <code>command</code>: Executable name/path for the backend
|
- <code>command</code>: Executable name/path for the backend<br />
|
||||||
- <code>args</code>: Default arguments prepended to all instances
|
- <code>args</code>: Default arguments prepended to all instances<br />
|
||||||
- <code>environment</code>: Environment variables for the backend process (optional)
|
- <code>environment</code>: Environment variables for the backend process (optional)<br />
|
||||||
- <code>response_headers</code>: Additional response headers to send with responses (optional)
|
- <code>response_headers</code>: Additional response headers to send with responses (optional)<br />
|
||||||
- <code>docker</code>: Docker-specific configuration (optional)
|
- <code>docker</code>: Docker-specific configuration (optional)<br />
|
||||||
- <code>enabled</code>: Boolean flag to enable Docker runtime
|
- <code>enabled</code>: Boolean flag to enable Docker runtime<br />
|
||||||
- <code>image</code>: Docker image to use
|
- <code>image</code>: Docker image to use<br />
|
||||||
- <code>args</code>: Additional arguments passed to <code>docker run</code>
|
- <code>args</code>: Additional arguments passed to <code>docker run</code><br />
|
||||||
- <code>environment</code>: Environment variables for the container (optional) </p>
|
- <code>environment</code>: Environment variables for the container (optional) </p>
|
||||||
<blockquote>
|
<blockquote>
|
||||||
<p>If llamactl is behind an NGINX proxy, <code>X-Accel-Buffering: no</code> response header may be required for NGINX to properly stream the responses without buffering.</p>
|
<p>If llamactl is behind an NGINX proxy, <code>X-Accel-Buffering: no</code> response header may be required for NGINX to properly stream the responses without buffering.</p>
|
||||||
</blockquote>
|
</blockquote>
|
||||||
<p><strong>Environment Variables:</strong> </p>
|
<p><strong>Environment Variables:</strong> </p>
|
||||||
<p><strong>LlamaCpp Backend:</strong>
|
<p><strong>LlamaCpp Backend:</strong><br />
|
||||||
- <code>LLAMACTL_LLAMACPP_COMMAND</code> - LlamaCpp executable command
|
- <code>LLAMACTL_LLAMACPP_COMMAND</code> - LlamaCpp executable command<br />
|
||||||
- <code>LLAMACTL_LLAMACPP_ARGS</code> - Space-separated default arguments
|
- <code>LLAMACTL_LLAMACPP_ARGS</code> - Space-separated default arguments<br />
|
||||||
- <code>LLAMACTL_LLAMACPP_ENV</code> - Environment variables in format "KEY1=value1,KEY2=value2"
|
- <code>LLAMACTL_LLAMACPP_ENV</code> - Environment variables in format "KEY1=value1,KEY2=value2"<br />
|
||||||
- <code>LLAMACTL_LLAMACPP_DOCKER_ENABLED</code> - Enable Docker runtime (true/false)
|
- <code>LLAMACTL_LLAMACPP_DOCKER_ENABLED</code> - Enable Docker runtime (true/false)<br />
|
||||||
- <code>LLAMACTL_LLAMACPP_DOCKER_IMAGE</code> - Docker image to use
|
- <code>LLAMACTL_LLAMACPP_DOCKER_IMAGE</code> - Docker image to use<br />
|
||||||
- <code>LLAMACTL_LLAMACPP_DOCKER_ARGS</code> - Space-separated Docker arguments
|
- <code>LLAMACTL_LLAMACPP_DOCKER_ARGS</code> - Space-separated Docker arguments<br />
|
||||||
- <code>LLAMACTL_LLAMACPP_DOCKER_ENV</code> - Docker environment variables in format "KEY1=value1,KEY2=value2"
|
- <code>LLAMACTL_LLAMACPP_DOCKER_ENV</code> - Docker environment variables in format "KEY1=value1,KEY2=value2"<br />
|
||||||
- <code>LLAMACTL_LLAMACPP_RESPONSE_HEADERS</code> - Response headers in format "KEY1=value1;KEY2=value2" </p>
|
- <code>LLAMACTL_LLAMACPP_RESPONSE_HEADERS</code> - Response headers in format "KEY1=value1;KEY2=value2" </p>
|
||||||
<p><strong>VLLM Backend:</strong>
|
<p><strong>VLLM Backend:</strong><br />
|
||||||
- <code>LLAMACTL_VLLM_COMMAND</code> - VLLM executable command
|
- <code>LLAMACTL_VLLM_COMMAND</code> - VLLM executable command<br />
|
||||||
- <code>LLAMACTL_VLLM_ARGS</code> - Space-separated default arguments
|
- <code>LLAMACTL_VLLM_ARGS</code> - Space-separated default arguments<br />
|
||||||
- <code>LLAMACTL_VLLM_ENV</code> - Environment variables in format "KEY1=value1,KEY2=value2"
|
- <code>LLAMACTL_VLLM_ENV</code> - Environment variables in format "KEY1=value1,KEY2=value2"<br />
|
||||||
- <code>LLAMACTL_VLLM_DOCKER_ENABLED</code> - Enable Docker runtime (true/false)
|
- <code>LLAMACTL_VLLM_DOCKER_ENABLED</code> - Enable Docker runtime (true/false)<br />
|
||||||
- <code>LLAMACTL_VLLM_DOCKER_IMAGE</code> - Docker image to use
|
- <code>LLAMACTL_VLLM_DOCKER_IMAGE</code> - Docker image to use<br />
|
||||||
- <code>LLAMACTL_VLLM_DOCKER_ARGS</code> - Space-separated Docker arguments
|
- <code>LLAMACTL_VLLM_DOCKER_ARGS</code> - Space-separated Docker arguments<br />
|
||||||
- <code>LLAMACTL_VLLM_DOCKER_ENV</code> - Docker environment variables in format "KEY1=value1,KEY2=value2"
|
- <code>LLAMACTL_VLLM_DOCKER_ENV</code> - Docker environment variables in format "KEY1=value1,KEY2=value2"<br />
|
||||||
- <code>LLAMACTL_VLLM_RESPONSE_HEADERS</code> - Response headers in format "KEY1=value1;KEY2=value2" </p>
|
- <code>LLAMACTL_VLLM_RESPONSE_HEADERS</code> - Response headers in format "KEY1=value1;KEY2=value2" </p>
|
||||||
<p><strong>MLX Backend:</strong>
|
<p><strong>MLX Backend:</strong><br />
|
||||||
- <code>LLAMACTL_MLX_COMMAND</code> - MLX executable command
|
- <code>LLAMACTL_MLX_COMMAND</code> - MLX executable command<br />
|
||||||
- <code>LLAMACTL_MLX_ARGS</code> - Space-separated default arguments
|
- <code>LLAMACTL_MLX_ARGS</code> - Space-separated default arguments<br />
|
||||||
- <code>LLAMACTL_MLX_ENV</code> - Environment variables in format "KEY1=value1,KEY2=value2"
|
- <code>LLAMACTL_MLX_ENV</code> - Environment variables in format "KEY1=value1,KEY2=value2"<br />
|
||||||
- <code>LLAMACTL_MLX_RESPONSE_HEADERS</code> - Response headers in format "KEY1=value1;KEY2=value2" </p>
|
- <code>LLAMACTL_MLX_RESPONSE_HEADERS</code> - Response headers in format "KEY1=value1;KEY2=value2" </p>
|
||||||
<h3 id="instance-configuration">Instance Configuration<a class="headerlink" href="#instance-configuration" title="Permanent link">¶</a></h3>
|
<h3 id="instance-configuration">Instance Configuration<a class="headerlink" href="#instance-configuration" title="Permanent link">¶</a></h3>
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a><span class="nt">instances</span><span class="p">:</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a><span class="nt">instances</span><span class="p">:</span>
|
||||||
@@ -1029,8 +1029,8 @@
|
|||||||
- <code>LLAMACTL_LOGS_DIR</code> - Log directory path<br />
|
- <code>LLAMACTL_LOGS_DIR</code> - Log directory path<br />
|
||||||
- <code>LLAMACTL_AUTO_CREATE_DATA_DIR</code> - Auto-create data/config/logs directories (true/false)<br />
|
- <code>LLAMACTL_AUTO_CREATE_DATA_DIR</code> - Auto-create data/config/logs directories (true/false)<br />
|
||||||
- <code>LLAMACTL_MAX_INSTANCES</code> - Maximum number of instances<br />
|
- <code>LLAMACTL_MAX_INSTANCES</code> - Maximum number of instances<br />
|
||||||
- <code>LLAMACTL_MAX_RUNNING_INSTANCES</code> - Maximum number of running instances
|
- <code>LLAMACTL_MAX_RUNNING_INSTANCES</code> - Maximum number of running instances<br />
|
||||||
- <code>LLAMACTL_ENABLE_LRU_EVICTION</code> - Enable LRU eviction for idle instances
|
- <code>LLAMACTL_ENABLE_LRU_EVICTION</code> - Enable LRU eviction for idle instances<br />
|
||||||
- <code>LLAMACTL_DEFAULT_AUTO_RESTART</code> - Default auto-restart setting (true/false)<br />
|
- <code>LLAMACTL_DEFAULT_AUTO_RESTART</code> - Default auto-restart setting (true/false)<br />
|
||||||
- <code>LLAMACTL_DEFAULT_MAX_RESTARTS</code> - Default maximum restarts<br />
|
- <code>LLAMACTL_DEFAULT_MAX_RESTARTS</code> - Default maximum restarts<br />
|
||||||
- <code>LLAMACTL_DEFAULT_RESTART_DELAY</code> - Default restart delay in seconds<br />
|
- <code>LLAMACTL_DEFAULT_RESTART_DELAY</code> - Default restart delay in seconds<br />
|
||||||
@@ -1044,10 +1044,10 @@
|
|||||||
<a id="__codelineno-5-4" name="__codelineno-5-4" href="#__codelineno-5-4"></a><span class="w"> </span><span class="nt">require_management_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Require API key for management endpoints (default: true)</span>
|
<a id="__codelineno-5-4" name="__codelineno-5-4" href="#__codelineno-5-4"></a><span class="w"> </span><span class="nt">require_management_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Require API key for management endpoints (default: true)</span>
|
||||||
<a id="__codelineno-5-5" name="__codelineno-5-5" href="#__codelineno-5-5"></a><span class="w"> </span><span class="nt">management_keys</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[]</span><span class="w"> </span><span class="c1"># List of valid management API keys</span>
|
<a id="__codelineno-5-5" name="__codelineno-5-5" href="#__codelineno-5-5"></a><span class="w"> </span><span class="nt">management_keys</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[]</span><span class="w"> </span><span class="c1"># List of valid management API keys</span>
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<p><strong>Environment Variables:</strong>
|
<p><strong>Environment Variables:</strong><br />
|
||||||
- <code>LLAMACTL_REQUIRE_INFERENCE_AUTH</code> - Require auth for OpenAI endpoints (true/false)
|
- <code>LLAMACTL_REQUIRE_INFERENCE_AUTH</code> - Require auth for OpenAI endpoints (true/false)<br />
|
||||||
- <code>LLAMACTL_INFERENCE_KEYS</code> - Comma-separated inference API keys
|
- <code>LLAMACTL_INFERENCE_KEYS</code> - Comma-separated inference API keys<br />
|
||||||
- <code>LLAMACTL_REQUIRE_MANAGEMENT_AUTH</code> - Require auth for management endpoints (true/false)
|
- <code>LLAMACTL_REQUIRE_MANAGEMENT_AUTH</code> - Require auth for management endpoints (true/false)<br />
|
||||||
- <code>LLAMACTL_MANAGEMENT_KEYS</code> - Comma-separated management API keys </p>
|
- <code>LLAMACTL_MANAGEMENT_KEYS</code> - Comma-separated management API keys </p>
|
||||||
<h3 id="remote-node-configuration">Remote Node Configuration<a class="headerlink" href="#remote-node-configuration" title="Permanent link">¶</a></h3>
|
<h3 id="remote-node-configuration">Remote Node Configuration<a class="headerlink" href="#remote-node-configuration" title="Permanent link">¶</a></h3>
|
||||||
<p>llamactl supports remote node deployments. Configure remote nodes to deploy instances on remote hosts and manage them centrally. </p>
|
<p>llamactl supports remote node deployments. Configure remote nodes to deploy instances on remote hosts and manage them centrally. </p>
|
||||||
@@ -1060,12 +1060,12 @@
|
|||||||
<a id="__codelineno-6-7" name="__codelineno-6-7" href="#__codelineno-6-7"></a><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="s">"http://192.168.1.10:8080"</span>
|
<a id="__codelineno-6-7" name="__codelineno-6-7" href="#__codelineno-6-7"></a><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="s">"http://192.168.1.10:8080"</span>
|
||||||
<a id="__codelineno-6-8" name="__codelineno-6-8" href="#__codelineno-6-8"></a><span class="w"> </span><span class="nt">api_key</span><span class="p">:</span><span class="w"> </span><span class="s">"worker1-api-key"</span><span class="w"> </span><span class="c1"># Management API key for authentication</span>
|
<a id="__codelineno-6-8" name="__codelineno-6-8" href="#__codelineno-6-8"></a><span class="w"> </span><span class="nt">api_key</span><span class="p">:</span><span class="w"> </span><span class="s">"worker1-api-key"</span><span class="w"> </span><span class="c1"># Management API key for authentication</span>
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<p><strong>Node Configuration Fields:</strong>
|
<p><strong>Node Configuration Fields:</strong><br />
|
||||||
- <code>local_node</code>: Specifies which node in the <code>nodes</code> map represents the local node
|
- <code>local_node</code>: Specifies which node in the <code>nodes</code> map represents the local node<br />
|
||||||
- <code>nodes</code>: Map of node configurations
|
- <code>nodes</code>: Map of node configurations<br />
|
||||||
- <code>address</code>: HTTP/HTTPS URL of the remote node (empty for local node)
|
- <code>address</code>: HTTP/HTTPS URL of the remote node (empty for local node)<br />
|
||||||
- <code>api_key</code>: Management API key for authenticating with the remote node </p>
|
- <code>api_key</code>: Management API key for authenticating with the remote node </p>
|
||||||
<p><strong>Environment Variables:</strong>
|
<p><strong>Environment Variables:</strong><br />
|
||||||
- <code>LLAMACTL_LOCAL_NODE</code> - Name of the local node </p>
|
- <code>LLAMACTL_LOCAL_NODE</code> - Name of the local node </p>
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -936,9 +936,9 @@
|
|||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<h3 id="option-2-docker">Option 2: Docker<a class="headerlink" href="#option-2-docker" title="Permanent link">¶</a></h3>
|
<h3 id="option-2-docker">Option 2: Docker<a class="headerlink" href="#option-2-docker" title="Permanent link">¶</a></h3>
|
||||||
<p>llamactl provides Dockerfiles for creating Docker images with backends pre-installed. The resulting images include the latest llamactl release with the respective backend. </p>
|
<p>llamactl provides Dockerfiles for creating Docker images with backends pre-installed. The resulting images include the latest llamactl release with the respective backend. </p>
|
||||||
<p><strong>Available Dockerfiles (CUDA):</strong>
|
<p><strong>Available Dockerfiles (CUDA):</strong><br />
|
||||||
- <strong>llamactl with llama.cpp CUDA</strong>: <code>docker/Dockerfile.llamacpp</code> (based on <code>ghcr.io/ggml-org/llama.cpp:server-cuda</code>)
|
- <strong>llamactl with llama.cpp CUDA</strong>: <code>docker/Dockerfile.llamacpp</code> (based on <code>ghcr.io/ggml-org/llama.cpp:server-cuda</code>)<br />
|
||||||
- <strong>llamactl with vLLM CUDA</strong>: <code>docker/Dockerfile.vllm</code> (based on <code>vllm/vllm-openai:latest</code>)
|
- <strong>llamactl with vLLM CUDA</strong>: <code>docker/Dockerfile.vllm</code> (based on <code>vllm/vllm-openai:latest</code>)<br />
|
||||||
- <strong>llamactl built from source</strong>: <code>docker/Dockerfile.source</code> (multi-stage build with webui) </p>
|
- <strong>llamactl built from source</strong>: <code>docker/Dockerfile.source</code> (multi-stage build with webui) </p>
|
||||||
<p><strong>Note:</strong> These Dockerfiles are configured for CUDA. For other platforms (CPU, ROCm, Vulkan, etc.), adapt the base image. For llama.cpp, see available tags at <a href="https://github.com/ggml-org/llama.cpp/blob/master/docs/docker.md">llama.cpp Docker docs</a>. For vLLM, check <a href="https://docs.vllm.ai/en/v0.6.5/serving/deploying_with_docker.html">vLLM docs</a>. </p>
|
<p><strong>Note:</strong> These Dockerfiles are configured for CUDA. For other platforms (CPU, ROCm, Vulkan, etc.), adapt the base image. For llama.cpp, see available tags at <a href="https://github.com/ggml-org/llama.cpp/blob/master/docs/docker.md">llama.cpp Docker docs</a>. For vLLM, check <a href="https://docs.vllm.ai/en/v0.6.5/serving/deploying_with_docker.html">vLLM docs</a>. </p>
|
||||||
<h4 id="using-docker-compose">Using Docker Compose<a class="headerlink" href="#using-docker-compose" title="Permanent link">¶</a></h4>
|
<h4 id="using-docker-compose">Using Docker Compose<a class="headerlink" href="#using-docker-compose" title="Permanent link">¶</a></h4>
|
||||||
@@ -955,11 +955,11 @@
|
|||||||
<a id="__codelineno-4-11" name="__codelineno-4-11" href="#__codelineno-4-11"></a><span class="c1"># Or start llamactl with vLLM backend</span>
|
<a id="__codelineno-4-11" name="__codelineno-4-11" href="#__codelineno-4-11"></a><span class="c1"># Or start llamactl with vLLM backend</span>
|
||||||
<a id="__codelineno-4-12" name="__codelineno-4-12" href="#__codelineno-4-12"></a>docker-compose<span class="w"> </span>-f<span class="w"> </span>docker/docker-compose.yml<span class="w"> </span>up<span class="w"> </span>llamactl-vllm<span class="w"> </span>-d
|
<a id="__codelineno-4-12" name="__codelineno-4-12" href="#__codelineno-4-12"></a>docker-compose<span class="w"> </span>-f<span class="w"> </span>docker/docker-compose.yml<span class="w"> </span>up<span class="w"> </span>llamactl-vllm<span class="w"> </span>-d
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<p>Access the dashboard at:
|
<p>Access the dashboard at:<br />
|
||||||
- llamactl with llama.cpp: http://localhost:8080
|
- llamactl with llama.cpp: http://localhost:8080<br />
|
||||||
- llamactl with vLLM: http://localhost:8081 </p>
|
- llamactl with vLLM: http://localhost:8081 </p>
|
||||||
<h4 id="using-docker-build-and-run">Using Docker Build and Run<a class="headerlink" href="#using-docker-build-and-run" title="Permanent link">¶</a></h4>
|
<h4 id="using-docker-build-and-run">Using Docker Build and Run<a class="headerlink" href="#using-docker-build-and-run" title="Permanent link">¶</a></h4>
|
||||||
<p><strong>llamactl with llama.cpp CUDA:</strong>
|
<p><strong>llamactl with llama.cpp CUDA:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a>docker<span class="w"> </span>build<span class="w"> </span>-f<span class="w"> </span>docker/Dockerfile.llamacpp<span class="w"> </span>-t<span class="w"> </span>llamactl:llamacpp-cuda<span class="w"> </span>.
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a>docker<span class="w"> </span>build<span class="w"> </span>-f<span class="w"> </span>docker/Dockerfile.llamacpp<span class="w"> </span>-t<span class="w"> </span>llamactl:llamacpp-cuda<span class="w"> </span>.
|
||||||
<a id="__codelineno-5-2" name="__codelineno-5-2" href="#__codelineno-5-2"></a>docker<span class="w"> </span>run<span class="w"> </span>-d<span class="w"> </span><span class="se">\</span>
|
<a id="__codelineno-5-2" name="__codelineno-5-2" href="#__codelineno-5-2"></a>docker<span class="w"> </span>run<span class="w"> </span>-d<span class="w"> </span><span class="se">\</span>
|
||||||
<a id="__codelineno-5-3" name="__codelineno-5-3" href="#__codelineno-5-3"></a><span class="w"> </span>--name<span class="w"> </span>llamactl-llamacpp<span class="w"> </span><span class="se">\</span>
|
<a id="__codelineno-5-3" name="__codelineno-5-3" href="#__codelineno-5-3"></a><span class="w"> </span>--name<span class="w"> </span>llamactl-llamacpp<span class="w"> </span><span class="se">\</span>
|
||||||
@@ -968,7 +968,7 @@
|
|||||||
<a id="__codelineno-5-6" name="__codelineno-5-6" href="#__codelineno-5-6"></a><span class="w"> </span>-v<span class="w"> </span>~/.cache/llama.cpp:/root/.cache/llama.cpp<span class="w"> </span><span class="se">\</span>
|
<a id="__codelineno-5-6" name="__codelineno-5-6" href="#__codelineno-5-6"></a><span class="w"> </span>-v<span class="w"> </span>~/.cache/llama.cpp:/root/.cache/llama.cpp<span class="w"> </span><span class="se">\</span>
|
||||||
<a id="__codelineno-5-7" name="__codelineno-5-7" href="#__codelineno-5-7"></a><span class="w"> </span>llamactl:llamacpp-cuda
|
<a id="__codelineno-5-7" name="__codelineno-5-7" href="#__codelineno-5-7"></a><span class="w"> </span>llamactl:llamacpp-cuda
|
||||||
</code></pre></div></p>
|
</code></pre></div></p>
|
||||||
<p><strong>llamactl with vLLM CUDA:</strong>
|
<p><strong>llamactl with vLLM CUDA:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a>docker<span class="w"> </span>build<span class="w"> </span>-f<span class="w"> </span>docker/Dockerfile.vllm<span class="w"> </span>-t<span class="w"> </span>llamactl:vllm-cuda<span class="w"> </span>.
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a>docker<span class="w"> </span>build<span class="w"> </span>-f<span class="w"> </span>docker/Dockerfile.vllm<span class="w"> </span>-t<span class="w"> </span>llamactl:vllm-cuda<span class="w"> </span>.
|
||||||
<a id="__codelineno-6-2" name="__codelineno-6-2" href="#__codelineno-6-2"></a>docker<span class="w"> </span>run<span class="w"> </span>-d<span class="w"> </span><span class="se">\</span>
|
<a id="__codelineno-6-2" name="__codelineno-6-2" href="#__codelineno-6-2"></a>docker<span class="w"> </span>run<span class="w"> </span>-d<span class="w"> </span><span class="se">\</span>
|
||||||
<a id="__codelineno-6-3" name="__codelineno-6-3" href="#__codelineno-6-3"></a><span class="w"> </span>--name<span class="w"> </span>llamactl-vllm<span class="w"> </span><span class="se">\</span>
|
<a id="__codelineno-6-3" name="__codelineno-6-3" href="#__codelineno-6-3"></a><span class="w"> </span>--name<span class="w"> </span>llamactl-vllm<span class="w"> </span><span class="se">\</span>
|
||||||
@@ -977,7 +977,7 @@
|
|||||||
<a id="__codelineno-6-6" name="__codelineno-6-6" href="#__codelineno-6-6"></a><span class="w"> </span>-v<span class="w"> </span>~/.cache/huggingface:/root/.cache/huggingface<span class="w"> </span><span class="se">\</span>
|
<a id="__codelineno-6-6" name="__codelineno-6-6" href="#__codelineno-6-6"></a><span class="w"> </span>-v<span class="w"> </span>~/.cache/huggingface:/root/.cache/huggingface<span class="w"> </span><span class="se">\</span>
|
||||||
<a id="__codelineno-6-7" name="__codelineno-6-7" href="#__codelineno-6-7"></a><span class="w"> </span>llamactl:vllm-cuda
|
<a id="__codelineno-6-7" name="__codelineno-6-7" href="#__codelineno-6-7"></a><span class="w"> </span>llamactl:vllm-cuda
|
||||||
</code></pre></div></p>
|
</code></pre></div></p>
|
||||||
<p><strong>llamactl built from source:</strong>
|
<p><strong>llamactl built from source:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a>docker<span class="w"> </span>build<span class="w"> </span>-f<span class="w"> </span>docker/Dockerfile.source<span class="w"> </span>-t<span class="w"> </span>llamactl:source<span class="w"> </span>.
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a>docker<span class="w"> </span>build<span class="w"> </span>-f<span class="w"> </span>docker/Dockerfile.source<span class="w"> </span>-t<span class="w"> </span>llamactl:source<span class="w"> </span>.
|
||||||
<a id="__codelineno-7-2" name="__codelineno-7-2" href="#__codelineno-7-2"></a>docker<span class="w"> </span>run<span class="w"> </span>-d<span class="w"> </span><span class="se">\</span>
|
<a id="__codelineno-7-2" name="__codelineno-7-2" href="#__codelineno-7-2"></a>docker<span class="w"> </span>run<span class="w"> </span>-d<span class="w"> </span><span class="se">\</span>
|
||||||
<a id="__codelineno-7-3" name="__codelineno-7-3" href="#__codelineno-7-3"></a><span class="w"> </span>--name<span class="w"> </span>llamactl<span class="w"> </span><span class="se">\</span>
|
<a id="__codelineno-7-3" name="__codelineno-7-3" href="#__codelineno-7-3"></a><span class="w"> </span>--name<span class="w"> </span>llamactl<span class="w"> </span><span class="se">\</span>
|
||||||
@@ -985,9 +985,9 @@
|
|||||||
<a id="__codelineno-7-5" name="__codelineno-7-5" href="#__codelineno-7-5"></a><span class="w"> </span>llamactl:source
|
<a id="__codelineno-7-5" name="__codelineno-7-5" href="#__codelineno-7-5"></a><span class="w"> </span>llamactl:source
|
||||||
</code></pre></div></p>
|
</code></pre></div></p>
|
||||||
<h3 id="option-3-build-from-source">Option 3: Build from Source<a class="headerlink" href="#option-3-build-from-source" title="Permanent link">¶</a></h3>
|
<h3 id="option-3-build-from-source">Option 3: Build from Source<a class="headerlink" href="#option-3-build-from-source" title="Permanent link">¶</a></h3>
|
||||||
<p>Requirements:
|
<p>Requirements:<br />
|
||||||
- Go 1.24 or later
|
- Go 1.24 or later<br />
|
||||||
- Node.js 22 or later
|
- Node.js 22 or later<br />
|
||||||
- Git </p>
|
- Git </p>
|
||||||
<p>If you prefer to build from source: </p>
|
<p>If you prefer to build from source: </p>
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-8-1" name="__codelineno-8-1" href="#__codelineno-8-1"></a><span class="c1"># Clone the repository</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-8-1" name="__codelineno-8-1" href="#__codelineno-8-1"></a><span class="c1"># Clone the repository</span>
|
||||||
@@ -1001,8 +1001,8 @@
|
|||||||
<a id="__codelineno-8-9" name="__codelineno-8-9" href="#__codelineno-8-9"></a>go<span class="w"> </span>build<span class="w"> </span>-o<span class="w"> </span>llamactl<span class="w"> </span>./cmd/server
|
<a id="__codelineno-8-9" name="__codelineno-8-9" href="#__codelineno-8-9"></a>go<span class="w"> </span>build<span class="w"> </span>-o<span class="w"> </span>llamactl<span class="w"> </span>./cmd/server
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<h2 id="remote-node-installation">Remote Node Installation<a class="headerlink" href="#remote-node-installation" title="Permanent link">¶</a></h2>
|
<h2 id="remote-node-installation">Remote Node Installation<a class="headerlink" href="#remote-node-installation" title="Permanent link">¶</a></h2>
|
||||||
<p>For deployments with remote nodes:
|
<p>For deployments with remote nodes:<br />
|
||||||
- Install llamactl on each node using any of the methods above
|
- Install llamactl on each node using any of the methods above<br />
|
||||||
- Configure API keys for authentication between nodes </p>
|
- Configure API keys for authentication between nodes </p>
|
||||||
<h2 id="verification">Verification<a class="headerlink" href="#verification" title="Permanent link">¶</a></h2>
|
<h2 id="verification">Verification<a class="headerlink" href="#verification" title="Permanent link">¶</a></h2>
|
||||||
<p>Verify your installation by checking the version: </p>
|
<p>Verify your installation by checking the version: </p>
|
||||||
|
|||||||
@@ -916,7 +916,7 @@
|
|||||||
</ul>
|
</ul>
|
||||||
<h2 id="example-configurations">Example Configurations<a class="headerlink" href="#example-configurations" title="Permanent link">¶</a></h2>
|
<h2 id="example-configurations">Example Configurations<a class="headerlink" href="#example-configurations" title="Permanent link">¶</a></h2>
|
||||||
<p>Here are basic example configurations for each backend: </p>
|
<p>Here are basic example configurations for each backend: </p>
|
||||||
<p><strong>llama.cpp backend:</strong>
|
<p><strong>llama.cpp backend:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a><span class="p">{</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a><span class="p">{</span>
|
||||||
<a id="__codelineno-2-2" name="__codelineno-2-2" href="#__codelineno-2-2"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama2-7b"</span><span class="p">,</span>
|
<a id="__codelineno-2-2" name="__codelineno-2-2" href="#__codelineno-2-2"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama2-7b"</span><span class="p">,</span>
|
||||||
<a id="__codelineno-2-3" name="__codelineno-2-3" href="#__codelineno-2-3"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama_cpp"</span><span class="p">,</span>
|
<a id="__codelineno-2-3" name="__codelineno-2-3" href="#__codelineno-2-3"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama_cpp"</span><span class="p">,</span>
|
||||||
@@ -928,7 +928,7 @@
|
|||||||
<a id="__codelineno-2-9" name="__codelineno-2-9" href="#__codelineno-2-9"></a><span class="w"> </span><span class="p">}</span>
|
<a id="__codelineno-2-9" name="__codelineno-2-9" href="#__codelineno-2-9"></a><span class="w"> </span><span class="p">}</span>
|
||||||
<a id="__codelineno-2-10" name="__codelineno-2-10" href="#__codelineno-2-10"></a><span class="p">}</span>
|
<a id="__codelineno-2-10" name="__codelineno-2-10" href="#__codelineno-2-10"></a><span class="p">}</span>
|
||||||
</code></pre></div></p>
|
</code></pre></div></p>
|
||||||
<p><strong>MLX backend (macOS only):</strong>
|
<p><strong>MLX backend (macOS only):</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a><span class="p">{</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a><span class="p">{</span>
|
||||||
<a id="__codelineno-3-2" name="__codelineno-3-2" href="#__codelineno-3-2"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"mistral-mlx"</span><span class="p">,</span>
|
<a id="__codelineno-3-2" name="__codelineno-3-2" href="#__codelineno-3-2"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"mistral-mlx"</span><span class="p">,</span>
|
||||||
<a id="__codelineno-3-3" name="__codelineno-3-3" href="#__codelineno-3-3"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"mlx_lm"</span><span class="p">,</span>
|
<a id="__codelineno-3-3" name="__codelineno-3-3" href="#__codelineno-3-3"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"mlx_lm"</span><span class="p">,</span>
|
||||||
@@ -939,7 +939,7 @@
|
|||||||
<a id="__codelineno-3-8" name="__codelineno-3-8" href="#__codelineno-3-8"></a><span class="w"> </span><span class="p">}</span>
|
<a id="__codelineno-3-8" name="__codelineno-3-8" href="#__codelineno-3-8"></a><span class="w"> </span><span class="p">}</span>
|
||||||
<a id="__codelineno-3-9" name="__codelineno-3-9" href="#__codelineno-3-9"></a><span class="p">}</span>
|
<a id="__codelineno-3-9" name="__codelineno-3-9" href="#__codelineno-3-9"></a><span class="p">}</span>
|
||||||
</code></pre></div></p>
|
</code></pre></div></p>
|
||||||
<p><strong>vLLM backend:</strong>
|
<p><strong>vLLM backend:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a><span class="p">{</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a><span class="p">{</span>
|
||||||
<a id="__codelineno-4-2" name="__codelineno-4-2" href="#__codelineno-4-2"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"dialogpt-vllm"</span><span class="p">,</span>
|
<a id="__codelineno-4-2" name="__codelineno-4-2" href="#__codelineno-4-2"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"dialogpt-vllm"</span><span class="p">,</span>
|
||||||
<a id="__codelineno-4-3" name="__codelineno-4-3" href="#__codelineno-4-3"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"vllm"</span><span class="p">,</span>
|
<a id="__codelineno-4-3" name="__codelineno-4-3" href="#__codelineno-4-3"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"vllm"</span><span class="p">,</span>
|
||||||
|
|||||||
Binary file not shown.
@@ -1406,15 +1406,15 @@
|
|||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a>curl<span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer <your-api-key>"</span><span class="w"> </span><span class="se">\</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a>curl<span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer <your-api-key>"</span><span class="w"> </span><span class="se">\</span>
|
||||||
<a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a><span class="w"> </span>http://localhost:8080/api/v1/instances
|
<a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a><span class="w"> </span>http://localhost:8080/api/v1/instances
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<p>The server supports two types of API keys:
|
<p>The server supports two types of API keys:<br />
|
||||||
- <strong>Management API Keys</strong>: Required for instance management operations (CRUD operations on instances)
|
- <strong>Management API Keys</strong>: Required for instance management operations (CRUD operations on instances)<br />
|
||||||
- <strong>Inference API Keys</strong>: Required for OpenAI-compatible inference endpoints </p>
|
- <strong>Inference API Keys</strong>: Required for OpenAI-compatible inference endpoints </p>
|
||||||
<h2 id="system-endpoints">System Endpoints<a class="headerlink" href="#system-endpoints" title="Permanent link">¶</a></h2>
|
<h2 id="system-endpoints">System Endpoints<a class="headerlink" href="#system-endpoints" title="Permanent link">¶</a></h2>
|
||||||
<h3 id="get-llamactl-version">Get Llamactl Version<a class="headerlink" href="#get-llamactl-version" title="Permanent link">¶</a></h3>
|
<h3 id="get-llamactl-version">Get Llamactl Version<a class="headerlink" href="#get-llamactl-version" title="Permanent link">¶</a></h3>
|
||||||
<p>Get the version information of the llamactl server. </p>
|
<p>Get the version information of the llamactl server. </p>
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a><span class="err">GET /api/v1/version</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a><span class="err">GET /api/v1/version</span>
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<p><strong>Response:</strong>
|
<p><strong>Response:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a>Version: 1.0.0
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a>Version: 1.0.0
|
||||||
<a id="__codelineno-3-2" name="__codelineno-3-2" href="#__codelineno-3-2"></a>Commit: abc123
|
<a id="__codelineno-3-2" name="__codelineno-3-2" href="#__codelineno-3-2"></a>Commit: abc123
|
||||||
<a id="__codelineno-3-3" name="__codelineno-3-3" href="#__codelineno-3-3"></a>Build Time: 2024-01-15T10:00:00Z
|
<a id="__codelineno-3-3" name="__codelineno-3-3" href="#__codelineno-3-3"></a>Build Time: 2024-01-15T10:00:00Z
|
||||||
@@ -1439,7 +1439,7 @@
|
|||||||
<p>Get a list of all instances. </p>
|
<p>Get a list of all instances. </p>
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a><span class="err">GET /api/v1/instances</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a><span class="err">GET /api/v1/instances</span>
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<p><strong>Response:</strong>
|
<p><strong>Response:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-8-1" name="__codelineno-8-1" href="#__codelineno-8-1"></a><span class="p">[</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-8-1" name="__codelineno-8-1" href="#__codelineno-8-1"></a><span class="p">[</span>
|
||||||
<a id="__codelineno-8-2" name="__codelineno-8-2" href="#__codelineno-8-2"></a><span class="w"> </span><span class="p">{</span>
|
<a id="__codelineno-8-2" name="__codelineno-8-2" href="#__codelineno-8-2"></a><span class="w"> </span><span class="p">{</span>
|
||||||
<a id="__codelineno-8-3" name="__codelineno-8-3" href="#__codelineno-8-3"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama2-7b"</span><span class="p">,</span>
|
<a id="__codelineno-8-3" name="__codelineno-8-3" href="#__codelineno-8-3"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama2-7b"</span><span class="p">,</span>
|
||||||
@@ -1452,7 +1452,7 @@
|
|||||||
<p>Get detailed information about a specific instance. </p>
|
<p>Get detailed information about a specific instance. </p>
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-9-1" name="__codelineno-9-1" href="#__codelineno-9-1"></a><span class="err">GET /api/v1/instances/{name}</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-9-1" name="__codelineno-9-1" href="#__codelineno-9-1"></a><span class="err">GET /api/v1/instances/{name}</span>
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<p><strong>Response:</strong>
|
<p><strong>Response:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-10-1" name="__codelineno-10-1" href="#__codelineno-10-1"></a><span class="p">{</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-10-1" name="__codelineno-10-1" href="#__codelineno-10-1"></a><span class="p">{</span>
|
||||||
<a id="__codelineno-10-2" name="__codelineno-10-2" href="#__codelineno-10-2"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama2-7b"</span><span class="p">,</span>
|
<a id="__codelineno-10-2" name="__codelineno-10-2" href="#__codelineno-10-2"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama2-7b"</span><span class="p">,</span>
|
||||||
<a id="__codelineno-10-3" name="__codelineno-10-3" href="#__codelineno-10-3"></a><span class="w"> </span><span class="nt">"status"</span><span class="p">:</span><span class="w"> </span><span class="s2">"running"</span><span class="p">,</span>
|
<a id="__codelineno-10-3" name="__codelineno-10-3" href="#__codelineno-10-3"></a><span class="w"> </span><span class="nt">"status"</span><span class="p">:</span><span class="w"> </span><span class="s2">"running"</span><span class="p">,</span>
|
||||||
@@ -1476,7 +1476,7 @@
|
|||||||
<li><code>nodes</code>: Array with single node name to deploy the instance to (for remote deployments) </li>
|
<li><code>nodes</code>: Array with single node name to deploy the instance to (for remote deployments) </li>
|
||||||
</ul>
|
</ul>
|
||||||
<p>See <a href="../managing-instances/">Managing Instances</a> for complete configuration options. </p>
|
<p>See <a href="../managing-instances/">Managing Instances</a> for complete configuration options. </p>
|
||||||
<p><strong>Response:</strong>
|
<p><strong>Response:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-12-1" name="__codelineno-12-1" href="#__codelineno-12-1"></a><span class="p">{</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-12-1" name="__codelineno-12-1" href="#__codelineno-12-1"></a><span class="p">{</span>
|
||||||
<a id="__codelineno-12-2" name="__codelineno-12-2" href="#__codelineno-12-2"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama2-7b"</span><span class="p">,</span>
|
<a id="__codelineno-12-2" name="__codelineno-12-2" href="#__codelineno-12-2"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama2-7b"</span><span class="p">,</span>
|
||||||
<a id="__codelineno-12-3" name="__codelineno-12-3" href="#__codelineno-12-3"></a><span class="w"> </span><span class="nt">"status"</span><span class="p">:</span><span class="w"> </span><span class="s2">"running"</span><span class="p">,</span>
|
<a id="__codelineno-12-3" name="__codelineno-12-3" href="#__codelineno-12-3"></a><span class="w"> </span><span class="nt">"status"</span><span class="p">:</span><span class="w"> </span><span class="s2">"running"</span><span class="p">,</span>
|
||||||
@@ -1488,7 +1488,7 @@
|
|||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-13-1" name="__codelineno-13-1" href="#__codelineno-13-1"></a><span class="err">PUT /api/v1/instances/{name}</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-13-1" name="__codelineno-13-1" href="#__codelineno-13-1"></a><span class="err">PUT /api/v1/instances/{name}</span>
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<p><strong>Request Body:</strong> JSON object with configuration fields to update. </p>
|
<p><strong>Request Body:</strong> JSON object with configuration fields to update. </p>
|
||||||
<p><strong>Response:</strong>
|
<p><strong>Response:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-14-1" name="__codelineno-14-1" href="#__codelineno-14-1"></a><span class="p">{</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-14-1" name="__codelineno-14-1" href="#__codelineno-14-1"></a><span class="p">{</span>
|
||||||
<a id="__codelineno-14-2" name="__codelineno-14-2" href="#__codelineno-14-2"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama2-7b"</span><span class="p">,</span>
|
<a id="__codelineno-14-2" name="__codelineno-14-2" href="#__codelineno-14-2"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama2-7b"</span><span class="p">,</span>
|
||||||
<a id="__codelineno-14-3" name="__codelineno-14-3" href="#__codelineno-14-3"></a><span class="w"> </span><span class="nt">"status"</span><span class="p">:</span><span class="w"> </span><span class="s2">"running"</span><span class="p">,</span>
|
<a id="__codelineno-14-3" name="__codelineno-14-3" href="#__codelineno-14-3"></a><span class="w"> </span><span class="nt">"status"</span><span class="p">:</span><span class="w"> </span><span class="s2">"running"</span><span class="p">,</span>
|
||||||
@@ -1505,21 +1505,21 @@
|
|||||||
<p>Start a stopped instance. </p>
|
<p>Start a stopped instance. </p>
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-16-1" name="__codelineno-16-1" href="#__codelineno-16-1"></a><span class="err">POST /api/v1/instances/{name}/start</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-16-1" name="__codelineno-16-1" href="#__codelineno-16-1"></a><span class="err">POST /api/v1/instances/{name}/start</span>
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<p><strong>Response:</strong>
|
<p><strong>Response:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-17-1" name="__codelineno-17-1" href="#__codelineno-17-1"></a><span class="p">{</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-17-1" name="__codelineno-17-1" href="#__codelineno-17-1"></a><span class="p">{</span>
|
||||||
<a id="__codelineno-17-2" name="__codelineno-17-2" href="#__codelineno-17-2"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama2-7b"</span><span class="p">,</span>
|
<a id="__codelineno-17-2" name="__codelineno-17-2" href="#__codelineno-17-2"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama2-7b"</span><span class="p">,</span>
|
||||||
<a id="__codelineno-17-3" name="__codelineno-17-3" href="#__codelineno-17-3"></a><span class="w"> </span><span class="nt">"status"</span><span class="p">:</span><span class="w"> </span><span class="s2">"running"</span><span class="p">,</span>
|
<a id="__codelineno-17-3" name="__codelineno-17-3" href="#__codelineno-17-3"></a><span class="w"> </span><span class="nt">"status"</span><span class="p">:</span><span class="w"> </span><span class="s2">"running"</span><span class="p">,</span>
|
||||||
<a id="__codelineno-17-4" name="__codelineno-17-4" href="#__codelineno-17-4"></a><span class="w"> </span><span class="nt">"created"</span><span class="p">:</span><span class="w"> </span><span class="mi">1705312200</span>
|
<a id="__codelineno-17-4" name="__codelineno-17-4" href="#__codelineno-17-4"></a><span class="w"> </span><span class="nt">"created"</span><span class="p">:</span><span class="w"> </span><span class="mi">1705312200</span>
|
||||||
<a id="__codelineno-17-5" name="__codelineno-17-5" href="#__codelineno-17-5"></a><span class="p">}</span>
|
<a id="__codelineno-17-5" name="__codelineno-17-5" href="#__codelineno-17-5"></a><span class="p">}</span>
|
||||||
</code></pre></div></p>
|
</code></pre></div></p>
|
||||||
<p><strong>Error Responses:</strong>
|
<p><strong>Error Responses:</strong><br />
|
||||||
- <code>409 Conflict</code>: Maximum number of running instances reached
|
- <code>409 Conflict</code>: Maximum number of running instances reached<br />
|
||||||
- <code>500 Internal Server Error</code>: Failed to start instance </p>
|
- <code>500 Internal Server Error</code>: Failed to start instance </p>
|
||||||
<h3 id="stop-instance">Stop Instance<a class="headerlink" href="#stop-instance" title="Permanent link">¶</a></h3>
|
<h3 id="stop-instance">Stop Instance<a class="headerlink" href="#stop-instance" title="Permanent link">¶</a></h3>
|
||||||
<p>Stop a running instance. </p>
|
<p>Stop a running instance. </p>
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-18-1" name="__codelineno-18-1" href="#__codelineno-18-1"></a><span class="err">POST /api/v1/instances/{name}/stop</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-18-1" name="__codelineno-18-1" href="#__codelineno-18-1"></a><span class="err">POST /api/v1/instances/{name}/stop</span>
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<p><strong>Response:</strong>
|
<p><strong>Response:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-19-1" name="__codelineno-19-1" href="#__codelineno-19-1"></a><span class="p">{</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-19-1" name="__codelineno-19-1" href="#__codelineno-19-1"></a><span class="p">{</span>
|
||||||
<a id="__codelineno-19-2" name="__codelineno-19-2" href="#__codelineno-19-2"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama2-7b"</span><span class="p">,</span>
|
<a id="__codelineno-19-2" name="__codelineno-19-2" href="#__codelineno-19-2"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama2-7b"</span><span class="p">,</span>
|
||||||
<a id="__codelineno-19-3" name="__codelineno-19-3" href="#__codelineno-19-3"></a><span class="w"> </span><span class="nt">"status"</span><span class="p">:</span><span class="w"> </span><span class="s2">"stopped"</span><span class="p">,</span>
|
<a id="__codelineno-19-3" name="__codelineno-19-3" href="#__codelineno-19-3"></a><span class="w"> </span><span class="nt">"status"</span><span class="p">:</span><span class="w"> </span><span class="s2">"stopped"</span><span class="p">,</span>
|
||||||
@@ -1530,7 +1530,7 @@
|
|||||||
<p>Restart an instance (stop then start). </p>
|
<p>Restart an instance (stop then start). </p>
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-20-1" name="__codelineno-20-1" href="#__codelineno-20-1"></a><span class="err">POST /api/v1/instances/{name}/restart</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-20-1" name="__codelineno-20-1" href="#__codelineno-20-1"></a><span class="err">POST /api/v1/instances/{name}/restart</span>
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<p><strong>Response:</strong>
|
<p><strong>Response:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-21-1" name="__codelineno-21-1" href="#__codelineno-21-1"></a><span class="p">{</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-21-1" name="__codelineno-21-1" href="#__codelineno-21-1"></a><span class="p">{</span>
|
||||||
<a id="__codelineno-21-2" name="__codelineno-21-2" href="#__codelineno-21-2"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama2-7b"</span><span class="p">,</span>
|
<a id="__codelineno-21-2" name="__codelineno-21-2" href="#__codelineno-21-2"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama2-7b"</span><span class="p">,</span>
|
||||||
<a id="__codelineno-21-3" name="__codelineno-21-3" href="#__codelineno-21-3"></a><span class="w"> </span><span class="nt">"status"</span><span class="p">:</span><span class="w"> </span><span class="s2">"running"</span><span class="p">,</span>
|
<a id="__codelineno-21-3" name="__codelineno-21-3" href="#__codelineno-21-3"></a><span class="w"> </span><span class="nt">"status"</span><span class="p">:</span><span class="w"> </span><span class="s2">"running"</span><span class="p">,</span>
|
||||||
@@ -1541,10 +1541,10 @@
|
|||||||
<p>Retrieve instance logs. </p>
|
<p>Retrieve instance logs. </p>
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-22-1" name="__codelineno-22-1" href="#__codelineno-22-1"></a><span class="err">GET /api/v1/instances/{name}/logs</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-22-1" name="__codelineno-22-1" href="#__codelineno-22-1"></a><span class="err">GET /api/v1/instances/{name}/logs</span>
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<p><strong>Query Parameters:</strong>
|
<p><strong>Query Parameters:</strong><br />
|
||||||
- <code>lines</code>: Number of lines to return (default: all lines, use -1 for all) </p>
|
- <code>lines</code>: Number of lines to return (default: all lines, use -1 for all) </p>
|
||||||
<p><strong>Response:</strong> Plain text log output </p>
|
<p><strong>Response:</strong> Plain text log output </p>
|
||||||
<p><strong>Example:</strong>
|
<p><strong>Example:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-23-1" name="__codelineno-23-1" href="#__codelineno-23-1"></a>curl<span class="w"> </span><span class="s2">"http://localhost:8080/api/v1/instances/my-instance/logs?lines=100"</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-23-1" name="__codelineno-23-1" href="#__codelineno-23-1"></a>curl<span class="w"> </span><span class="s2">"http://localhost:8080/api/v1/instances/my-instance/logs?lines=100"</span>
|
||||||
</code></pre></div></p>
|
</code></pre></div></p>
|
||||||
<h3 id="proxy-to-instance">Proxy to Instance<a class="headerlink" href="#proxy-to-instance" title="Permanent link">¶</a></h3>
|
<h3 id="proxy-to-instance">Proxy to Instance<a class="headerlink" href="#proxy-to-instance" title="Permanent link">¶</a></h3>
|
||||||
@@ -1553,12 +1553,12 @@
|
|||||||
<a id="__codelineno-24-2" name="__codelineno-24-2" href="#__codelineno-24-2"></a><span class="err">POST /api/v1/instances/{name}/proxy/*</span>
|
<a id="__codelineno-24-2" name="__codelineno-24-2" href="#__codelineno-24-2"></a><span class="err">POST /api/v1/instances/{name}/proxy/*</span>
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<p>This endpoint forwards all requests to the underlying llama-server instance running on its configured port. The proxy strips the <code>/api/v1/instances/{name}/proxy</code> prefix and forwards the remaining path to the instance. </p>
|
<p>This endpoint forwards all requests to the underlying llama-server instance running on its configured port. The proxy strips the <code>/api/v1/instances/{name}/proxy</code> prefix and forwards the remaining path to the instance. </p>
|
||||||
<p><strong>Example - Check Instance Health:</strong>
|
<p><strong>Example - Check Instance Health:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-25-1" name="__codelineno-25-1" href="#__codelineno-25-1"></a>curl<span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer your-api-key"</span><span class="w"> </span><span class="se">\</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-25-1" name="__codelineno-25-1" href="#__codelineno-25-1"></a>curl<span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer your-api-key"</span><span class="w"> </span><span class="se">\</span>
|
||||||
<a id="__codelineno-25-2" name="__codelineno-25-2" href="#__codelineno-25-2"></a><span class="w"> </span>http://localhost:8080/api/v1/instances/my-model/proxy/health
|
<a id="__codelineno-25-2" name="__codelineno-25-2" href="#__codelineno-25-2"></a><span class="w"> </span>http://localhost:8080/api/v1/instances/my-model/proxy/health
|
||||||
</code></pre></div></p>
|
</code></pre></div></p>
|
||||||
<p>This forwards the request to <code>http://instance-host:instance-port/health</code> on the actual llama-server instance. </p>
|
<p>This forwards the request to <code>http://instance-host:instance-port/health</code> on the actual llama-server instance. </p>
|
||||||
<p><strong>Error Responses:</strong>
|
<p><strong>Error Responses:</strong><br />
|
||||||
- <code>503 Service Unavailable</code>: Instance is not running </p>
|
- <code>503 Service Unavailable</code>: Instance is not running </p>
|
||||||
<h2 id="openai-compatible-api">OpenAI-Compatible API<a class="headerlink" href="#openai-compatible-api" title="Permanent link">¶</a></h2>
|
<h2 id="openai-compatible-api">OpenAI-Compatible API<a class="headerlink" href="#openai-compatible-api" title="Permanent link">¶</a></h2>
|
||||||
<p>Llamactl provides OpenAI-compatible endpoints for inference operations. </p>
|
<p>Llamactl provides OpenAI-compatible endpoints for inference operations. </p>
|
||||||
@@ -1566,7 +1566,7 @@
|
|||||||
<p>List all instances in OpenAI-compatible format. </p>
|
<p>List all instances in OpenAI-compatible format. </p>
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-26-1" name="__codelineno-26-1" href="#__codelineno-26-1"></a><span class="err">GET /v1/models</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-26-1" name="__codelineno-26-1" href="#__codelineno-26-1"></a><span class="err">GET /v1/models</span>
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<p><strong>Response:</strong>
|
<p><strong>Response:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-27-1" name="__codelineno-27-1" href="#__codelineno-27-1"></a><span class="p">{</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-27-1" name="__codelineno-27-1" href="#__codelineno-27-1"></a><span class="p">{</span>
|
||||||
<a id="__codelineno-27-2" name="__codelineno-27-2" href="#__codelineno-27-2"></a><span class="w"> </span><span class="nt">"object"</span><span class="p">:</span><span class="w"> </span><span class="s2">"list"</span><span class="p">,</span>
|
<a id="__codelineno-27-2" name="__codelineno-27-2" href="#__codelineno-27-2"></a><span class="w"> </span><span class="nt">"object"</span><span class="p">:</span><span class="w"> </span><span class="s2">"list"</span><span class="p">,</span>
|
||||||
<a id="__codelineno-27-3" name="__codelineno-27-3" href="#__codelineno-27-3"></a><span class="w"> </span><span class="nt">"data"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
|
<a id="__codelineno-27-3" name="__codelineno-27-3" href="#__codelineno-27-3"></a><span class="w"> </span><span class="nt">"data"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
|
||||||
@@ -1588,7 +1588,7 @@
|
|||||||
<a id="__codelineno-28-5" name="__codelineno-28-5" href="#__codelineno-28-5"></a><span class="err">POST /v1/reranking</span>
|
<a id="__codelineno-28-5" name="__codelineno-28-5" href="#__codelineno-28-5"></a><span class="err">POST /v1/reranking</span>
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<p><strong>Request Body:</strong> Standard OpenAI format with <code>model</code> field specifying the instance name </p>
|
<p><strong>Request Body:</strong> Standard OpenAI format with <code>model</code> field specifying the instance name </p>
|
||||||
<p><strong>Example:</strong>
|
<p><strong>Example:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-29-1" name="__codelineno-29-1" href="#__codelineno-29-1"></a><span class="p">{</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-29-1" name="__codelineno-29-1" href="#__codelineno-29-1"></a><span class="p">{</span>
|
||||||
<a id="__codelineno-29-2" name="__codelineno-29-2" href="#__codelineno-29-2"></a><span class="w"> </span><span class="nt">"model"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama2-7b"</span><span class="p">,</span>
|
<a id="__codelineno-29-2" name="__codelineno-29-2" href="#__codelineno-29-2"></a><span class="w"> </span><span class="nt">"model"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama2-7b"</span><span class="p">,</span>
|
||||||
<a id="__codelineno-29-3" name="__codelineno-29-3" href="#__codelineno-29-3"></a><span class="w"> </span><span class="nt">"messages"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
|
<a id="__codelineno-29-3" name="__codelineno-29-3" href="#__codelineno-29-3"></a><span class="w"> </span><span class="nt">"messages"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span>
|
||||||
@@ -1600,14 +1600,14 @@
|
|||||||
<a id="__codelineno-29-9" name="__codelineno-29-9" href="#__codelineno-29-9"></a><span class="p">}</span>
|
<a id="__codelineno-29-9" name="__codelineno-29-9" href="#__codelineno-29-9"></a><span class="p">}</span>
|
||||||
</code></pre></div></p>
|
</code></pre></div></p>
|
||||||
<p>The server routes requests to the appropriate instance based on the <code>model</code> field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see <a href="../managing-instances/">Managing Instances</a>. </p>
|
<p>The server routes requests to the appropriate instance based on the <code>model</code> field in the request body. Instances with on-demand starting enabled will be automatically started if not running. For configuration details, see <a href="../managing-instances/">Managing Instances</a>. </p>
|
||||||
<p><strong>Error Responses:</strong>
|
<p><strong>Error Responses:</strong><br />
|
||||||
- <code>400 Bad Request</code>: Invalid request body or missing instance name
|
- <code>400 Bad Request</code>: Invalid request body or missing instance name<br />
|
||||||
- <code>503 Service Unavailable</code>: Instance is not running and on-demand start is disabled
|
- <code>503 Service Unavailable</code>: Instance is not running and on-demand start is disabled<br />
|
||||||
- <code>409 Conflict</code>: Cannot start instance due to maximum instances limit </p>
|
- <code>409 Conflict</code>: Cannot start instance due to maximum instances limit </p>
|
||||||
<h2 id="instance-status-values">Instance Status Values<a class="headerlink" href="#instance-status-values" title="Permanent link">¶</a></h2>
|
<h2 id="instance-status-values">Instance Status Values<a class="headerlink" href="#instance-status-values" title="Permanent link">¶</a></h2>
|
||||||
<p>Instances can have the following status values:
|
<p>Instances can have the following status values:<br />
|
||||||
- <code>stopped</code>: Instance is not running
|
- <code>stopped</code>: Instance is not running<br />
|
||||||
- <code>running</code>: Instance is running and ready to accept requests
|
- <code>running</code>: Instance is running and ready to accept requests<br />
|
||||||
- <code>failed</code>: Instance failed to start or crashed </p>
|
- <code>failed</code>: Instance failed to start or crashed </p>
|
||||||
<h2 id="error-responses">Error Responses<a class="headerlink" href="#error-responses" title="Permanent link">¶</a></h2>
|
<h2 id="error-responses">Error Responses<a class="headerlink" href="#error-responses" title="Permanent link">¶</a></h2>
|
||||||
<p>All endpoints may return error responses in the following format: </p>
|
<p>All endpoints may return error responses in the following format: </p>
|
||||||
@@ -1721,12 +1721,12 @@
|
|||||||
<p>Parse a llama-server command string into instance options. </p>
|
<p>Parse a llama-server command string into instance options. </p>
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-34-1" name="__codelineno-34-1" href="#__codelineno-34-1"></a><span class="err">POST /api/v1/backends/llama-cpp/parse-command</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-34-1" name="__codelineno-34-1" href="#__codelineno-34-1"></a><span class="err">POST /api/v1/backends/llama-cpp/parse-command</span>
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<p><strong>Request Body:</strong>
|
<p><strong>Request Body:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-35-1" name="__codelineno-35-1" href="#__codelineno-35-1"></a><span class="p">{</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-35-1" name="__codelineno-35-1" href="#__codelineno-35-1"></a><span class="p">{</span>
|
||||||
<a id="__codelineno-35-2" name="__codelineno-35-2" href="#__codelineno-35-2"></a><span class="w"> </span><span class="nt">"command"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama-server -m /path/to/model.gguf -c 2048 --port 8080"</span>
|
<a id="__codelineno-35-2" name="__codelineno-35-2" href="#__codelineno-35-2"></a><span class="w"> </span><span class="nt">"command"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama-server -m /path/to/model.gguf -c 2048 --port 8080"</span>
|
||||||
<a id="__codelineno-35-3" name="__codelineno-35-3" href="#__codelineno-35-3"></a><span class="p">}</span>
|
<a id="__codelineno-35-3" name="__codelineno-35-3" href="#__codelineno-35-3"></a><span class="p">}</span>
|
||||||
</code></pre></div></p>
|
</code></pre></div></p>
|
||||||
<p><strong>Response:</strong>
|
<p><strong>Response:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-36-1" name="__codelineno-36-1" href="#__codelineno-36-1"></a><span class="p">{</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-36-1" name="__codelineno-36-1" href="#__codelineno-36-1"></a><span class="p">{</span>
|
||||||
<a id="__codelineno-36-2" name="__codelineno-36-2" href="#__codelineno-36-2"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama_cpp"</span><span class="p">,</span>
|
<a id="__codelineno-36-2" name="__codelineno-36-2" href="#__codelineno-36-2"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama_cpp"</span><span class="p">,</span>
|
||||||
<a id="__codelineno-36-3" name="__codelineno-36-3" href="#__codelineno-36-3"></a><span class="w"> </span><span class="nt">"llama_server_options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
<a id="__codelineno-36-3" name="__codelineno-36-3" href="#__codelineno-36-3"></a><span class="w"> </span><span class="nt">"llama_server_options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||||
@@ -1740,12 +1740,12 @@
|
|||||||
<p>Parse an MLX-LM server command string into instance options. </p>
|
<p>Parse an MLX-LM server command string into instance options. </p>
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-37-1" name="__codelineno-37-1" href="#__codelineno-37-1"></a><span class="err">POST /api/v1/backends/mlx/parse-command</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-37-1" name="__codelineno-37-1" href="#__codelineno-37-1"></a><span class="err">POST /api/v1/backends/mlx/parse-command</span>
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<p><strong>Request Body:</strong>
|
<p><strong>Request Body:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-38-1" name="__codelineno-38-1" href="#__codelineno-38-1"></a><span class="p">{</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-38-1" name="__codelineno-38-1" href="#__codelineno-38-1"></a><span class="p">{</span>
|
||||||
<a id="__codelineno-38-2" name="__codelineno-38-2" href="#__codelineno-38-2"></a><span class="w"> </span><span class="nt">"command"</span><span class="p">:</span><span class="w"> </span><span class="s2">"mlx_lm.server --model /path/to/model --port 8080"</span>
|
<a id="__codelineno-38-2" name="__codelineno-38-2" href="#__codelineno-38-2"></a><span class="w"> </span><span class="nt">"command"</span><span class="p">:</span><span class="w"> </span><span class="s2">"mlx_lm.server --model /path/to/model --port 8080"</span>
|
||||||
<a id="__codelineno-38-3" name="__codelineno-38-3" href="#__codelineno-38-3"></a><span class="p">}</span>
|
<a id="__codelineno-38-3" name="__codelineno-38-3" href="#__codelineno-38-3"></a><span class="p">}</span>
|
||||||
</code></pre></div></p>
|
</code></pre></div></p>
|
||||||
<p><strong>Response:</strong>
|
<p><strong>Response:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-39-1" name="__codelineno-39-1" href="#__codelineno-39-1"></a><span class="p">{</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-39-1" name="__codelineno-39-1" href="#__codelineno-39-1"></a><span class="p">{</span>
|
||||||
<a id="__codelineno-39-2" name="__codelineno-39-2" href="#__codelineno-39-2"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"mlx_lm"</span><span class="p">,</span>
|
<a id="__codelineno-39-2" name="__codelineno-39-2" href="#__codelineno-39-2"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"mlx_lm"</span><span class="p">,</span>
|
||||||
<a id="__codelineno-39-3" name="__codelineno-39-3" href="#__codelineno-39-3"></a><span class="w"> </span><span class="nt">"mlx_server_options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
<a id="__codelineno-39-3" name="__codelineno-39-3" href="#__codelineno-39-3"></a><span class="w"> </span><span class="nt">"mlx_server_options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||||
@@ -1758,12 +1758,12 @@
|
|||||||
<p>Parse a vLLM serve command string into instance options. </p>
|
<p>Parse a vLLM serve command string into instance options. </p>
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-40-1" name="__codelineno-40-1" href="#__codelineno-40-1"></a><span class="err">POST /api/v1/backends/vllm/parse-command</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-40-1" name="__codelineno-40-1" href="#__codelineno-40-1"></a><span class="err">POST /api/v1/backends/vllm/parse-command</span>
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<p><strong>Request Body:</strong>
|
<p><strong>Request Body:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-41-1" name="__codelineno-41-1" href="#__codelineno-41-1"></a><span class="p">{</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-41-1" name="__codelineno-41-1" href="#__codelineno-41-1"></a><span class="p">{</span>
|
||||||
<a id="__codelineno-41-2" name="__codelineno-41-2" href="#__codelineno-41-2"></a><span class="w"> </span><span class="nt">"command"</span><span class="p">:</span><span class="w"> </span><span class="s2">"vllm serve /path/to/model --port 8080"</span>
|
<a id="__codelineno-41-2" name="__codelineno-41-2" href="#__codelineno-41-2"></a><span class="w"> </span><span class="nt">"command"</span><span class="p">:</span><span class="w"> </span><span class="s2">"vllm serve /path/to/model --port 8080"</span>
|
||||||
<a id="__codelineno-41-3" name="__codelineno-41-3" href="#__codelineno-41-3"></a><span class="p">}</span>
|
<a id="__codelineno-41-3" name="__codelineno-41-3" href="#__codelineno-41-3"></a><span class="p">}</span>
|
||||||
</code></pre></div></p>
|
</code></pre></div></p>
|
||||||
<p><strong>Response:</strong>
|
<p><strong>Response:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-42-1" name="__codelineno-42-1" href="#__codelineno-42-1"></a><span class="p">{</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-42-1" name="__codelineno-42-1" href="#__codelineno-42-1"></a><span class="p">{</span>
|
||||||
<a id="__codelineno-42-2" name="__codelineno-42-2" href="#__codelineno-42-2"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"vllm"</span><span class="p">,</span>
|
<a id="__codelineno-42-2" name="__codelineno-42-2" href="#__codelineno-42-2"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"vllm"</span><span class="p">,</span>
|
||||||
<a id="__codelineno-42-3" name="__codelineno-42-3" href="#__codelineno-42-3"></a><span class="w"> </span><span class="nt">"vllm_server_options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
<a id="__codelineno-42-3" name="__codelineno-42-3" href="#__codelineno-42-3"></a><span class="w"> </span><span class="nt">"vllm_server_options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||||
@@ -1772,8 +1772,8 @@
|
|||||||
<a id="__codelineno-42-6" name="__codelineno-42-6" href="#__codelineno-42-6"></a><span class="w"> </span><span class="p">}</span>
|
<a id="__codelineno-42-6" name="__codelineno-42-6" href="#__codelineno-42-6"></a><span class="w"> </span><span class="p">}</span>
|
||||||
<a id="__codelineno-42-7" name="__codelineno-42-7" href="#__codelineno-42-7"></a><span class="p">}</span>
|
<a id="__codelineno-42-7" name="__codelineno-42-7" href="#__codelineno-42-7"></a><span class="p">}</span>
|
||||||
</code></pre></div></p>
|
</code></pre></div></p>
|
||||||
<p><strong>Error Responses for Parse Commands:</strong>
|
<p><strong>Error Responses for Parse Commands:</strong><br />
|
||||||
- <code>400 Bad Request</code>: Invalid request body, empty command, or parse error
|
- <code>400 Bad Request</code>: Invalid request body, empty command, or parse error<br />
|
||||||
- <code>500 Internal Server Error</code>: Encoding error </p>
|
- <code>500 Internal Server Error</code>: Encoding error </p>
|
||||||
<h2 id="auto-generated-documentation">Auto-Generated Documentation<a class="headerlink" href="#auto-generated-documentation" title="Permanent link">¶</a></h2>
|
<h2 id="auto-generated-documentation">Auto-Generated Documentation<a class="headerlink" href="#auto-generated-documentation" title="Permanent link">¶</a></h2>
|
||||||
<p>The API documentation is automatically generated from code annotations using Swagger/OpenAPI. To regenerate the documentation: </p>
|
<p>The API documentation is automatically generated from code annotations using Swagger/OpenAPI. To regenerate the documentation: </p>
|
||||||
|
|||||||
@@ -1237,9 +1237,9 @@
|
|||||||
</ul>
|
</ul>
|
||||||
<p><img alt="Dashboard Screenshot" src="../../images/dashboard.png" /> </p>
|
<p><img alt="Dashboard Screenshot" src="../../images/dashboard.png" /> </p>
|
||||||
<h3 id="authentication">Authentication<a class="headerlink" href="#authentication" title="Permanent link">¶</a></h3>
|
<h3 id="authentication">Authentication<a class="headerlink" href="#authentication" title="Permanent link">¶</a></h3>
|
||||||
<p>If authentication is enabled:
|
<p>If authentication is enabled:<br />
|
||||||
1. Navigate to the web UI
|
1. Navigate to the web UI<br />
|
||||||
2. Enter your credentials
|
2. Enter your credentials<br />
|
||||||
3. Bearer token is stored for the session </p>
|
3. Bearer token is stored for the session </p>
|
||||||
<h3 id="theme-support">Theme Support<a class="headerlink" href="#theme-support" title="Permanent link">¶</a></h3>
|
<h3 id="theme-support">Theme Support<a class="headerlink" href="#theme-support" title="Permanent link">¶</a></h3>
|
||||||
<ul>
|
<ul>
|
||||||
@@ -1430,9 +1430,9 @@
|
|||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a><span class="c1"># Get instance details</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a><span class="c1"># Get instance details</span>
|
||||||
<a id="__codelineno-6-2" name="__codelineno-6-2" href="#__codelineno-6-2"></a>curl<span class="w"> </span>http://localhost:8080/api/instances/<span class="o">{</span>name<span class="o">}</span>/proxy/
|
<a id="__codelineno-6-2" name="__codelineno-6-2" href="#__codelineno-6-2"></a>curl<span class="w"> </span>http://localhost:8080/api/instances/<span class="o">{</span>name<span class="o">}</span>/proxy/
|
||||||
</code></pre></div>
|
</code></pre></div>
|
||||||
<p>All backends provide OpenAI-compatible endpoints. Check the respective documentation:
|
<p>All backends provide OpenAI-compatible endpoints. Check the respective documentation:<br />
|
||||||
- <a href="https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md">llama-server docs</a>
|
- <a href="https://github.com/ggml-org/llama.cpp/blob/master/tools/server/README.md">llama-server docs</a><br />
|
||||||
- <a href="https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md">MLX-LM docs</a>
|
- <a href="https://github.com/ml-explore/mlx-lm/blob/main/mlx_lm/SERVER.md">MLX-LM docs</a><br />
|
||||||
- <a href="https://docs.vllm.ai/en/latest/">vLLM docs</a> </p>
|
- <a href="https://docs.vllm.ai/en/latest/">vLLM docs</a> </p>
|
||||||
<h3 id="instance-health">Instance Health<a class="headerlink" href="#instance-health" title="Permanent link">¶</a></h3>
|
<h3 id="instance-health">Instance Health<a class="headerlink" href="#instance-health" title="Permanent link">¶</a></h3>
|
||||||
<h4 id="via-web-ui_6">Via Web UI<a class="headerlink" href="#via-web-ui_6" title="Permanent link">¶</a></h4>
|
<h4 id="via-web-ui_6">Via Web UI<a class="headerlink" href="#via-web-ui_6" title="Permanent link">¶</a></h4>
|
||||||
|
|||||||
@@ -1002,8 +1002,8 @@
|
|||||||
<h2 id="configuration-issues">Configuration Issues<a class="headerlink" href="#configuration-issues" title="Permanent link">¶</a></h2>
|
<h2 id="configuration-issues">Configuration Issues<a class="headerlink" href="#configuration-issues" title="Permanent link">¶</a></h2>
|
||||||
<h3 id="invalid-configuration">Invalid Configuration<a class="headerlink" href="#invalid-configuration" title="Permanent link">¶</a></h3>
|
<h3 id="invalid-configuration">Invalid Configuration<a class="headerlink" href="#invalid-configuration" title="Permanent link">¶</a></h3>
|
||||||
<p><strong>Problem:</strong> Invalid configuration preventing startup </p>
|
<p><strong>Problem:</strong> Invalid configuration preventing startup </p>
|
||||||
<p><strong>Solutions:</strong>
|
<p><strong>Solutions:</strong><br />
|
||||||
1. Use minimal configuration:
|
1. Use minimal configuration:<br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nt">server</span><span class="p">:</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nt">server</span><span class="p">:</span>
|
||||||
<a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a><span class="w"> </span><span class="nt">host</span><span class="p">:</span><span class="w"> </span><span class="s">"0.0.0.0"</span>
|
<a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a><span class="w"> </span><span class="nt">host</span><span class="p">:</span><span class="w"> </span><span class="s">"0.0.0.0"</span>
|
||||||
<a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">8080</span>
|
<a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="w"> </span><span class="nt">port</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">8080</span>
|
||||||
@@ -1011,7 +1011,7 @@
|
|||||||
<a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="w"> </span><span class="nt">port_range</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">8000</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">9000</span><span class="p p-Indicator">]</span>
|
<a id="__codelineno-0-5" name="__codelineno-0-5" href="#__codelineno-0-5"></a><span class="w"> </span><span class="nt">port_range</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[</span><span class="nv">8000</span><span class="p p-Indicator">,</span><span class="w"> </span><span class="nv">9000</span><span class="p p-Indicator">]</span>
|
||||||
</code></pre></div></p>
|
</code></pre></div></p>
|
||||||
<ol>
|
<ol>
|
||||||
<li>Check data directory permissions:
|
<li>Check data directory permissions:<br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a><span class="c1"># Ensure data directory is writable (default: ~/.local/share/llamactl)</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a><span class="c1"># Ensure data directory is writable (default: ~/.local/share/llamactl)</span>
|
||||||
<a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a>mkdir<span class="w"> </span>-p<span class="w"> </span>~/.local/share/llamactl/<span class="o">{</span>instances,logs<span class="o">}</span>
|
<a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a>mkdir<span class="w"> </span>-p<span class="w"> </span>~/.local/share/llamactl/<span class="o">{</span>instances,logs<span class="o">}</span>
|
||||||
</code></pre></div></li>
|
</code></pre></div></li>
|
||||||
@@ -1026,8 +1026,8 @@
|
|||||||
- <strong>Path issues:</strong> Use absolute paths to model files </p>
|
- <strong>Path issues:</strong> Use absolute paths to model files </p>
|
||||||
<h3 id="memory-issues">Memory Issues<a class="headerlink" href="#memory-issues" title="Permanent link">¶</a></h3>
|
<h3 id="memory-issues">Memory Issues<a class="headerlink" href="#memory-issues" title="Permanent link">¶</a></h3>
|
||||||
<p><strong>Problem:</strong> Out of memory errors or system becomes unresponsive </p>
|
<p><strong>Problem:</strong> Out of memory errors or system becomes unresponsive </p>
|
||||||
<p><strong>Solutions:</strong>
|
<p><strong>Solutions:</strong><br />
|
||||||
1. <strong>Reduce context size:</strong>
|
1. <strong>Reduce context size:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a><span class="p">{</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a><span class="p">{</span>
|
||||||
<a id="__codelineno-2-2" name="__codelineno-2-2" href="#__codelineno-2-2"></a><span class="w"> </span><span class="nt">"n_ctx"</span><span class="p">:</span><span class="w"> </span><span class="mi">1024</span>
|
<a id="__codelineno-2-2" name="__codelineno-2-2" href="#__codelineno-2-2"></a><span class="w"> </span><span class="nt">"n_ctx"</span><span class="p">:</span><span class="w"> </span><span class="mi">1024</span>
|
||||||
<a id="__codelineno-2-3" name="__codelineno-2-3" href="#__codelineno-2-3"></a><span class="p">}</span>
|
<a id="__codelineno-2-3" name="__codelineno-2-3" href="#__codelineno-2-3"></a><span class="p">}</span>
|
||||||
@@ -1039,8 +1039,8 @@
|
|||||||
</ol>
|
</ol>
|
||||||
<h3 id="gpu-configuration">GPU Configuration<a class="headerlink" href="#gpu-configuration" title="Permanent link">¶</a></h3>
|
<h3 id="gpu-configuration">GPU Configuration<a class="headerlink" href="#gpu-configuration" title="Permanent link">¶</a></h3>
|
||||||
<p><strong>Problem:</strong> GPU not being used effectively </p>
|
<p><strong>Problem:</strong> GPU not being used effectively </p>
|
||||||
<p><strong>Solutions:</strong>
|
<p><strong>Solutions:</strong><br />
|
||||||
1. <strong>Configure GPU layers:</strong>
|
1. <strong>Configure GPU layers:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a><span class="p">{</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a><span class="p">{</span>
|
||||||
<a id="__codelineno-3-2" name="__codelineno-3-2" href="#__codelineno-3-2"></a><span class="w"> </span><span class="nt">"n_gpu_layers"</span><span class="p">:</span><span class="w"> </span><span class="mi">35</span>
|
<a id="__codelineno-3-2" name="__codelineno-3-2" href="#__codelineno-3-2"></a><span class="w"> </span><span class="nt">"n_gpu_layers"</span><span class="p">:</span><span class="w"> </span><span class="mi">35</span>
|
||||||
<a id="__codelineno-3-3" name="__codelineno-3-3" href="#__codelineno-3-3"></a><span class="p">}</span>
|
<a id="__codelineno-3-3" name="__codelineno-3-3" href="#__codelineno-3-3"></a><span class="p">}</span>
|
||||||
@@ -1060,8 +1060,8 @@
|
|||||||
<h2 id="api-and-network-issues">API and Network Issues<a class="headerlink" href="#api-and-network-issues" title="Permanent link">¶</a></h2>
|
<h2 id="api-and-network-issues">API and Network Issues<a class="headerlink" href="#api-and-network-issues" title="Permanent link">¶</a></h2>
|
||||||
<h3 id="cors-errors">CORS Errors<a class="headerlink" href="#cors-errors" title="Permanent link">¶</a></h3>
|
<h3 id="cors-errors">CORS Errors<a class="headerlink" href="#cors-errors" title="Permanent link">¶</a></h3>
|
||||||
<p><strong>Problem:</strong> Web UI shows CORS errors in browser console </p>
|
<p><strong>Problem:</strong> Web UI shows CORS errors in browser console </p>
|
||||||
<p><strong>Solutions:</strong>
|
<p><strong>Solutions:</strong><br />
|
||||||
1. <strong>Configure allowed origins:</strong>
|
1. <strong>Configure allowed origins:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a><span class="nt">server</span><span class="p">:</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a><span class="nt">server</span><span class="p">:</span>
|
||||||
<a id="__codelineno-5-2" name="__codelineno-5-2" href="#__codelineno-5-2"></a><span class="w"> </span><span class="nt">allowed_origins</span><span class="p">:</span>
|
<a id="__codelineno-5-2" name="__codelineno-5-2" href="#__codelineno-5-2"></a><span class="w"> </span><span class="nt">allowed_origins</span><span class="p">:</span>
|
||||||
<a id="__codelineno-5-3" name="__codelineno-5-3" href="#__codelineno-5-3"></a><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="s">"http://localhost:3000"</span>
|
<a id="__codelineno-5-3" name="__codelineno-5-3" href="#__codelineno-5-3"></a><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="s">"http://localhost:3000"</span>
|
||||||
@@ -1069,15 +1069,15 @@
|
|||||||
</code></pre></div></p>
|
</code></pre></div></p>
|
||||||
<h2 id="authentication-issues">Authentication Issues<a class="headerlink" href="#authentication-issues" title="Permanent link">¶</a></h2>
|
<h2 id="authentication-issues">Authentication Issues<a class="headerlink" href="#authentication-issues" title="Permanent link">¶</a></h2>
|
||||||
<p><strong>Problem:</strong> API requests failing with authentication errors </p>
|
<p><strong>Problem:</strong> API requests failing with authentication errors </p>
|
||||||
<p><strong>Solutions:</strong>
|
<p><strong>Solutions:</strong><br />
|
||||||
1. <strong>Disable authentication temporarily:</strong>
|
1. <strong>Disable authentication temporarily:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a><span class="nt">auth</span><span class="p">:</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a><span class="nt">auth</span><span class="p">:</span>
|
||||||
<a id="__codelineno-6-2" name="__codelineno-6-2" href="#__codelineno-6-2"></a><span class="w"> </span><span class="nt">require_management_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
<a id="__codelineno-6-2" name="__codelineno-6-2" href="#__codelineno-6-2"></a><span class="w"> </span><span class="nt">require_management_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||||
<a id="__codelineno-6-3" name="__codelineno-6-3" href="#__codelineno-6-3"></a><span class="w"> </span><span class="nt">require_inference_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
<a id="__codelineno-6-3" name="__codelineno-6-3" href="#__codelineno-6-3"></a><span class="w"> </span><span class="nt">require_inference_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
|
||||||
</code></pre></div></p>
|
</code></pre></div></p>
|
||||||
<ol>
|
<ol>
|
||||||
<li>
|
<li>
|
||||||
<p><strong>Configure API keys:</strong>
|
<p><strong>Configure API keys:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a><span class="nt">auth</span><span class="p">:</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a><span class="nt">auth</span><span class="p">:</span>
|
||||||
<a id="__codelineno-7-2" name="__codelineno-7-2" href="#__codelineno-7-2"></a><span class="w"> </span><span class="nt">management_keys</span><span class="p">:</span>
|
<a id="__codelineno-7-2" name="__codelineno-7-2" href="#__codelineno-7-2"></a><span class="w"> </span><span class="nt">management_keys</span><span class="p">:</span>
|
||||||
<a id="__codelineno-7-3" name="__codelineno-7-3" href="#__codelineno-7-3"></a><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="s">"your-management-key"</span>
|
<a id="__codelineno-7-3" name="__codelineno-7-3" href="#__codelineno-7-3"></a><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="s">"your-management-key"</span>
|
||||||
@@ -1086,7 +1086,7 @@
|
|||||||
</code></pre></div></p>
|
</code></pre></div></p>
|
||||||
</li>
|
</li>
|
||||||
<li>
|
<li>
|
||||||
<p><strong>Use correct Authorization header:</strong>
|
<p><strong>Use correct Authorization header:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-8-1" name="__codelineno-8-1" href="#__codelineno-8-1"></a>curl<span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer your-api-key"</span><span class="w"> </span><span class="se">\</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-8-1" name="__codelineno-8-1" href="#__codelineno-8-1"></a>curl<span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer your-api-key"</span><span class="w"> </span><span class="se">\</span>
|
||||||
<a id="__codelineno-8-2" name="__codelineno-8-2" href="#__codelineno-8-2"></a><span class="w"> </span>http://localhost:8080/api/v1/instances
|
<a id="__codelineno-8-2" name="__codelineno-8-2" href="#__codelineno-8-2"></a><span class="w"> </span>http://localhost:8080/api/v1/instances
|
||||||
</code></pre></div></p>
|
</code></pre></div></p>
|
||||||
@@ -1095,8 +1095,8 @@
|
|||||||
<h2 id="remote-node-issues">Remote Node Issues<a class="headerlink" href="#remote-node-issues" title="Permanent link">¶</a></h2>
|
<h2 id="remote-node-issues">Remote Node Issues<a class="headerlink" href="#remote-node-issues" title="Permanent link">¶</a></h2>
|
||||||
<h3 id="node-configuration">Node Configuration<a class="headerlink" href="#node-configuration" title="Permanent link">¶</a></h3>
|
<h3 id="node-configuration">Node Configuration<a class="headerlink" href="#node-configuration" title="Permanent link">¶</a></h3>
|
||||||
<p><strong>Problem:</strong> Remote instances not appearing or cannot be managed </p>
|
<p><strong>Problem:</strong> Remote instances not appearing or cannot be managed </p>
|
||||||
<p><strong>Solutions:</strong>
|
<p><strong>Solutions:</strong><br />
|
||||||
1. <strong>Verify node configuration:</strong>
|
1. <strong>Verify node configuration:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-9-1" name="__codelineno-9-1" href="#__codelineno-9-1"></a><span class="nt">local_node</span><span class="p">:</span><span class="w"> </span><span class="s">"main"</span><span class="w"> </span><span class="c1"># Must match a key in nodes map</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-9-1" name="__codelineno-9-1" href="#__codelineno-9-1"></a><span class="nt">local_node</span><span class="p">:</span><span class="w"> </span><span class="s">"main"</span><span class="w"> </span><span class="c1"># Must match a key in nodes map</span>
|
||||||
<a id="__codelineno-9-2" name="__codelineno-9-2" href="#__codelineno-9-2"></a><span class="nt">nodes</span><span class="p">:</span>
|
<a id="__codelineno-9-2" name="__codelineno-9-2" href="#__codelineno-9-2"></a><span class="nt">nodes</span><span class="p">:</span>
|
||||||
<a id="__codelineno-9-3" name="__codelineno-9-3" href="#__codelineno-9-3"></a><span class="w"> </span><span class="nt">main</span><span class="p">:</span>
|
<a id="__codelineno-9-3" name="__codelineno-9-3" href="#__codelineno-9-3"></a><span class="w"> </span><span class="nt">main</span><span class="p">:</span>
|
||||||
@@ -1106,7 +1106,7 @@
|
|||||||
<a id="__codelineno-9-7" name="__codelineno-9-7" href="#__codelineno-9-7"></a><span class="w"> </span><span class="nt">api_key</span><span class="p">:</span><span class="w"> </span><span class="s">"secure-key"</span><span class="w"> </span><span class="c1"># Must match worker1's management key</span>
|
<a id="__codelineno-9-7" name="__codelineno-9-7" href="#__codelineno-9-7"></a><span class="w"> </span><span class="nt">api_key</span><span class="p">:</span><span class="w"> </span><span class="s">"secure-key"</span><span class="w"> </span><span class="c1"># Must match worker1's management key</span>
|
||||||
</code></pre></div></p>
|
</code></pre></div></p>
|
||||||
<ol>
|
<ol>
|
||||||
<li><strong>Test remote node connectivity:</strong>
|
<li><strong>Test remote node connectivity:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-10-1" name="__codelineno-10-1" href="#__codelineno-10-1"></a>curl<span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer remote-node-key"</span><span class="w"> </span><span class="se">\</span>
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-10-1" name="__codelineno-10-1" href="#__codelineno-10-1"></a>curl<span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer remote-node-key"</span><span class="w"> </span><span class="se">\</span>
|
||||||
<a id="__codelineno-10-2" name="__codelineno-10-2" href="#__codelineno-10-2"></a><span class="w"> </span>http://remote-node:8080/api/v1/instances
|
<a id="__codelineno-10-2" name="__codelineno-10-2" href="#__codelineno-10-2"></a><span class="w"> </span>http://remote-node:8080/api/v1/instances
|
||||||
</code></pre></div></li>
|
</code></pre></div></li>
|
||||||
@@ -1127,7 +1127,7 @@
|
|||||||
<p>When reporting issues, include: </p>
|
<p>When reporting issues, include: </p>
|
||||||
<ol>
|
<ol>
|
||||||
<li>
|
<li>
|
||||||
<p><strong>System information:</strong>
|
<p><strong>System information:</strong><br />
|
||||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-13-1" name="__codelineno-13-1" href="#__codelineno-13-1"></a>llamactl<span class="w"> </span>--version
|
<div class="highlight"><pre><span></span><code><a id="__codelineno-13-1" name="__codelineno-13-1" href="#__codelineno-13-1"></a>llamactl<span class="w"> </span>--version
|
||||||
</code></pre></div></p>
|
</code></pre></div></p>
|
||||||
</li>
|
</li>
|
||||||
|
|||||||
Reference in New Issue
Block a user