mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-06 17:14:28 +00:00
Deployed a31af94 to dev with MkDocs 1.5.3 and mike 2.0.0
This commit is contained in:
@@ -842,6 +842,15 @@
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#remote-node-instance-example" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Remote Node Instance Example
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
@@ -1268,6 +1277,15 @@
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#remote-node-instance-example" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Remote Node Instance Example
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
@@ -1455,6 +1473,7 @@
|
||||
<li><code>on_demand_start</code>: Start instance when receiving requests</li>
|
||||
<li><code>idle_timeout</code>: Idle timeout in minutes</li>
|
||||
<li><code>environment</code>: Environment variables as key-value pairs</li>
|
||||
<li><code>nodes</code>: Array with single node name to deploy the instance to (for remote deployments)</li>
|
||||
</ul>
|
||||
<p>See <a href="../managing-instances/">Managing Instances</a> for complete configuration options.</p>
|
||||
<p><strong>Response:</strong>
|
||||
@@ -1655,74 +1674,103 @@
|
||||
<a id="__codelineno-31-42" name="__codelineno-31-42" href="#__codelineno-31-42"></a>curl<span class="w"> </span>-X<span class="w"> </span>DELETE<span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer your-api-key"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-31-43" name="__codelineno-31-43" href="#__codelineno-31-43"></a><span class="w"> </span>http://localhost:8080/api/v1/instances/my-model
|
||||
</code></pre></div>
|
||||
<h3 id="using-the-proxy-endpoint">Using the Proxy Endpoint<a class="headerlink" href="#using-the-proxy-endpoint" title="Permanent link">¶</a></h3>
|
||||
<p>You can also directly proxy requests to the llama-server instance:</p>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-32-1" name="__codelineno-32-1" href="#__codelineno-32-1"></a><span class="c1"># Direct proxy to instance (bypasses OpenAI compatibility layer)</span>
|
||||
<a id="__codelineno-32-2" name="__codelineno-32-2" href="#__codelineno-32-2"></a>curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://localhost:8080/api/v1/instances/my-model/proxy/completion<span class="w"> </span><span class="se">\</span>
|
||||
<h3 id="remote-node-instance-example">Remote Node Instance Example<a class="headerlink" href="#remote-node-instance-example" title="Permanent link">¶</a></h3>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-32-1" name="__codelineno-32-1" href="#__codelineno-32-1"></a><span class="c1"># Create instance on specific remote node</span>
|
||||
<a id="__codelineno-32-2" name="__codelineno-32-2" href="#__codelineno-32-2"></a>curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://localhost:8080/api/v1/instances/remote-model<span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-32-3" name="__codelineno-32-3" href="#__codelineno-32-3"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Content-Type: application/json"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-32-4" name="__codelineno-32-4" href="#__codelineno-32-4"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer your-api-key"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-32-5" name="__codelineno-32-5" href="#__codelineno-32-5"></a><span class="w"> </span>-d<span class="w"> </span><span class="s1">'{</span>
|
||||
<a id="__codelineno-32-6" name="__codelineno-32-6" href="#__codelineno-32-6"></a><span class="s1"> "prompt": "Hello, world!",</span>
|
||||
<a id="__codelineno-32-7" name="__codelineno-32-7" href="#__codelineno-32-7"></a><span class="s1"> "n_predict": 50</span>
|
||||
<a id="__codelineno-32-8" name="__codelineno-32-8" href="#__codelineno-32-8"></a><span class="s1"> }'</span>
|
||||
<a id="__codelineno-32-6" name="__codelineno-32-6" href="#__codelineno-32-6"></a><span class="s1"> "backend_type": "llama_cpp",</span>
|
||||
<a id="__codelineno-32-7" name="__codelineno-32-7" href="#__codelineno-32-7"></a><span class="s1"> "backend_options": {</span>
|
||||
<a id="__codelineno-32-8" name="__codelineno-32-8" href="#__codelineno-32-8"></a><span class="s1"> "model": "/models/llama-2-7b.gguf",</span>
|
||||
<a id="__codelineno-32-9" name="__codelineno-32-9" href="#__codelineno-32-9"></a><span class="s1"> "gpu_layers": 32</span>
|
||||
<a id="__codelineno-32-10" name="__codelineno-32-10" href="#__codelineno-32-10"></a><span class="s1"> },</span>
|
||||
<a id="__codelineno-32-11" name="__codelineno-32-11" href="#__codelineno-32-11"></a><span class="s1"> "nodes": ["worker1"]</span>
|
||||
<a id="__codelineno-32-12" name="__codelineno-32-12" href="#__codelineno-32-12"></a><span class="s1"> }'</span>
|
||||
<a id="__codelineno-32-13" name="__codelineno-32-13" href="#__codelineno-32-13"></a>
|
||||
<a id="__codelineno-32-14" name="__codelineno-32-14" href="#__codelineno-32-14"></a><span class="c1"># Check status of remote instance</span>
|
||||
<a id="__codelineno-32-15" name="__codelineno-32-15" href="#__codelineno-32-15"></a>curl<span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer your-api-key"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-32-16" name="__codelineno-32-16" href="#__codelineno-32-16"></a><span class="w"> </span>http://localhost:8080/api/v1/instances/remote-model
|
||||
<a id="__codelineno-32-17" name="__codelineno-32-17" href="#__codelineno-32-17"></a>
|
||||
<a id="__codelineno-32-18" name="__codelineno-32-18" href="#__codelineno-32-18"></a><span class="c1"># Use remote instance with OpenAI-compatible API</span>
|
||||
<a id="__codelineno-32-19" name="__codelineno-32-19" href="#__codelineno-32-19"></a>curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://localhost:8080/v1/chat/completions<span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-32-20" name="__codelineno-32-20" href="#__codelineno-32-20"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Content-Type: application/json"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-32-21" name="__codelineno-32-21" href="#__codelineno-32-21"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer your-inference-api-key"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-32-22" name="__codelineno-32-22" href="#__codelineno-32-22"></a><span class="w"> </span>-d<span class="w"> </span><span class="s1">'{</span>
|
||||
<a id="__codelineno-32-23" name="__codelineno-32-23" href="#__codelineno-32-23"></a><span class="s1"> "model": "remote-model",</span>
|
||||
<a id="__codelineno-32-24" name="__codelineno-32-24" href="#__codelineno-32-24"></a><span class="s1"> "messages": [</span>
|
||||
<a id="__codelineno-32-25" name="__codelineno-32-25" href="#__codelineno-32-25"></a><span class="s1"> {"role": "user", "content": "Hello from remote node!"}</span>
|
||||
<a id="__codelineno-32-26" name="__codelineno-32-26" href="#__codelineno-32-26"></a><span class="s1"> ]</span>
|
||||
<a id="__codelineno-32-27" name="__codelineno-32-27" href="#__codelineno-32-27"></a><span class="s1"> }'</span>
|
||||
</code></pre></div>
|
||||
<h3 id="using-the-proxy-endpoint">Using the Proxy Endpoint<a class="headerlink" href="#using-the-proxy-endpoint" title="Permanent link">¶</a></h3>
|
||||
<p>You can also directly proxy requests to the llama-server instance:</p>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-33-1" name="__codelineno-33-1" href="#__codelineno-33-1"></a><span class="c1"># Direct proxy to instance (bypasses OpenAI compatibility layer)</span>
|
||||
<a id="__codelineno-33-2" name="__codelineno-33-2" href="#__codelineno-33-2"></a>curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://localhost:8080/api/v1/instances/my-model/proxy/completion<span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-33-3" name="__codelineno-33-3" href="#__codelineno-33-3"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Content-Type: application/json"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-33-4" name="__codelineno-33-4" href="#__codelineno-33-4"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer your-api-key"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-33-5" name="__codelineno-33-5" href="#__codelineno-33-5"></a><span class="w"> </span>-d<span class="w"> </span><span class="s1">'{</span>
|
||||
<a id="__codelineno-33-6" name="__codelineno-33-6" href="#__codelineno-33-6"></a><span class="s1"> "prompt": "Hello, world!",</span>
|
||||
<a id="__codelineno-33-7" name="__codelineno-33-7" href="#__codelineno-33-7"></a><span class="s1"> "n_predict": 50</span>
|
||||
<a id="__codelineno-33-8" name="__codelineno-33-8" href="#__codelineno-33-8"></a><span class="s1"> }'</span>
|
||||
</code></pre></div>
|
||||
<h2 id="backend-specific-endpoints">Backend-Specific Endpoints<a class="headerlink" href="#backend-specific-endpoints" title="Permanent link">¶</a></h2>
|
||||
<h3 id="parse-commands">Parse Commands<a class="headerlink" href="#parse-commands" title="Permanent link">¶</a></h3>
|
||||
<p>Llamactl provides endpoints to parse command strings from different backends into instance configuration options.</p>
|
||||
<h4 id="parse-llamacpp-command">Parse Llama.cpp Command<a class="headerlink" href="#parse-llamacpp-command" title="Permanent link">¶</a></h4>
|
||||
<p>Parse a llama-server command string into instance options.</p>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-33-1" name="__codelineno-33-1" href="#__codelineno-33-1"></a><span class="err">POST /api/v1/backends/llama-cpp/parse-command</span>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-34-1" name="__codelineno-34-1" href="#__codelineno-34-1"></a><span class="err">POST /api/v1/backends/llama-cpp/parse-command</span>
|
||||
</code></pre></div>
|
||||
<p><strong>Request Body:</strong>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-34-1" name="__codelineno-34-1" href="#__codelineno-34-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-34-2" name="__codelineno-34-2" href="#__codelineno-34-2"></a><span class="w"> </span><span class="nt">"command"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama-server -m /path/to/model.gguf -c 2048 --port 8080"</span>
|
||||
<a id="__codelineno-34-3" name="__codelineno-34-3" href="#__codelineno-34-3"></a><span class="p">}</span>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-35-1" name="__codelineno-35-1" href="#__codelineno-35-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-35-2" name="__codelineno-35-2" href="#__codelineno-35-2"></a><span class="w"> </span><span class="nt">"command"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama-server -m /path/to/model.gguf -c 2048 --port 8080"</span>
|
||||
<a id="__codelineno-35-3" name="__codelineno-35-3" href="#__codelineno-35-3"></a><span class="p">}</span>
|
||||
</code></pre></div></p>
|
||||
<p><strong>Response:</strong>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-35-1" name="__codelineno-35-1" href="#__codelineno-35-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-35-2" name="__codelineno-35-2" href="#__codelineno-35-2"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama_cpp"</span><span class="p">,</span>
|
||||
<a id="__codelineno-35-3" name="__codelineno-35-3" href="#__codelineno-35-3"></a><span class="w"> </span><span class="nt">"llama_server_options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||
<a id="__codelineno-35-4" name="__codelineno-35-4" href="#__codelineno-35-4"></a><span class="w"> </span><span class="nt">"model"</span><span class="p">:</span><span class="w"> </span><span class="s2">"/path/to/model.gguf"</span><span class="p">,</span>
|
||||
<a id="__codelineno-35-5" name="__codelineno-35-5" href="#__codelineno-35-5"></a><span class="w"> </span><span class="nt">"ctx_size"</span><span class="p">:</span><span class="w"> </span><span class="mi">2048</span><span class="p">,</span>
|
||||
<a id="__codelineno-35-6" name="__codelineno-35-6" href="#__codelineno-35-6"></a><span class="w"> </span><span class="nt">"port"</span><span class="p">:</span><span class="w"> </span><span class="mi">8080</span>
|
||||
<a id="__codelineno-35-7" name="__codelineno-35-7" href="#__codelineno-35-7"></a><span class="w"> </span><span class="p">}</span>
|
||||
<a id="__codelineno-35-8" name="__codelineno-35-8" href="#__codelineno-35-8"></a><span class="p">}</span>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-36-1" name="__codelineno-36-1" href="#__codelineno-36-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-36-2" name="__codelineno-36-2" href="#__codelineno-36-2"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama_cpp"</span><span class="p">,</span>
|
||||
<a id="__codelineno-36-3" name="__codelineno-36-3" href="#__codelineno-36-3"></a><span class="w"> </span><span class="nt">"llama_server_options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||
<a id="__codelineno-36-4" name="__codelineno-36-4" href="#__codelineno-36-4"></a><span class="w"> </span><span class="nt">"model"</span><span class="p">:</span><span class="w"> </span><span class="s2">"/path/to/model.gguf"</span><span class="p">,</span>
|
||||
<a id="__codelineno-36-5" name="__codelineno-36-5" href="#__codelineno-36-5"></a><span class="w"> </span><span class="nt">"ctx_size"</span><span class="p">:</span><span class="w"> </span><span class="mi">2048</span><span class="p">,</span>
|
||||
<a id="__codelineno-36-6" name="__codelineno-36-6" href="#__codelineno-36-6"></a><span class="w"> </span><span class="nt">"port"</span><span class="p">:</span><span class="w"> </span><span class="mi">8080</span>
|
||||
<a id="__codelineno-36-7" name="__codelineno-36-7" href="#__codelineno-36-7"></a><span class="w"> </span><span class="p">}</span>
|
||||
<a id="__codelineno-36-8" name="__codelineno-36-8" href="#__codelineno-36-8"></a><span class="p">}</span>
|
||||
</code></pre></div></p>
|
||||
<h4 id="parse-mlx-lm-command">Parse MLX-LM Command<a class="headerlink" href="#parse-mlx-lm-command" title="Permanent link">¶</a></h4>
|
||||
<p>Parse an MLX-LM server command string into instance options.</p>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-36-1" name="__codelineno-36-1" href="#__codelineno-36-1"></a><span class="err">POST /api/v1/backends/mlx/parse-command</span>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-37-1" name="__codelineno-37-1" href="#__codelineno-37-1"></a><span class="err">POST /api/v1/backends/mlx/parse-command</span>
|
||||
</code></pre></div>
|
||||
<p><strong>Request Body:</strong>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-37-1" name="__codelineno-37-1" href="#__codelineno-37-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-37-2" name="__codelineno-37-2" href="#__codelineno-37-2"></a><span class="w"> </span><span class="nt">"command"</span><span class="p">:</span><span class="w"> </span><span class="s2">"mlx_lm.server --model /path/to/model --port 8080"</span>
|
||||
<a id="__codelineno-37-3" name="__codelineno-37-3" href="#__codelineno-37-3"></a><span class="p">}</span>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-38-1" name="__codelineno-38-1" href="#__codelineno-38-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-38-2" name="__codelineno-38-2" href="#__codelineno-38-2"></a><span class="w"> </span><span class="nt">"command"</span><span class="p">:</span><span class="w"> </span><span class="s2">"mlx_lm.server --model /path/to/model --port 8080"</span>
|
||||
<a id="__codelineno-38-3" name="__codelineno-38-3" href="#__codelineno-38-3"></a><span class="p">}</span>
|
||||
</code></pre></div></p>
|
||||
<p><strong>Response:</strong>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-38-1" name="__codelineno-38-1" href="#__codelineno-38-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-38-2" name="__codelineno-38-2" href="#__codelineno-38-2"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"mlx_lm"</span><span class="p">,</span>
|
||||
<a id="__codelineno-38-3" name="__codelineno-38-3" href="#__codelineno-38-3"></a><span class="w"> </span><span class="nt">"mlx_server_options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||
<a id="__codelineno-38-4" name="__codelineno-38-4" href="#__codelineno-38-4"></a><span class="w"> </span><span class="nt">"model"</span><span class="p">:</span><span class="w"> </span><span class="s2">"/path/to/model"</span><span class="p">,</span>
|
||||
<a id="__codelineno-38-5" name="__codelineno-38-5" href="#__codelineno-38-5"></a><span class="w"> </span><span class="nt">"port"</span><span class="p">:</span><span class="w"> </span><span class="mi">8080</span>
|
||||
<a id="__codelineno-38-6" name="__codelineno-38-6" href="#__codelineno-38-6"></a><span class="w"> </span><span class="p">}</span>
|
||||
<a id="__codelineno-38-7" name="__codelineno-38-7" href="#__codelineno-38-7"></a><span class="p">}</span>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-39-1" name="__codelineno-39-1" href="#__codelineno-39-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-39-2" name="__codelineno-39-2" href="#__codelineno-39-2"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"mlx_lm"</span><span class="p">,</span>
|
||||
<a id="__codelineno-39-3" name="__codelineno-39-3" href="#__codelineno-39-3"></a><span class="w"> </span><span class="nt">"mlx_server_options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||
<a id="__codelineno-39-4" name="__codelineno-39-4" href="#__codelineno-39-4"></a><span class="w"> </span><span class="nt">"model"</span><span class="p">:</span><span class="w"> </span><span class="s2">"/path/to/model"</span><span class="p">,</span>
|
||||
<a id="__codelineno-39-5" name="__codelineno-39-5" href="#__codelineno-39-5"></a><span class="w"> </span><span class="nt">"port"</span><span class="p">:</span><span class="w"> </span><span class="mi">8080</span>
|
||||
<a id="__codelineno-39-6" name="__codelineno-39-6" href="#__codelineno-39-6"></a><span class="w"> </span><span class="p">}</span>
|
||||
<a id="__codelineno-39-7" name="__codelineno-39-7" href="#__codelineno-39-7"></a><span class="p">}</span>
|
||||
</code></pre></div></p>
|
||||
<h4 id="parse-vllm-command">Parse vLLM Command<a class="headerlink" href="#parse-vllm-command" title="Permanent link">¶</a></h4>
|
||||
<p>Parse a vLLM serve command string into instance options.</p>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-39-1" name="__codelineno-39-1" href="#__codelineno-39-1"></a><span class="err">POST /api/v1/backends/vllm/parse-command</span>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-40-1" name="__codelineno-40-1" href="#__codelineno-40-1"></a><span class="err">POST /api/v1/backends/vllm/parse-command</span>
|
||||
</code></pre></div>
|
||||
<p><strong>Request Body:</strong>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-40-1" name="__codelineno-40-1" href="#__codelineno-40-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-40-2" name="__codelineno-40-2" href="#__codelineno-40-2"></a><span class="w"> </span><span class="nt">"command"</span><span class="p">:</span><span class="w"> </span><span class="s2">"vllm serve /path/to/model --port 8080"</span>
|
||||
<a id="__codelineno-40-3" name="__codelineno-40-3" href="#__codelineno-40-3"></a><span class="p">}</span>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-41-1" name="__codelineno-41-1" href="#__codelineno-41-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-41-2" name="__codelineno-41-2" href="#__codelineno-41-2"></a><span class="w"> </span><span class="nt">"command"</span><span class="p">:</span><span class="w"> </span><span class="s2">"vllm serve /path/to/model --port 8080"</span>
|
||||
<a id="__codelineno-41-3" name="__codelineno-41-3" href="#__codelineno-41-3"></a><span class="p">}</span>
|
||||
</code></pre></div></p>
|
||||
<p><strong>Response:</strong>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-41-1" name="__codelineno-41-1" href="#__codelineno-41-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-41-2" name="__codelineno-41-2" href="#__codelineno-41-2"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"vllm"</span><span class="p">,</span>
|
||||
<a id="__codelineno-41-3" name="__codelineno-41-3" href="#__codelineno-41-3"></a><span class="w"> </span><span class="nt">"vllm_server_options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||
<a id="__codelineno-41-4" name="__codelineno-41-4" href="#__codelineno-41-4"></a><span class="w"> </span><span class="nt">"model"</span><span class="p">:</span><span class="w"> </span><span class="s2">"/path/to/model"</span><span class="p">,</span>
|
||||
<a id="__codelineno-41-5" name="__codelineno-41-5" href="#__codelineno-41-5"></a><span class="w"> </span><span class="nt">"port"</span><span class="p">:</span><span class="w"> </span><span class="mi">8080</span>
|
||||
<a id="__codelineno-41-6" name="__codelineno-41-6" href="#__codelineno-41-6"></a><span class="w"> </span><span class="p">}</span>
|
||||
<a id="__codelineno-41-7" name="__codelineno-41-7" href="#__codelineno-41-7"></a><span class="p">}</span>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-42-1" name="__codelineno-42-1" href="#__codelineno-42-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-42-2" name="__codelineno-42-2" href="#__codelineno-42-2"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"vllm"</span><span class="p">,</span>
|
||||
<a id="__codelineno-42-3" name="__codelineno-42-3" href="#__codelineno-42-3"></a><span class="w"> </span><span class="nt">"vllm_server_options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||
<a id="__codelineno-42-4" name="__codelineno-42-4" href="#__codelineno-42-4"></a><span class="w"> </span><span class="nt">"model"</span><span class="p">:</span><span class="w"> </span><span class="s2">"/path/to/model"</span><span class="p">,</span>
|
||||
<a id="__codelineno-42-5" name="__codelineno-42-5" href="#__codelineno-42-5"></a><span class="w"> </span><span class="nt">"port"</span><span class="p">:</span><span class="w"> </span><span class="mi">8080</span>
|
||||
<a id="__codelineno-42-6" name="__codelineno-42-6" href="#__codelineno-42-6"></a><span class="w"> </span><span class="p">}</span>
|
||||
<a id="__codelineno-42-7" name="__codelineno-42-7" href="#__codelineno-42-7"></a><span class="p">}</span>
|
||||
</code></pre></div></p>
|
||||
<p><strong>Error Responses for Parse Commands:</strong>
|
||||
- <code>400 Bad Request</code>: Invalid request body, empty command, or parse error
|
||||
@@ -1735,7 +1783,7 @@
|
||||
</ol>
|
||||
<h2 id="swagger-documentation">Swagger Documentation<a class="headerlink" href="#swagger-documentation" title="Permanent link">¶</a></h2>
|
||||
<p>If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at:</p>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-42-1" name="__codelineno-42-1" href="#__codelineno-42-1"></a>http://localhost:8080/swagger/
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-43-1" name="__codelineno-43-1" href="#__codelineno-43-1"></a>http://localhost:8080/swagger/
|
||||
</code></pre></div>
|
||||
<p>This provides a complete interactive interface for testing all API endpoints.</p>
|
||||
|
||||
@@ -1758,7 +1806,7 @@
|
||||
<span class="md-icon" title="Last update">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M21 13.1c-.1 0-.3.1-.4.2l-1 1 2.1 2.1 1-1c.2-.2.2-.6 0-.8l-1.3-1.3c-.1-.1-.2-.2-.4-.2m-1.9 1.8-6.1 6V23h2.1l6.1-6.1-2.1-2M12.5 7v5.2l4 2.4-1 1L11 13V7h1.5M11 21.9c-5.1-.5-9-4.8-9-9.9C2 6.5 6.5 2 12 2c5.3 0 9.6 4.1 10 9.3-.3-.1-.6-.2-1-.2s-.7.1-1 .2C19.6 7.2 16.2 4 12 4c-4.4 0-8 3.6-8 8 0 4.1 3.1 7.5 7.1 7.9l-.1.2v1.8Z"/></svg>
|
||||
</span>
|
||||
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">September 28, 2025</span>
|
||||
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">October 9, 2025</span>
|
||||
</span>
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user