Deployed 12bbf34 to dev with MkDocs 1.5.3 and mike 2.0.0

This commit is contained in:
lordmathis
2025-09-28 13:42:22 +00:00
parent 66f9ee7c18
commit e9503ca768
8 changed files with 168 additions and 114 deletions

View File

@@ -1445,7 +1445,18 @@
<p>Create and start a new instance.</p>
<div class="highlight"><pre><span></span><code><a id="__codelineno-11-1" name="__codelineno-11-1" href="#__codelineno-11-1"></a><span class="err">POST /api/v1/instances/{name}</span>
</code></pre></div>
<p><strong>Request Body:</strong> JSON object with instance configuration. See <a href="../managing-instances/">Managing Instances</a> for available configuration options.</p>
<p><strong>Request Body:</strong> JSON object with instance configuration. Common fields include:</p>
<ul>
<li><code>backend_type</code>: Backend type (<code>llama_cpp</code>, <code>mlx_lm</code>, or <code>vllm</code>)</li>
<li><code>backend_options</code>: Backend-specific configuration</li>
<li><code>auto_restart</code>: Enable automatic restart on failure</li>
<li><code>max_restarts</code>: Maximum restart attempts</li>
<li><code>restart_delay</code>: Delay between restarts in seconds</li>
<li><code>on_demand_start</code>: Start instance when receiving requests</li>
<li><code>idle_timeout</code>: Idle timeout in minutes</li>
<li><code>environment</code>: Environment variables as key-value pairs</li>
</ul>
<p>See <a href="../managing-instances/">Managing Instances</a> for complete configuration options.</p>
<p><strong>Response:</strong>
<div class="highlight"><pre><span></span><code><a id="__codelineno-12-1" name="__codelineno-12-1" href="#__codelineno-12-1"></a><span class="p">{</span>
<a id="__codelineno-12-2" name="__codelineno-12-2" href="#__codelineno-12-2"></a><span class="w"> </span><span class="nt">&quot;name&quot;</span><span class="p">:</span><span class="w"> </span><span class="s2">&quot;llama2-7b&quot;</span><span class="p">,</span>
@@ -1605,36 +1616,44 @@
<a id="__codelineno-31-3" name="__codelineno-31-3" href="#__codelineno-31-3"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">&quot;Content-Type: application/json&quot;</span><span class="w"> </span><span class="se">\</span>
<a id="__codelineno-31-4" name="__codelineno-31-4" href="#__codelineno-31-4"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">&quot;Authorization: Bearer your-api-key&quot;</span><span class="w"> </span><span class="se">\</span>
<a id="__codelineno-31-5" name="__codelineno-31-5" href="#__codelineno-31-5"></a><span class="w"> </span>-d<span class="w"> </span><span class="s1">&#39;{</span>
<a id="__codelineno-31-6" name="__codelineno-31-6" href="#__codelineno-31-6"></a><span class="s1"> &quot;model&quot;: &quot;/models/llama-2-7b.gguf&quot;</span>
<a id="__codelineno-31-7" name="__codelineno-31-7" href="#__codelineno-31-7"></a><span class="s1"> }&#39;</span>
<a id="__codelineno-31-8" name="__codelineno-31-8" href="#__codelineno-31-8"></a>
<a id="__codelineno-31-9" name="__codelineno-31-9" href="#__codelineno-31-9"></a><span class="c1"># Check instance status</span>
<a id="__codelineno-31-10" name="__codelineno-31-10" href="#__codelineno-31-10"></a>curl<span class="w"> </span>-H<span class="w"> </span><span class="s2">&quot;Authorization: Bearer your-api-key&quot;</span><span class="w"> </span><span class="se">\</span>
<a id="__codelineno-31-11" name="__codelineno-31-11" href="#__codelineno-31-11"></a><span class="w"> </span>http://localhost:8080/api/v1/instances/my-model
<a id="__codelineno-31-12" name="__codelineno-31-12" href="#__codelineno-31-12"></a>
<a id="__codelineno-31-13" name="__codelineno-31-13" href="#__codelineno-31-13"></a><span class="c1"># Get instance logs</span>
<a id="__codelineno-31-14" name="__codelineno-31-14" href="#__codelineno-31-14"></a>curl<span class="w"> </span>-H<span class="w"> </span><span class="s2">&quot;Authorization: Bearer your-api-key&quot;</span><span class="w"> </span><span class="se">\</span>
<a id="__codelineno-31-15" name="__codelineno-31-15" href="#__codelineno-31-15"></a><span class="w"> </span><span class="s2">&quot;http://localhost:8080/api/v1/instances/my-model/logs?lines=50&quot;</span>
<a id="__codelineno-31-6" name="__codelineno-31-6" href="#__codelineno-31-6"></a><span class="s1"> &quot;backend_type&quot;: &quot;llama_cpp&quot;,</span>
<a id="__codelineno-31-7" name="__codelineno-31-7" href="#__codelineno-31-7"></a><span class="s1"> &quot;backend_options&quot;: {</span>
<a id="__codelineno-31-8" name="__codelineno-31-8" href="#__codelineno-31-8"></a><span class="s1"> &quot;model&quot;: &quot;/models/llama-2-7b.gguf&quot;,</span>
<a id="__codelineno-31-9" name="__codelineno-31-9" href="#__codelineno-31-9"></a><span class="s1"> &quot;gpu_layers&quot;: 32</span>
<a id="__codelineno-31-10" name="__codelineno-31-10" href="#__codelineno-31-10"></a><span class="s1"> },</span>
<a id="__codelineno-31-11" name="__codelineno-31-11" href="#__codelineno-31-11"></a><span class="s1"> &quot;environment&quot;: {</span>
<a id="__codelineno-31-12" name="__codelineno-31-12" href="#__codelineno-31-12"></a><span class="s1"> &quot;CUDA_VISIBLE_DEVICES&quot;: &quot;0&quot;,</span>
<a id="__codelineno-31-13" name="__codelineno-31-13" href="#__codelineno-31-13"></a><span class="s1"> &quot;OMP_NUM_THREADS&quot;: &quot;8&quot;</span>
<a id="__codelineno-31-14" name="__codelineno-31-14" href="#__codelineno-31-14"></a><span class="s1"> }</span>
<a id="__codelineno-31-15" name="__codelineno-31-15" href="#__codelineno-31-15"></a><span class="s1"> }&#39;</span>
<a id="__codelineno-31-16" name="__codelineno-31-16" href="#__codelineno-31-16"></a>
<a id="__codelineno-31-17" name="__codelineno-31-17" href="#__codelineno-31-17"></a><span class="c1"># Use OpenAI-compatible chat completions</span>
<a id="__codelineno-31-18" name="__codelineno-31-18" href="#__codelineno-31-18"></a>curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://localhost:8080/v1/chat/completions<span class="w"> </span><span class="se">\</span>
<a id="__codelineno-31-19" name="__codelineno-31-19" href="#__codelineno-31-19"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">&quot;Content-Type: application/json&quot;</span><span class="w"> </span><span class="se">\</span>
<a id="__codelineno-31-20" name="__codelineno-31-20" href="#__codelineno-31-20"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">&quot;Authorization: Bearer your-inference-api-key&quot;</span><span class="w"> </span><span class="se">\</span>
<a id="__codelineno-31-21" name="__codelineno-31-21" href="#__codelineno-31-21"></a><span class="w"> </span>-d<span class="w"> </span><span class="s1">&#39;{</span>
<a id="__codelineno-31-22" name="__codelineno-31-22" href="#__codelineno-31-22"></a><span class="s1"> &quot;model&quot;: &quot;my-model&quot;,</span>
<a id="__codelineno-31-23" name="__codelineno-31-23" href="#__codelineno-31-23"></a><span class="s1"> &quot;messages&quot;: [</span>
<a id="__codelineno-31-24" name="__codelineno-31-24" href="#__codelineno-31-24"></a><span class="s1"> {&quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;Hello!&quot;}</span>
<a id="__codelineno-31-25" name="__codelineno-31-25" href="#__codelineno-31-25"></a><span class="s1"> ],</span>
<a id="__codelineno-31-26" name="__codelineno-31-26" href="#__codelineno-31-26"></a><span class="s1"> &quot;max_tokens&quot;: 100</span>
<a id="__codelineno-31-27" name="__codelineno-31-27" href="#__codelineno-31-27"></a><span class="s1"> }&#39;</span>
<a id="__codelineno-31-28" name="__codelineno-31-28" href="#__codelineno-31-28"></a>
<a id="__codelineno-31-29" name="__codelineno-31-29" href="#__codelineno-31-29"></a><span class="c1"># Stop instance</span>
<a id="__codelineno-31-30" name="__codelineno-31-30" href="#__codelineno-31-30"></a>curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>-H<span class="w"> </span><span class="s2">&quot;Authorization: Bearer your-api-key&quot;</span><span class="w"> </span><span class="se">\</span>
<a id="__codelineno-31-31" name="__codelineno-31-31" href="#__codelineno-31-31"></a><span class="w"> </span>http://localhost:8080/api/v1/instances/my-model/stop
<a id="__codelineno-31-32" name="__codelineno-31-32" href="#__codelineno-31-32"></a>
<a id="__codelineno-31-33" name="__codelineno-31-33" href="#__codelineno-31-33"></a><span class="c1"># Delete instance</span>
<a id="__codelineno-31-34" name="__codelineno-31-34" href="#__codelineno-31-34"></a>curl<span class="w"> </span>-X<span class="w"> </span>DELETE<span class="w"> </span>-H<span class="w"> </span><span class="s2">&quot;Authorization: Bearer your-api-key&quot;</span><span class="w"> </span><span class="se">\</span>
<a id="__codelineno-31-35" name="__codelineno-31-35" href="#__codelineno-31-35"></a><span class="w"> </span>http://localhost:8080/api/v1/instances/my-model
<a id="__codelineno-31-17" name="__codelineno-31-17" href="#__codelineno-31-17"></a><span class="c1"># Check instance status</span>
<a id="__codelineno-31-18" name="__codelineno-31-18" href="#__codelineno-31-18"></a>curl<span class="w"> </span>-H<span class="w"> </span><span class="s2">&quot;Authorization: Bearer your-api-key&quot;</span><span class="w"> </span><span class="se">\</span>
<a id="__codelineno-31-19" name="__codelineno-31-19" href="#__codelineno-31-19"></a><span class="w"> </span>http://localhost:8080/api/v1/instances/my-model
<a id="__codelineno-31-20" name="__codelineno-31-20" href="#__codelineno-31-20"></a>
<a id="__codelineno-31-21" name="__codelineno-31-21" href="#__codelineno-31-21"></a><span class="c1"># Get instance logs</span>
<a id="__codelineno-31-22" name="__codelineno-31-22" href="#__codelineno-31-22"></a>curl<span class="w"> </span>-H<span class="w"> </span><span class="s2">&quot;Authorization: Bearer your-api-key&quot;</span><span class="w"> </span><span class="se">\</span>
<a id="__codelineno-31-23" name="__codelineno-31-23" href="#__codelineno-31-23"></a><span class="w"> </span><span class="s2">&quot;http://localhost:8080/api/v1/instances/my-model/logs?lines=50&quot;</span>
<a id="__codelineno-31-24" name="__codelineno-31-24" href="#__codelineno-31-24"></a>
<a id="__codelineno-31-25" name="__codelineno-31-25" href="#__codelineno-31-25"></a><span class="c1"># Use OpenAI-compatible chat completions</span>
<a id="__codelineno-31-26" name="__codelineno-31-26" href="#__codelineno-31-26"></a>curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://localhost:8080/v1/chat/completions<span class="w"> </span><span class="se">\</span>
<a id="__codelineno-31-27" name="__codelineno-31-27" href="#__codelineno-31-27"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">&quot;Content-Type: application/json&quot;</span><span class="w"> </span><span class="se">\</span>
<a id="__codelineno-31-28" name="__codelineno-31-28" href="#__codelineno-31-28"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">&quot;Authorization: Bearer your-inference-api-key&quot;</span><span class="w"> </span><span class="se">\</span>
<a id="__codelineno-31-29" name="__codelineno-31-29" href="#__codelineno-31-29"></a><span class="w"> </span>-d<span class="w"> </span><span class="s1">&#39;{</span>
<a id="__codelineno-31-30" name="__codelineno-31-30" href="#__codelineno-31-30"></a><span class="s1"> &quot;model&quot;: &quot;my-model&quot;,</span>
<a id="__codelineno-31-31" name="__codelineno-31-31" href="#__codelineno-31-31"></a><span class="s1"> &quot;messages&quot;: [</span>
<a id="__codelineno-31-32" name="__codelineno-31-32" href="#__codelineno-31-32"></a><span class="s1"> {&quot;role&quot;: &quot;user&quot;, &quot;content&quot;: &quot;Hello!&quot;}</span>
<a id="__codelineno-31-33" name="__codelineno-31-33" href="#__codelineno-31-33"></a><span class="s1"> ],</span>
<a id="__codelineno-31-34" name="__codelineno-31-34" href="#__codelineno-31-34"></a><span class="s1"> &quot;max_tokens&quot;: 100</span>
<a id="__codelineno-31-35" name="__codelineno-31-35" href="#__codelineno-31-35"></a><span class="s1"> }&#39;</span>
<a id="__codelineno-31-36" name="__codelineno-31-36" href="#__codelineno-31-36"></a>
<a id="__codelineno-31-37" name="__codelineno-31-37" href="#__codelineno-31-37"></a><span class="c1"># Stop instance</span>
<a id="__codelineno-31-38" name="__codelineno-31-38" href="#__codelineno-31-38"></a>curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>-H<span class="w"> </span><span class="s2">&quot;Authorization: Bearer your-api-key&quot;</span><span class="w"> </span><span class="se">\</span>
<a id="__codelineno-31-39" name="__codelineno-31-39" href="#__codelineno-31-39"></a><span class="w"> </span>http://localhost:8080/api/v1/instances/my-model/stop
<a id="__codelineno-31-40" name="__codelineno-31-40" href="#__codelineno-31-40"></a>
<a id="__codelineno-31-41" name="__codelineno-31-41" href="#__codelineno-31-41"></a><span class="c1"># Delete instance</span>
<a id="__codelineno-31-42" name="__codelineno-31-42" href="#__codelineno-31-42"></a>curl<span class="w"> </span>-X<span class="w"> </span>DELETE<span class="w"> </span>-H<span class="w"> </span><span class="s2">&quot;Authorization: Bearer your-api-key&quot;</span><span class="w"> </span><span class="se">\</span>
<a id="__codelineno-31-43" name="__codelineno-31-43" href="#__codelineno-31-43"></a><span class="w"> </span>http://localhost:8080/api/v1/instances/my-model
</code></pre></div>
<h3 id="using-the-proxy-endpoint">Using the Proxy Endpoint<a class="headerlink" href="#using-the-proxy-endpoint" title="Permanent link">&para;</a></h3>
<p>You can also directly proxy requests to the llama-server instance:</p>
@@ -1739,7 +1758,7 @@
<span class="md-icon" title="Last update">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M21 13.1c-.1 0-.3.1-.4.2l-1 1 2.1 2.1 1-1c.2-.2.2-.6 0-.8l-1.3-1.3c-.1-.1-.2-.2-.4-.2m-1.9 1.8-6.1 6V23h2.1l6.1-6.1-2.1-2M12.5 7v5.2l4 2.4-1 1L11 13V7h1.5M11 21.9c-5.1-.5-9-4.8-9-9.9C2 6.5 6.5 2 12 2c5.3 0 9.6 4.1 10 9.3-.3-.1-.6-.2-1-.2s-.7.1-1 .2C19.6 7.2 16.2 4 12 4c-4.4 0-8 3.6-8 8 0 4.1 3.1 7.5 7.1 7.9l-.1.2v1.8Z"/></svg>
</span>
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">September 22, 2025</span>
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">September 28, 2025</span>
</span>