mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-05 16:44:22 +00:00
Deployed a31af94 to dev with MkDocs 1.5.3 and mike 2.0.0
This commit is contained in:
Binary file not shown.
@@ -558,18 +558,18 @@
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#command-line-options" class="md-nav__link">
|
||||
<li class="md-nav__item">
|
||||
<a href="#remote-node-configuration" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Command Line Options
|
||||
Remote Node Configuration
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -805,18 +805,18 @@
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#command-line-options" class="md-nav__link">
|
||||
<li class="md-nav__item">
|
||||
<a href="#remote-node-configuration" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Command Line Options
|
||||
Remote Node Configuration
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -901,6 +901,10 @@
|
||||
<a id="__codelineno-1-55" name="__codelineno-1-55" href="#__codelineno-1-55"></a><span class="w"> </span><span class="nt">inference_keys</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[]</span><span class="w"> </span><span class="c1"># Keys for inference endpoints</span>
|
||||
<a id="__codelineno-1-56" name="__codelineno-1-56" href="#__codelineno-1-56"></a><span class="w"> </span><span class="nt">require_management_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Require auth for management endpoints</span>
|
||||
<a id="__codelineno-1-57" name="__codelineno-1-57" href="#__codelineno-1-57"></a><span class="w"> </span><span class="nt">management_keys</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[]</span><span class="w"> </span><span class="c1"># Keys for management endpoints</span>
|
||||
<a id="__codelineno-1-58" name="__codelineno-1-58" href="#__codelineno-1-58"></a>
|
||||
<a id="__codelineno-1-59" name="__codelineno-1-59" href="#__codelineno-1-59"></a><span class="nt">local_node</span><span class="p">:</span><span class="w"> </span><span class="s">"main"</span><span class="w"> </span><span class="c1"># Name of the local node (default: "main")</span>
|
||||
<a id="__codelineno-1-60" name="__codelineno-1-60" href="#__codelineno-1-60"></a><span class="nt">nodes</span><span class="p">:</span><span class="w"> </span><span class="c1"># Node configuration for multi-node deployment</span>
|
||||
<a id="__codelineno-1-61" name="__codelineno-1-61" href="#__codelineno-1-61"></a><span class="w"> </span><span class="nt">main</span><span class="p">:</span><span class="w"> </span><span class="c1"># Default local node (empty config)</span>
|
||||
</code></pre></div>
|
||||
<h2 id="configuration-files">Configuration Files<a class="headerlink" href="#configuration-files" title="Permanent link">¶</a></h2>
|
||||
<h3 id="configuration-file-locations">Configuration File Locations<a class="headerlink" href="#configuration-file-locations" title="Permanent link">¶</a></h3>
|
||||
@@ -1040,16 +1044,29 @@
|
||||
<a id="__codelineno-5-4" name="__codelineno-5-4" href="#__codelineno-5-4"></a><span class="w"> </span><span class="nt">require_management_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Require API key for management endpoints (default: true)</span>
|
||||
<a id="__codelineno-5-5" name="__codelineno-5-5" href="#__codelineno-5-5"></a><span class="w"> </span><span class="nt">management_keys</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[]</span><span class="w"> </span><span class="c1"># List of valid management API keys</span>
|
||||
</code></pre></div>
|
||||
<p><strong>Environment Variables:</strong><br />
|
||||
- <code>LLAMACTL_REQUIRE_INFERENCE_AUTH</code> - Require auth for OpenAI endpoints (true/false)<br />
|
||||
- <code>LLAMACTL_INFERENCE_KEYS</code> - Comma-separated inference API keys<br />
|
||||
- <code>LLAMACTL_REQUIRE_MANAGEMENT_AUTH</code> - Require auth for management endpoints (true/false)<br />
|
||||
- <code>LLAMACTL_MANAGEMENT_KEYS</code> - Comma-separated management API keys </p>
|
||||
<h2 id="command-line-options">Command Line Options<a class="headerlink" href="#command-line-options" title="Permanent link">¶</a></h2>
|
||||
<p>View all available command line options:</p>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a>llamactl<span class="w"> </span>--help
|
||||
<p><strong>Environment Variables:</strong>
|
||||
- <code>LLAMACTL_REQUIRE_INFERENCE_AUTH</code> - Require auth for OpenAI endpoints (true/false)
|
||||
- <code>LLAMACTL_INFERENCE_KEYS</code> - Comma-separated inference API keys
|
||||
- <code>LLAMACTL_REQUIRE_MANAGEMENT_AUTH</code> - Require auth for management endpoints (true/false)
|
||||
- <code>LLAMACTL_MANAGEMENT_KEYS</code> - Comma-separated management API keys</p>
|
||||
<h3 id="remote-node-configuration">Remote Node Configuration<a class="headerlink" href="#remote-node-configuration" title="Permanent link">¶</a></h3>
|
||||
<p>llamactl supports remote node deployments. Configure remote nodes to deploy instances on remote hosts and manage them centrally.</p>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a><span class="nt">local_node</span><span class="p">:</span><span class="w"> </span><span class="s">"main"</span><span class="w"> </span><span class="c1"># Name of the local node (default: "main")</span>
|
||||
<a id="__codelineno-6-2" name="__codelineno-6-2" href="#__codelineno-6-2"></a><span class="nt">nodes</span><span class="p">:</span><span class="w"> </span><span class="c1"># Node configuration map</span>
|
||||
<a id="__codelineno-6-3" name="__codelineno-6-3" href="#__codelineno-6-3"></a><span class="w"> </span><span class="nt">main</span><span class="p">:</span><span class="w"> </span><span class="c1"># Local node (empty address means local)</span>
|
||||
<a id="__codelineno-6-4" name="__codelineno-6-4" href="#__codelineno-6-4"></a><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="s">""</span><span class="w"> </span><span class="c1"># Not used for local node</span>
|
||||
<a id="__codelineno-6-5" name="__codelineno-6-5" href="#__codelineno-6-5"></a><span class="w"> </span><span class="nt">api_key</span><span class="p">:</span><span class="w"> </span><span class="s">""</span><span class="w"> </span><span class="c1"># Not used for local node</span>
|
||||
<a id="__codelineno-6-6" name="__codelineno-6-6" href="#__codelineno-6-6"></a><span class="w"> </span><span class="nt">worker1</span><span class="p">:</span><span class="w"> </span><span class="c1"># Remote worker node</span>
|
||||
<a id="__codelineno-6-7" name="__codelineno-6-7" href="#__codelineno-6-7"></a><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="s">"http://192.168.1.10:8080"</span>
|
||||
<a id="__codelineno-6-8" name="__codelineno-6-8" href="#__codelineno-6-8"></a><span class="w"> </span><span class="nt">api_key</span><span class="p">:</span><span class="w"> </span><span class="s">"worker1-api-key"</span><span class="w"> </span><span class="c1"># Management API key for authentication</span>
|
||||
</code></pre></div>
|
||||
<p>You can also override configuration using command line flags when starting llamactl.</p>
|
||||
<p><strong>Node Configuration Fields:</strong>
|
||||
- <code>local_node</code>: Specifies which node in the <code>nodes</code> map represents the local node
|
||||
- <code>nodes</code>: Map of node configurations
|
||||
- <code>address</code>: HTTP/HTTPS URL of the remote node (empty for local node)
|
||||
- <code>api_key</code>: Management API key for authenticating with the remote node</p>
|
||||
<p><strong>Environment Variables:</strong>
|
||||
- <code>LLAMACTL_LOCAL_NODE</code> - Name of the local node</p>
|
||||
|
||||
|
||||
|
||||
@@ -1070,7 +1087,7 @@
|
||||
<span class="md-icon" title="Last update">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M21 13.1c-.1 0-.3.1-.4.2l-1 1 2.1 2.1 1-1c.2-.2.2-.6 0-.8l-1.3-1.3c-.1-.1-.2-.2-.4-.2m-1.9 1.8-6.1 6V23h2.1l6.1-6.1-2.1-2M12.5 7v5.2l4 2.4-1 1L11 13V7h1.5M11 21.9c-5.1-.5-9-4.8-9-9.9C2 6.5 6.5 2 12 2c5.3 0 9.6 4.1 10 9.3-.3-.1-.6-.2-1-.2s-.7.1-1 .2C19.6 7.2 16.2 4 12 4c-4.4 0-8 3.6-8 8 0 4.1 3.1 7.5 7.1 7.9l-.1.2v1.8Z"/></svg>
|
||||
</span>
|
||||
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">October 4, 2025</span>
|
||||
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">October 9, 2025</span>
|
||||
</span>
|
||||
|
||||
|
||||
|
||||
@@ -525,6 +525,15 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#remote-node-installation" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Remote Node Installation
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
@@ -829,6 +838,15 @@
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#remote-node-installation" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Remote Node Installation
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
@@ -982,12 +1000,17 @@
|
||||
<a id="__codelineno-8-8" name="__codelineno-8-8" href="#__codelineno-8-8"></a><span class="c1"># Build the application</span>
|
||||
<a id="__codelineno-8-9" name="__codelineno-8-9" href="#__codelineno-8-9"></a>go<span class="w"> </span>build<span class="w"> </span>-o<span class="w"> </span>llamactl<span class="w"> </span>./cmd/server
|
||||
</code></pre></div>
|
||||
<h2 id="remote-node-installation">Remote Node Installation<a class="headerlink" href="#remote-node-installation" title="Permanent link">¶</a></h2>
|
||||
<p>For deployments with remote nodes:
|
||||
- Install llamactl on each node using any of the methods above
|
||||
- Configure API keys for authentication between nodes</p>
|
||||
<h2 id="verification">Verification<a class="headerlink" href="#verification" title="Permanent link">¶</a></h2>
|
||||
<p>Verify your installation by checking the version:</p>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-9-1" name="__codelineno-9-1" href="#__codelineno-9-1"></a>llamactl<span class="w"> </span>--version
|
||||
</code></pre></div>
|
||||
<h2 id="next-steps">Next Steps<a class="headerlink" href="#next-steps" title="Permanent link">¶</a></h2>
|
||||
<p>Now that Llamactl is installed, continue to the <a href="../quick-start/">Quick Start</a> guide to get your first instance running!</p>
|
||||
<p>For remote node deployments, see the <a href="../configuration/">Configuration Guide</a> for node setup instructions.</p>
|
||||
|
||||
|
||||
|
||||
@@ -1008,7 +1031,7 @@
|
||||
<span class="md-icon" title="Last update">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M21 13.1c-.1 0-.3.1-.4.2l-1 1 2.1 2.1 1-1c.2-.2.2-.6 0-.8l-1.3-1.3c-.1-.1-.2-.2-.4-.2m-1.9 1.8-6.1 6V23h2.1l6.1-6.1-2.1-2M12.5 7v5.2l4 2.4-1 1L11 13V7h1.5M11 21.9c-5.1-.5-9-4.8-9-9.9C2 6.5 6.5 2 12 2c5.3 0 9.6 4.1 10 9.3-.3-.1-.6-.2-1-.2s-.7.1-1 .2C19.6 7.2 16.2 4 12 4c-4.4 0-8 3.6-8 8 0 4.1 3.1 7.5 7.1 7.9l-.1.2v1.8Z"/></svg>
|
||||
</span>
|
||||
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">September 29, 2025</span>
|
||||
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">October 9, 2025</span>
|
||||
</span>
|
||||
|
||||
|
||||
|
||||
@@ -426,6 +426,15 @@
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#remote-instance-deployment" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
🔗 Remote Instance Deployment
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -771,6 +780,15 @@
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#remote-instance-deployment" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
🔗 Remote Instance Deployment
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
@@ -852,6 +870,12 @@
|
||||
<li><strong>Smart Resource Management</strong>: Idle timeout, LRU eviction, and configurable instance limits </li>
|
||||
<li><strong>Environment Variables</strong>: Set custom environment variables per instance for advanced configuration </li>
|
||||
</ul>
|
||||
<h3 id="remote-instance-deployment">🔗 Remote Instance Deployment<a class="headerlink" href="#remote-instance-deployment" title="Permanent link">¶</a></h3>
|
||||
<ul>
|
||||
<li><strong>Remote Node Support</strong>: Deploy instances on remote hosts </li>
|
||||
<li><strong>Central Management</strong>: Manage remote instances from a single dashboard </li>
|
||||
<li><strong>Seamless Routing</strong>: Automatic request routing to remote instances </li>
|
||||
</ul>
|
||||
<p><img alt="Dashboard Screenshot" src="images/dashboard.png" /> </p>
|
||||
<h2 id="quick-links">Quick Links<a class="headerlink" href="#quick-links" title="Permanent link">¶</a></h2>
|
||||
<ul>
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -2,37 +2,37 @@
|
||||
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||
<url>
|
||||
<loc>https://llamactl.org/dev/</loc>
|
||||
<lastmod>2025-10-06</lastmod>
|
||||
<lastmod>2025-10-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://llamactl.org/dev/getting-started/configuration/</loc>
|
||||
<lastmod>2025-10-06</lastmod>
|
||||
<lastmod>2025-10-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://llamactl.org/dev/getting-started/installation/</loc>
|
||||
<lastmod>2025-10-06</lastmod>
|
||||
<lastmod>2025-10-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://llamactl.org/dev/getting-started/quick-start/</loc>
|
||||
<lastmod>2025-10-06</lastmod>
|
||||
<lastmod>2025-10-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://llamactl.org/dev/user-guide/api-reference/</loc>
|
||||
<lastmod>2025-10-06</lastmod>
|
||||
<lastmod>2025-10-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://llamactl.org/dev/user-guide/managing-instances/</loc>
|
||||
<lastmod>2025-10-06</lastmod>
|
||||
<lastmod>2025-10-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
<url>
|
||||
<loc>https://llamactl.org/dev/user-guide/troubleshooting/</loc>
|
||||
<lastmod>2025-10-06</lastmod>
|
||||
<lastmod>2025-10-09</lastmod>
|
||||
<changefreq>daily</changefreq>
|
||||
</url>
|
||||
</urlset>
|
||||
Binary file not shown.
@@ -842,6 +842,15 @@
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#remote-node-instance-example" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Remote Node Instance Example
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
@@ -1268,6 +1277,15 @@
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#remote-node-instance-example" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Remote Node Instance Example
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
@@ -1455,6 +1473,7 @@
|
||||
<li><code>on_demand_start</code>: Start instance when receiving requests</li>
|
||||
<li><code>idle_timeout</code>: Idle timeout in minutes</li>
|
||||
<li><code>environment</code>: Environment variables as key-value pairs</li>
|
||||
<li><code>nodes</code>: Array with single node name to deploy the instance to (for remote deployments)</li>
|
||||
</ul>
|
||||
<p>See <a href="../managing-instances/">Managing Instances</a> for complete configuration options.</p>
|
||||
<p><strong>Response:</strong>
|
||||
@@ -1655,74 +1674,103 @@
|
||||
<a id="__codelineno-31-42" name="__codelineno-31-42" href="#__codelineno-31-42"></a>curl<span class="w"> </span>-X<span class="w"> </span>DELETE<span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer your-api-key"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-31-43" name="__codelineno-31-43" href="#__codelineno-31-43"></a><span class="w"> </span>http://localhost:8080/api/v1/instances/my-model
|
||||
</code></pre></div>
|
||||
<h3 id="using-the-proxy-endpoint">Using the Proxy Endpoint<a class="headerlink" href="#using-the-proxy-endpoint" title="Permanent link">¶</a></h3>
|
||||
<p>You can also directly proxy requests to the llama-server instance:</p>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-32-1" name="__codelineno-32-1" href="#__codelineno-32-1"></a><span class="c1"># Direct proxy to instance (bypasses OpenAI compatibility layer)</span>
|
||||
<a id="__codelineno-32-2" name="__codelineno-32-2" href="#__codelineno-32-2"></a>curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://localhost:8080/api/v1/instances/my-model/proxy/completion<span class="w"> </span><span class="se">\</span>
|
||||
<h3 id="remote-node-instance-example">Remote Node Instance Example<a class="headerlink" href="#remote-node-instance-example" title="Permanent link">¶</a></h3>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-32-1" name="__codelineno-32-1" href="#__codelineno-32-1"></a><span class="c1"># Create instance on specific remote node</span>
|
||||
<a id="__codelineno-32-2" name="__codelineno-32-2" href="#__codelineno-32-2"></a>curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://localhost:8080/api/v1/instances/remote-model<span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-32-3" name="__codelineno-32-3" href="#__codelineno-32-3"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Content-Type: application/json"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-32-4" name="__codelineno-32-4" href="#__codelineno-32-4"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer your-api-key"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-32-5" name="__codelineno-32-5" href="#__codelineno-32-5"></a><span class="w"> </span>-d<span class="w"> </span><span class="s1">'{</span>
|
||||
<a id="__codelineno-32-6" name="__codelineno-32-6" href="#__codelineno-32-6"></a><span class="s1"> "prompt": "Hello, world!",</span>
|
||||
<a id="__codelineno-32-7" name="__codelineno-32-7" href="#__codelineno-32-7"></a><span class="s1"> "n_predict": 50</span>
|
||||
<a id="__codelineno-32-8" name="__codelineno-32-8" href="#__codelineno-32-8"></a><span class="s1"> }'</span>
|
||||
<a id="__codelineno-32-6" name="__codelineno-32-6" href="#__codelineno-32-6"></a><span class="s1"> "backend_type": "llama_cpp",</span>
|
||||
<a id="__codelineno-32-7" name="__codelineno-32-7" href="#__codelineno-32-7"></a><span class="s1"> "backend_options": {</span>
|
||||
<a id="__codelineno-32-8" name="__codelineno-32-8" href="#__codelineno-32-8"></a><span class="s1"> "model": "/models/llama-2-7b.gguf",</span>
|
||||
<a id="__codelineno-32-9" name="__codelineno-32-9" href="#__codelineno-32-9"></a><span class="s1"> "gpu_layers": 32</span>
|
||||
<a id="__codelineno-32-10" name="__codelineno-32-10" href="#__codelineno-32-10"></a><span class="s1"> },</span>
|
||||
<a id="__codelineno-32-11" name="__codelineno-32-11" href="#__codelineno-32-11"></a><span class="s1"> "nodes": ["worker1"]</span>
|
||||
<a id="__codelineno-32-12" name="__codelineno-32-12" href="#__codelineno-32-12"></a><span class="s1"> }'</span>
|
||||
<a id="__codelineno-32-13" name="__codelineno-32-13" href="#__codelineno-32-13"></a>
|
||||
<a id="__codelineno-32-14" name="__codelineno-32-14" href="#__codelineno-32-14"></a><span class="c1"># Check status of remote instance</span>
|
||||
<a id="__codelineno-32-15" name="__codelineno-32-15" href="#__codelineno-32-15"></a>curl<span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer your-api-key"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-32-16" name="__codelineno-32-16" href="#__codelineno-32-16"></a><span class="w"> </span>http://localhost:8080/api/v1/instances/remote-model
|
||||
<a id="__codelineno-32-17" name="__codelineno-32-17" href="#__codelineno-32-17"></a>
|
||||
<a id="__codelineno-32-18" name="__codelineno-32-18" href="#__codelineno-32-18"></a><span class="c1"># Use remote instance with OpenAI-compatible API</span>
|
||||
<a id="__codelineno-32-19" name="__codelineno-32-19" href="#__codelineno-32-19"></a>curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://localhost:8080/v1/chat/completions<span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-32-20" name="__codelineno-32-20" href="#__codelineno-32-20"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Content-Type: application/json"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-32-21" name="__codelineno-32-21" href="#__codelineno-32-21"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer your-inference-api-key"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-32-22" name="__codelineno-32-22" href="#__codelineno-32-22"></a><span class="w"> </span>-d<span class="w"> </span><span class="s1">'{</span>
|
||||
<a id="__codelineno-32-23" name="__codelineno-32-23" href="#__codelineno-32-23"></a><span class="s1"> "model": "remote-model",</span>
|
||||
<a id="__codelineno-32-24" name="__codelineno-32-24" href="#__codelineno-32-24"></a><span class="s1"> "messages": [</span>
|
||||
<a id="__codelineno-32-25" name="__codelineno-32-25" href="#__codelineno-32-25"></a><span class="s1"> {"role": "user", "content": "Hello from remote node!"}</span>
|
||||
<a id="__codelineno-32-26" name="__codelineno-32-26" href="#__codelineno-32-26"></a><span class="s1"> ]</span>
|
||||
<a id="__codelineno-32-27" name="__codelineno-32-27" href="#__codelineno-32-27"></a><span class="s1"> }'</span>
|
||||
</code></pre></div>
|
||||
<h3 id="using-the-proxy-endpoint">Using the Proxy Endpoint<a class="headerlink" href="#using-the-proxy-endpoint" title="Permanent link">¶</a></h3>
|
||||
<p>You can also directly proxy requests to the llama-server instance:</p>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-33-1" name="__codelineno-33-1" href="#__codelineno-33-1"></a><span class="c1"># Direct proxy to instance (bypasses OpenAI compatibility layer)</span>
|
||||
<a id="__codelineno-33-2" name="__codelineno-33-2" href="#__codelineno-33-2"></a>curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://localhost:8080/api/v1/instances/my-model/proxy/completion<span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-33-3" name="__codelineno-33-3" href="#__codelineno-33-3"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Content-Type: application/json"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-33-4" name="__codelineno-33-4" href="#__codelineno-33-4"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer your-api-key"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-33-5" name="__codelineno-33-5" href="#__codelineno-33-5"></a><span class="w"> </span>-d<span class="w"> </span><span class="s1">'{</span>
|
||||
<a id="__codelineno-33-6" name="__codelineno-33-6" href="#__codelineno-33-6"></a><span class="s1"> "prompt": "Hello, world!",</span>
|
||||
<a id="__codelineno-33-7" name="__codelineno-33-7" href="#__codelineno-33-7"></a><span class="s1"> "n_predict": 50</span>
|
||||
<a id="__codelineno-33-8" name="__codelineno-33-8" href="#__codelineno-33-8"></a><span class="s1"> }'</span>
|
||||
</code></pre></div>
|
||||
<h2 id="backend-specific-endpoints">Backend-Specific Endpoints<a class="headerlink" href="#backend-specific-endpoints" title="Permanent link">¶</a></h2>
|
||||
<h3 id="parse-commands">Parse Commands<a class="headerlink" href="#parse-commands" title="Permanent link">¶</a></h3>
|
||||
<p>Llamactl provides endpoints to parse command strings from different backends into instance configuration options.</p>
|
||||
<h4 id="parse-llamacpp-command">Parse Llama.cpp Command<a class="headerlink" href="#parse-llamacpp-command" title="Permanent link">¶</a></h4>
|
||||
<p>Parse a llama-server command string into instance options.</p>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-33-1" name="__codelineno-33-1" href="#__codelineno-33-1"></a><span class="err">POST /api/v1/backends/llama-cpp/parse-command</span>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-34-1" name="__codelineno-34-1" href="#__codelineno-34-1"></a><span class="err">POST /api/v1/backends/llama-cpp/parse-command</span>
|
||||
</code></pre></div>
|
||||
<p><strong>Request Body:</strong>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-34-1" name="__codelineno-34-1" href="#__codelineno-34-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-34-2" name="__codelineno-34-2" href="#__codelineno-34-2"></a><span class="w"> </span><span class="nt">"command"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama-server -m /path/to/model.gguf -c 2048 --port 8080"</span>
|
||||
<a id="__codelineno-34-3" name="__codelineno-34-3" href="#__codelineno-34-3"></a><span class="p">}</span>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-35-1" name="__codelineno-35-1" href="#__codelineno-35-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-35-2" name="__codelineno-35-2" href="#__codelineno-35-2"></a><span class="w"> </span><span class="nt">"command"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama-server -m /path/to/model.gguf -c 2048 --port 8080"</span>
|
||||
<a id="__codelineno-35-3" name="__codelineno-35-3" href="#__codelineno-35-3"></a><span class="p">}</span>
|
||||
</code></pre></div></p>
|
||||
<p><strong>Response:</strong>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-35-1" name="__codelineno-35-1" href="#__codelineno-35-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-35-2" name="__codelineno-35-2" href="#__codelineno-35-2"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama_cpp"</span><span class="p">,</span>
|
||||
<a id="__codelineno-35-3" name="__codelineno-35-3" href="#__codelineno-35-3"></a><span class="w"> </span><span class="nt">"llama_server_options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||
<a id="__codelineno-35-4" name="__codelineno-35-4" href="#__codelineno-35-4"></a><span class="w"> </span><span class="nt">"model"</span><span class="p">:</span><span class="w"> </span><span class="s2">"/path/to/model.gguf"</span><span class="p">,</span>
|
||||
<a id="__codelineno-35-5" name="__codelineno-35-5" href="#__codelineno-35-5"></a><span class="w"> </span><span class="nt">"ctx_size"</span><span class="p">:</span><span class="w"> </span><span class="mi">2048</span><span class="p">,</span>
|
||||
<a id="__codelineno-35-6" name="__codelineno-35-6" href="#__codelineno-35-6"></a><span class="w"> </span><span class="nt">"port"</span><span class="p">:</span><span class="w"> </span><span class="mi">8080</span>
|
||||
<a id="__codelineno-35-7" name="__codelineno-35-7" href="#__codelineno-35-7"></a><span class="w"> </span><span class="p">}</span>
|
||||
<a id="__codelineno-35-8" name="__codelineno-35-8" href="#__codelineno-35-8"></a><span class="p">}</span>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-36-1" name="__codelineno-36-1" href="#__codelineno-36-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-36-2" name="__codelineno-36-2" href="#__codelineno-36-2"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama_cpp"</span><span class="p">,</span>
|
||||
<a id="__codelineno-36-3" name="__codelineno-36-3" href="#__codelineno-36-3"></a><span class="w"> </span><span class="nt">"llama_server_options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||
<a id="__codelineno-36-4" name="__codelineno-36-4" href="#__codelineno-36-4"></a><span class="w"> </span><span class="nt">"model"</span><span class="p">:</span><span class="w"> </span><span class="s2">"/path/to/model.gguf"</span><span class="p">,</span>
|
||||
<a id="__codelineno-36-5" name="__codelineno-36-5" href="#__codelineno-36-5"></a><span class="w"> </span><span class="nt">"ctx_size"</span><span class="p">:</span><span class="w"> </span><span class="mi">2048</span><span class="p">,</span>
|
||||
<a id="__codelineno-36-6" name="__codelineno-36-6" href="#__codelineno-36-6"></a><span class="w"> </span><span class="nt">"port"</span><span class="p">:</span><span class="w"> </span><span class="mi">8080</span>
|
||||
<a id="__codelineno-36-7" name="__codelineno-36-7" href="#__codelineno-36-7"></a><span class="w"> </span><span class="p">}</span>
|
||||
<a id="__codelineno-36-8" name="__codelineno-36-8" href="#__codelineno-36-8"></a><span class="p">}</span>
|
||||
</code></pre></div></p>
|
||||
<h4 id="parse-mlx-lm-command">Parse MLX-LM Command<a class="headerlink" href="#parse-mlx-lm-command" title="Permanent link">¶</a></h4>
|
||||
<p>Parse an MLX-LM server command string into instance options.</p>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-36-1" name="__codelineno-36-1" href="#__codelineno-36-1"></a><span class="err">POST /api/v1/backends/mlx/parse-command</span>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-37-1" name="__codelineno-37-1" href="#__codelineno-37-1"></a><span class="err">POST /api/v1/backends/mlx/parse-command</span>
|
||||
</code></pre></div>
|
||||
<p><strong>Request Body:</strong>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-37-1" name="__codelineno-37-1" href="#__codelineno-37-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-37-2" name="__codelineno-37-2" href="#__codelineno-37-2"></a><span class="w"> </span><span class="nt">"command"</span><span class="p">:</span><span class="w"> </span><span class="s2">"mlx_lm.server --model /path/to/model --port 8080"</span>
|
||||
<a id="__codelineno-37-3" name="__codelineno-37-3" href="#__codelineno-37-3"></a><span class="p">}</span>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-38-1" name="__codelineno-38-1" href="#__codelineno-38-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-38-2" name="__codelineno-38-2" href="#__codelineno-38-2"></a><span class="w"> </span><span class="nt">"command"</span><span class="p">:</span><span class="w"> </span><span class="s2">"mlx_lm.server --model /path/to/model --port 8080"</span>
|
||||
<a id="__codelineno-38-3" name="__codelineno-38-3" href="#__codelineno-38-3"></a><span class="p">}</span>
|
||||
</code></pre></div></p>
|
||||
<p><strong>Response:</strong>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-38-1" name="__codelineno-38-1" href="#__codelineno-38-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-38-2" name="__codelineno-38-2" href="#__codelineno-38-2"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"mlx_lm"</span><span class="p">,</span>
|
||||
<a id="__codelineno-38-3" name="__codelineno-38-3" href="#__codelineno-38-3"></a><span class="w"> </span><span class="nt">"mlx_server_options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||
<a id="__codelineno-38-4" name="__codelineno-38-4" href="#__codelineno-38-4"></a><span class="w"> </span><span class="nt">"model"</span><span class="p">:</span><span class="w"> </span><span class="s2">"/path/to/model"</span><span class="p">,</span>
|
||||
<a id="__codelineno-38-5" name="__codelineno-38-5" href="#__codelineno-38-5"></a><span class="w"> </span><span class="nt">"port"</span><span class="p">:</span><span class="w"> </span><span class="mi">8080</span>
|
||||
<a id="__codelineno-38-6" name="__codelineno-38-6" href="#__codelineno-38-6"></a><span class="w"> </span><span class="p">}</span>
|
||||
<a id="__codelineno-38-7" name="__codelineno-38-7" href="#__codelineno-38-7"></a><span class="p">}</span>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-39-1" name="__codelineno-39-1" href="#__codelineno-39-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-39-2" name="__codelineno-39-2" href="#__codelineno-39-2"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"mlx_lm"</span><span class="p">,</span>
|
||||
<a id="__codelineno-39-3" name="__codelineno-39-3" href="#__codelineno-39-3"></a><span class="w"> </span><span class="nt">"mlx_server_options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||
<a id="__codelineno-39-4" name="__codelineno-39-4" href="#__codelineno-39-4"></a><span class="w"> </span><span class="nt">"model"</span><span class="p">:</span><span class="w"> </span><span class="s2">"/path/to/model"</span><span class="p">,</span>
|
||||
<a id="__codelineno-39-5" name="__codelineno-39-5" href="#__codelineno-39-5"></a><span class="w"> </span><span class="nt">"port"</span><span class="p">:</span><span class="w"> </span><span class="mi">8080</span>
|
||||
<a id="__codelineno-39-6" name="__codelineno-39-6" href="#__codelineno-39-6"></a><span class="w"> </span><span class="p">}</span>
|
||||
<a id="__codelineno-39-7" name="__codelineno-39-7" href="#__codelineno-39-7"></a><span class="p">}</span>
|
||||
</code></pre></div></p>
|
||||
<h4 id="parse-vllm-command">Parse vLLM Command<a class="headerlink" href="#parse-vllm-command" title="Permanent link">¶</a></h4>
|
||||
<p>Parse a vLLM serve command string into instance options.</p>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-39-1" name="__codelineno-39-1" href="#__codelineno-39-1"></a><span class="err">POST /api/v1/backends/vllm/parse-command</span>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-40-1" name="__codelineno-40-1" href="#__codelineno-40-1"></a><span class="err">POST /api/v1/backends/vllm/parse-command</span>
|
||||
</code></pre></div>
|
||||
<p><strong>Request Body:</strong>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-40-1" name="__codelineno-40-1" href="#__codelineno-40-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-40-2" name="__codelineno-40-2" href="#__codelineno-40-2"></a><span class="w"> </span><span class="nt">"command"</span><span class="p">:</span><span class="w"> </span><span class="s2">"vllm serve /path/to/model --port 8080"</span>
|
||||
<a id="__codelineno-40-3" name="__codelineno-40-3" href="#__codelineno-40-3"></a><span class="p">}</span>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-41-1" name="__codelineno-41-1" href="#__codelineno-41-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-41-2" name="__codelineno-41-2" href="#__codelineno-41-2"></a><span class="w"> </span><span class="nt">"command"</span><span class="p">:</span><span class="w"> </span><span class="s2">"vllm serve /path/to/model --port 8080"</span>
|
||||
<a id="__codelineno-41-3" name="__codelineno-41-3" href="#__codelineno-41-3"></a><span class="p">}</span>
|
||||
</code></pre></div></p>
|
||||
<p><strong>Response:</strong>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-41-1" name="__codelineno-41-1" href="#__codelineno-41-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-41-2" name="__codelineno-41-2" href="#__codelineno-41-2"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"vllm"</span><span class="p">,</span>
|
||||
<a id="__codelineno-41-3" name="__codelineno-41-3" href="#__codelineno-41-3"></a><span class="w"> </span><span class="nt">"vllm_server_options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||
<a id="__codelineno-41-4" name="__codelineno-41-4" href="#__codelineno-41-4"></a><span class="w"> </span><span class="nt">"model"</span><span class="p">:</span><span class="w"> </span><span class="s2">"/path/to/model"</span><span class="p">,</span>
|
||||
<a id="__codelineno-41-5" name="__codelineno-41-5" href="#__codelineno-41-5"></a><span class="w"> </span><span class="nt">"port"</span><span class="p">:</span><span class="w"> </span><span class="mi">8080</span>
|
||||
<a id="__codelineno-41-6" name="__codelineno-41-6" href="#__codelineno-41-6"></a><span class="w"> </span><span class="p">}</span>
|
||||
<a id="__codelineno-41-7" name="__codelineno-41-7" href="#__codelineno-41-7"></a><span class="p">}</span>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-42-1" name="__codelineno-42-1" href="#__codelineno-42-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-42-2" name="__codelineno-42-2" href="#__codelineno-42-2"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"vllm"</span><span class="p">,</span>
|
||||
<a id="__codelineno-42-3" name="__codelineno-42-3" href="#__codelineno-42-3"></a><span class="w"> </span><span class="nt">"vllm_server_options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||
<a id="__codelineno-42-4" name="__codelineno-42-4" href="#__codelineno-42-4"></a><span class="w"> </span><span class="nt">"model"</span><span class="p">:</span><span class="w"> </span><span class="s2">"/path/to/model"</span><span class="p">,</span>
|
||||
<a id="__codelineno-42-5" name="__codelineno-42-5" href="#__codelineno-42-5"></a><span class="w"> </span><span class="nt">"port"</span><span class="p">:</span><span class="w"> </span><span class="mi">8080</span>
|
||||
<a id="__codelineno-42-6" name="__codelineno-42-6" href="#__codelineno-42-6"></a><span class="w"> </span><span class="p">}</span>
|
||||
<a id="__codelineno-42-7" name="__codelineno-42-7" href="#__codelineno-42-7"></a><span class="p">}</span>
|
||||
</code></pre></div></p>
|
||||
<p><strong>Error Responses for Parse Commands:</strong>
|
||||
- <code>400 Bad Request</code>: Invalid request body, empty command, or parse error
|
||||
@@ -1735,7 +1783,7 @@
|
||||
</ol>
|
||||
<h2 id="swagger-documentation">Swagger Documentation<a class="headerlink" href="#swagger-documentation" title="Permanent link">¶</a></h2>
|
||||
<p>If swagger documentation is enabled in the server configuration, you can access the interactive API documentation at:</p>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-42-1" name="__codelineno-42-1" href="#__codelineno-42-1"></a>http://localhost:8080/swagger/
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-43-1" name="__codelineno-43-1" href="#__codelineno-43-1"></a>http://localhost:8080/swagger/
|
||||
</code></pre></div>
|
||||
<p>This provides a complete interactive interface for testing all API endpoints.</p>
|
||||
|
||||
@@ -1758,7 +1806,7 @@
|
||||
<span class="md-icon" title="Last update">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M21 13.1c-.1 0-.3.1-.4.2l-1 1 2.1 2.1 1-1c.2-.2.2-.6 0-.8l-1.3-1.3c-.1-.1-.2-.2-.4-.2m-1.9 1.8-6.1 6V23h2.1l6.1-6.1-2.1-2M12.5 7v5.2l4 2.4-1 1L11 13V7h1.5M11 21.9c-5.1-.5-9-4.8-9-9.9C2 6.5 6.5 2 12 2c5.3 0 9.6 4.1 10 9.3-.3-.1-.6-.2-1-.2s-.7.1-1 .2C19.6 7.2 16.2 4 12 4c-4.4 0-8 3.6-8 8 0 4.1 3.1 7.5 7.1 7.9l-.1.2v1.8Z"/></svg>
|
||||
</span>
|
||||
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">September 28, 2025</span>
|
||||
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">October 9, 2025</span>
|
||||
</span>
|
||||
|
||||
|
||||
|
||||
@@ -1259,6 +1259,7 @@
|
||||
<ol>
|
||||
<li>Click the <strong>"Create Instance"</strong> button on the dashboard</li>
|
||||
<li>Enter a unique <strong>Name</strong> for your instance (only required field)</li>
|
||||
<li><strong>Select Target Node</strong>: Choose which node to deploy the instance to from the dropdown</li>
|
||||
<li><strong>Choose Backend Type</strong>:<ul>
|
||||
<li><strong>llama.cpp</strong>: For GGUF models using llama-server</li>
|
||||
<li><strong>MLX</strong>: For MLX-optimized models (macOS only)</li>
|
||||
@@ -1347,6 +1348,18 @@
|
||||
<a id="__codelineno-0-56" name="__codelineno-0-56" href="#__codelineno-0-56"></a><span class="s1"> "gpu_layers": 32</span>
|
||||
<a id="__codelineno-0-57" name="__codelineno-0-57" href="#__codelineno-0-57"></a><span class="s1"> }</span>
|
||||
<a id="__codelineno-0-58" name="__codelineno-0-58" href="#__codelineno-0-58"></a><span class="s1"> }'</span>
|
||||
<a id="__codelineno-0-59" name="__codelineno-0-59" href="#__codelineno-0-59"></a>
|
||||
<a id="__codelineno-0-60" name="__codelineno-0-60" href="#__codelineno-0-60"></a><span class="c1"># Create instance on specific remote node</span>
|
||||
<a id="__codelineno-0-61" name="__codelineno-0-61" href="#__codelineno-0-61"></a>curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://localhost:8080/api/instances/remote-llama<span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-0-62" name="__codelineno-0-62" href="#__codelineno-0-62"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Content-Type: application/json"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-0-63" name="__codelineno-0-63" href="#__codelineno-0-63"></a><span class="w"> </span>-d<span class="w"> </span><span class="s1">'{</span>
|
||||
<a id="__codelineno-0-64" name="__codelineno-0-64" href="#__codelineno-0-64"></a><span class="s1"> "backend_type": "llama_cpp",</span>
|
||||
<a id="__codelineno-0-65" name="__codelineno-0-65" href="#__codelineno-0-65"></a><span class="s1"> "backend_options": {</span>
|
||||
<a id="__codelineno-0-66" name="__codelineno-0-66" href="#__codelineno-0-66"></a><span class="s1"> "model": "/models/llama-7b.gguf",</span>
|
||||
<a id="__codelineno-0-67" name="__codelineno-0-67" href="#__codelineno-0-67"></a><span class="s1"> "gpu_layers": 32</span>
|
||||
<a id="__codelineno-0-68" name="__codelineno-0-68" href="#__codelineno-0-68"></a><span class="s1"> },</span>
|
||||
<a id="__codelineno-0-69" name="__codelineno-0-69" href="#__codelineno-0-69"></a><span class="s1"> "nodes": ["worker1"]</span>
|
||||
<a id="__codelineno-0-70" name="__codelineno-0-70" href="#__codelineno-0-70"></a><span class="s1"> }'</span>
|
||||
</code></pre></div>
|
||||
<h2 id="start-instance">Start Instance<a class="headerlink" href="#start-instance" title="Permanent link">¶</a></h2>
|
||||
<h3 id="via-web-ui_1">Via Web UI<a class="headerlink" href="#via-web-ui_1" title="Permanent link">¶</a></h3>
|
||||
@@ -1450,7 +1463,7 @@
|
||||
<span class="md-icon" title="Last update">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M21 13.1c-.1 0-.3.1-.4.2l-1 1 2.1 2.1 1-1c.2-.2.2-.6 0-.8l-1.3-1.3c-.1-.1-.2-.2-.4-.2m-1.9 1.8-6.1 6V23h2.1l6.1-6.1-2.1-2M12.5 7v5.2l4 2.4-1 1L11 13V7h1.5M11 21.9c-5.1-.5-9-4.8-9-9.9C2 6.5 6.5 2 12 2c5.3 0 9.6 4.1 10 9.3-.3-.1-.6-.2-1-.2s-.7.1-1 .2C19.6 7.2 16.2 4 12 4c-4.4 0-8 3.6-8 8 0 4.1 3.1 7.5 7.1 7.9l-.1.2v1.8Z"/></svg>
|
||||
</span>
|
||||
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">September 28, 2025</span>
|
||||
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">October 9, 2025</span>
|
||||
</span>
|
||||
|
||||
|
||||
|
||||
@@ -695,6 +695,30 @@
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#remote-node-issues" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Remote Node Issues
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Remote Node Issues">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#node-configuration" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Node Configuration
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
@@ -887,6 +911,30 @@
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#remote-node-issues" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Remote Node Issues
|
||||
</span>
|
||||
</a>
|
||||
|
||||
<nav class="md-nav" aria-label="Remote Node Issues">
|
||||
<ul class="md-nav__list">
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#node-configuration" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Node Configuration
|
||||
</span>
|
||||
</a>
|
||||
|
||||
</li>
|
||||
|
||||
</ul>
|
||||
</nav>
|
||||
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
@@ -1044,24 +1092,43 @@
|
||||
</code></pre></div></p>
|
||||
</li>
|
||||
</ol>
|
||||
<h2 id="remote-node-issues">Remote Node Issues<a class="headerlink" href="#remote-node-issues" title="Permanent link">¶</a></h2>
|
||||
<h3 id="node-configuration">Node Configuration<a class="headerlink" href="#node-configuration" title="Permanent link">¶</a></h3>
|
||||
<p><strong>Problem:</strong> Remote instances not appearing or cannot be managed</p>
|
||||
<p><strong>Solutions:</strong>
|
||||
1. <strong>Verify node configuration:</strong>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-9-1" name="__codelineno-9-1" href="#__codelineno-9-1"></a><span class="nt">local_node</span><span class="p">:</span><span class="w"> </span><span class="s">"main"</span><span class="w"> </span><span class="c1"># Must match a key in nodes map</span>
|
||||
<a id="__codelineno-9-2" name="__codelineno-9-2" href="#__codelineno-9-2"></a><span class="nt">nodes</span><span class="p">:</span>
|
||||
<a id="__codelineno-9-3" name="__codelineno-9-3" href="#__codelineno-9-3"></a><span class="w"> </span><span class="nt">main</span><span class="p">:</span>
|
||||
<a id="__codelineno-9-4" name="__codelineno-9-4" href="#__codelineno-9-4"></a><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="s">""</span><span class="w"> </span><span class="c1"># Empty for local node</span>
|
||||
<a id="__codelineno-9-5" name="__codelineno-9-5" href="#__codelineno-9-5"></a><span class="w"> </span><span class="nt">worker1</span><span class="p">:</span>
|
||||
<a id="__codelineno-9-6" name="__codelineno-9-6" href="#__codelineno-9-6"></a><span class="w"> </span><span class="nt">address</span><span class="p">:</span><span class="w"> </span><span class="s">"http://worker1.internal:8080"</span>
|
||||
<a id="__codelineno-9-7" name="__codelineno-9-7" href="#__codelineno-9-7"></a><span class="w"> </span><span class="nt">api_key</span><span class="p">:</span><span class="w"> </span><span class="s">"secure-key"</span><span class="w"> </span><span class="c1"># Must match worker1's management key</span>
|
||||
</code></pre></div></p>
|
||||
<ol>
|
||||
<li><strong>Test remote node connectivity:</strong>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-10-1" name="__codelineno-10-1" href="#__codelineno-10-1"></a>curl<span class="w"> </span>-H<span class="w"> </span><span class="s2">"Authorization: Bearer remote-node-key"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-10-2" name="__codelineno-10-2" href="#__codelineno-10-2"></a><span class="w"> </span>http://remote-node:8080/api/v1/instances
|
||||
</code></pre></div></li>
|
||||
</ol>
|
||||
<h2 id="debugging-and-logs">Debugging and Logs<a class="headerlink" href="#debugging-and-logs" title="Permanent link">¶</a></h2>
|
||||
<h3 id="viewing-instance-logs">Viewing Instance Logs<a class="headerlink" href="#viewing-instance-logs" title="Permanent link">¶</a></h3>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-9-1" name="__codelineno-9-1" href="#__codelineno-9-1"></a><span class="c1"># Get instance logs via API</span>
|
||||
<a id="__codelineno-9-2" name="__codelineno-9-2" href="#__codelineno-9-2"></a>curl<span class="w"> </span>http://localhost:8080/api/v1/instances/<span class="o">{</span>name<span class="o">}</span>/logs
|
||||
<a id="__codelineno-9-3" name="__codelineno-9-3" href="#__codelineno-9-3"></a>
|
||||
<a id="__codelineno-9-4" name="__codelineno-9-4" href="#__codelineno-9-4"></a><span class="c1"># Or check log files directly</span>
|
||||
<a id="__codelineno-9-5" name="__codelineno-9-5" href="#__codelineno-9-5"></a>tail<span class="w"> </span>-f<span class="w"> </span>~/.local/share/llamactl/logs/<span class="o">{</span>instance-name<span class="o">}</span>.log
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-11-1" name="__codelineno-11-1" href="#__codelineno-11-1"></a><span class="c1"># Get instance logs via API</span>
|
||||
<a id="__codelineno-11-2" name="__codelineno-11-2" href="#__codelineno-11-2"></a>curl<span class="w"> </span>http://localhost:8080/api/v1/instances/<span class="o">{</span>name<span class="o">}</span>/logs
|
||||
<a id="__codelineno-11-3" name="__codelineno-11-3" href="#__codelineno-11-3"></a>
|
||||
<a id="__codelineno-11-4" name="__codelineno-11-4" href="#__codelineno-11-4"></a><span class="c1"># Or check log files directly</span>
|
||||
<a id="__codelineno-11-5" name="__codelineno-11-5" href="#__codelineno-11-5"></a>tail<span class="w"> </span>-f<span class="w"> </span>~/.local/share/llamactl/logs/<span class="o">{</span>instance-name<span class="o">}</span>.log
|
||||
</code></pre></div>
|
||||
<h3 id="enable-debug-logging">Enable Debug Logging<a class="headerlink" href="#enable-debug-logging" title="Permanent link">¶</a></h3>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-10-1" name="__codelineno-10-1" href="#__codelineno-10-1"></a><span class="nb">export</span><span class="w"> </span><span class="nv">LLAMACTL_LOG_LEVEL</span><span class="o">=</span>debug
|
||||
<a id="__codelineno-10-2" name="__codelineno-10-2" href="#__codelineno-10-2"></a>llamactl
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-12-1" name="__codelineno-12-1" href="#__codelineno-12-1"></a><span class="nb">export</span><span class="w"> </span><span class="nv">LLAMACTL_LOG_LEVEL</span><span class="o">=</span>debug
|
||||
<a id="__codelineno-12-2" name="__codelineno-12-2" href="#__codelineno-12-2"></a>llamactl
|
||||
</code></pre></div>
|
||||
<h2 id="getting-help">Getting Help<a class="headerlink" href="#getting-help" title="Permanent link">¶</a></h2>
|
||||
<p>When reporting issues, include:</p>
|
||||
<ol>
|
||||
<li>
|
||||
<p><strong>System information:</strong>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-11-1" name="__codelineno-11-1" href="#__codelineno-11-1"></a>llamactl<span class="w"> </span>--version
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-13-1" name="__codelineno-13-1" href="#__codelineno-13-1"></a>llamactl<span class="w"> </span>--version
|
||||
</code></pre></div></p>
|
||||
</li>
|
||||
<li>
|
||||
@@ -1094,7 +1161,7 @@
|
||||
<span class="md-icon" title="Last update">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M21 13.1c-.1 0-.3.1-.4.2l-1 1 2.1 2.1 1-1c.2-.2.2-.6 0-.8l-1.3-1.3c-.1-.1-.2-.2-.4-.2m-1.9 1.8-6.1 6V23h2.1l6.1-6.1-2.1-2M12.5 7v5.2l4 2.4-1 1L11 13V7h1.5M11 21.9c-5.1-.5-9-4.8-9-9.9C2 6.5 6.5 2 12 2c5.3 0 9.6 4.1 10 9.3-.3-.1-.6-.2-1-.2s-.7.1-1 .2C19.6 7.2 16.2 4 12 4c-4.4 0-8 3.6-8 8 0 4.1 3.1 7.5 7.1 7.9l-.1.2v1.8Z"/></svg>
|
||||
</span>
|
||||
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">September 3, 2025</span>
|
||||
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">October 9, 2025</span>
|
||||
</span>
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user