Deployed c776785 to dev with MkDocs 1.6.1 and mike 2.1.3

This commit is contained in:
lordmathis
2025-12-08 18:23:59 +00:00
parent 85205fc5d3
commit 8e8fb83fb3
9 changed files with 86 additions and 130 deletions

View File

@@ -806,13 +806,12 @@
<a id="__codelineno-1-59" name="__codelineno-1-59" href="#__codelineno-1-59"></a> <a id="__codelineno-1-59" name="__codelineno-1-59" href="#__codelineno-1-59"></a>
<a id="__codelineno-1-60" name="__codelineno-1-60" href="#__codelineno-1-60"></a><span class="nt">auth</span><span class="p">:</span> <a id="__codelineno-1-60" name="__codelineno-1-60" href="#__codelineno-1-60"></a><span class="nt">auth</span><span class="p">:</span>
<a id="__codelineno-1-61" name="__codelineno-1-61" href="#__codelineno-1-61"></a><span class="w"> </span><span class="nt">require_inference_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Require auth for inference endpoints</span> <a id="__codelineno-1-61" name="__codelineno-1-61" href="#__codelineno-1-61"></a><span class="w"> </span><span class="nt">require_inference_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Require auth for inference endpoints</span>
<a id="__codelineno-1-62" name="__codelineno-1-62" href="#__codelineno-1-62"></a><span class="w"> </span><span class="nt">inference_keys</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[]</span><span class="w"> </span><span class="c1"># Keys for inference endpoints</span> <a id="__codelineno-1-62" name="__codelineno-1-62" href="#__codelineno-1-62"></a><span class="w"> </span><span class="nt">require_management_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Require auth for management endpoints</span>
<a id="__codelineno-1-63" name="__codelineno-1-63" href="#__codelineno-1-63"></a><span class="w"> </span><span class="nt">require_management_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Require auth for management endpoints</span> <a id="__codelineno-1-63" name="__codelineno-1-63" href="#__codelineno-1-63"></a><span class="w"> </span><span class="nt">management_keys</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[]</span><span class="w"> </span><span class="c1"># Keys for management endpoints</span>
<a id="__codelineno-1-64" name="__codelineno-1-64" href="#__codelineno-1-64"></a><span class="w"> </span><span class="nt">management_keys</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[]</span><span class="w"> </span><span class="c1"># Keys for management endpoints</span> <a id="__codelineno-1-64" name="__codelineno-1-64" href="#__codelineno-1-64"></a>
<a id="__codelineno-1-65" name="__codelineno-1-65" href="#__codelineno-1-65"></a> <a id="__codelineno-1-65" name="__codelineno-1-65" href="#__codelineno-1-65"></a><span class="nt">local_node</span><span class="p">:</span><span class="w"> </span><span class="s">&quot;main&quot;</span><span class="w"> </span><span class="c1"># Name of the local node (default: &quot;main&quot;)</span>
<a id="__codelineno-1-66" name="__codelineno-1-66" href="#__codelineno-1-66"></a><span class="nt">local_node</span><span class="p">:</span><span class="w"> </span><span class="s">&quot;main&quot;</span><span class="w"> </span><span class="c1"># Name of the local node (default: &quot;main&quot;)</span> <a id="__codelineno-1-66" name="__codelineno-1-66" href="#__codelineno-1-66"></a><span class="nt">nodes</span><span class="p">:</span><span class="w"> </span><span class="c1"># Node configuration for multi-node deployment</span>
<a id="__codelineno-1-67" name="__codelineno-1-67" href="#__codelineno-1-67"></a><span class="nt">nodes</span><span class="p">:</span><span class="w"> </span><span class="c1"># Node configuration for multi-node deployment</span> <a id="__codelineno-1-67" name="__codelineno-1-67" href="#__codelineno-1-67"></a><span class="w"> </span><span class="nt">main</span><span class="p">:</span><span class="w"> </span><span class="c1"># Default local node (empty config)</span>
<a id="__codelineno-1-68" name="__codelineno-1-68" href="#__codelineno-1-68"></a><span class="w"> </span><span class="nt">main</span><span class="p">:</span><span class="w"> </span><span class="c1"># Default local node (empty config)</span>
</code></pre></div> </code></pre></div>
<h2 id="configuration-files">Configuration Files<a class="headerlink" href="#configuration-files" title="Permanent link">&para;</a></h2> <h2 id="configuration-files">Configuration Files<a class="headerlink" href="#configuration-files" title="Permanent link">&para;</a></h2>
<h3 id="configuration-file-locations">Configuration File Locations<a class="headerlink" href="#configuration-file-locations" title="Permanent link">&para;</a></h3> <h3 id="configuration-file-locations">Configuration File Locations<a class="headerlink" href="#configuration-file-locations" title="Permanent link">&para;</a></h3>
@@ -965,15 +964,30 @@
- <code>LLAMACTL_DATABASE_MAX_IDLE_CONNECTIONS</code> - Maximum idle database connections<br /> - <code>LLAMACTL_DATABASE_MAX_IDLE_CONNECTIONS</code> - Maximum idle database connections<br />
- <code>LLAMACTL_DATABASE_CONN_MAX_LIFETIME</code> - Connection max lifetime (e.g., "5m", "1h") </p> - <code>LLAMACTL_DATABASE_CONN_MAX_LIFETIME</code> - Connection max lifetime (e.g., "5m", "1h") </p>
<h3 id="authentication-configuration">Authentication Configuration<a class="headerlink" href="#authentication-configuration" title="Permanent link">&para;</a></h3> <h3 id="authentication-configuration">Authentication Configuration<a class="headerlink" href="#authentication-configuration" title="Permanent link">&para;</a></h3>
<p>llamactl supports two types of authentication: </p>
<ul>
<li><strong>Management API Keys</strong>: For accessing the web UI and management API (creating/managing instances). These can be configured in the config file or via environment variables. </li>
<li><strong>Inference API Keys</strong>: For accessing the OpenAI-compatible inference endpoints. These are managed via the web UI (Settings → API Keys) and stored in the database. </li>
</ul>
<div class="highlight"><pre><span></span><code><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a><span class="nt">auth</span><span class="p">:</span> <div class="highlight"><pre><span></span><code><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a><span class="nt">auth</span><span class="p">:</span>
<a id="__codelineno-7-2" name="__codelineno-7-2" href="#__codelineno-7-2"></a><span class="w"> </span><span class="nt">require_inference_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Require API key for OpenAI endpoints (default: true)</span> <a id="__codelineno-7-2" name="__codelineno-7-2" href="#__codelineno-7-2"></a><span class="w"> </span><span class="nt">require_inference_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Require API key for OpenAI endpoints (default: true)</span>
<a id="__codelineno-7-3" name="__codelineno-7-3" href="#__codelineno-7-3"></a><span class="w"> </span><span class="nt">inference_keys</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[]</span><span class="w"> </span><span class="c1"># List of valid inference API keys</span> <a id="__codelineno-7-3" name="__codelineno-7-3" href="#__codelineno-7-3"></a><span class="w"> </span><span class="nt">require_management_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Require API key for management endpoints (default: true)</span>
<a id="__codelineno-7-4" name="__codelineno-7-4" href="#__codelineno-7-4"></a><span class="w"> </span><span class="nt">require_management_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">true</span><span class="w"> </span><span class="c1"># Require API key for management endpoints (default: true)</span> <a id="__codelineno-7-4" name="__codelineno-7-4" href="#__codelineno-7-4"></a><span class="w"> </span><span class="nt">management_keys</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[]</span><span class="w"> </span><span class="c1"># List of valid management API keys</span>
<a id="__codelineno-7-5" name="__codelineno-7-5" href="#__codelineno-7-5"></a><span class="w"> </span><span class="nt">management_keys</span><span class="p">:</span><span class="w"> </span><span class="p p-Indicator">[]</span><span class="w"> </span><span class="c1"># List of valid management API keys</span>
</code></pre></div> </code></pre></div>
<p><strong>Managing Inference API Keys:</strong> </p>
<p>Inference API keys are managed through the web UI or management API and stored in the database. To create and manage inference keys: </p>
<ol>
<li>Open the web UI and log in with a management API key </li>
<li>Navigate to <strong>Settings → API Keys</strong> </li>
<li>Click <strong>Create API Key</strong> </li>
<li>Configure the key: </li>
<li><strong>Name</strong>: A descriptive name for the key </li>
<li><strong>Expiration</strong>: Optional expiration date </li>
<li><strong>Permissions</strong>: Grant access to all instances or specific instances only </li>
<li>Copy the generated key - it won't be shown again </li>
</ol>
<p><strong>Environment Variables:</strong><br /> <p><strong>Environment Variables:</strong><br />
- <code>LLAMACTL_REQUIRE_INFERENCE_AUTH</code> - Require auth for OpenAI endpoints (true/false)<br /> - <code>LLAMACTL_REQUIRE_INFERENCE_AUTH</code> - Require auth for OpenAI endpoints (true/false)<br />
- <code>LLAMACTL_INFERENCE_KEYS</code> - Comma-separated inference API keys<br />
- <code>LLAMACTL_REQUIRE_MANAGEMENT_AUTH</code> - Require auth for management endpoints (true/false)<br /> - <code>LLAMACTL_REQUIRE_MANAGEMENT_AUTH</code> - Require auth for management endpoints (true/false)<br />
- <code>LLAMACTL_MANAGEMENT_KEYS</code> - Comma-separated management API keys </p> - <code>LLAMACTL_MANAGEMENT_KEYS</code> - Comma-separated management API keys </p>
<h3 id="remote-node-configuration">Remote Node Configuration<a class="headerlink" href="#remote-node-configuration" title="Permanent link">&para;</a></h3> <h3 id="remote-node-configuration">Remote Node Configuration<a class="headerlink" href="#remote-node-configuration" title="Permanent link">&para;</a></h3>
@@ -1014,7 +1028,7 @@
<span class="md-icon" title="Last update"> <span class="md-icon" title="Last update">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M21 13.1c-.1 0-.3.1-.4.2l-1 1 2.1 2.1 1-1c.2-.2.2-.6 0-.8l-1.3-1.3c-.1-.1-.2-.2-.4-.2m-1.9 1.8-6.1 6V23h2.1l6.1-6.1zM12.5 7v5.2l4 2.4-1 1L11 13V7zM11 21.9c-5.1-.5-9-4.8-9-9.9C2 6.5 6.5 2 12 2c5.3 0 9.6 4.1 10 9.3-.3-.1-.6-.2-1-.2s-.7.1-1 .2C19.6 7.2 16.2 4 12 4c-4.4 0-8 3.6-8 8 0 4.1 3.1 7.5 7.1 7.9l-.1.2z"/></svg> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M21 13.1c-.1 0-.3.1-.4.2l-1 1 2.1 2.1 1-1c.2-.2.2-.6 0-.8l-1.3-1.3c-.1-.1-.2-.2-.4-.2m-1.9 1.8-6.1 6V23h2.1l6.1-6.1zM12.5 7v5.2l4 2.4-1 1L11 13V7zM11 21.9c-5.1-.5-9-4.8-9-9.9C2 6.5 6.5 2 12 2c5.3 0 9.6 4.1 10 9.3-.3-.1-.6-.2-1-.2s-.7.1-1 .2C19.6 7.2 16.2 4 12 4c-4.4 0-8 3.6-8 8 0 4.1 3.1 7.5 7.1 7.9l-.1.2z"/></svg>
</span> </span>
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date" title="December 2, 2025 19:07:39 UTC">December 2, 2025</span> <span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date" title="December 8, 2025 18:15:42 UTC">December 8, 2025</span>
</span> </span>

View File

@@ -2063,20 +2063,19 @@ const docTemplate = `{
"server.CreateKeyRequest": { "server.CreateKeyRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
"expiresAt": { "expires_at": {
"type": "integer", "type": "integer"
"format": "int64"
}, },
"instancePermissions": { "instance_ids": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/definitions/server.InstancePermission" "type": "integer"
} }
}, },
"name": { "name": {
"type": "string" "type": "string"
}, },
"permissionMode": { "permission_mode": {
"$ref": "#/definitions/auth.PermissionMode" "$ref": "#/definitions/auth.PermissionMode"
} }
} }
@@ -2087,9 +2086,6 @@ const docTemplate = `{
"created_at": { "created_at": {
"type": "integer" "type": "integer"
}, },
"enabled": {
"type": "boolean"
},
"expires_at": { "expires_at": {
"type": "integer" "type": "integer"
}, },
@@ -2116,29 +2112,9 @@ const docTemplate = `{
} }
} }
}, },
"server.InstancePermission": {
"type": "object",
"properties": {
"can_infer": {
"type": "boolean"
},
"can_view_logs": {
"type": "boolean"
},
"instance_id": {
"type": "integer"
}
}
},
"server.KeyPermissionResponse": { "server.KeyPermissionResponse": {
"type": "object", "type": "object",
"properties": { "properties": {
"can_infer": {
"type": "boolean"
},
"can_view_logs": {
"type": "boolean"
},
"instance_id": { "instance_id": {
"type": "integer" "type": "integer"
}, },
@@ -2153,9 +2129,6 @@ const docTemplate = `{
"created_at": { "created_at": {
"type": "integer" "type": "integer"
}, },
"enabled": {
"type": "boolean"
},
"expires_at": { "expires_at": {
"type": "integer" "type": "integer"
}, },

View File

@@ -564,6 +564,15 @@
</span> </span>
</a> </a>
</li>
<li class="md-nav__item">
<a href="#create-an-inference-api-key" class="md-nav__link">
<span class="md-ellipsis">
Create an Inference API Key
</span>
</a>
</li> </li>
<li class="md-nav__item"> <li class="md-nav__item">
@@ -773,10 +782,10 @@
<h2 id="authentication">Authentication<a class="headerlink" href="#authentication" title="Permanent link">&para;</a></h2> <h2 id="authentication">Authentication<a class="headerlink" href="#authentication" title="Permanent link">&para;</a></h2>
<p>Llamactl uses two types of API keys: </p> <p>Llamactl uses two types of API keys: </p>
<ul> <ul>
<li><strong>Management API Key</strong>: Used to authenticate with the Llamactl management API (creating, starting, stopping instances). </li> <li><strong>Management API Key</strong>: Used to authenticate with the Llamactl management API and web UI. If not configured, one is auto-generated at startup and printed to the terminal. </li>
<li><strong>Inference API Key</strong>: Used to authenticate requests to the OpenAI-compatible endpoints (<code>/v1/chat/completions</code>, <code>/v1/completions</code>, etc.). </li> <li><strong>Inference API Key</strong>: Used to authenticate requests to the OpenAI-compatible endpoints (<code>/v1/chat/completions</code>, <code>/v1/completions</code>, etc.). These are created and managed via the web UI. </li>
</ul> </ul>
<p>By default, authentication is required. If you don't configure these keys in your configuration file, llamactl will auto-generate them and print them to the terminal on startup. You can also configure custom keys or disable authentication entirely in the <a href="../configuration/">Configuration</a> guide. </p> <p>By default, authentication is required for both management and inference endpoints. You can configure custom management keys or disable authentication in the <a href="../configuration/">Configuration</a> guide. </p>
<h2 id="start-llamactl">Start Llamactl<a class="headerlink" href="#start-llamactl" title="Permanent link">&para;</a></h2> <h2 id="start-llamactl">Start Llamactl<a class="headerlink" href="#start-llamactl" title="Permanent link">&para;</a></h2>
<p>Start the Llamactl server: </p> <p>Start the Llamactl server: </p>
<div class="highlight"><pre><span></span><code><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a>llamactl <div class="highlight"><pre><span></span><code><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a>llamactl
@@ -789,22 +798,15 @@
<a id="__codelineno-1-6" name="__codelineno-1-6" href="#__codelineno-1-6"></a> sk-management-... <a id="__codelineno-1-6" name="__codelineno-1-6" href="#__codelineno-1-6"></a> sk-management-...
<a id="__codelineno-1-7" name="__codelineno-1-7" href="#__codelineno-1-7"></a> <a id="__codelineno-1-7" name="__codelineno-1-7" href="#__codelineno-1-7"></a>
<a id="__codelineno-1-8" name="__codelineno-1-8" href="#__codelineno-1-8"></a>━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ <a id="__codelineno-1-8" name="__codelineno-1-8" href="#__codelineno-1-8"></a>━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
<a id="__codelineno-1-9" name="__codelineno-1-9" href="#__codelineno-1-9"></a>⚠️ INFERENCE AUTHENTICATION REQUIRED <a id="__codelineno-1-9" name="__codelineno-1-9" href="#__codelineno-1-9"></a>⚠️ IMPORTANT
<a id="__codelineno-1-10" name="__codelineno-1-10" href="#__codelineno-1-10"></a>━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ <a id="__codelineno-1-10" name="__codelineno-1-10" href="#__codelineno-1-10"></a>━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
<a id="__codelineno-1-11" name="__codelineno-1-11" href="#__codelineno-1-11"></a>🔑 Generated Inference API Key: <a id="__codelineno-1-11" name="__codelineno-1-11" href="#__codelineno-1-11"></a>• This key is auto-generated and will change on restart
<a id="__codelineno-1-12" name="__codelineno-1-12" href="#__codelineno-1-12"></a> <a id="__codelineno-1-12" name="__codelineno-1-12" href="#__codelineno-1-12"></a>• For production, add explicit management_keys to your configuration
<a id="__codelineno-1-13" name="__codelineno-1-13" href="#__codelineno-1-13"></a> sk-inference-... <a id="__codelineno-1-13" name="__codelineno-1-13" href="#__codelineno-1-13"></a>• Copy this key before it disappears from the terminal
<a id="__codelineno-1-14" name="__codelineno-1-14" href="#__codelineno-1-14"></a> <a id="__codelineno-1-14" name="__codelineno-1-14" href="#__codelineno-1-14"></a>━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
<a id="__codelineno-1-15" name="__codelineno-1-15" href="#__codelineno-1-15"></a>━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ <a id="__codelineno-1-15" name="__codelineno-1-15" href="#__codelineno-1-15"></a>Llamactl server listening on 0.0.0.0:8080
<a id="__codelineno-1-16" name="__codelineno-1-16" href="#__codelineno-1-16"></a>⚠️ IMPORTANT
<a id="__codelineno-1-17" name="__codelineno-1-17" href="#__codelineno-1-17"></a>━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
<a id="__codelineno-1-18" name="__codelineno-1-18" href="#__codelineno-1-18"></a>• These keys are auto-generated and will change on restart
<a id="__codelineno-1-19" name="__codelineno-1-19" href="#__codelineno-1-19"></a>• For production, add explicit keys to your configuration
<a id="__codelineno-1-20" name="__codelineno-1-20" href="#__codelineno-1-20"></a>• Copy these keys before they disappear from the terminal
<a id="__codelineno-1-21" name="__codelineno-1-21" href="#__codelineno-1-21"></a>━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
<a id="__codelineno-1-22" name="__codelineno-1-22" href="#__codelineno-1-22"></a>Llamactl server listening on 0.0.0.0:8080
</code></pre></div> </code></pre></div>
<p>Copy the <strong>Management</strong> and <strong>Inference</strong> API Keys from the terminal - you'll need them to access the web UI and make inference requests. </p> <p>Copy the <strong>Management API Key</strong> from the terminal - you'll need it to access the web UI. </p>
<p>By default, Llamactl will start on <code>http://localhost:8080</code>. </p> <p>By default, Llamactl will start on <code>http://localhost:8080</code>. </p>
<h2 id="access-the-web-ui">Access the Web UI<a class="headerlink" href="#access-the-web-ui" title="Permanent link">&para;</a></h2> <h2 id="access-the-web-ui">Access the Web UI<a class="headerlink" href="#access-the-web-ui" title="Permanent link">&para;</a></h2>
<p>Open your web browser and navigate to: </p> <p>Open your web browser and navigate to: </p>
@@ -826,7 +828,7 @@
</ul> </ul>
<div class="admonition tip"> <div class="admonition tip">
<p class="admonition-title">Auto-Assignment</p> <p class="admonition-title">Auto-Assignment</p>
<p>Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and generates API keys if authentication is enabled. You typically don't need to manually specify these values. </p> <p>Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and manages API keys if authentication is enabled. You typically don't need to manually specify these values. </p>
</div> </div>
<div class="admonition note"> <div class="admonition note">
<p class="admonition-title">Remote Node Deployment</p> <p class="admonition-title">Remote Node Deployment</p>
@@ -845,6 +847,21 @@
<li><strong>View logs</strong> by clicking the logs button </li> <li><strong>View logs</strong> by clicking the logs button </li>
<li><strong>Stop</strong> the instance when needed </li> <li><strong>Stop</strong> the instance when needed </li>
</ul> </ul>
<h2 id="create-an-inference-api-key">Create an Inference API Key<a class="headerlink" href="#create-an-inference-api-key" title="Permanent link">&para;</a></h2>
<p>To make inference requests to your instances, you'll need an inference API key: </p>
<ol>
<li>In the web UI, click the <strong>Settings</strong> icon (gear icon in the top-right) </li>
<li>Navigate to the <strong>API Keys</strong> tab </li>
<li>Click <strong>Create API Key</strong> </li>
<li>Configure your key: </li>
<li><strong>Name</strong>: Give it a descriptive name (e.g., "Production Key", "Development Key") </li>
<li><strong>Expiration</strong>: Optionally set an expiration date for the key </li>
<li><strong>Permissions</strong>: Choose whether the key can access all instances or only specific ones </li>
<li>Click <strong>Create</strong> </li>
<li><strong>Copy the generated key</strong> - it will only be shown once! </li>
</ol>
<p>The key will look like: <code>llamactl-...</code> </p>
<p>You can create multiple inference keys with different permissions for different use cases (e.g., one for development, one for production, or keys limited to specific instances). </p>
<h2 id="example-configurations">Example Configurations<a class="headerlink" href="#example-configurations" title="Permanent link">&para;</a></h2> <h2 id="example-configurations">Example Configurations<a class="headerlink" href="#example-configurations" title="Permanent link">&para;</a></h2>
<p>Here are basic example configurations for each backend: </p> <p>Here are basic example configurations for each backend: </p>
<p><strong>llama.cpp backend:</strong><br /> <p><strong>llama.cpp backend:</strong><br />
@@ -966,7 +983,7 @@
</code></pre></div> </code></pre></div>
<div class="admonition note"> <div class="admonition note">
<p class="admonition-title">API Key</p> <p class="admonition-title">API Key</p>
<p>If you disabled authentication in your config, you can use any value for <code>api_key</code> (e.g., <code>"not-needed"</code>). Otherwise, use the inference API key shown in the terminal output on startup. </p> <p>If you disabled authentication in your config, you can use any value for <code>api_key</code> (e.g., <code>"not-needed"</code>). Otherwise, use the inference API key you created via the web UI (Settings → API Keys). </p>
</div> </div>
<h3 id="list-available-models">List Available Models<a class="headerlink" href="#list-available-models" title="Permanent link">&para;</a></h3> <h3 id="list-available-models">List Available Models<a class="headerlink" href="#list-available-models" title="Permanent link">&para;</a></h3>
<p>Get a list of running instances (models) in OpenAI-compatible format: </p> <p>Get a list of running instances (models) in OpenAI-compatible format: </p>
@@ -998,7 +1015,7 @@
<span class="md-icon" title="Last update"> <span class="md-icon" title="Last update">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M21 13.1c-.1 0-.3.1-.4.2l-1 1 2.1 2.1 1-1c.2-.2.2-.6 0-.8l-1.3-1.3c-.1-.1-.2-.2-.4-.2m-1.9 1.8-6.1 6V23h2.1l6.1-6.1zM12.5 7v5.2l4 2.4-1 1L11 13V7zM11 21.9c-5.1-.5-9-4.8-9-9.9C2 6.5 6.5 2 12 2c5.3 0 9.6 4.1 10 9.3-.3-.1-.6-.2-1-.2s-.7.1-1 .2C19.6 7.2 16.2 4 12 4c-4.4 0-8 3.6-8 8 0 4.1 3.1 7.5 7.1 7.9l-.1.2z"/></svg> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M21 13.1c-.1 0-.3.1-.4.2l-1 1 2.1 2.1 1-1c.2-.2.2-.6 0-.8l-1.3-1.3c-.1-.1-.2-.2-.4-.2m-1.9 1.8-6.1 6V23h2.1l6.1-6.1zM12.5 7v5.2l4 2.4-1 1L11 13V7zM11 21.9c-5.1-.5-9-4.8-9-9.9C2 6.5 6.5 2 12 2c5.3 0 9.6 4.1 10 9.3-.3-.1-.6-.2-1-.2s-.7.1-1 .2C19.6 7.2 16.2 4 12 4c-4.4 0-8 3.6-8 8 0 4.1 3.1 7.5 7.1 7.9l-.1.2z"/></svg>
</span> </span>
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date" title="October 26, 2025 16:19:53 UTC">October 26, 2025</span> <span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date" title="December 8, 2025 18:15:42 UTC">December 8, 2025</span>
</span> </span>

File diff suppressed because one or more lines are too long

View File

@@ -2056,20 +2056,19 @@
"server.CreateKeyRequest": { "server.CreateKeyRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
"expiresAt": { "expires_at": {
"type": "integer", "type": "integer"
"format": "int64"
}, },
"instancePermissions": { "instance_ids": {
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/definitions/server.InstancePermission" "type": "integer"
} }
}, },
"name": { "name": {
"type": "string" "type": "string"
}, },
"permissionMode": { "permission_mode": {
"$ref": "#/definitions/auth.PermissionMode" "$ref": "#/definitions/auth.PermissionMode"
} }
} }
@@ -2080,9 +2079,6 @@
"created_at": { "created_at": {
"type": "integer" "type": "integer"
}, },
"enabled": {
"type": "boolean"
},
"expires_at": { "expires_at": {
"type": "integer" "type": "integer"
}, },
@@ -2109,29 +2105,9 @@
} }
} }
}, },
"server.InstancePermission": {
"type": "object",
"properties": {
"can_infer": {
"type": "boolean"
},
"can_view_logs": {
"type": "boolean"
},
"instance_id": {
"type": "integer"
}
}
},
"server.KeyPermissionResponse": { "server.KeyPermissionResponse": {
"type": "object", "type": "object",
"properties": { "properties": {
"can_infer": {
"type": "boolean"
},
"can_view_logs": {
"type": "boolean"
},
"instance_id": { "instance_id": {
"type": "integer" "type": "integer"
}, },
@@ -2146,9 +2122,6 @@
"created_at": { "created_at": {
"type": "integer" "type": "integer"
}, },
"enabled": {
"type": "boolean"
},
"expires_at": { "expires_at": {
"type": "integer" "type": "integer"
}, },

View File

@@ -232,24 +232,21 @@ definitions:
type: object type: object
server.CreateKeyRequest: server.CreateKeyRequest:
properties: properties:
expiresAt: expires_at:
format: int64
type: integer type: integer
instancePermissions: instance_ids:
items: items:
$ref: '#/definitions/server.InstancePermission' type: integer
type: array type: array
name: name:
type: string type: string
permissionMode: permission_mode:
$ref: '#/definitions/auth.PermissionMode' $ref: '#/definitions/auth.PermissionMode'
type: object type: object
server.CreateKeyResponse: server.CreateKeyResponse:
properties: properties:
created_at: created_at:
type: integer type: integer
enabled:
type: boolean
expires_at: expires_at:
type: integer type: integer
id: id:
@@ -267,21 +264,8 @@ definitions:
user_id: user_id:
type: string type: string
type: object type: object
server.InstancePermission:
properties:
can_infer:
type: boolean
can_view_logs:
type: boolean
instance_id:
type: integer
type: object
server.KeyPermissionResponse: server.KeyPermissionResponse:
properties: properties:
can_infer:
type: boolean
can_view_logs:
type: boolean
instance_id: instance_id:
type: integer type: integer
instance_name: instance_name:
@@ -291,8 +275,6 @@ definitions:
properties: properties:
created_at: created_at:
type: integer type: integer
enabled:
type: boolean
expires_at: expires_at:
type: integer type: integer
id: id:

View File

@@ -880,21 +880,18 @@
<a id="__codelineno-5-3" name="__codelineno-5-3" href="#__codelineno-5-3"></a><span class="w"> </span><span class="nt">require_inference_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span> <a id="__codelineno-5-3" name="__codelineno-5-3" href="#__codelineno-5-3"></a><span class="w"> </span><span class="nt">require_inference_auth</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">false</span>
</code></pre></div></p> </code></pre></div></p>
<ol> <ol>
<li> <li><strong>Configure management API keys:</strong><br />
<p><strong>Configure API keys:</strong><br />
<div class="highlight"><pre><span></span><code><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a><span class="nt">auth</span><span class="p">:</span> <div class="highlight"><pre><span></span><code><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a><span class="nt">auth</span><span class="p">:</span>
<a id="__codelineno-6-2" name="__codelineno-6-2" href="#__codelineno-6-2"></a><span class="w"> </span><span class="nt">management_keys</span><span class="p">:</span> <a id="__codelineno-6-2" name="__codelineno-6-2" href="#__codelineno-6-2"></a><span class="w"> </span><span class="nt">management_keys</span><span class="p">:</span>
<a id="__codelineno-6-3" name="__codelineno-6-3" href="#__codelineno-6-3"></a><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="s">&quot;your-management-key&quot;</span> <a id="__codelineno-6-3" name="__codelineno-6-3" href="#__codelineno-6-3"></a><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="s">&quot;your-management-key&quot;</span>
<a id="__codelineno-6-4" name="__codelineno-6-4" href="#__codelineno-6-4"></a><span class="w"> </span><span class="nt">inference_keys</span><span class="p">:</span> </code></pre></div></li>
<a id="__codelineno-6-5" name="__codelineno-6-5" href="#__codelineno-6-5"></a><span class="w"> </span><span class="p p-Indicator">-</span><span class="w"> </span><span class="s">&quot;your-inference-key&quot;</span> </ol>
</code></pre></div></p> <p>For inference API keys, create them via the web UI (Settings → API Keys) after logging in with your management key. </p>
</li> <ol>
<li> <li><strong>Use correct Authorization header:</strong><br />
<p><strong>Use correct Authorization header:</strong><br />
<div class="highlight"><pre><span></span><code><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a>curl<span class="w"> </span>-H<span class="w"> </span><span class="s2">&quot;Authorization: Bearer your-api-key&quot;</span><span class="w"> </span><span class="se">\</span> <div class="highlight"><pre><span></span><code><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a>curl<span class="w"> </span>-H<span class="w"> </span><span class="s2">&quot;Authorization: Bearer your-api-key&quot;</span><span class="w"> </span><span class="se">\</span>
<a id="__codelineno-7-2" name="__codelineno-7-2" href="#__codelineno-7-2"></a><span class="w"> </span>http://localhost:8080/api/v1/instances <a id="__codelineno-7-2" name="__codelineno-7-2" href="#__codelineno-7-2"></a><span class="w"> </span>http://localhost:8080/api/v1/instances
</code></pre></div></p> </code></pre></div></li>
</li>
</ol> </ol>
<h2 id="remote-node-issues">Remote Node Issues<a class="headerlink" href="#remote-node-issues" title="Permanent link">&para;</a></h2> <h2 id="remote-node-issues">Remote Node Issues<a class="headerlink" href="#remote-node-issues" title="Permanent link">&para;</a></h2>
<h3 id="node-configuration">Node Configuration<a class="headerlink" href="#node-configuration" title="Permanent link">&para;</a></h3> <h3 id="node-configuration">Node Configuration<a class="headerlink" href="#node-configuration" title="Permanent link">&para;</a></h3>
@@ -972,7 +969,7 @@
<span class="md-icon" title="Last update"> <span class="md-icon" title="Last update">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M21 13.1c-.1 0-.3.1-.4.2l-1 1 2.1 2.1 1-1c.2-.2.2-.6 0-.8l-1.3-1.3c-.1-.1-.2-.2-.4-.2m-1.9 1.8-6.1 6V23h2.1l6.1-6.1zM12.5 7v5.2l4 2.4-1 1L11 13V7zM11 21.9c-5.1-.5-9-4.8-9-9.9C2 6.5 6.5 2 12 2c5.3 0 9.6 4.1 10 9.3-.3-.1-.6-.2-1-.2s-.7.1-1 .2C19.6 7.2 16.2 4 12 4c-4.4 0-8 3.6-8 8 0 4.1 3.1 7.5 7.1 7.9l-.1.2z"/></svg> <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M21 13.1c-.1 0-.3.1-.4.2l-1 1 2.1 2.1 1-1c.2-.2.2-.6 0-.8l-1.3-1.3c-.1-.1-.2-.2-.4-.2m-1.9 1.8-6.1 6V23h2.1l6.1-6.1zM12.5 7v5.2l4 2.4-1 1L11 13V7zM11 21.9c-5.1-.5-9-4.8-9-9.9C2 6.5 6.5 2 12 2c5.3 0 9.6 4.1 10 9.3-.3-.1-.6-.2-1-.2s-.7.1-1 .2C19.6 7.2 16.2 4 12 4c-4.4 0-8 3.6-8 8 0 4.1 3.1 7.5 7.1 7.9l-.1.2z"/></svg>
</span> </span>
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date" title="October 26, 2025 16:19:53 UTC">October 26, 2025</span> <span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date" title="December 8, 2025 18:15:42 UTC">December 8, 2025</span>
</span> </span>