mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-06 17:14:28 +00:00
Deployed ebc82c3 to dev with MkDocs 1.5.3 and mike 2.0.0
This commit is contained in:
@@ -495,9 +495,9 @@
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#example-configuration" class="md-nav__link">
|
||||
<a href="#example-configurations" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Example Configuration
|
||||
Example Configurations
|
||||
</span>
|
||||
</a>
|
||||
|
||||
@@ -775,9 +775,9 @@
|
||||
</li>
|
||||
|
||||
<li class="md-nav__item">
|
||||
<a href="#example-configuration" class="md-nav__link">
|
||||
<a href="#example-configurations" class="md-nav__link">
|
||||
<span class="md-ellipsis">
|
||||
Example Configuration
|
||||
Example Configurations
|
||||
</span>
|
||||
</a>
|
||||
|
||||
@@ -879,9 +879,10 @@
|
||||
<li>Click the "Add Instance" button</li>
|
||||
<li>Fill in the instance configuration:</li>
|
||||
<li><strong>Name</strong>: Give your instance a descriptive name</li>
|
||||
<li><strong>Model Path</strong>: Path to your Llama.cpp model file</li>
|
||||
<li><strong>Backend Type</strong>: Choose from llama.cpp, MLX, or vLLM</li>
|
||||
<li><strong>Model</strong>: Model path or identifier for your chosen backend</li>
|
||||
<li>
|
||||
<p><strong>Additional Options</strong>: Any extra Llama.cpp parameters</p>
|
||||
<p><strong>Additional Options</strong>: Backend-specific parameters</p>
|
||||
</li>
|
||||
<li>
|
||||
<p>Click "Create Instance"</p>
|
||||
@@ -895,76 +896,103 @@
|
||||
<li><strong>View logs</strong> by clicking the logs button</li>
|
||||
<li><strong>Stop</strong> the instance when needed</li>
|
||||
</ul>
|
||||
<h2 id="example-configuration">Example Configuration<a class="headerlink" href="#example-configuration" title="Permanent link">¶</a></h2>
|
||||
<p>Here's a basic example configuration for a Llama 2 model:</p>
|
||||
<h2 id="example-configurations">Example Configurations<a class="headerlink" href="#example-configurations" title="Permanent link">¶</a></h2>
|
||||
<p>Here are basic example configurations for each backend:</p>
|
||||
<p><strong>llama.cpp backend:</strong>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-2-2" name="__codelineno-2-2" href="#__codelineno-2-2"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama2-7b"</span><span class="p">,</span>
|
||||
<a id="__codelineno-2-3" name="__codelineno-2-3" href="#__codelineno-2-3"></a><span class="w"> </span><span class="nt">"model_path"</span><span class="p">:</span><span class="w"> </span><span class="s2">"/path/to/llama-2-7b-chat.gguf"</span><span class="p">,</span>
|
||||
<a id="__codelineno-2-4" name="__codelineno-2-4" href="#__codelineno-2-4"></a><span class="w"> </span><span class="nt">"options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||
<a id="__codelineno-2-5" name="__codelineno-2-5" href="#__codelineno-2-5"></a><span class="w"> </span><span class="nt">"threads"</span><span class="p">:</span><span class="w"> </span><span class="mi">4</span><span class="p">,</span>
|
||||
<a id="__codelineno-2-6" name="__codelineno-2-6" href="#__codelineno-2-6"></a><span class="w"> </span><span class="nt">"context_size"</span><span class="p">:</span><span class="w"> </span><span class="mi">2048</span>
|
||||
<a id="__codelineno-2-7" name="__codelineno-2-7" href="#__codelineno-2-7"></a><span class="w"> </span><span class="p">}</span>
|
||||
<a id="__codelineno-2-8" name="__codelineno-2-8" href="#__codelineno-2-8"></a><span class="p">}</span>
|
||||
</code></pre></div>
|
||||
<a id="__codelineno-2-3" name="__codelineno-2-3" href="#__codelineno-2-3"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"llama_cpp"</span><span class="p">,</span>
|
||||
<a id="__codelineno-2-4" name="__codelineno-2-4" href="#__codelineno-2-4"></a><span class="w"> </span><span class="nt">"backend_options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||
<a id="__codelineno-2-5" name="__codelineno-2-5" href="#__codelineno-2-5"></a><span class="w"> </span><span class="nt">"model"</span><span class="p">:</span><span class="w"> </span><span class="s2">"/path/to/llama-2-7b-chat.gguf"</span><span class="p">,</span>
|
||||
<a id="__codelineno-2-6" name="__codelineno-2-6" href="#__codelineno-2-6"></a><span class="w"> </span><span class="nt">"threads"</span><span class="p">:</span><span class="w"> </span><span class="mi">4</span><span class="p">,</span>
|
||||
<a id="__codelineno-2-7" name="__codelineno-2-7" href="#__codelineno-2-7"></a><span class="w"> </span><span class="nt">"ctx_size"</span><span class="p">:</span><span class="w"> </span><span class="mi">2048</span><span class="p">,</span>
|
||||
<a id="__codelineno-2-8" name="__codelineno-2-8" href="#__codelineno-2-8"></a><span class="w"> </span><span class="nt">"gpu_layers"</span><span class="p">:</span><span class="w"> </span><span class="mi">32</span>
|
||||
<a id="__codelineno-2-9" name="__codelineno-2-9" href="#__codelineno-2-9"></a><span class="w"> </span><span class="p">}</span>
|
||||
<a id="__codelineno-2-10" name="__codelineno-2-10" href="#__codelineno-2-10"></a><span class="p">}</span>
|
||||
</code></pre></div></p>
|
||||
<p><strong>MLX backend (macOS only):</strong>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-3-2" name="__codelineno-3-2" href="#__codelineno-3-2"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"mistral-mlx"</span><span class="p">,</span>
|
||||
<a id="__codelineno-3-3" name="__codelineno-3-3" href="#__codelineno-3-3"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"mlx_lm"</span><span class="p">,</span>
|
||||
<a id="__codelineno-3-4" name="__codelineno-3-4" href="#__codelineno-3-4"></a><span class="w"> </span><span class="nt">"backend_options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||
<a id="__codelineno-3-5" name="__codelineno-3-5" href="#__codelineno-3-5"></a><span class="w"> </span><span class="nt">"model"</span><span class="p">:</span><span class="w"> </span><span class="s2">"mlx-community/Mistral-7B-Instruct-v0.3-4bit"</span><span class="p">,</span>
|
||||
<a id="__codelineno-3-6" name="__codelineno-3-6" href="#__codelineno-3-6"></a><span class="w"> </span><span class="nt">"temp"</span><span class="p">:</span><span class="w"> </span><span class="mf">0.7</span><span class="p">,</span>
|
||||
<a id="__codelineno-3-7" name="__codelineno-3-7" href="#__codelineno-3-7"></a><span class="w"> </span><span class="nt">"max_tokens"</span><span class="p">:</span><span class="w"> </span><span class="mi">2048</span>
|
||||
<a id="__codelineno-3-8" name="__codelineno-3-8" href="#__codelineno-3-8"></a><span class="w"> </span><span class="p">}</span>
|
||||
<a id="__codelineno-3-9" name="__codelineno-3-9" href="#__codelineno-3-9"></a><span class="p">}</span>
|
||||
</code></pre></div></p>
|
||||
<p><strong>vLLM backend:</strong>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a><span class="p">{</span>
|
||||
<a id="__codelineno-4-2" name="__codelineno-4-2" href="#__codelineno-4-2"></a><span class="w"> </span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"dialogpt-vllm"</span><span class="p">,</span>
|
||||
<a id="__codelineno-4-3" name="__codelineno-4-3" href="#__codelineno-4-3"></a><span class="w"> </span><span class="nt">"backend_type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"vllm"</span><span class="p">,</span>
|
||||
<a id="__codelineno-4-4" name="__codelineno-4-4" href="#__codelineno-4-4"></a><span class="w"> </span><span class="nt">"backend_options"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span>
|
||||
<a id="__codelineno-4-5" name="__codelineno-4-5" href="#__codelineno-4-5"></a><span class="w"> </span><span class="nt">"model"</span><span class="p">:</span><span class="w"> </span><span class="s2">"microsoft/DialoGPT-medium"</span><span class="p">,</span>
|
||||
<a id="__codelineno-4-6" name="__codelineno-4-6" href="#__codelineno-4-6"></a><span class="w"> </span><span class="nt">"tensor_parallel_size"</span><span class="p">:</span><span class="w"> </span><span class="mi">2</span><span class="p">,</span>
|
||||
<a id="__codelineno-4-7" name="__codelineno-4-7" href="#__codelineno-4-7"></a><span class="w"> </span><span class="nt">"gpu_memory_utilization"</span><span class="p">:</span><span class="w"> </span><span class="mf">0.9</span>
|
||||
<a id="__codelineno-4-8" name="__codelineno-4-8" href="#__codelineno-4-8"></a><span class="w"> </span><span class="p">}</span>
|
||||
<a id="__codelineno-4-9" name="__codelineno-4-9" href="#__codelineno-4-9"></a><span class="p">}</span>
|
||||
</code></pre></div></p>
|
||||
<h2 id="using-the-api">Using the API<a class="headerlink" href="#using-the-api" title="Permanent link">¶</a></h2>
|
||||
<p>You can also manage instances via the REST API:</p>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a><span class="c1"># List all instances</span>
|
||||
<a id="__codelineno-3-2" name="__codelineno-3-2" href="#__codelineno-3-2"></a>curl<span class="w"> </span>http://localhost:8080/api/instances
|
||||
<a id="__codelineno-3-3" name="__codelineno-3-3" href="#__codelineno-3-3"></a>
|
||||
<a id="__codelineno-3-4" name="__codelineno-3-4" href="#__codelineno-3-4"></a><span class="c1"># Create a new instance</span>
|
||||
<a id="__codelineno-3-5" name="__codelineno-3-5" href="#__codelineno-3-5"></a>curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://localhost:8080/api/instances<span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-3-6" name="__codelineno-3-6" href="#__codelineno-3-6"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Content-Type: application/json"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-3-7" name="__codelineno-3-7" href="#__codelineno-3-7"></a><span class="w"> </span>-d<span class="w"> </span><span class="s1">'{</span>
|
||||
<a id="__codelineno-3-8" name="__codelineno-3-8" href="#__codelineno-3-8"></a><span class="s1"> "name": "my-model",</span>
|
||||
<a id="__codelineno-3-9" name="__codelineno-3-9" href="#__codelineno-3-9"></a><span class="s1"> "model_path": "/path/to/model.gguf",</span>
|
||||
<a id="__codelineno-3-10" name="__codelineno-3-10" href="#__codelineno-3-10"></a><span class="s1"> }'</span>
|
||||
<a id="__codelineno-3-11" name="__codelineno-3-11" href="#__codelineno-3-11"></a>
|
||||
<a id="__codelineno-3-12" name="__codelineno-3-12" href="#__codelineno-3-12"></a><span class="c1"># Start an instance</span>
|
||||
<a id="__codelineno-3-13" name="__codelineno-3-13" href="#__codelineno-3-13"></a>curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://localhost:8080/api/instances/my-model/start
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a><span class="c1"># List all instances</span>
|
||||
<a id="__codelineno-5-2" name="__codelineno-5-2" href="#__codelineno-5-2"></a>curl<span class="w"> </span>http://localhost:8080/api/instances
|
||||
<a id="__codelineno-5-3" name="__codelineno-5-3" href="#__codelineno-5-3"></a>
|
||||
<a id="__codelineno-5-4" name="__codelineno-5-4" href="#__codelineno-5-4"></a><span class="c1"># Create a new llama.cpp instance</span>
|
||||
<a id="__codelineno-5-5" name="__codelineno-5-5" href="#__codelineno-5-5"></a>curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://localhost:8080/api/instances/my-model<span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-5-6" name="__codelineno-5-6" href="#__codelineno-5-6"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Content-Type: application/json"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-5-7" name="__codelineno-5-7" href="#__codelineno-5-7"></a><span class="w"> </span>-d<span class="w"> </span><span class="s1">'{</span>
|
||||
<a id="__codelineno-5-8" name="__codelineno-5-8" href="#__codelineno-5-8"></a><span class="s1"> "backend_type": "llama_cpp",</span>
|
||||
<a id="__codelineno-5-9" name="__codelineno-5-9" href="#__codelineno-5-9"></a><span class="s1"> "backend_options": {</span>
|
||||
<a id="__codelineno-5-10" name="__codelineno-5-10" href="#__codelineno-5-10"></a><span class="s1"> "model": "/path/to/model.gguf"</span>
|
||||
<a id="__codelineno-5-11" name="__codelineno-5-11" href="#__codelineno-5-11"></a><span class="s1"> }</span>
|
||||
<a id="__codelineno-5-12" name="__codelineno-5-12" href="#__codelineno-5-12"></a><span class="s1"> }'</span>
|
||||
<a id="__codelineno-5-13" name="__codelineno-5-13" href="#__codelineno-5-13"></a>
|
||||
<a id="__codelineno-5-14" name="__codelineno-5-14" href="#__codelineno-5-14"></a><span class="c1"># Start an instance</span>
|
||||
<a id="__codelineno-5-15" name="__codelineno-5-15" href="#__codelineno-5-15"></a>curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://localhost:8080/api/instances/my-model/start
|
||||
</code></pre></div>
|
||||
<h2 id="openai-compatible-api">OpenAI Compatible API<a class="headerlink" href="#openai-compatible-api" title="Permanent link">¶</a></h2>
|
||||
<p>Llamactl provides OpenAI-compatible endpoints, making it easy to integrate with existing OpenAI client libraries and tools.</p>
|
||||
<h3 id="chat-completions">Chat Completions<a class="headerlink" href="#chat-completions" title="Permanent link">¶</a></h3>
|
||||
<p>Once you have an instance running, you can use it with the OpenAI-compatible chat completions endpoint:</p>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a>curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://localhost:8080/v1/chat/completions<span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-4-2" name="__codelineno-4-2" href="#__codelineno-4-2"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Content-Type: application/json"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-4-3" name="__codelineno-4-3" href="#__codelineno-4-3"></a><span class="w"> </span>-d<span class="w"> </span><span class="s1">'{</span>
|
||||
<a id="__codelineno-4-4" name="__codelineno-4-4" href="#__codelineno-4-4"></a><span class="s1"> "model": "my-model",</span>
|
||||
<a id="__codelineno-4-5" name="__codelineno-4-5" href="#__codelineno-4-5"></a><span class="s1"> "messages": [</span>
|
||||
<a id="__codelineno-4-6" name="__codelineno-4-6" href="#__codelineno-4-6"></a><span class="s1"> {</span>
|
||||
<a id="__codelineno-4-7" name="__codelineno-4-7" href="#__codelineno-4-7"></a><span class="s1"> "role": "user",</span>
|
||||
<a id="__codelineno-4-8" name="__codelineno-4-8" href="#__codelineno-4-8"></a><span class="s1"> "content": "Hello! Can you help me write a Python function?"</span>
|
||||
<a id="__codelineno-4-9" name="__codelineno-4-9" href="#__codelineno-4-9"></a><span class="s1"> }</span>
|
||||
<a id="__codelineno-4-10" name="__codelineno-4-10" href="#__codelineno-4-10"></a><span class="s1"> ],</span>
|
||||
<a id="__codelineno-4-11" name="__codelineno-4-11" href="#__codelineno-4-11"></a><span class="s1"> "max_tokens": 150,</span>
|
||||
<a id="__codelineno-4-12" name="__codelineno-4-12" href="#__codelineno-4-12"></a><span class="s1"> "temperature": 0.7</span>
|
||||
<a id="__codelineno-4-13" name="__codelineno-4-13" href="#__codelineno-4-13"></a><span class="s1"> }'</span>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a>curl<span class="w"> </span>-X<span class="w"> </span>POST<span class="w"> </span>http://localhost:8080/v1/chat/completions<span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-6-2" name="__codelineno-6-2" href="#__codelineno-6-2"></a><span class="w"> </span>-H<span class="w"> </span><span class="s2">"Content-Type: application/json"</span><span class="w"> </span><span class="se">\</span>
|
||||
<a id="__codelineno-6-3" name="__codelineno-6-3" href="#__codelineno-6-3"></a><span class="w"> </span>-d<span class="w"> </span><span class="s1">'{</span>
|
||||
<a id="__codelineno-6-4" name="__codelineno-6-4" href="#__codelineno-6-4"></a><span class="s1"> "model": "my-model",</span>
|
||||
<a id="__codelineno-6-5" name="__codelineno-6-5" href="#__codelineno-6-5"></a><span class="s1"> "messages": [</span>
|
||||
<a id="__codelineno-6-6" name="__codelineno-6-6" href="#__codelineno-6-6"></a><span class="s1"> {</span>
|
||||
<a id="__codelineno-6-7" name="__codelineno-6-7" href="#__codelineno-6-7"></a><span class="s1"> "role": "user",</span>
|
||||
<a id="__codelineno-6-8" name="__codelineno-6-8" href="#__codelineno-6-8"></a><span class="s1"> "content": "Hello! Can you help me write a Python function?"</span>
|
||||
<a id="__codelineno-6-9" name="__codelineno-6-9" href="#__codelineno-6-9"></a><span class="s1"> }</span>
|
||||
<a id="__codelineno-6-10" name="__codelineno-6-10" href="#__codelineno-6-10"></a><span class="s1"> ],</span>
|
||||
<a id="__codelineno-6-11" name="__codelineno-6-11" href="#__codelineno-6-11"></a><span class="s1"> "max_tokens": 150,</span>
|
||||
<a id="__codelineno-6-12" name="__codelineno-6-12" href="#__codelineno-6-12"></a><span class="s1"> "temperature": 0.7</span>
|
||||
<a id="__codelineno-6-13" name="__codelineno-6-13" href="#__codelineno-6-13"></a><span class="s1"> }'</span>
|
||||
</code></pre></div>
|
||||
<h3 id="using-with-python-openai-client">Using with Python OpenAI Client<a class="headerlink" href="#using-with-python-openai-client" title="Permanent link">¶</a></h3>
|
||||
<p>You can also use the official OpenAI Python client:</p>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">openai</span><span class="w"> </span><span class="kn">import</span> <span class="n">OpenAI</span>
|
||||
<a id="__codelineno-5-2" name="__codelineno-5-2" href="#__codelineno-5-2"></a>
|
||||
<a id="__codelineno-5-3" name="__codelineno-5-3" href="#__codelineno-5-3"></a><span class="c1"># Point the client to your Llamactl server</span>
|
||||
<a id="__codelineno-5-4" name="__codelineno-5-4" href="#__codelineno-5-4"></a><span class="n">client</span> <span class="o">=</span> <span class="n">OpenAI</span><span class="p">(</span>
|
||||
<a id="__codelineno-5-5" name="__codelineno-5-5" href="#__codelineno-5-5"></a> <span class="n">base_url</span><span class="o">=</span><span class="s2">"http://localhost:8080/v1"</span><span class="p">,</span>
|
||||
<a id="__codelineno-5-6" name="__codelineno-5-6" href="#__codelineno-5-6"></a> <span class="n">api_key</span><span class="o">=</span><span class="s2">"not-needed"</span> <span class="c1"># Llamactl doesn't require API keys by default</span>
|
||||
<a id="__codelineno-5-7" name="__codelineno-5-7" href="#__codelineno-5-7"></a><span class="p">)</span>
|
||||
<a id="__codelineno-5-8" name="__codelineno-5-8" href="#__codelineno-5-8"></a>
|
||||
<a id="__codelineno-5-9" name="__codelineno-5-9" href="#__codelineno-5-9"></a><span class="c1"># Create a chat completion</span>
|
||||
<a id="__codelineno-5-10" name="__codelineno-5-10" href="#__codelineno-5-10"></a><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
<a id="__codelineno-5-11" name="__codelineno-5-11" href="#__codelineno-5-11"></a> <span class="n">model</span><span class="o">=</span><span class="s2">"my-model"</span><span class="p">,</span> <span class="c1"># Use the name of your instance</span>
|
||||
<a id="__codelineno-5-12" name="__codelineno-5-12" href="#__codelineno-5-12"></a> <span class="n">messages</span><span class="o">=</span><span class="p">[</span>
|
||||
<a id="__codelineno-5-13" name="__codelineno-5-13" href="#__codelineno-5-13"></a> <span class="p">{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Explain quantum computing in simple terms"</span><span class="p">}</span>
|
||||
<a id="__codelineno-5-14" name="__codelineno-5-14" href="#__codelineno-5-14"></a> <span class="p">],</span>
|
||||
<a id="__codelineno-5-15" name="__codelineno-5-15" href="#__codelineno-5-15"></a> <span class="n">max_tokens</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span>
|
||||
<a id="__codelineno-5-16" name="__codelineno-5-16" href="#__codelineno-5-16"></a> <span class="n">temperature</span><span class="o">=</span><span class="mf">0.7</span>
|
||||
<a id="__codelineno-5-17" name="__codelineno-5-17" href="#__codelineno-5-17"></a><span class="p">)</span>
|
||||
<a id="__codelineno-5-18" name="__codelineno-5-18" href="#__codelineno-5-18"></a>
|
||||
<a id="__codelineno-5-19" name="__codelineno-5-19" href="#__codelineno-5-19"></a><span class="nb">print</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">choices</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">message</span><span class="o">.</span><span class="n">content</span><span class="p">)</span>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">openai</span><span class="w"> </span><span class="kn">import</span> <span class="n">OpenAI</span>
|
||||
<a id="__codelineno-7-2" name="__codelineno-7-2" href="#__codelineno-7-2"></a>
|
||||
<a id="__codelineno-7-3" name="__codelineno-7-3" href="#__codelineno-7-3"></a><span class="c1"># Point the client to your Llamactl server</span>
|
||||
<a id="__codelineno-7-4" name="__codelineno-7-4" href="#__codelineno-7-4"></a><span class="n">client</span> <span class="o">=</span> <span class="n">OpenAI</span><span class="p">(</span>
|
||||
<a id="__codelineno-7-5" name="__codelineno-7-5" href="#__codelineno-7-5"></a> <span class="n">base_url</span><span class="o">=</span><span class="s2">"http://localhost:8080/v1"</span><span class="p">,</span>
|
||||
<a id="__codelineno-7-6" name="__codelineno-7-6" href="#__codelineno-7-6"></a> <span class="n">api_key</span><span class="o">=</span><span class="s2">"not-needed"</span> <span class="c1"># Llamactl doesn't require API keys by default</span>
|
||||
<a id="__codelineno-7-7" name="__codelineno-7-7" href="#__codelineno-7-7"></a><span class="p">)</span>
|
||||
<a id="__codelineno-7-8" name="__codelineno-7-8" href="#__codelineno-7-8"></a>
|
||||
<a id="__codelineno-7-9" name="__codelineno-7-9" href="#__codelineno-7-9"></a><span class="c1"># Create a chat completion</span>
|
||||
<a id="__codelineno-7-10" name="__codelineno-7-10" href="#__codelineno-7-10"></a><span class="n">response</span> <span class="o">=</span> <span class="n">client</span><span class="o">.</span><span class="n">chat</span><span class="o">.</span><span class="n">completions</span><span class="o">.</span><span class="n">create</span><span class="p">(</span>
|
||||
<a id="__codelineno-7-11" name="__codelineno-7-11" href="#__codelineno-7-11"></a> <span class="n">model</span><span class="o">=</span><span class="s2">"my-model"</span><span class="p">,</span> <span class="c1"># Use the name of your instance</span>
|
||||
<a id="__codelineno-7-12" name="__codelineno-7-12" href="#__codelineno-7-12"></a> <span class="n">messages</span><span class="o">=</span><span class="p">[</span>
|
||||
<a id="__codelineno-7-13" name="__codelineno-7-13" href="#__codelineno-7-13"></a> <span class="p">{</span><span class="s2">"role"</span><span class="p">:</span> <span class="s2">"user"</span><span class="p">,</span> <span class="s2">"content"</span><span class="p">:</span> <span class="s2">"Explain quantum computing in simple terms"</span><span class="p">}</span>
|
||||
<a id="__codelineno-7-14" name="__codelineno-7-14" href="#__codelineno-7-14"></a> <span class="p">],</span>
|
||||
<a id="__codelineno-7-15" name="__codelineno-7-15" href="#__codelineno-7-15"></a> <span class="n">max_tokens</span><span class="o">=</span><span class="mi">200</span><span class="p">,</span>
|
||||
<a id="__codelineno-7-16" name="__codelineno-7-16" href="#__codelineno-7-16"></a> <span class="n">temperature</span><span class="o">=</span><span class="mf">0.7</span>
|
||||
<a id="__codelineno-7-17" name="__codelineno-7-17" href="#__codelineno-7-17"></a><span class="p">)</span>
|
||||
<a id="__codelineno-7-18" name="__codelineno-7-18" href="#__codelineno-7-18"></a>
|
||||
<a id="__codelineno-7-19" name="__codelineno-7-19" href="#__codelineno-7-19"></a><span class="nb">print</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">choices</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">message</span><span class="o">.</span><span class="n">content</span><span class="p">)</span>
|
||||
</code></pre></div>
|
||||
<h3 id="list-available-models">List Available Models<a class="headerlink" href="#list-available-models" title="Permanent link">¶</a></h3>
|
||||
<p>Get a list of running instances (models) in OpenAI-compatible format:</p>
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a>curl<span class="w"> </span>http://localhost:8080/v1/models
|
||||
<div class="highlight"><pre><span></span><code><a id="__codelineno-8-1" name="__codelineno-8-1" href="#__codelineno-8-1"></a>curl<span class="w"> </span>http://localhost:8080/v1/models
|
||||
</code></pre></div>
|
||||
<h2 id="next-steps">Next Steps<a class="headerlink" href="#next-steps" title="Permanent link">¶</a></h2>
|
||||
<ul>
|
||||
@@ -992,7 +1020,7 @@
|
||||
<span class="md-icon" title="Last update">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M21 13.1c-.1 0-.3.1-.4.2l-1 1 2.1 2.1 1-1c.2-.2.2-.6 0-.8l-1.3-1.3c-.1-.1-.2-.2-.4-.2m-1.9 1.8-6.1 6V23h2.1l6.1-6.1-2.1-2M12.5 7v5.2l4 2.4-1 1L11 13V7h1.5M11 21.9c-5.1-.5-9-4.8-9-9.9C2 6.5 6.5 2 12 2c5.3 0 9.6 4.1 10 9.3-.3-.1-.6-.2-1-.2s-.7.1-1 .2C19.6 7.2 16.2 4 12 4c-4.4 0-8 3.6-8 8 0 4.1 3.1 7.5 7.1 7.9l-.1.2v1.8Z"/></svg>
|
||||
</span>
|
||||
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">September 3, 2025</span>
|
||||
<span class="git-revision-date-localized-plugin git-revision-date-localized-plugin-date">September 21, 2025</span>
|
||||
</span>
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user