mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-12-23 01:24:24 +00:00
Implement model management for llama.cpp instances
This commit is contained in:
141
pkg/instance/models.go
Normal file
141
pkg/instance/models.go
Normal file
@@ -0,0 +1,141 @@
|
||||
package instance
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"llamactl/pkg/backends"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Model represents a model available in a llama.cpp instance
|
||||
type Model struct {
|
||||
ID string `json:"id"`
|
||||
Object string `json:"object"`
|
||||
OwnedBy string `json:"owned_by"`
|
||||
Created int64 `json:"created"`
|
||||
InCache bool `json:"in_cache"`
|
||||
Path string `json:"path"`
|
||||
Status ModelStatus `json:"status"`
|
||||
}
|
||||
|
||||
// ModelStatus represents the status of a model in an instance
|
||||
type ModelStatus struct {
|
||||
Value string `json:"value"` // "loaded" | "loading" | "unloaded"
|
||||
Args []string `json:"args"`
|
||||
}
|
||||
|
||||
// IsLlamaCpp checks if this instance is a llama.cpp instance
|
||||
func (i *Instance) IsLlamaCpp() bool {
|
||||
opts := i.GetOptions()
|
||||
if opts == nil {
|
||||
return false
|
||||
}
|
||||
return opts.BackendOptions.BackendType == backends.BackendTypeLlamaCpp
|
||||
}
|
||||
|
||||
// GetModels fetches the models available in this llama.cpp instance
|
||||
func (i *Instance) GetModels() ([]Model, error) {
|
||||
if !i.IsLlamaCpp() {
|
||||
return nil, fmt.Errorf("instance %s is not a llama.cpp instance", i.Name)
|
||||
}
|
||||
|
||||
if !i.IsRunning() {
|
||||
return nil, fmt.Errorf("instance %s is not running", i.Name)
|
||||
}
|
||||
|
||||
var result struct {
|
||||
Data []Model `json:"data"`
|
||||
}
|
||||
if err := i.doRequest("GET", "/models", nil, &result, 10*time.Second); err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch models: %w", err)
|
||||
}
|
||||
|
||||
return result.Data, nil
|
||||
}
|
||||
|
||||
// LoadModel loads a model in this llama.cpp instance
|
||||
func (i *Instance) LoadModel(modelName string) error {
|
||||
if !i.IsLlamaCpp() {
|
||||
return fmt.Errorf("instance %s is not a llama.cpp instance", i.Name)
|
||||
}
|
||||
|
||||
if !i.IsRunning() {
|
||||
return fmt.Errorf("instance %s is not running", i.Name)
|
||||
}
|
||||
|
||||
// Make the load request
|
||||
reqBody := map[string]string{"model": modelName}
|
||||
if err := i.doRequest("POST", "/models/load", reqBody, nil, 30*time.Second); err != nil {
|
||||
return fmt.Errorf("failed to load model: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnloadModel unloads a model from this llama.cpp instance
|
||||
func (i *Instance) UnloadModel(modelName string) error {
|
||||
if !i.IsLlamaCpp() {
|
||||
return fmt.Errorf("instance %s is not a llama.cpp instance", i.Name)
|
||||
}
|
||||
|
||||
if !i.IsRunning() {
|
||||
return fmt.Errorf("instance %s is not running", i.Name)
|
||||
}
|
||||
|
||||
// Make the unload request
|
||||
reqBody := map[string]string{"model": modelName}
|
||||
if err := i.doRequest("POST", "/models/unload", reqBody, nil, 30*time.Second); err != nil {
|
||||
return fmt.Errorf("failed to unload model: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// doRequest makes an HTTP request to this instance's backend
|
||||
func (i *Instance) doRequest(method, path string, reqBody, respBody any, timeout time.Duration) error {
|
||||
url := fmt.Sprintf("http://%s:%d%s", i.GetHost(), i.GetPort(), path)
|
||||
|
||||
var bodyReader io.Reader
|
||||
if reqBody != nil {
|
||||
bodyBytes, err := json.Marshal(reqBody)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal request body: %w", err)
|
||||
}
|
||||
bodyReader = bytes.NewReader(bodyBytes)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
||||
defer cancel()
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, method, url, bodyReader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
|
||||
if reqBody != nil {
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
}
|
||||
|
||||
resp, err := http.DefaultClient.Do(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
bodyBytes, _ := io.ReadAll(resp.Body)
|
||||
return fmt.Errorf("status %d: %s", resp.StatusCode, string(bodyBytes))
|
||||
}
|
||||
|
||||
if respBody != nil {
|
||||
if err := json.NewDecoder(resp.Body).Decode(respBody); err != nil {
|
||||
return fmt.Errorf("failed to decode response: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user