mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-12-22 09:04:22 +00:00
142 lines
3.6 KiB
Go
142 lines
3.6 KiB
Go
package instance
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"llamactl/pkg/backends"
|
|
"net/http"
|
|
"time"
|
|
)
|
|
|
|
// Model represents a model available in a llama.cpp instance
|
|
type Model struct {
|
|
ID string `json:"id"`
|
|
Object string `json:"object"`
|
|
OwnedBy string `json:"owned_by"`
|
|
Created int64 `json:"created"`
|
|
InCache bool `json:"in_cache"`
|
|
Path string `json:"path"`
|
|
Status ModelStatus `json:"status"`
|
|
}
|
|
|
|
// ModelStatus represents the status of a model in an instance
|
|
type ModelStatus struct {
|
|
Value string `json:"value"` // "loaded" | "loading" | "unloaded"
|
|
Args []string `json:"args"`
|
|
}
|
|
|
|
// IsLlamaCpp checks if this instance is a llama.cpp instance
|
|
func (i *Instance) IsLlamaCpp() bool {
|
|
opts := i.GetOptions()
|
|
if opts == nil {
|
|
return false
|
|
}
|
|
return opts.BackendOptions.BackendType == backends.BackendTypeLlamaCpp
|
|
}
|
|
|
|
// GetModels fetches the models available in this llama.cpp instance
|
|
func (i *Instance) GetModels() ([]Model, error) {
|
|
if !i.IsLlamaCpp() {
|
|
return nil, fmt.Errorf("instance %s is not a llama.cpp instance", i.Name)
|
|
}
|
|
|
|
if !i.IsRunning() {
|
|
return nil, fmt.Errorf("instance %s is not running", i.Name)
|
|
}
|
|
|
|
var result struct {
|
|
Data []Model `json:"data"`
|
|
}
|
|
if err := i.doRequest("GET", "/models", nil, &result, 10*time.Second); err != nil {
|
|
return nil, fmt.Errorf("failed to fetch models: %w", err)
|
|
}
|
|
|
|
return result.Data, nil
|
|
}
|
|
|
|
// LoadModel loads a model in this llama.cpp instance
|
|
func (i *Instance) LoadModel(modelName string) error {
|
|
if !i.IsLlamaCpp() {
|
|
return fmt.Errorf("instance %s is not a llama.cpp instance", i.Name)
|
|
}
|
|
|
|
if !i.IsRunning() {
|
|
return fmt.Errorf("instance %s is not running", i.Name)
|
|
}
|
|
|
|
// Make the load request
|
|
reqBody := map[string]string{"model": modelName}
|
|
if err := i.doRequest("POST", "/models/load", reqBody, nil, 30*time.Second); err != nil {
|
|
return fmt.Errorf("failed to load model: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// UnloadModel unloads a model from this llama.cpp instance
|
|
func (i *Instance) UnloadModel(modelName string) error {
|
|
if !i.IsLlamaCpp() {
|
|
return fmt.Errorf("instance %s is not a llama.cpp instance", i.Name)
|
|
}
|
|
|
|
if !i.IsRunning() {
|
|
return fmt.Errorf("instance %s is not running", i.Name)
|
|
}
|
|
|
|
// Make the unload request
|
|
reqBody := map[string]string{"model": modelName}
|
|
if err := i.doRequest("POST", "/models/unload", reqBody, nil, 30*time.Second); err != nil {
|
|
return fmt.Errorf("failed to unload model: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// doRequest makes an HTTP request to this instance's backend
|
|
func (i *Instance) doRequest(method, path string, reqBody, respBody any, timeout time.Duration) error {
|
|
url := fmt.Sprintf("http://%s:%d%s", i.GetHost(), i.GetPort(), path)
|
|
|
|
var bodyReader io.Reader
|
|
if reqBody != nil {
|
|
bodyBytes, err := json.Marshal(reqBody)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to marshal request body: %w", err)
|
|
}
|
|
bodyReader = bytes.NewReader(bodyBytes)
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), timeout)
|
|
defer cancel()
|
|
|
|
req, err := http.NewRequestWithContext(ctx, method, url, bodyReader)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create request: %w", err)
|
|
}
|
|
|
|
if reqBody != nil {
|
|
req.Header.Set("Content-Type", "application/json")
|
|
}
|
|
|
|
resp, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
bodyBytes, _ := io.ReadAll(resp.Body)
|
|
return fmt.Errorf("status %d: %s", resp.StatusCode, string(bodyBytes))
|
|
}
|
|
|
|
if respBody != nil {
|
|
if err := json.NewDecoder(resp.Body).Decode(respBody); err != nil {
|
|
return fmt.Errorf("failed to decode response: %w", err)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|