package instance import ( "bytes" "context" "encoding/json" "fmt" "io" "llamactl/pkg/backends" "net/http" "time" ) // Model represents a model available in a llama.cpp instance type Model struct { ID string `json:"id"` Object string `json:"object"` OwnedBy string `json:"owned_by"` Created int64 `json:"created"` InCache bool `json:"in_cache"` Path string `json:"path"` Status ModelStatus `json:"status"` } // ModelStatus represents the status of a model in an instance type ModelStatus struct { Value string `json:"value"` // "loaded" | "loading" | "unloaded" Args []string `json:"args"` } // IsLlamaCpp checks if this instance is a llama.cpp instance func (i *Instance) IsLlamaCpp() bool { opts := i.GetOptions() if opts == nil { return false } return opts.BackendOptions.BackendType == backends.BackendTypeLlamaCpp } // GetModels fetches the models available in this llama.cpp instance func (i *Instance) GetModels() ([]Model, error) { if !i.IsLlamaCpp() { return nil, fmt.Errorf("instance %s is not a llama.cpp instance", i.Name) } if !i.IsRunning() { return nil, fmt.Errorf("instance %s is not running", i.Name) } var result struct { Data []Model `json:"data"` } if err := i.doRequest("GET", "/models", nil, &result, 10*time.Second); err != nil { return nil, fmt.Errorf("failed to fetch models: %w", err) } return result.Data, nil } // LoadModel loads a model in this llama.cpp instance func (i *Instance) LoadModel(modelName string) error { if !i.IsLlamaCpp() { return fmt.Errorf("instance %s is not a llama.cpp instance", i.Name) } if !i.IsRunning() { return fmt.Errorf("instance %s is not running", i.Name) } // Make the load request reqBody := map[string]string{"model": modelName} if err := i.doRequest("POST", "/models/load", reqBody, nil, 30*time.Second); err != nil { return fmt.Errorf("failed to load model: %w", err) } return nil } // UnloadModel unloads a model from this llama.cpp instance func (i *Instance) UnloadModel(modelName string) error { if !i.IsLlamaCpp() { return fmt.Errorf("instance %s is not a llama.cpp instance", i.Name) } if !i.IsRunning() { return fmt.Errorf("instance %s is not running", i.Name) } // Make the unload request reqBody := map[string]string{"model": modelName} if err := i.doRequest("POST", "/models/unload", reqBody, nil, 30*time.Second); err != nil { return fmt.Errorf("failed to unload model: %w", err) } return nil } // doRequest makes an HTTP request to this instance's backend func (i *Instance) doRequest(method, path string, reqBody, respBody any, timeout time.Duration) error { url := fmt.Sprintf("http://%s:%d%s", i.GetHost(), i.GetPort(), path) var bodyReader io.Reader if reqBody != nil { bodyBytes, err := json.Marshal(reqBody) if err != nil { return fmt.Errorf("failed to marshal request body: %w", err) } bodyReader = bytes.NewReader(bodyBytes) } ctx, cancel := context.WithTimeout(context.Background(), timeout) defer cancel() req, err := http.NewRequestWithContext(ctx, method, url, bodyReader) if err != nil { return fmt.Errorf("failed to create request: %w", err) } if reqBody != nil { req.Header.Set("Content-Type", "application/json") } resp, err := http.DefaultClient.Do(req) if err != nil { return err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { bodyBytes, _ := io.ReadAll(resp.Body) return fmt.Errorf("status %d: %s", resp.StatusCode, string(bodyBytes)) } if respBody != nil { if err := json.NewDecoder(resp.Body).Decode(respBody); err != nil { return fmt.Errorf("failed to decode response: %w", err) } } return nil }