Refactor project structure

This commit is contained in:
2025-07-26 11:37:28 +02:00
parent 1fb6b7c212
commit f337a3efe2
48 changed files with 0 additions and 0 deletions

239
pkg/config.go Normal file
View File

@@ -0,0 +1,239 @@
package llamactl
import (
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"gopkg.in/yaml.v3"
)
// Config represents the configuration for llamactl
type Config struct {
Server ServerConfig `yaml:"server"`
Instances InstancesConfig `yaml:"instances"`
}
// ServerConfig contains HTTP server configuration
type ServerConfig struct {
// Server host to bind to
Host string `yaml:"host"`
// Server port to bind to
Port int `yaml:"port"`
}
// InstancesConfig contains instance management configuration
type InstancesConfig struct {
// Port range for instances (e.g., 8000,9000)
PortRange [2]int `yaml:"port_range"`
// Directory where instance logs will be stored
LogDirectory string `yaml:"log_directory"`
// Maximum number of instances that can be created
MaxInstances int `yaml:"max_instances"`
// Path to llama-server executable
LlamaExecutable string `yaml:"llama_executable"`
// Default auto-restart setting for new instances
DefaultAutoRestart bool `yaml:"default_auto_restart"`
// Default max restarts for new instances
DefaultMaxRestarts int `yaml:"default_max_restarts"`
// Default restart delay for new instances (in seconds)
DefaultRestartDelay int `yaml:"default_restart_delay"`
}
// LoadConfig loads configuration with the following precedence:
// 1. Hardcoded defaults
// 2. Config file
// 3. Environment variables
func LoadConfig(configPath string) (Config, error) {
// 1. Start with defaults
cfg := Config{
Server: ServerConfig{
Host: "",
Port: 8080,
},
Instances: InstancesConfig{
PortRange: [2]int{8000, 9000},
LogDirectory: "/tmp/llamactl",
MaxInstances: 10,
LlamaExecutable: "llama-server",
DefaultAutoRestart: true,
DefaultMaxRestarts: 3,
DefaultRestartDelay: 5,
},
}
// 2. Load from config file
if err := loadConfigFile(&cfg, configPath); err != nil {
return cfg, err
}
// 3. Override with environment variables
loadEnvVars(&cfg)
return cfg, nil
}
// loadConfigFile attempts to load config from file with fallback locations
func loadConfigFile(cfg *Config, configPath string) error {
var configLocations []string
// If specific config path provided, use only that
if configPath != "" {
configLocations = []string{configPath}
} else {
// Default config file locations (in order of precedence)
configLocations = getDefaultConfigLocations()
}
for _, path := range configLocations {
if data, err := os.ReadFile(path); err == nil {
if err := yaml.Unmarshal(data, cfg); err != nil {
return err
}
return nil
}
}
return nil
}
// loadEnvVars overrides config with environment variables
func loadEnvVars(cfg *Config) {
// Server config
if host := os.Getenv("LLAMACTL_HOST"); host != "" {
cfg.Server.Host = host
}
if port := os.Getenv("LLAMACTL_PORT"); port != "" {
if p, err := strconv.Atoi(port); err == nil {
cfg.Server.Port = p
}
}
// Instance config
if portRange := os.Getenv("LLAMACTL_INSTANCE_PORT_RANGE"); portRange != "" {
if ports := parsePortRange(portRange); ports != [2]int{0, 0} {
cfg.Instances.PortRange = ports
}
}
if logDir := os.Getenv("LLAMACTL_LOG_DIR"); logDir != "" {
cfg.Instances.LogDirectory = logDir
}
if maxInstances := os.Getenv("LLAMACTL_MAX_INSTANCES"); maxInstances != "" {
if m, err := strconv.Atoi(maxInstances); err == nil {
cfg.Instances.MaxInstances = m
}
}
if llamaExec := os.Getenv("LLAMACTL_LLAMA_EXECUTABLE"); llamaExec != "" {
cfg.Instances.LlamaExecutable = llamaExec
}
if autoRestart := os.Getenv("LLAMACTL_DEFAULT_AUTO_RESTART"); autoRestart != "" {
if b, err := strconv.ParseBool(autoRestart); err == nil {
cfg.Instances.DefaultAutoRestart = b
}
}
if maxRestarts := os.Getenv("LLAMACTL_DEFAULT_MAX_RESTARTS"); maxRestarts != "" {
if m, err := strconv.Atoi(maxRestarts); err == nil {
cfg.Instances.DefaultMaxRestarts = m
}
}
if restartDelay := os.Getenv("LLAMACTL_DEFAULT_RESTART_DELAY"); restartDelay != "" {
if seconds, err := strconv.Atoi(restartDelay); err == nil {
cfg.Instances.DefaultRestartDelay = seconds
}
}
}
// parsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
func parsePortRange(s string) [2]int {
var parts []string
// Try both separators
if strings.Contains(s, "-") {
parts = strings.Split(s, "-")
} else if strings.Contains(s, ",") {
parts = strings.Split(s, ",")
}
// Parse the two parts
if len(parts) == 2 {
start, err1 := strconv.Atoi(strings.TrimSpace(parts[0]))
end, err2 := strconv.Atoi(strings.TrimSpace(parts[1]))
if err1 == nil && err2 == nil {
return [2]int{start, end}
}
}
return [2]int{0, 0} // Invalid format
}
// getDefaultConfigLocations returns platform-specific config file locations
func getDefaultConfigLocations() []string {
var locations []string
// Current directory (cross-platform)
locations = append(locations,
"./llamactl.yaml",
"./config.yaml",
)
homeDir, _ := os.UserHomeDir()
switch runtime.GOOS {
case "windows":
// Windows: Use APPDATA and ProgramData
if appData := os.Getenv("APPDATA"); appData != "" {
locations = append(locations, filepath.Join(appData, "llamactl", "config.yaml"))
}
if programData := os.Getenv("PROGRAMDATA"); programData != "" {
locations = append(locations, filepath.Join(programData, "llamactl", "config.yaml"))
}
// Fallback to user home
if homeDir != "" {
locations = append(locations, filepath.Join(homeDir, "llamactl", "config.yaml"))
}
case "darwin":
// macOS: Use proper Application Support directories
if homeDir != "" {
locations = append(locations,
filepath.Join(homeDir, "Library", "Application Support", "llamactl", "config.yaml"),
filepath.Join(homeDir, ".config", "llamactl", "config.yaml"), // XDG fallback
)
}
locations = append(locations, "/Library/Application Support/llamactl/config.yaml")
locations = append(locations, "/etc/llamactl/config.yaml") // Unix fallback
default:
// User config: $XDG_CONFIG_HOME/llamactl/config.yaml or ~/.config/llamactl/config.yaml
configHome := os.Getenv("XDG_CONFIG_HOME")
if configHome == "" && homeDir != "" {
configHome = filepath.Join(homeDir, ".config")
}
if configHome != "" {
locations = append(locations, filepath.Join(configHome, "llamactl", "config.yaml"))
}
// System config: /etc/llamactl/config.yaml
locations = append(locations, "/etc/llamactl/config.yaml")
// Additional system locations
if xdgConfigDirs := os.Getenv("XDG_CONFIG_DIRS"); xdgConfigDirs != "" {
for _, dir := range strings.Split(xdgConfigDirs, ":") {
if dir != "" {
locations = append(locations, filepath.Join(dir, "llamactl", "config.yaml"))
}
}
}
}
return locations
}

492
pkg/handlers.go Normal file
View File

@@ -0,0 +1,492 @@
package llamactl
import (
"encoding/json"
"fmt"
"net/http"
"os/exec"
"strconv"
"strings"
"github.com/go-chi/chi/v5"
)
type Handler struct {
InstanceManager InstanceManager
}
func NewHandler(im InstanceManager) *Handler {
return &Handler{
InstanceManager: im,
}
}
// HelpHandler godoc
// @Summary Get help for llama server
// @Description Returns the help text for the llama server command
// @Tags server
// @Produces text/plain
// @Success 200 {string} string "Help text"
// @Failure 500 {string} string "Internal Server Error"
// @Router /server/help [get]
func (h *Handler) HelpHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
helpCmd := exec.Command("llama-server", "--help")
output, err := helpCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to get help: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// VersionHandler godoc
// @Summary Get version of llama server
// @Description Returns the version of the llama server command
// @Tags server
// @Produces text/plain
// @Success 200 {string} string "Version information"
// @Failure 500 {string} string "Internal Server Error"
// @Router /server/version [get]
func (h *Handler) VersionHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
versionCmd := exec.Command("llama-server", "--version")
output, err := versionCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to get version: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// ListDevicesHandler godoc
// @Summary List available devices for llama server
// @Description Returns a list of available devices for the llama server
// @Tags server
// @Produces text/plain
// @Success 200 {string} string "List of devices"
// @Failure 500 {string} string "Internal Server Error"
// @Router /server/devices [get]
func (h *Handler) ListDevicesHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
listCmd := exec.Command("llama-server", "--list-devices")
output, err := listCmd.CombinedOutput()
if err != nil {
http.Error(w, "Failed to list devices: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write(output)
}
}
// ListInstances godoc
// @Summary List all instances
// @Description Returns a list of all instances managed by the server
// @Tags instances
// @Produces json
// @Success 200 {array} Instance "List of instances"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances [get]
func (h *Handler) ListInstances() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
instances, err := h.InstanceManager.ListInstances()
if err != nil {
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instances); err != nil {
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// CreateInstance godoc
// @Summary Create and start a new instance
// @Description Creates a new instance with the provided configuration options
// @Tags instances
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body CreateInstanceOptions true "Instance configuration options"
// @Success 201 {object} Instance "Created instance details"
// @Failure 400 {string} string "Invalid request body"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [post]
func (h *Handler) CreateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
var options CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
instance, err := h.InstanceManager.CreateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusCreated)
if err := json.NewEncoder(w).Encode(instance); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// GetInstance godoc
// @Summary Get details of a specific instance
// @Description Returns the details of a specific instance by name
// @Tags instances
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [get]
func (h *Handler) GetInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
instance, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instance); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// UpdateInstance godoc
// @Summary Update an instance's configuration
// @Description Updates the configuration of a specific instance by name
// @Tags instances
// @Accept json
// @Produces json
// @Param name path string true "Instance Name"
// @Param options body CreateInstanceOptions true "Instance configuration options"
// @Success 200 {object} Instance "Updated instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [put]
func (h *Handler) UpdateInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
var options CreateInstanceOptions
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
instance, err := h.InstanceManager.UpdateInstance(name, &options)
if err != nil {
http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instance); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// StartInstance godoc
// @Summary Start a stopped instance
// @Description Starts a specific instance by name
// @Tags instances
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Started instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/start [post]
func (h *Handler) StartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
instance, err := h.InstanceManager.StartInstance(name)
if err != nil {
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instance); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// StopInstance godoc
// @Summary Stop a running instance
// @Description Stops a specific instance by name
// @Tags instances
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Stopped instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/stop [post]
func (h *Handler) StopInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
instance, err := h.InstanceManager.StopInstance(name)
if err != nil {
http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instance); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// RestartInstance godoc
// @Summary Restart a running instance
// @Description Restarts a specific instance by name
// @Tags instances
// @Produces json
// @Param name path string true "Instance Name"
// @Success 200 {object} Instance "Restarted instance details"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/restart [post]
func (h *Handler) RestartInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
instance, err := h.InstanceManager.RestartInstance(name)
if err != nil {
http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(instance); err != nil {
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
return
}
}
}
// DeleteInstance godoc
// @Summary Delete an instance
// @Description Stops and removes a specific instance by name
// @Tags instances
// @Param name path string true "Instance Name"
// @Success 204 "No Content"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name} [delete]
func (h *Handler) DeleteInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
if err := h.InstanceManager.DeleteInstance(name); err != nil {
http.Error(w, "Failed to delete instance: "+err.Error(), http.StatusInternalServerError)
return
}
w.WriteHeader(http.StatusNoContent)
}
}
// GetInstanceLogs godoc
// @Summary Get logs from a specific instance
// @Description Returns the logs from a specific instance by name with optional line limit
// @Tags instances
// @Param name path string true "Instance Name"
// @Param lines query string false "Number of lines to retrieve (default: all lines)"
// @Produces text/plain
// @Success 200 {string} string "Instance logs"
// @Failure 400 {string} string "Invalid name format or lines parameter"
// @Failure 500 {string} string "Internal Server Error"
// @Router /instances/{name}/logs [get]
func (h *Handler) GetInstanceLogs() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
lines := r.URL.Query().Get("lines")
if lines == "" {
lines = "-1"
}
num_lines, err := strconv.Atoi(lines)
if err != nil {
http.Error(w, "Invalid lines parameter: "+err.Error(), http.StatusBadRequest)
return
}
instance, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
logs, err := instance.GetLogs(num_lines)
if err != nil {
http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "text/plain")
w.Write([]byte(logs))
}
}
// ProxyToInstance godoc
// @Summary Proxy requests to a specific instance
// @Description Forwards HTTP requests to the llama-server instance running on a specific port
// @Tags instances
// @Param name path string true "Instance Name"
// @Success 200 "Request successfully proxied to instance"
// @Failure 400 {string} string "Invalid name format"
// @Failure 500 {string} string "Internal Server Error"
// @Failure 503 {string} string "Instance is not running"
// @Router /instances/{name}/proxy [get]
func (h *Handler) ProxyToInstance() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
name := chi.URLParam(r, "name")
if name == "" {
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
return
}
instance, err := h.InstanceManager.GetInstance(name)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
if !instance.Running {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
// Get the cached proxy for this instance
proxy, err := instance.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
proxyPath := r.URL.Path[len(prefix):]
// Ensure the proxy path starts with "/"
if !strings.HasPrefix(proxyPath, "/") {
proxyPath = "/" + proxyPath
}
// Modify the request to remove the proxy prefix
originalPath := r.URL.Path
r.URL.Path = proxyPath
// Set forwarded headers
r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
r.Header.Set("X-Forwarded-Proto", "http")
// Restore original path for logging purposes
defer func() {
r.URL.Path = originalPath
}()
// Forward the request using the cached proxy
proxy.ServeHTTP(w, r)
}
}
// OpenAIProxy godoc
func (h *Handler) OpenAIProxy() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
// Extract model name from request body
var requestBody map[string]any
if err := json.NewDecoder(r.Body).Decode(&requestBody); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
modelName, ok := requestBody["model"].(string)
if !ok || modelName == "" {
http.Error(w, "Model name is required", http.StatusBadRequest)
return
}
// Route to the appropriate instance based on model name
instance, err := h.InstanceManager.GetInstance(modelName)
if err != nil {
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
return
}
if !instance.Running {
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
return
}
proxy, err := instance.GetProxy()
if err != nil {
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
return
}
proxy.ServeHTTP(w, r)
}
}

247
pkg/instance.go Normal file
View File

@@ -0,0 +1,247 @@
package llamactl
import (
"context"
"encoding/json"
"fmt"
"io"
"log"
"net/http/httputil"
"net/url"
"os/exec"
"sync"
)
type CreateInstanceOptions struct {
// Auto restart
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
// RestartDelay duration in seconds
RestartDelay *int `json:"restart_delay_seconds,omitempty"`
LlamaServerOptions `json:",inline"`
}
// UnmarshalJSON implements custom JSON unmarshaling for CreateInstanceOptions
// This is needed because the embedded LlamaServerOptions has its own UnmarshalJSON
// which can interfere with proper unmarshaling of the pointer fields
func (c *CreateInstanceOptions) UnmarshalJSON(data []byte) error {
// First, unmarshal into a temporary struct without the embedded type
type tempCreateOptions struct {
AutoRestart *bool `json:"auto_restart,omitempty"`
MaxRestarts *int `json:"max_restarts,omitempty"`
RestartDelay *int `json:"restart_delay_seconds,omitempty"`
}
var temp tempCreateOptions
if err := json.Unmarshal(data, &temp); err != nil {
return err
}
// Copy the pointer fields
c.AutoRestart = temp.AutoRestart
c.MaxRestarts = temp.MaxRestarts
c.RestartDelay = temp.RestartDelay
// Now unmarshal the embedded LlamaServerOptions
if err := json.Unmarshal(data, &c.LlamaServerOptions); err != nil {
return err
}
return nil
}
// Instance represents a running instance of the llama server
type Instance struct {
Name string `json:"name"`
options *CreateInstanceOptions `json:"-"`
globalSettings *InstancesConfig
// Status
Running bool `json:"running"`
// Logging file
logger *InstanceLogger `json:"-"`
// internal
cmd *exec.Cmd `json:"-"` // Command to run the instance
ctx context.Context `json:"-"` // Context for managing the instance lifecycle
cancel context.CancelFunc `json:"-"` // Function to cancel the context
stdout io.ReadCloser `json:"-"` // Standard output stream
stderr io.ReadCloser `json:"-"` // Standard error stream
mu sync.RWMutex `json:"-"` // RWMutex for better read/write separation
restarts int `json:"-"` // Number of restarts
proxy *httputil.ReverseProxy `json:"-"` // Reverse proxy for this instance
// Restart control
restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts
monitorDone chan struct{} `json:"-"` // Channel to signal monitor goroutine completion
}
// validateAndCopyOptions validates and creates a deep copy of the provided options
// It applies validation rules and returns a safe copy
func validateAndCopyOptions(name string, options *CreateInstanceOptions) *CreateInstanceOptions {
optionsCopy := &CreateInstanceOptions{}
if options != nil {
// Copy the embedded LlamaServerOptions
optionsCopy.LlamaServerOptions = options.LlamaServerOptions
// Copy and validate pointer fields
if options.AutoRestart != nil {
autoRestart := *options.AutoRestart
optionsCopy.AutoRestart = &autoRestart
}
if options.MaxRestarts != nil {
maxRestarts := *options.MaxRestarts
if maxRestarts < 0 {
log.Printf("Instance %s MaxRestarts value (%d) cannot be negative, setting to 0", name, maxRestarts)
maxRestarts = 0
}
optionsCopy.MaxRestarts = &maxRestarts
}
if options.RestartDelay != nil {
restartDelay := *options.RestartDelay
if restartDelay < 0 {
log.Printf("Instance %s RestartDelay value (%d) cannot be negative, setting to 0 seconds", name, restartDelay)
restartDelay = 0
}
optionsCopy.RestartDelay = &restartDelay
}
}
return optionsCopy
}
// applyDefaultOptions applies default values from global settings to any nil options
func applyDefaultOptions(options *CreateInstanceOptions, globalSettings *InstancesConfig) {
if globalSettings == nil {
return
}
if options.AutoRestart == nil {
defaultAutoRestart := globalSettings.DefaultAutoRestart
options.AutoRestart = &defaultAutoRestart
}
if options.MaxRestarts == nil {
defaultMaxRestarts := globalSettings.DefaultMaxRestarts
options.MaxRestarts = &defaultMaxRestarts
}
if options.RestartDelay == nil {
defaultRestartDelay := globalSettings.DefaultRestartDelay
options.RestartDelay = &defaultRestartDelay
}
}
// NewInstance creates a new instance with the given name, log path, and options
func NewInstance(name string, globalSettings *InstancesConfig, options *CreateInstanceOptions) *Instance {
// Validate and copy options
optionsCopy := validateAndCopyOptions(name, options)
// Apply defaults
applyDefaultOptions(optionsCopy, globalSettings)
// Create the instance logger
logger := NewInstanceLogger(name, globalSettings.LogDirectory)
return &Instance{
Name: name,
options: optionsCopy,
globalSettings: globalSettings,
logger: logger,
Running: false,
}
}
func (i *Instance) GetOptions() *CreateInstanceOptions {
i.mu.RLock()
defer i.mu.RUnlock()
return i.options
}
func (i *Instance) SetOptions(options *CreateInstanceOptions) {
i.mu.Lock()
defer i.mu.Unlock()
if options == nil {
log.Println("Warning: Attempted to set nil options on instance", i.Name)
return
}
// Validate and copy options and apply defaults
optionsCopy := validateAndCopyOptions(i.Name, options)
applyDefaultOptions(optionsCopy, i.globalSettings)
i.options = optionsCopy
// Clear the proxy so it gets recreated with new options
i.proxy = nil
}
// GetProxy returns the reverse proxy for this instance, creating it if needed
func (i *Instance) GetProxy() (*httputil.ReverseProxy, error) {
i.mu.Lock()
defer i.mu.Unlock()
if i.proxy == nil {
if i.options == nil {
return nil, fmt.Errorf("instance %s has no options set", i.Name)
}
targetURL, err := url.Parse(fmt.Sprintf("http://%s:%d", i.options.Host, i.options.Port))
if err != nil {
return nil, fmt.Errorf("failed to parse target URL for instance %s: %w", i.Name, err)
}
i.proxy = httputil.NewSingleHostReverseProxy(targetURL)
}
return i.proxy, nil
}
// MarshalJSON implements json.Marshaler for Instance
func (i *Instance) MarshalJSON() ([]byte, error) {
// Use read lock since we're only reading data
i.mu.RLock()
defer i.mu.RUnlock()
// Create a temporary struct with exported fields for JSON marshalling
temp := struct {
Name string `json:"name"`
Options *CreateInstanceOptions `json:"options,omitempty"`
Running bool `json:"running"`
}{
Name: i.Name,
Options: i.options,
Running: i.Running,
}
return json.Marshal(temp)
}
// UnmarshalJSON implements json.Unmarshaler for Instance
func (i *Instance) UnmarshalJSON(data []byte) error {
// Create a temporary struct for unmarshalling
temp := struct {
Name string `json:"name"`
Options *CreateInstanceOptions `json:"options,omitempty"`
Running bool `json:"running"`
}{}
if err := json.Unmarshal(data, &temp); err != nil {
return err
}
// Set the fields
i.Name = temp.Name
i.Running = temp.Running
// Handle options with validation but no defaults
if temp.Options != nil {
i.options = validateAndCopyOptions(i.Name, temp.Options)
}
return nil
}

257
pkg/lifecycle.go Normal file
View File

@@ -0,0 +1,257 @@
package llamactl
import (
"context"
"fmt"
"log"
"os/exec"
"runtime"
"syscall"
"time"
)
// Start starts the llama server instance and returns an error if it fails.
func (i *Instance) Start() error {
i.mu.Lock()
defer i.mu.Unlock()
if i.Running {
return fmt.Errorf("instance %s is already running", i.Name)
}
// Safety check: ensure options are valid
if i.options == nil {
return fmt.Errorf("instance %s has no options set", i.Name)
}
// Reset restart counter when manually starting (not during auto-restart)
// We can detect auto-restart by checking if restartCancel is set
if i.restartCancel == nil {
i.restarts = 0
}
// Create log files
if err := i.logger.Create(); err != nil {
return fmt.Errorf("failed to create log files: %w", err)
}
args := i.options.BuildCommandArgs()
i.ctx, i.cancel = context.WithCancel(context.Background())
i.cmd = exec.CommandContext(i.ctx, "llama-server", args...)
if runtime.GOOS != "windows" {
if i.cmd.SysProcAttr == nil {
i.cmd.SysProcAttr = &syscall.SysProcAttr{}
}
i.cmd.SysProcAttr.Setpgid = true
}
var err error
i.stdout, err = i.cmd.StdoutPipe()
if err != nil {
i.logger.Close()
return fmt.Errorf("failed to get stdout pipe: %w", err)
}
i.stderr, err = i.cmd.StderrPipe()
if err != nil {
i.stdout.Close()
i.logger.Close()
return fmt.Errorf("failed to get stderr pipe: %w", err)
}
if err := i.cmd.Start(); err != nil {
return fmt.Errorf("failed to start instance %s: %w", i.Name, err)
}
i.Running = true
// Create channel for monitor completion signaling
i.monitorDone = make(chan struct{})
go i.logger.readOutput(i.stdout)
go i.logger.readOutput(i.stderr)
go i.monitorProcess()
return nil
}
// Stop terminates the subprocess
func (i *Instance) Stop() error {
i.mu.Lock()
if !i.Running {
// Even if not running, cancel any pending restart
if i.restartCancel != nil {
i.restartCancel()
i.restartCancel = nil
log.Printf("Cancelled pending restart for instance %s", i.Name)
}
i.mu.Unlock()
return fmt.Errorf("instance %s is not running", i.Name)
}
// Cancel any pending restart
if i.restartCancel != nil {
i.restartCancel()
i.restartCancel = nil
}
// Set running to false first to signal intentional stop
i.Running = false
// Clean up the proxy
i.proxy = nil
// Get the monitor done channel before releasing the lock
monitorDone := i.monitorDone
i.mu.Unlock()
// Stop the process with SIGINT
if i.cmd.Process != nil {
if err := i.cmd.Process.Signal(syscall.SIGINT); err != nil {
log.Printf("Failed to send SIGINT to instance %s: %v", i.Name, err)
}
}
select {
case <-monitorDone:
// Process exited normally
case <-time.After(30 * time.Second):
// Force kill if it doesn't exit within 30 seconds
if i.cmd.Process != nil {
killErr := i.cmd.Process.Kill()
if killErr != nil {
log.Printf("Failed to force kill instance %s: %v", i.Name, killErr)
}
log.Printf("Instance %s did not stop in time, force killed", i.Name)
// Wait a bit more for the monitor to finish after force kill
select {
case <-monitorDone:
// Monitor completed after force kill
case <-time.After(2 * time.Second):
log.Printf("Warning: Monitor goroutine did not complete after force kill for instance %s", i.Name)
}
}
}
i.logger.Close()
return nil
}
func (i *Instance) monitorProcess() {
defer func() {
if i.monitorDone != nil {
close(i.monitorDone)
}
}()
err := i.cmd.Wait()
i.mu.Lock()
// Check if the instance was intentionally stopped
if !i.Running {
i.mu.Unlock()
return
}
i.Running = false
i.logger.Close()
// Cancel any existing restart context since we're handling a new exit
if i.restartCancel != nil {
i.restartCancel()
i.restartCancel = nil
}
// Log the exit
if err != nil {
log.Printf("Instance %s crashed with error: %v", i.Name, err)
// Handle restart while holding the lock, then release it
i.handleRestart()
} else {
log.Printf("Instance %s exited cleanly", i.Name)
i.mu.Unlock()
}
}
// handleRestart manages the restart process while holding the lock
func (i *Instance) handleRestart() {
// Validate restart conditions and get safe parameters
shouldRestart, maxRestarts, restartDelay := i.validateRestartConditions()
if !shouldRestart {
i.mu.Unlock()
return
}
i.restarts++
log.Printf("Auto-restarting instance %s (attempt %d/%d) in %v",
i.Name, i.restarts, maxRestarts, time.Duration(restartDelay)*time.Second)
// Create a cancellable context for the restart delay
restartCtx, cancel := context.WithCancel(context.Background())
i.restartCancel = cancel
// Release the lock before sleeping
i.mu.Unlock()
// Use context-aware sleep so it can be cancelled
select {
case <-time.After(time.Duration(restartDelay) * time.Second):
// Sleep completed normally, continue with restart
case <-restartCtx.Done():
// Restart was cancelled
log.Printf("Restart cancelled for instance %s", i.Name)
return
}
// Restart the instance
if err := i.Start(); err != nil {
log.Printf("Failed to restart instance %s: %v", i.Name, err)
} else {
log.Printf("Successfully restarted instance %s", i.Name)
// Clear the cancel function
i.mu.Lock()
i.restartCancel = nil
i.mu.Unlock()
}
}
// validateRestartConditions checks if the instance should be restarted and returns the parameters
func (i *Instance) validateRestartConditions() (shouldRestart bool, maxRestarts int, restartDelay int) {
if i.options == nil {
log.Printf("Instance %s not restarting: options are nil", i.Name)
return false, 0, 0
}
if i.options.AutoRestart == nil || !*i.options.AutoRestart {
log.Printf("Instance %s not restarting: AutoRestart is disabled", i.Name)
return false, 0, 0
}
if i.options.MaxRestarts == nil {
log.Printf("Instance %s not restarting: MaxRestarts is nil", i.Name)
return false, 0, 0
}
if i.options.RestartDelay == nil {
log.Printf("Instance %s not restarting: RestartDelay is nil", i.Name)
return false, 0, 0
}
// Values are already validated during unmarshaling/SetOptions
maxRestarts = *i.options.MaxRestarts
restartDelay = *i.options.RestartDelay
if i.restarts >= maxRestarts {
log.Printf("Instance %s exceeded max restart attempts (%d)", i.Name, maxRestarts)
return false, 0, 0
}
return true, maxRestarts, restartDelay
}

364
pkg/llama.go Normal file
View File

@@ -0,0 +1,364 @@
package llamactl
import (
"encoding/json"
"reflect"
"strconv"
"strings"
)
type LlamaServerOptions struct {
// Common params
VerbosePrompt bool `json:"verbose_prompt,omitempty"`
Threads int `json:"threads,omitempty"`
ThreadsBatch int `json:"threads_batch,omitempty"`
CPUMask string `json:"cpu_mask,omitempty"`
CPURange string `json:"cpu_range,omitempty"`
CPUStrict int `json:"cpu_strict,omitempty"`
Priority int `json:"priority,omitempty"`
Poll int `json:"poll,omitempty"`
CPUMaskBatch string `json:"cpu_mask_batch,omitempty"`
CPURangeBatch string `json:"cpu_range_batch,omitempty"`
CPUStrictBatch int `json:"cpu_strict_batch,omitempty"`
PriorityBatch int `json:"priority_batch,omitempty"`
PollBatch int `json:"poll_batch,omitempty"`
CtxSize int `json:"ctx_size,omitempty"`
Predict int `json:"predict,omitempty"`
BatchSize int `json:"batch_size,omitempty"`
UBatchSize int `json:"ubatch_size,omitempty"`
Keep int `json:"keep,omitempty"`
FlashAttn bool `json:"flash_attn,omitempty"`
NoPerf bool `json:"no_perf,omitempty"`
Escape bool `json:"escape,omitempty"`
NoEscape bool `json:"no_escape,omitempty"`
RopeScaling string `json:"rope_scaling,omitempty"`
RopeScale float64 `json:"rope_scale,omitempty"`
RopeFreqBase float64 `json:"rope_freq_base,omitempty"`
RopeFreqScale float64 `json:"rope_freq_scale,omitempty"`
YarnOrigCtx int `json:"yarn_orig_ctx,omitempty"`
YarnExtFactor float64 `json:"yarn_ext_factor,omitempty"`
YarnAttnFactor float64 `json:"yarn_attn_factor,omitempty"`
YarnBetaSlow float64 `json:"yarn_beta_slow,omitempty"`
YarnBetaFast float64 `json:"yarn_beta_fast,omitempty"`
DumpKVCache bool `json:"dump_kv_cache,omitempty"`
NoKVOffload bool `json:"no_kv_offload,omitempty"`
CacheTypeK string `json:"cache_type_k,omitempty"`
CacheTypeV string `json:"cache_type_v,omitempty"`
DefragThold float64 `json:"defrag_thold,omitempty"`
Parallel int `json:"parallel,omitempty"`
Mlock bool `json:"mlock,omitempty"`
NoMmap bool `json:"no_mmap,omitempty"`
Numa string `json:"numa,omitempty"`
Device string `json:"device,omitempty"`
OverrideTensor []string `json:"override_tensor,omitempty"`
GPULayers int `json:"gpu_layers,omitempty"`
SplitMode string `json:"split_mode,omitempty"`
TensorSplit string `json:"tensor_split,omitempty"`
MainGPU int `json:"main_gpu,omitempty"`
CheckTensors bool `json:"check_tensors,omitempty"`
OverrideKV []string `json:"override_kv,omitempty"`
Lora []string `json:"lora,omitempty"`
LoraScaled []string `json:"lora_scaled,omitempty"`
ControlVector []string `json:"control_vector,omitempty"`
ControlVectorScaled []string `json:"control_vector_scaled,omitempty"`
ControlVectorLayerRange string `json:"control_vector_layer_range,omitempty"`
Model string `json:"model,omitempty"`
ModelURL string `json:"model_url,omitempty"`
HFRepo string `json:"hf_repo,omitempty"`
HFRepoDraft string `json:"hf_repo_draft,omitempty"`
HFFile string `json:"hf_file,omitempty"`
HFRepoV string `json:"hf_repo_v,omitempty"`
HFFileV string `json:"hf_file_v,omitempty"`
HFToken string `json:"hf_token,omitempty"`
LogDisable bool `json:"log_disable,omitempty"`
LogFile string `json:"log_file,omitempty"`
LogColors bool `json:"log_colors,omitempty"`
Verbose bool `json:"verbose,omitempty"`
Verbosity int `json:"verbosity,omitempty"`
LogPrefix bool `json:"log_prefix,omitempty"`
LogTimestamps bool `json:"log_timestamps,omitempty"`
// Sampling params
Samplers string `json:"samplers,omitempty"`
Seed int `json:"seed,omitempty"`
SamplingSeq string `json:"sampling_seq,omitempty"`
IgnoreEOS bool `json:"ignore_eos,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
TopK int `json:"top_k,omitempty"`
TopP float64 `json:"top_p,omitempty"`
MinP float64 `json:"min_p,omitempty"`
XTCProbability float64 `json:"xtc_probability,omitempty"`
XTCThreshold float64 `json:"xtc_threshold,omitempty"`
Typical float64 `json:"typical,omitempty"`
RepeatLastN int `json:"repeat_last_n,omitempty"`
RepeatPenalty float64 `json:"repeat_penalty,omitempty"`
PresencePenalty float64 `json:"presence_penalty,omitempty"`
FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
DryMultiplier float64 `json:"dry_multiplier,omitempty"`
DryBase float64 `json:"dry_base,omitempty"`
DryAllowedLength int `json:"dry_allowed_length,omitempty"`
DryPenaltyLastN int `json:"dry_penalty_last_n,omitempty"`
DrySequenceBreaker []string `json:"dry_sequence_breaker,omitempty"`
DynatempRange float64 `json:"dynatemp_range,omitempty"`
DynatempExp float64 `json:"dynatemp_exp,omitempty"`
Mirostat int `json:"mirostat,omitempty"`
MirostatLR float64 `json:"mirostat_lr,omitempty"`
MirostatEnt float64 `json:"mirostat_ent,omitempty"`
LogitBias []string `json:"logit_bias,omitempty"`
Grammar string `json:"grammar,omitempty"`
GrammarFile string `json:"grammar_file,omitempty"`
JSONSchema string `json:"json_schema,omitempty"`
JSONSchemaFile string `json:"json_schema_file,omitempty"`
// Server/Example-specific params
NoContextShift bool `json:"no_context_shift,omitempty"`
Special bool `json:"special,omitempty"`
NoWarmup bool `json:"no_warmup,omitempty"`
SPMInfill bool `json:"spm_infill,omitempty"`
Pooling string `json:"pooling,omitempty"`
ContBatching bool `json:"cont_batching,omitempty"`
NoContBatching bool `json:"no_cont_batching,omitempty"`
MMProj string `json:"mmproj,omitempty"`
MMProjURL string `json:"mmproj_url,omitempty"`
NoMMProj bool `json:"no_mmproj,omitempty"`
NoMMProjOffload bool `json:"no_mmproj_offload,omitempty"`
Alias string `json:"alias,omitempty"`
Host string `json:"host,omitempty"`
Port int `json:"port,omitempty"`
Path string `json:"path,omitempty"`
NoWebUI bool `json:"no_webui,omitempty"`
Embedding bool `json:"embedding,omitempty"`
Reranking bool `json:"reranking,omitempty"`
APIKey string `json:"api_key,omitempty"`
APIKeyFile string `json:"api_key_file,omitempty"`
SSLKeyFile string `json:"ssl_key_file,omitempty"`
SSLCertFile string `json:"ssl_cert_file,omitempty"`
ChatTemplateKwargs string `json:"chat_template_kwargs,omitempty"`
Timeout int `json:"timeout,omitempty"`
ThreadsHTTP int `json:"threads_http,omitempty"`
CacheReuse int `json:"cache_reuse,omitempty"`
Metrics bool `json:"metrics,omitempty"`
Slots bool `json:"slots,omitempty"`
Props bool `json:"props,omitempty"`
NoSlots bool `json:"no_slots,omitempty"`
SlotSavePath string `json:"slot_save_path,omitempty"`
Jinja bool `json:"jinja,omitempty"`
ReasoningFormat string `json:"reasoning_format,omitempty"`
ReasoningBudget int `json:"reasoning_budget,omitempty"`
ChatTemplate string `json:"chat_template,omitempty"`
ChatTemplateFile string `json:"chat_template_file,omitempty"`
NoPrefillAssistant bool `json:"no_prefill_assistant,omitempty"`
SlotPromptSimilarity float64 `json:"slot_prompt_similarity,omitempty"`
LoraInitWithoutApply bool `json:"lora_init_without_apply,omitempty"`
// Speculative decoding params
DraftMax int `json:"draft_max,omitempty"`
DraftMin int `json:"draft_min,omitempty"`
DraftPMin float64 `json:"draft_p_min,omitempty"`
CtxSizeDraft int `json:"ctx_size_draft,omitempty"`
DeviceDraft string `json:"device_draft,omitempty"`
GPULayersDraft int `json:"gpu_layers_draft,omitempty"`
ModelDraft string `json:"model_draft,omitempty"`
CacheTypeKDraft string `json:"cache_type_k_draft,omitempty"`
CacheTypeVDraft string `json:"cache_type_v_draft,omitempty"`
// Audio/TTS params
ModelVocoder string `json:"model_vocoder,omitempty"`
TTSUseGuideTokens bool `json:"tts_use_guide_tokens,omitempty"`
// Default model params
EmbdBGESmallEnDefault bool `json:"embd_bge_small_en_default,omitempty"`
EmbdE5SmallEnDefault bool `json:"embd_e5_small_en_default,omitempty"`
EmbdGTESmallDefault bool `json:"embd_gte_small_default,omitempty"`
FIMQwen1_5BDefault bool `json:"fim_qwen_1_5b_default,omitempty"`
FIMQwen3BDefault bool `json:"fim_qwen_3b_default,omitempty"`
FIMQwen7BDefault bool `json:"fim_qwen_7b_default,omitempty"`
FIMQwen7BSpec bool `json:"fim_qwen_7b_spec,omitempty"`
FIMQwen14BSpec bool `json:"fim_qwen_14b_spec,omitempty"`
}
// UnmarshalJSON implements custom JSON unmarshaling to support multiple field names
func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
// First unmarshal into a map to handle multiple field names
var raw map[string]interface{}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
// Create a temporary struct for standard unmarshaling
type tempOptions LlamaServerOptions
temp := tempOptions{}
// Standard unmarshal first
if err := json.Unmarshal(data, &temp); err != nil {
return err
}
// Copy to our struct
*o = LlamaServerOptions(temp)
// Handle alternative field names
fieldMappings := map[string]string{
// Threads alternatives
"t": "threads",
"tb": "threads_batch",
"threads-batch": "threads_batch",
// Context size alternatives
"c": "ctx_size",
"ctx-size": "ctx_size",
// Predict alternatives
"n": "predict",
"n-predict": "predict",
"n_predict": "predict",
// Batch size alternatives
"b": "batch_size",
"batch-size": "batch_size",
// GPU layers alternatives
"ngl": "gpu_layers",
"gpu-layers": "gpu_layers",
"n-gpu-layers": "gpu_layers",
"n_gpu_layers": "gpu_layers",
// Model alternatives
"m": "model",
// Seed alternatives
"s": "seed",
// Flash attention alternatives
"fa": "flash_attn",
"flash-attn": "flash_attn",
// Verbose alternatives
"v": "verbose",
"log-verbose": "verbose",
// Verbosity alternatives
"lv": "verbosity",
"log-verbosity": "verbosity",
// Temperature alternatives
"temp": "temperature",
// Top-k alternatives
"top-k": "top_k",
// Top-p alternatives
"top-p": "top_p",
// Min-p alternatives
"min-p": "min_p",
// Additional mappings can be added here
}
// Process alternative field names
for altName, canonicalName := range fieldMappings {
if value, exists := raw[altName]; exists {
// Use reflection to set the field value
v := reflect.ValueOf(o).Elem()
field := v.FieldByNameFunc(func(fieldName string) bool {
field, _ := v.Type().FieldByName(fieldName)
jsonTag := field.Tag.Get("json")
return jsonTag == canonicalName+",omitempty" || jsonTag == canonicalName
})
if field.IsValid() && field.CanSet() {
switch field.Kind() {
case reflect.Int:
if intVal, ok := value.(float64); ok {
field.SetInt(int64(intVal))
} else if strVal, ok := value.(string); ok {
if intVal, err := strconv.Atoi(strVal); err == nil {
field.SetInt(int64(intVal))
}
}
case reflect.Float64:
if floatVal, ok := value.(float64); ok {
field.SetFloat(floatVal)
} else if strVal, ok := value.(string); ok {
if floatVal, err := strconv.ParseFloat(strVal, 64); err == nil {
field.SetFloat(floatVal)
}
}
case reflect.String:
if strVal, ok := value.(string); ok {
field.SetString(strVal)
}
case reflect.Bool:
if boolVal, ok := value.(bool); ok {
field.SetBool(boolVal)
}
}
}
}
}
return nil
}
// BuildCommandArgs converts InstanceOptions to command line arguments
func (o *LlamaServerOptions) BuildCommandArgs() []string {
var args []string
v := reflect.ValueOf(o).Elem()
t := v.Type()
for i := 0; i < v.NumField(); i++ {
field := v.Field(i)
fieldType := t.Field(i)
// Skip unexported fields
if !field.CanInterface() {
continue
}
// Get the JSON tag to determine the flag name
jsonTag := fieldType.Tag.Get("json")
if jsonTag == "" || jsonTag == "-" {
continue
}
// Remove ",omitempty" from the tag
flagName := jsonTag
if commaIndex := strings.Index(jsonTag, ","); commaIndex != -1 {
flagName = jsonTag[:commaIndex]
}
// Convert snake_case to kebab-case for CLI flags
flagName = strings.ReplaceAll(flagName, "_", "-")
// Add the appropriate arguments based on field type and value
switch field.Kind() {
case reflect.Bool:
if field.Bool() {
args = append(args, "--"+flagName)
}
case reflect.Int:
if field.Int() != 0 {
args = append(args, "--"+flagName, strconv.FormatInt(field.Int(), 10))
}
case reflect.Float64:
if field.Float() != 0 {
args = append(args, "--"+flagName, strconv.FormatFloat(field.Float(), 'f', -1, 64))
}
case reflect.String:
if field.String() != "" {
args = append(args, "--"+flagName, field.String())
}
case reflect.Slice:
if field.Type().Elem().Kind() == reflect.String {
// Handle []string fields
for j := 0; j < field.Len(); j++ {
args = append(args, "--"+flagName, field.Index(j).String())
}
}
}
}
return args
}

118
pkg/logging.go Normal file
View File

@@ -0,0 +1,118 @@
package llamactl
import (
"bufio"
"fmt"
"io"
"os"
"strings"
"time"
)
type InstanceLogger struct {
name string
logDir string
logFile *os.File
logFilePath string
}
func NewInstanceLogger(name string, logDir string) *InstanceLogger {
return &InstanceLogger{
name: name,
logDir: logDir,
}
}
// Create creates and opens the log files for stdout and stderr
func (i *InstanceLogger) Create() error {
if i.logDir == "" {
return fmt.Errorf("logDir is empty for instance %s", i.name)
}
// Set up instance logs
logPath := i.logDir + "/" + i.name + ".log"
i.logFilePath = logPath
if err := os.MkdirAll(i.logDir, 0755); err != nil {
return fmt.Errorf("failed to create log directory: %w", err)
}
logFile, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0644)
if err != nil {
return fmt.Errorf("failed to create stdout log file: %w", err)
}
i.logFile = logFile
// Write a startup marker to both files
timestamp := time.Now().Format("2006-01-02 15:04:05")
fmt.Fprintf(i.logFile, "\n=== Instance %s started at %s ===\n", i.name, timestamp)
return nil
}
// GetLogs retrieves the last n lines of logs from the instance
func (i *Instance) GetLogs(num_lines int) (string, error) {
i.mu.RLock()
logFileName := i.logger.logFilePath
i.mu.RUnlock()
if logFileName == "" {
return "", fmt.Errorf("log file not created for instance %s", i.Name)
}
file, err := os.Open(logFileName)
if err != nil {
return "", fmt.Errorf("failed to open log file: %w", err)
}
defer file.Close()
if num_lines <= 0 {
content, err := io.ReadAll(file)
if err != nil {
return "", fmt.Errorf("failed to read log file: %w", err)
}
return string(content), nil
}
var lines []string
scanner := bufio.NewScanner(file)
// Read all lines into a slice
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
if err := scanner.Err(); err != nil {
return "", fmt.Errorf("error reading file: %w", err)
}
// Return the last N lines
start := max(len(lines)-num_lines, 0)
return strings.Join(lines[start:], "\n"), nil
}
// closeLogFile closes the log files
func (i *InstanceLogger) Close() {
if i.logFile != nil {
timestamp := time.Now().Format("2006-01-02 15:04:05")
fmt.Fprintf(i.logFile, "=== Instance %s stopped at %s ===\n\n", i.name, timestamp)
i.logFile.Close()
i.logFile = nil
}
}
// readOutput reads from the given reader and writes lines to the log file
func (i *InstanceLogger) readOutput(reader io.ReadCloser) {
defer reader.Close()
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
line := scanner.Text()
if i.logFile != nil {
fmt.Fprintln(i.logFile, line)
i.logFile.Sync() // Ensure data is written to disk
}
}
}

237
pkg/manager.go Normal file
View File

@@ -0,0 +1,237 @@
package llamactl
import (
"fmt"
"sync"
)
// InstanceManager defines the interface for managing instances of the llama server.
type InstanceManager interface {
ListInstances() ([]*Instance, error)
CreateInstance(name string, options *CreateInstanceOptions) (*Instance, error)
GetInstance(name string) (*Instance, error)
UpdateInstance(name string, options *CreateInstanceOptions) (*Instance, error)
DeleteInstance(name string) error
StartInstance(name string) (*Instance, error)
StopInstance(name string) (*Instance, error)
RestartInstance(name string) (*Instance, error)
GetInstanceLogs(name string) (string, error)
}
type instanceManager struct {
mu sync.RWMutex
instances map[string]*Instance
ports map[int]bool
instancesConfig InstancesConfig
}
// NewInstanceManager creates a new instance of InstanceManager.
func NewInstanceManager(instancesConfig InstancesConfig) InstanceManager {
return &instanceManager{
instances: make(map[string]*Instance),
ports: make(map[int]bool),
instancesConfig: instancesConfig,
}
}
// ListInstances returns a list of all instances managed by the instance manager.
func (im *instanceManager) ListInstances() ([]*Instance, error) {
im.mu.RLock()
defer im.mu.RUnlock()
instances := make([]*Instance, 0, len(im.instances))
for _, instance := range im.instances {
instances = append(instances, instance)
}
return instances, nil
}
// CreateInstance creates a new instance with the given options and returns it.
// The instance is initially in a "stopped" state.
func (im *instanceManager) CreateInstance(name string, options *CreateInstanceOptions) (*Instance, error) {
if options == nil {
return nil, fmt.Errorf("instance options cannot be nil")
}
err := ValidateInstanceName(name)
if err != nil {
return nil, err
}
err = ValidateInstanceOptions(options)
if err != nil {
return nil, err
}
im.mu.Lock()
defer im.mu.Unlock()
// Check if instance with this name already exists
if im.instances[name] != nil {
return nil, fmt.Errorf("instance with name %s already exists", name)
}
// Assign a port if not specified
if options.Port == 0 {
port, err := im.getNextAvailablePort()
if err != nil {
return nil, fmt.Errorf("failed to get next available port: %w", err)
}
options.Port = port
}
instance := NewInstance(name, &im.instancesConfig, options)
im.instances[instance.Name] = instance
return instance, nil
}
// GetInstance retrieves an instance by its name.
func (im *instanceManager) GetInstance(name string) (*Instance, error) {
im.mu.RLock()
defer im.mu.RUnlock()
instance, exists := im.instances[name]
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
return instance, nil
}
// UpdateInstance updates the options of an existing instance and returns it.
// If the instance is running, it will be restarted to apply the new options.
func (im *instanceManager) UpdateInstance(name string, options *CreateInstanceOptions) (*Instance, error) {
im.mu.RLock()
instance, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if options == nil {
return nil, fmt.Errorf("instance options cannot be nil")
}
err := ValidateInstanceOptions(options)
if err != nil {
return nil, err
}
// Check if instance is running before updating options
wasRunning := instance.Running
// If the instance is running, stop it first
if wasRunning {
if err := instance.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s for update: %w", name, err)
}
}
// Now update the options while the instance is stopped
instance.SetOptions(options)
// If it was running before, start it again with the new options
if wasRunning {
if err := instance.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s after update: %w", name, err)
}
}
return instance, nil
}
// DeleteInstance removes stopped instance by its name.
func (im *instanceManager) DeleteInstance(name string) error {
im.mu.Lock()
defer im.mu.Unlock()
_, exists := im.instances[name]
if !exists {
return fmt.Errorf("instance with name %s not found", name)
}
if im.instances[name].Running {
return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
}
delete(im.instances, name)
return nil
}
// StartInstance starts a stopped instance and returns it.
// If the instance is already running, it returns an error.
func (im *instanceManager) StartInstance(name string) (*Instance, error) {
im.mu.RLock()
instance, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if instance.Running {
return instance, fmt.Errorf("instance with name %s is already running", name)
}
if err := instance.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
}
return instance, nil
}
// StopInstance stops a running instance and returns it.
func (im *instanceManager) StopInstance(name string) (*Instance, error) {
im.mu.RLock()
instance, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return nil, fmt.Errorf("instance with name %s not found", name)
}
if !instance.Running {
return instance, fmt.Errorf("instance with name %s is already stopped", name)
}
if err := instance.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s: %w", name, err)
}
return instance, nil
}
// RestartInstance stops and then starts an instance, returning the updated instance.
func (im *instanceManager) RestartInstance(name string) (*Instance, error) {
instance, err := im.StopInstance(name)
if err != nil {
return nil, err
}
return im.StartInstance(instance.Name)
}
// GetInstanceLogs retrieves the logs for a specific instance by its name.
func (im *instanceManager) GetInstanceLogs(name string) (string, error) {
im.mu.RLock()
_, exists := im.instances[name]
im.mu.RUnlock()
if !exists {
return "", fmt.Errorf("instance with name %s not found", name)
}
// TODO: Implement actual log retrieval logic
return fmt.Sprintf("Logs for instance %s", name), nil
}
func (im *instanceManager) getNextAvailablePort() (int, error) {
portRange := im.instancesConfig.PortRange
for port := portRange[0]; port <= portRange[1]; port++ {
if !im.ports[port] {
im.ports[port] = true
return port, nil
}
}
return 0, fmt.Errorf("no available ports in the specified range")
}

59
pkg/routes.go Normal file
View File

@@ -0,0 +1,59 @@
package llamactl
import (
"github.com/go-chi/chi/v5"
"github.com/go-chi/chi/v5/middleware"
httpSwagger "github.com/swaggo/http-swagger"
_ "llamactl/docs"
)
func SetupRouter(handler *Handler) *chi.Mux {
r := chi.NewRouter()
r.Use(middleware.Logger)
r.Get("/swagger/*", httpSwagger.Handler(
httpSwagger.URL("/swagger/doc.json"),
))
// Define routes
r.Route("/api/v1", func(r chi.Router) {
r.Route("/server", func(r chi.Router) {
r.Get("/help", handler.HelpHandler())
r.Get("/version", handler.VersionHandler())
r.Get("/devices", handler.ListDevicesHandler())
})
// Instance management endpoints
r.Route("/instances", func(r chi.Router) {
r.Get("/", handler.ListInstances()) // List all instances
r.Route("/{name}", func(r chi.Router) {
// Instance management
r.Get("/", handler.GetInstance()) // Get instance details
r.Post("/", handler.CreateInstance()) // Create and start new instance
r.Put("/", handler.UpdateInstance()) // Update instance configuration
r.Delete("/", handler.DeleteInstance()) // Stop and remove instance
r.Post("/start", handler.StartInstance()) // Start stopped instance
r.Post("/stop", handler.StopInstance()) // Stop running instance
r.Post("/restart", handler.RestartInstance()) // Restart instance
r.Get("/logs", handler.GetInstanceLogs()) // Get instance logs
// Llama.cpp server proxy endpoints (proxied to the actual llama.cpp server)
r.Route("/proxy", func(r chi.Router) {
r.HandleFunc("/*", handler.ProxyToInstance()) // Proxy all llama.cpp server requests
})
})
})
})
// OpenAI-compatible endpoints (model name in request body determines routing)
r.Post("/v1/", handler.OpenAIProxy()) // Proxy to OpenAI-compatible endpoints based on instance name in request body
// r.Post("/v1/completions", handler.OpenAICompletions()) // Route based on model name in request
// r.Post("/v1/chat/completions", handler.OpenAIChatCompletions()) // Route based on model name in request
// r.Post("/v1/embeddings", handler.OpenAIEmbeddings()) // Route based on model name in request (if supported)
// r.Post("/v1/rerank", handler.OpenAIRerank()) // Route based on model name in request (if supported)
// r.Post("/v1/reranking", handler.OpenAIReranking()) // Route based on model name in request (if supported)
return r
}

115
pkg/validation.go Normal file
View File

@@ -0,0 +1,115 @@
package llamactl
import (
"fmt"
"reflect"
"regexp"
)
// Simple security validation that focuses only on actual injection risks
var (
// Block shell metacharacters that could enable command injection
dangerousPatterns = []*regexp.Regexp{
regexp.MustCompile(`[;&|$` + "`" + `]`), // Shell metacharacters
regexp.MustCompile(`\$\(.*\)`), // Command substitution $(...)
regexp.MustCompile("`.*`"), // Command substitution backticks
}
// Simple validation for instance names
validNamePattern = regexp.MustCompile(`^[a-zA-Z0-9_-]+$`)
)
type ValidationError error
// validateStringForInjection checks if a string contains dangerous patterns
func validateStringForInjection(value string) error {
for _, pattern := range dangerousPatterns {
if pattern.MatchString(value) {
return ValidationError(fmt.Errorf("value contains potentially dangerous characters: %s", value))
}
}
return nil
}
// ValidateInstanceOptions performs minimal security validation
func ValidateInstanceOptions(options *CreateInstanceOptions) error {
if options == nil {
return ValidationError(fmt.Errorf("options cannot be nil"))
}
// Use reflection to check all string fields for injection patterns
if err := validateStructStrings(&options.LlamaServerOptions, ""); err != nil {
return err
}
// Basic network validation - only check for reasonable ranges
if options.Port < 0 || options.Port > 65535 {
return ValidationError(fmt.Errorf("invalid port range"))
}
return nil
}
// validateStructStrings recursively validates all string fields in a struct
func validateStructStrings(v any, fieldPath string) error {
val := reflect.ValueOf(v)
if val.Kind() == reflect.Ptr {
val = val.Elem()
}
if val.Kind() != reflect.Struct {
return nil
}
typ := val.Type()
for i := 0; i < val.NumField(); i++ {
field := val.Field(i)
fieldType := typ.Field(i)
if !field.CanInterface() {
continue
}
fieldName := fieldType.Name
if fieldPath != "" {
fieldName = fieldPath + "." + fieldName
}
switch field.Kind() {
case reflect.String:
if err := validateStringForInjection(field.String()); err != nil {
return ValidationError(fmt.Errorf("field %s: %w", fieldName, err))
}
case reflect.Slice:
if field.Type().Elem().Kind() == reflect.String {
for j := 0; j < field.Len(); j++ {
if err := validateStringForInjection(field.Index(j).String()); err != nil {
return ValidationError(fmt.Errorf("field %s[%d]: %w", fieldName, j, err))
}
}
}
case reflect.Struct:
if err := validateStructStrings(field.Interface(), fieldName); err != nil {
return err
}
}
}
return nil
}
func ValidateInstanceName(name string) error {
// Validate instance name
if name == "" {
return ValidationError(fmt.Errorf("name cannot be empty"))
}
if !validNamePattern.MatchString(name) {
return ValidationError(fmt.Errorf("name contains invalid characters (only alphanumeric, hyphens, underscores allowed)"))
}
if len(name) > 50 {
return ValidationError(fmt.Errorf("name too long (max 50 characters)"))
}
return nil
}