diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yaml similarity index 100% rename from .github/workflows/codeql.yml rename to .github/workflows/codeql.yaml diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yaml similarity index 100% rename from .github/workflows/docs.yml rename to .github/workflows/docs.yaml diff --git a/README.md b/README.md index 0f27290..d9fea15 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # llamactl -![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg) +![Build and Release](https://github.com/lordmathis/llamactl/actions/workflows/release.yaml/badge.svg) ![Go Tests](https://github.com/lordmathis/llamactl/actions/workflows/go_test.yaml/badge.svg) ![WebUI Tests](https://github.com/lordmathis/llamactl/actions/workflows/webui_test.yaml/badge.svg) ![User Docs](https://github.com/lordmathis/llamactl/actions/workflows/docs.yaml/badge.svg) **Unified management and routing for llama.cpp, MLX and vLLM models with web dashboard.** @@ -23,7 +23,12 @@ ### ⚡ Smart Operations - **Instance Monitoring**: Health checks, auto-restart, log management - **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits -- **Environment Variables**: Set custom environment variables per instance for advanced configuration +- **Environment Variables**: Set custom environment variables per instance for advanced configuration + +### 🔗 Remote Instance Deployment +- **Remote Node Support**: Deploy instances on remote hosts +- **Central Management**: Manage remote instances from a single dashboard +- **Seamless Routing**: Automatic request routing to remote instances ![Dashboard Screenshot](docs/images/dashboard.png) diff --git a/cmd/server/main.go b/cmd/server/main.go index e245ebf..de080c7 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -58,7 +58,7 @@ func main() { } // Initialize the instance manager - instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances) + instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances, cfg.Nodes) // Create a new handler with the instance manager handler := server.NewHandler(instanceManager, cfg) diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md index be4fc6d..c43efc6 100644 --- a/docs/getting-started/configuration.md +++ b/docs/getting-started/configuration.md @@ -70,6 +70,10 @@ auth: inference_keys: [] # Keys for inference endpoints require_management_auth: true # Require auth for management endpoints management_keys: [] # Keys for management endpoints + +local_node: "main" # Name of the local node (default: "main") +nodes: # Node configuration for multi-node deployment + main: # Default local node (empty config) ``` ## Configuration Files @@ -235,18 +239,32 @@ auth: management_keys: [] # List of valid management API keys ``` -**Environment Variables:** -- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false) -- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys -- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false) -- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys +**Environment Variables:** +- `LLAMACTL_REQUIRE_INFERENCE_AUTH` - Require auth for OpenAI endpoints (true/false) +- `LLAMACTL_INFERENCE_KEYS` - Comma-separated inference API keys +- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false) +- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys -## Command Line Options +### Remote Node Configuration -View all available command line options: +llamactl supports remote node deployments. Configure remote nodes to deploy instances on remote hosts and manage them centrally. -```bash -llamactl --help +```yaml +local_node: "main" # Name of the local node (default: "main") +nodes: # Node configuration map + main: # Local node (empty address means local) + address: "" # Not used for local node + api_key: "" # Not used for local node + worker1: # Remote worker node + address: "http://192.168.1.10:8080" + api_key: "worker1-api-key" # Management API key for authentication ``` -You can also override configuration using command line flags when starting llamactl. +**Node Configuration Fields:** +- `local_node`: Specifies which node in the `nodes` map represents the local node +- `nodes`: Map of node configurations + - `address`: HTTP/HTTPS URL of the remote node (empty for local node) + - `api_key`: Management API key for authenticating with the remote node + +**Environment Variables:** +- `LLAMACTL_LOCAL_NODE` - Name of the local node diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md index f64146f..04e0dfd 100644 --- a/docs/getting-started/installation.md +++ b/docs/getting-started/installation.md @@ -157,6 +157,12 @@ cd webui && npm ci && npm run build && cd .. go build -o llamactl ./cmd/server ``` +## Remote Node Installation + +For deployments with remote nodes: +- Install llamactl on each node using any of the methods above +- Configure API keys for authentication between nodes + ## Verification Verify your installation by checking the version: @@ -168,3 +174,5 @@ llamactl --version ## Next Steps Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running! + +For remote node deployments, see the [Configuration Guide](configuration.md) for node setup instructions. diff --git a/docs/user-guide/api-reference.md b/docs/user-guide/api-reference.md index 26e01e4..472cd0b 100644 --- a/docs/user-guide/api-reference.md +++ b/docs/user-guide/api-reference.md @@ -126,6 +126,7 @@ POST /api/v1/instances/{name} - `on_demand_start`: Start instance when receiving requests - `idle_timeout`: Idle timeout in minutes - `environment`: Environment variables as key-value pairs +- `nodes`: Array with single node name to deploy the instance to (for remote deployments) See [Managing Instances](managing-instances.md) for complete configuration options. @@ -405,6 +406,38 @@ curl -X DELETE -H "Authorization: Bearer your-api-key" \ http://localhost:8080/api/v1/instances/my-model ``` +### Remote Node Instance Example + +```bash +# Create instance on specific remote node +curl -X POST http://localhost:8080/api/v1/instances/remote-model \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer your-api-key" \ + -d '{ + "backend_type": "llama_cpp", + "backend_options": { + "model": "/models/llama-2-7b.gguf", + "gpu_layers": 32 + }, + "nodes": ["worker1"] + }' + +# Check status of remote instance +curl -H "Authorization: Bearer your-api-key" \ + http://localhost:8080/api/v1/instances/remote-model + +# Use remote instance with OpenAI-compatible API +curl -X POST http://localhost:8080/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer your-inference-api-key" \ + -d '{ + "model": "remote-model", + "messages": [ + {"role": "user", "content": "Hello from remote node!"} + ] + }' +``` + ### Using the Proxy Endpoint You can also directly proxy requests to the llama-server instance: diff --git a/docs/user-guide/managing-instances.md b/docs/user-guide/managing-instances.md index 824c4fe..b02de2d 100644 --- a/docs/user-guide/managing-instances.md +++ b/docs/user-guide/managing-instances.md @@ -39,26 +39,27 @@ Each instance is displayed as a card showing: 1. Click the **"Create Instance"** button on the dashboard 2. Enter a unique **Name** for your instance (only required field) -3. **Choose Backend Type**: +3. **Select Target Node**: Choose which node to deploy the instance to from the dropdown +4. **Choose Backend Type**: - **llama.cpp**: For GGUF models using llama-server - **MLX**: For MLX-optimized models (macOS only) - **vLLM**: For distributed serving and high-throughput inference -4. Configure model source: +5. Configure model source: - **For llama.cpp**: GGUF model path or HuggingFace repo - **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`) - **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`) -5. Configure optional instance management settings: +6. Configure optional instance management settings: - **Auto Restart**: Automatically restart instance on failure - **Max Restarts**: Maximum number of restart attempts - **Restart Delay**: Delay in seconds between restart attempts - **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint - **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable) - **Environment Variables**: Set custom environment variables for the instance process -6. Configure backend-specific options: +7. Configure backend-specific options: - **llama.cpp**: Threads, context size, GPU layers, port, etc. - **MLX**: Temperature, top-p, adapter path, Python environment, etc. - **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc. -7. Click **"Create"** to save the instance +8. Click **"Create"** to save the instance ### Via API @@ -121,6 +122,18 @@ curl -X POST http://localhost:8080/api/instances/gemma-3-27b \ "gpu_layers": 32 } }' + +# Create instance on specific remote node +curl -X POST http://localhost:8080/api/instances/remote-llama \ + -H "Content-Type: application/json" \ + -d '{ + "backend_type": "llama_cpp", + "backend_options": { + "model": "/models/llama-7b.gguf", + "gpu_layers": 32 + }, + "nodes": ["worker1"] + }' ``` ## Start Instance @@ -227,3 +240,4 @@ Check the health status of your instances: ```bash curl http://localhost:8080/api/instances/{name}/proxy/health ``` + diff --git a/docs/user-guide/troubleshooting.md b/docs/user-guide/troubleshooting.md index 5608139..4b7a507 100644 --- a/docs/user-guide/troubleshooting.md +++ b/docs/user-guide/troubleshooting.md @@ -125,6 +125,30 @@ This helps determine if the issue is with llamactl or with the underlying llama. http://localhost:8080/api/v1/instances ``` +## Remote Node Issues + +### Node Configuration + +**Problem:** Remote instances not appearing or cannot be managed + +**Solutions:** +1. **Verify node configuration:** + ```yaml + local_node: "main" # Must match a key in nodes map + nodes: + main: + address: "" # Empty for local node + worker1: + address: "http://worker1.internal:8080" + api_key: "secure-key" # Must match worker1's management key + ``` + +2. **Test remote node connectivity:** + ```bash + curl -H "Authorization: Bearer remote-node-key" \ + http://remote-node:8080/api/v1/instances + ``` + ## Debugging and Logs ### Viewing Instance Logs diff --git a/pkg/config/config.go b/pkg/config/config.go index ee57cd2..d6ee420 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -37,13 +37,15 @@ type BackendConfig struct { // AppConfig represents the configuration for llamactl type AppConfig struct { - Server ServerConfig `yaml:"server"` - Backends BackendConfig `yaml:"backends"` - Instances InstancesConfig `yaml:"instances"` - Auth AuthConfig `yaml:"auth"` - Version string `yaml:"-"` - CommitHash string `yaml:"-"` - BuildTime string `yaml:"-"` + Server ServerConfig `yaml:"server"` + Backends BackendConfig `yaml:"backends"` + Instances InstancesConfig `yaml:"instances"` + Auth AuthConfig `yaml:"auth"` + LocalNode string `yaml:"local_node,omitempty"` + Nodes map[string]NodeConfig `yaml:"nodes,omitempty"` + Version string `yaml:"-"` + CommitHash string `yaml:"-"` + BuildTime string `yaml:"-"` } // ServerConfig contains HTTP server configuration @@ -128,6 +130,11 @@ type AuthConfig struct { ManagementKeys []string `yaml:"management_keys"` } +type NodeConfig struct { + Address string `yaml:"address"` + APIKey string `yaml:"api_key,omitempty"` +} + // LoadConfig loads configuration with the following precedence: // 1. Hardcoded defaults // 2. Config file @@ -142,6 +149,10 @@ func LoadConfig(configPath string) (AppConfig, error) { AllowedHeaders: []string{"*"}, // Default to allow all headers EnableSwagger: false, }, + LocalNode: "main", + Nodes: map[string]NodeConfig{ + "main": {}, // Local node with empty config + }, Backends: BackendConfig{ LlamaCpp: BackendSettings{ Command: "llama-server", @@ -469,6 +480,11 @@ func loadEnvVars(cfg *AppConfig) { if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" { cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",") } + + // Local node config + if localNode := os.Getenv("LLAMACTL_LOCAL_NODE"); localNode != "" { + cfg.LocalNode = localNode + } } // ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000" diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index ad800ed..964708e 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -510,3 +510,132 @@ func TestGetBackendSettings_InvalidBackendType(t *testing.T) { t.Errorf("Expected empty command for invalid backend, got %q", settings.Command) } } + +func TestLoadConfig_LocalNode(t *testing.T) { + t.Run("default local node", func(t *testing.T) { + cfg, err := config.LoadConfig("nonexistent-file.yaml") + if err != nil { + t.Fatalf("LoadConfig failed: %v", err) + } + + if cfg.LocalNode != "main" { + t.Errorf("Expected default local node 'main', got %q", cfg.LocalNode) + } + }) + + t.Run("local node from file", func(t *testing.T) { + tempDir := t.TempDir() + configFile := filepath.Join(tempDir, "test-config.yaml") + + configContent := ` +local_node: "worker1" +nodes: + worker1: + address: "" + worker2: + address: "http://192.168.1.10:8080" + api_key: "test-key" +` + + err := os.WriteFile(configFile, []byte(configContent), 0644) + if err != nil { + t.Fatalf("Failed to write test config file: %v", err) + } + + cfg, err := config.LoadConfig(configFile) + if err != nil { + t.Fatalf("LoadConfig failed: %v", err) + } + + if cfg.LocalNode != "worker1" { + t.Errorf("Expected local node 'worker1', got %q", cfg.LocalNode) + } + + // Verify nodes map (includes default "main" + worker1 + worker2) + if len(cfg.Nodes) != 3 { + t.Errorf("Expected 3 nodes (default main + worker1 + worker2), got %d", len(cfg.Nodes)) + } + + // Verify local node exists and is empty + localNode, exists := cfg.Nodes["worker1"] + if !exists { + t.Error("Expected local node 'worker1' to exist in nodes map") + } + if localNode.Address != "" { + t.Errorf("Expected local node address to be empty, got %q", localNode.Address) + } + if localNode.APIKey != "" { + t.Errorf("Expected local node api_key to be empty, got %q", localNode.APIKey) + } + + // Verify remote node + remoteNode, exists := cfg.Nodes["worker2"] + if !exists { + t.Error("Expected remote node 'worker2' to exist in nodes map") + } + if remoteNode.Address != "http://192.168.1.10:8080" { + t.Errorf("Expected remote node address 'http://192.168.1.10:8080', got %q", remoteNode.Address) + } + + // Verify default main node still exists + _, exists = cfg.Nodes["main"] + if !exists { + t.Error("Expected default 'main' node to still exist in nodes map") + } + }) + + t.Run("custom local node name in config", func(t *testing.T) { + tempDir := t.TempDir() + configFile := filepath.Join(tempDir, "test-config.yaml") + + configContent := ` +local_node: "primary" +nodes: + primary: + address: "" + worker1: + address: "http://192.168.1.10:8080" +` + + err := os.WriteFile(configFile, []byte(configContent), 0644) + if err != nil { + t.Fatalf("Failed to write test config file: %v", err) + } + + cfg, err := config.LoadConfig(configFile) + if err != nil { + t.Fatalf("LoadConfig failed: %v", err) + } + + if cfg.LocalNode != "primary" { + t.Errorf("Expected local node 'primary', got %q", cfg.LocalNode) + } + + // Verify nodes map includes default "main" + primary + worker1 + if len(cfg.Nodes) != 3 { + t.Errorf("Expected 3 nodes (default main + primary + worker1), got %d", len(cfg.Nodes)) + } + + localNode, exists := cfg.Nodes["primary"] + if !exists { + t.Error("Expected local node 'primary' to exist in nodes map") + } + if localNode.Address != "" { + t.Errorf("Expected local node address to be empty, got %q", localNode.Address) + } + }) + + t.Run("local node from environment variable", func(t *testing.T) { + os.Setenv("LLAMACTL_LOCAL_NODE", "custom-node") + defer os.Unsetenv("LLAMACTL_LOCAL_NODE") + + cfg, err := config.LoadConfig("nonexistent-file.yaml") + if err != nil { + t.Fatalf("LoadConfig failed: %v", err) + } + + if cfg.LocalNode != "custom-node" { + t.Errorf("Expected local node 'custom-node' from env var, got %q", cfg.LocalNode) + } + }) +} diff --git a/pkg/instance/instance.go b/pkg/instance/instance.go index 228f382..dcebef4 100644 --- a/pkg/instance/instance.go +++ b/pkg/instance/instance.go @@ -171,6 +171,11 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) { return nil, fmt.Errorf("instance %s has no options set", i.Name) } + // Remote instances should not use local proxy - they are handled by RemoteInstanceProxy + if len(i.options.Nodes) > 0 { + return nil, fmt.Errorf("instance %s is a remote instance and should not use local proxy", i.Name) + } + var host string var port int switch i.options.BackendType { @@ -285,5 +290,24 @@ func (i *Process) UnmarshalJSON(data []byte) error { i.options = aux.Options } + // Initialize fields that are not serialized + if i.timeProvider == nil { + i.timeProvider = realTimeProvider{} + } + if i.logger == nil && i.globalInstanceSettings != nil { + i.logger = NewInstanceLogger(i.Name, i.globalInstanceSettings.LogsDir) + } + return nil } + +func (i *Process) IsRemote() bool { + i.mu.RLock() + defer i.mu.RUnlock() + + if i.options == nil { + return false + } + + return len(i.options.Nodes) > 0 +} diff --git a/pkg/instance/options.go b/pkg/instance/options.go index 62181dd..439f426 100644 --- a/pkg/instance/options.go +++ b/pkg/instance/options.go @@ -27,6 +27,8 @@ type CreateInstanceOptions struct { BackendType backends.BackendType `json:"backend_type"` BackendOptions map[string]any `json:"backend_options,omitempty"` + Nodes []string `json:"nodes,omitempty"` + // Backend-specific options LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"` MlxServerOptions *mlx.MlxServerOptions `json:"-"` diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 725b1a9..b944ef3 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -6,6 +6,7 @@ import ( "llamactl/pkg/config" "llamactl/pkg/instance" "log" + "net/http" "os" "path/filepath" "strings" @@ -25,10 +26,22 @@ type InstanceManager interface { StopInstance(name string) (*instance.Process, error) EvictLRUInstance() error RestartInstance(name string) (*instance.Process, error) - GetInstanceLogs(name string) (string, error) + GetInstanceLogs(name string, numLines int) (string, error) Shutdown() } +type RemoteManager interface { + ListRemoteInstances(node *config.NodeConfig) ([]*instance.Process, error) + CreateRemoteInstance(node *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error) + GetRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error) + UpdateRemoteInstance(node *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error) + DeleteRemoteInstance(node *config.NodeConfig, name string) error + StartRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error) + StopRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error) + RestartRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error) + GetRemoteInstanceLogs(node *config.NodeConfig, name string, numLines int) (string, error) +} + type instanceManager struct { mu sync.RWMutex instances map[string]*instance.Process @@ -42,13 +55,26 @@ type instanceManager struct { shutdownChan chan struct{} shutdownDone chan struct{} isShutdown bool + + // Remote instance management + httpClient *http.Client + instanceNodeMap map[string]*config.NodeConfig // Maps instance name to its node config + nodeConfigMap map[string]*config.NodeConfig // Maps node name to node config for quick lookup } // NewInstanceManager creates a new instance of InstanceManager. -func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig) InstanceManager { +func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig, nodesConfig map[string]config.NodeConfig) InstanceManager { if instancesConfig.TimeoutCheckInterval <= 0 { instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set } + + // Build node config map for quick lookup + nodeConfigMap := make(map[string]*config.NodeConfig) + for name := range nodesConfig { + nodeCopy := nodesConfig[name] + nodeConfigMap[name] = &nodeCopy + } + im := &instanceManager{ instances: make(map[string]*instance.Process), runningInstances: make(map[string]struct{}), @@ -59,6 +85,13 @@ func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig con timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute), shutdownChan: make(chan struct{}), shutdownDone: make(chan struct{}), + + httpClient: &http.Client{ + Timeout: 30 * time.Second, + }, + + instanceNodeMap: make(map[string]*config.NodeConfig), + nodeConfigMap: nodeConfigMap, } // Load existing instances from disk @@ -238,24 +271,43 @@ func (im *instanceManager) loadInstance(name, path string) error { return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name) } - statusCallback := func(oldStatus, newStatus instance.InstanceStatus) { - im.onStatusChange(persistedInstance.Name, oldStatus, newStatus) + options := persistedInstance.GetOptions() + + // Check if this is a remote instance + isRemote := options != nil && len(options.Nodes) > 0 + + var statusCallback func(oldStatus, newStatus instance.InstanceStatus) + if !isRemote { + // Only set status callback for local instances + statusCallback = func(oldStatus, newStatus instance.InstanceStatus) { + im.onStatusChange(persistedInstance.Name, oldStatus, newStatus) + } } // Create new inst using NewInstance (handles validation, defaults, setup) - inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback) + inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, statusCallback) // Restore persisted fields that NewInstance doesn't set inst.Created = persistedInstance.Created inst.SetStatus(persistedInstance.Status) - // Check for port conflicts and add to maps - if inst.GetPort() > 0 { - port := inst.GetPort() - if im.ports[port] { - return fmt.Errorf("port conflict: instance %s wants port %d which is already in use", name, port) + // Handle remote instance mapping + if isRemote { + nodeName := options.Nodes[0] + nodeConfig, exists := im.nodeConfigMap[nodeName] + if !exists { + return fmt.Errorf("node %s not found for remote instance %s", nodeName, name) + } + im.instanceNodeMap[name] = nodeConfig + } else { + // Check for port conflicts only for local instances + if inst.GetPort() > 0 { + port := inst.GetPort() + if im.ports[port] { + return fmt.Errorf("port conflict: instance %s wants port %d which is already in use", name, port) + } + im.ports[port] = true } - im.ports[port] = true } im.instances[name] = inst @@ -293,8 +345,18 @@ func (im *instanceManager) autoStartInstances() { log.Printf("Auto-starting instance %s", inst.Name) // Reset running state before starting (since Start() expects stopped instance) inst.SetStatus(instance.Stopped) - if err := inst.Start(); err != nil { - log.Printf("Failed to auto-start instance %s: %v", inst.Name, err) + + // Check if this is a remote instance + if node := im.getNodeForInstance(inst); node != nil { + // Remote instance - use StartRemoteInstance + if _, err := im.StartRemoteInstance(node, inst.Name); err != nil { + log.Printf("Failed to auto-start remote instance %s: %v", inst.Name, err) + } + } else { + // Local instance - call Start() directly + if err := inst.Start(); err != nil { + log.Printf("Failed to auto-start instance %s: %v", inst.Name, err) + } } } } @@ -309,3 +371,18 @@ func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus inst delete(im.runningInstances, name) } } + +// getNodeForInstance returns the node configuration for a remote instance +// Returns nil if the instance is not remote or the node is not found +func (im *instanceManager) getNodeForInstance(inst *instance.Process) *config.NodeConfig { + if !inst.IsRemote() { + return nil + } + + // Check if we have a cached mapping + if nodeConfig, exists := im.instanceNodeMap[inst.Name]; exists { + return nodeConfig + } + + return nil +} diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go index 3b683d6..e59e2eb 100644 --- a/pkg/manager/manager_test.go +++ b/pkg/manager/manager_test.go @@ -34,7 +34,7 @@ func TestNewInstanceManager(t *testing.T) { TimeoutCheckInterval: 5, } - mgr := manager.NewInstanceManager(backendConfig, cfg) + mgr := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}) if mgr == nil { t.Fatal("NewInstanceManager returned nil") } @@ -69,7 +69,7 @@ func TestPersistence(t *testing.T) { } // Test instance persistence on creation - manager1 := manager.NewInstanceManager(backendConfig, cfg) + manager1 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}) options := &instance.CreateInstanceOptions{ BackendType: backends.BackendTypeLlamaCpp, LlamaServerOptions: &llamacpp.LlamaServerOptions{ @@ -90,7 +90,7 @@ func TestPersistence(t *testing.T) { } // Test loading instances from disk - manager2 := manager.NewInstanceManager(backendConfig, cfg) + manager2 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}) instances, err := manager2.ListInstances() if err != nil { t.Fatalf("ListInstances failed: %v", err) @@ -207,7 +207,7 @@ func createTestManager() manager.InstanceManager { DefaultRestartDelay: 5, TimeoutCheckInterval: 5, } - return manager.NewInstanceManager(backendConfig, cfg) + return manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}) } func TestAutoRestartDisabledInstanceStatus(t *testing.T) { @@ -227,7 +227,7 @@ func TestAutoRestartDisabledInstanceStatus(t *testing.T) { } // Create first manager and instance with auto-restart disabled - manager1 := manager.NewInstanceManager(backendConfig, cfg) + manager1 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}) autoRestart := false options := &instance.CreateInstanceOptions{ @@ -252,7 +252,7 @@ func TestAutoRestartDisabledInstanceStatus(t *testing.T) { manager1.Shutdown() // Create second manager (simulating restart of llamactl) - manager2 := manager.NewInstanceManager(backendConfig, cfg) + manager2 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}) // Get the loaded instance loadedInst, err := manager2.GetInstance("test-instance") diff --git a/pkg/manager/operations.go b/pkg/manager/operations.go index b3c0d13..a8b5c3f 100644 --- a/pkg/manager/operations.go +++ b/pkg/manager/operations.go @@ -3,6 +3,7 @@ package manager import ( "fmt" "llamactl/pkg/backends" + "llamactl/pkg/config" "llamactl/pkg/instance" "llamactl/pkg/validation" "os" @@ -11,16 +12,65 @@ import ( type MaxRunningInstancesError error +// updateLocalInstanceFromRemote updates the local stub instance with data from the remote instance +// while preserving the Nodes field to maintain remote instance tracking +func (im *instanceManager) updateLocalInstanceFromRemote(localInst *instance.Process, remoteInst *instance.Process) { + if localInst == nil || remoteInst == nil { + return + } + + // Get the remote instance options + remoteOptions := remoteInst.GetOptions() + if remoteOptions == nil { + return + } + + // Preserve the Nodes field from the local instance + localOptions := localInst.GetOptions() + var preservedNodes []string + if localOptions != nil && len(localOptions.Nodes) > 0 { + preservedNodes = make([]string, len(localOptions.Nodes)) + copy(preservedNodes, localOptions.Nodes) + } + + // Create a copy of remote options and restore the Nodes field + updatedOptions := *remoteOptions + updatedOptions.Nodes = preservedNodes + + // Update the local instance with all remote data + localInst.SetOptions(&updatedOptions) + localInst.Status = remoteInst.Status + localInst.Created = remoteInst.Created +} + // ListInstances returns a list of all instances managed by the instance manager. +// For remote instances, this fetches the live state from remote nodes and updates local stubs. func (im *instanceManager) ListInstances() ([]*instance.Process, error) { im.mu.RLock() - defer im.mu.RUnlock() - - instances := make([]*instance.Process, 0, len(im.instances)) + localInstances := make([]*instance.Process, 0, len(im.instances)) for _, inst := range im.instances { - instances = append(instances, inst) + localInstances = append(localInstances, inst) } - return instances, nil + im.mu.RUnlock() + + // Update remote instances with live state + for _, inst := range localInstances { + if node := im.getNodeForInstance(inst); node != nil { + remoteInst, err := im.GetRemoteInstance(node, inst.Name) + if err != nil { + // Log error but continue with stale data + // Don't fail the entire list operation due to one remote failure + continue + } + + // Update the local stub with all remote data (preserving Nodes) + im.mu.Lock() + im.updateLocalInstanceFromRemote(inst, remoteInst) + im.mu.Unlock() + } + } + + return localInstances, nil } // CreateInstance creates a new instance with the given options and returns it. @@ -43,16 +93,56 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI im.mu.Lock() defer im.mu.Unlock() - // Check max instances limit after acquiring the lock - if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 { - return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances) - } - - // Check if instance with this name already exists + // Check if instance with this name already exists (must be globally unique) if im.instances[name] != nil { return nil, fmt.Errorf("instance with name %s already exists", name) } + // Check if this is a remote instance + isRemote := len(options.Nodes) > 0 + var nodeConfig *config.NodeConfig + + if isRemote { + // Validate that the node exists + nodeName := options.Nodes[0] // Use first node for now + var exists bool + nodeConfig, exists = im.nodeConfigMap[nodeName] + if !exists { + return nil, fmt.Errorf("node %s not found", nodeName) + } + + // Create the remote instance on the remote node + remoteInst, err := im.CreateRemoteInstance(nodeConfig, name, options) + if err != nil { + return nil, err + } + + // Create a local stub that preserves the Nodes field for tracking + // We keep the original options (with Nodes) so IsRemote() works correctly + inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, nil) + + // Update the local stub with all remote data (preserving Nodes) + im.updateLocalInstanceFromRemote(inst, remoteInst) + + // Add to local tracking maps (but don't count towards limits) + im.instances[name] = inst + im.instanceNodeMap[name] = nodeConfig + + // Persist the remote instance locally for tracking across restarts + if err := im.persistInstance(inst); err != nil { + return nil, fmt.Errorf("failed to persist remote instance %s: %w", name, err) + } + + return inst, nil + } + + // Local instance creation + // Check max instances limit for local instances only + localInstanceCount := len(im.instances) - len(im.instanceNodeMap) + if localInstanceCount >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 { + return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances) + } + // Assign and validate port for backend-specific options if err := im.assignAndValidatePort(options); err != nil { return nil, err @@ -73,28 +163,68 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI } // GetInstance retrieves an instance by its name. +// For remote instances, this fetches the live state from the remote node and updates the local stub. func (im *instanceManager) GetInstance(name string) (*instance.Process, error) { im.mu.RLock() - defer im.mu.RUnlock() + inst, exists := im.instances[name] + im.mu.RUnlock() - instance, exists := im.instances[name] if !exists { return nil, fmt.Errorf("instance with name %s not found", name) } - return instance, nil + + // Check if instance is remote and fetch live state + if node := im.getNodeForInstance(inst); node != nil { + remoteInst, err := im.GetRemoteInstance(node, name) + if err != nil { + return nil, err + } + + // Update the local stub with all remote data (preserving Nodes) + im.mu.Lock() + im.updateLocalInstanceFromRemote(inst, remoteInst) + im.mu.Unlock() + + // Return the local stub (preserving Nodes field) + return inst, nil + } + + return inst, nil } // UpdateInstance updates the options of an existing instance and returns it. // If the instance is running, it will be restarted to apply the new options. func (im *instanceManager) UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) { im.mu.RLock() - instance, exists := im.instances[name] + inst, exists := im.instances[name] im.mu.RUnlock() if !exists { return nil, fmt.Errorf("instance with name %s not found", name) } + // Check if instance is remote and delegate to remote operation + if node := im.getNodeForInstance(inst); node != nil { + remoteInst, err := im.UpdateRemoteInstance(node, name, options) + if err != nil { + return nil, err + } + + // Update the local stub with all remote data (preserving Nodes) + im.mu.Lock() + im.updateLocalInstanceFromRemote(inst, remoteInst) + im.mu.Unlock() + + // Persist the updated remote instance locally + im.mu.Lock() + defer im.mu.Unlock() + if err := im.persistInstance(inst); err != nil { + return nil, fmt.Errorf("failed to persist updated remote instance %s: %w", name, err) + } + + return inst, nil + } + if options == nil { return nil, fmt.Errorf("instance options cannot be nil") } @@ -105,55 +235,90 @@ func (im *instanceManager) UpdateInstance(name string, options *instance.CreateI } // Check if instance is running before updating options - wasRunning := instance.IsRunning() + wasRunning := inst.IsRunning() // If the instance is running, stop it first if wasRunning { - if err := instance.Stop(); err != nil { + if err := inst.Stop(); err != nil { return nil, fmt.Errorf("failed to stop instance %s for update: %w", name, err) } } // Now update the options while the instance is stopped - instance.SetOptions(options) + inst.SetOptions(options) // If it was running before, start it again with the new options if wasRunning { - if err := instance.Start(); err != nil { + if err := inst.Start(); err != nil { return nil, fmt.Errorf("failed to start instance %s after update: %w", name, err) } } im.mu.Lock() defer im.mu.Unlock() - if err := im.persistInstance(instance); err != nil { + if err := im.persistInstance(inst); err != nil { return nil, fmt.Errorf("failed to persist updated instance %s: %w", name, err) } - return instance, nil + return inst, nil } // DeleteInstance removes stopped instance by its name. func (im *instanceManager) DeleteInstance(name string) error { im.mu.Lock() - defer im.mu.Unlock() + inst, exists := im.instances[name] + im.mu.Unlock() - instance, exists := im.instances[name] if !exists { return fmt.Errorf("instance with name %s not found", name) } - if instance.IsRunning() { + // Check if instance is remote and delegate to remote operation + if node := im.getNodeForInstance(inst); node != nil { + err := im.DeleteRemoteInstance(node, name) + if err != nil { + return err + } + + // Clean up local tracking + im.mu.Lock() + defer im.mu.Unlock() + delete(im.instances, name) + delete(im.instanceNodeMap, name) + + // Delete the instance's config file if persistence is enabled + // Re-validate instance name for security (defense in depth) + validatedName, err := validation.ValidateInstanceName(name) + if err != nil { + return fmt.Errorf("invalid instance name for file deletion: %w", err) + } + instancePath := filepath.Join(im.instancesConfig.InstancesDir, validatedName+".json") + if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) { + return fmt.Errorf("failed to delete config file for remote instance %s: %w", validatedName, err) + } + + return nil + } + + if inst.IsRunning() { return fmt.Errorf("instance with name %s is still running, stop it before deleting", name) } - delete(im.ports, instance.GetPort()) + im.mu.Lock() + defer im.mu.Unlock() + + delete(im.ports, inst.GetPort()) delete(im.instances, name) // Delete the instance's config file if persistence is enabled - instancePath := filepath.Join(im.instancesConfig.InstancesDir, instance.Name+".json") + // Re-validate instance name for security (defense in depth) + validatedName, err := validation.ValidateInstanceName(inst.Name) + if err != nil { + return fmt.Errorf("invalid instance name for file deletion: %w", err) + } + instancePath := filepath.Join(im.instancesConfig.InstancesDir, validatedName+".json") if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) { - return fmt.Errorf("failed to delete config file for instance %s: %w", instance.Name, err) + return fmt.Errorf("failed to delete config file for instance %s: %w", validatedName, err) } return nil @@ -163,33 +328,59 @@ func (im *instanceManager) DeleteInstance(name string) error { // If the instance is already running, it returns an error. func (im *instanceManager) StartInstance(name string) (*instance.Process, error) { im.mu.RLock() - instance, exists := im.instances[name] - maxRunningExceeded := len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1 + inst, exists := im.instances[name] im.mu.RUnlock() if !exists { return nil, fmt.Errorf("instance with name %s not found", name) } - if instance.IsRunning() { - return instance, fmt.Errorf("instance with name %s is already running", name) + + // Check if instance is remote and delegate to remote operation + if node := im.getNodeForInstance(inst); node != nil { + remoteInst, err := im.StartRemoteInstance(node, name) + if err != nil { + return nil, err + } + + // Update the local stub with all remote data (preserving Nodes) + im.mu.Lock() + im.updateLocalInstanceFromRemote(inst, remoteInst) + im.mu.Unlock() + + return inst, nil } + if inst.IsRunning() { + return inst, fmt.Errorf("instance with name %s is already running", name) + } + + // Check max running instances limit for local instances only + im.mu.RLock() + localRunningCount := 0 + for instName := range im.runningInstances { + if _, isRemote := im.instanceNodeMap[instName]; !isRemote { + localRunningCount++ + } + } + maxRunningExceeded := localRunningCount >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1 + im.mu.RUnlock() + if maxRunningExceeded { return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances)) } - if err := instance.Start(); err != nil { + if err := inst.Start(); err != nil { return nil, fmt.Errorf("failed to start instance %s: %w", name, err) } im.mu.Lock() defer im.mu.Unlock() - err := im.persistInstance(instance) + err := im.persistInstance(inst) if err != nil { return nil, fmt.Errorf("failed to persist instance %s: %w", name, err) } - return instance, nil + return inst, nil } func (im *instanceManager) IsMaxRunningInstancesReached() bool { @@ -206,51 +397,95 @@ func (im *instanceManager) IsMaxRunningInstancesReached() bool { // StopInstance stops a running instance and returns it. func (im *instanceManager) StopInstance(name string) (*instance.Process, error) { im.mu.RLock() - instance, exists := im.instances[name] + inst, exists := im.instances[name] im.mu.RUnlock() if !exists { return nil, fmt.Errorf("instance with name %s not found", name) } - if !instance.IsRunning() { - return instance, fmt.Errorf("instance with name %s is already stopped", name) + + // Check if instance is remote and delegate to remote operation + if node := im.getNodeForInstance(inst); node != nil { + remoteInst, err := im.StopRemoteInstance(node, name) + if err != nil { + return nil, err + } + + // Update the local stub with all remote data (preserving Nodes) + im.mu.Lock() + im.updateLocalInstanceFromRemote(inst, remoteInst) + im.mu.Unlock() + + return inst, nil } - if err := instance.Stop(); err != nil { + if !inst.IsRunning() { + return inst, fmt.Errorf("instance with name %s is already stopped", name) + } + + if err := inst.Stop(); err != nil { return nil, fmt.Errorf("failed to stop instance %s: %w", name, err) } im.mu.Lock() defer im.mu.Unlock() - err := im.persistInstance(instance) + err := im.persistInstance(inst) if err != nil { return nil, fmt.Errorf("failed to persist instance %s: %w", name, err) } - return instance, nil + return inst, nil } // RestartInstance stops and then starts an instance, returning the updated instance. func (im *instanceManager) RestartInstance(name string) (*instance.Process, error) { - instance, err := im.StopInstance(name) + im.mu.RLock() + inst, exists := im.instances[name] + im.mu.RUnlock() + + if !exists { + return nil, fmt.Errorf("instance with name %s not found", name) + } + + // Check if instance is remote and delegate to remote operation + if node := im.getNodeForInstance(inst); node != nil { + remoteInst, err := im.RestartRemoteInstance(node, name) + if err != nil { + return nil, err + } + + // Update the local stub with all remote data (preserving Nodes) + im.mu.Lock() + im.updateLocalInstanceFromRemote(inst, remoteInst) + im.mu.Unlock() + + return inst, nil + } + + inst, err := im.StopInstance(name) if err != nil { return nil, err } - return im.StartInstance(instance.Name) + return im.StartInstance(inst.Name) } // GetInstanceLogs retrieves the logs for a specific instance by its name. -func (im *instanceManager) GetInstanceLogs(name string) (string, error) { +func (im *instanceManager) GetInstanceLogs(name string, numLines int) (string, error) { im.mu.RLock() - _, exists := im.instances[name] + inst, exists := im.instances[name] im.mu.RUnlock() if !exists { return "", fmt.Errorf("instance with name %s not found", name) } - // TODO: Implement actual log retrieval logic - return fmt.Sprintf("Logs for instance %s", name), nil + // Check if instance is remote and delegate to remote operation + if node := im.getNodeForInstance(inst); node != nil { + return im.GetRemoteInstanceLogs(node, name, numLines) + } + + // Get logs from the local instance + return inst.GetLogs(numLines) } // getPortFromOptions extracts the port from backend-specific options diff --git a/pkg/manager/operations_test.go b/pkg/manager/operations_test.go index 97358c5..fdeb44f 100644 --- a/pkg/manager/operations_test.go +++ b/pkg/manager/operations_test.go @@ -75,7 +75,7 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) { MaxInstances: 1, // Very low limit for testing TimeoutCheckInterval: 5, } - limitedManager := manager.NewInstanceManager(backendConfig, cfg) + limitedManager := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}) _, err = limitedManager.CreateInstance("instance1", options) if err != nil { diff --git a/pkg/manager/remote_ops.go b/pkg/manager/remote_ops.go new file mode 100644 index 0000000..40b2384 --- /dev/null +++ b/pkg/manager/remote_ops.go @@ -0,0 +1,243 @@ +package manager + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "llamactl/pkg/config" + "llamactl/pkg/instance" + "net/http" +) + +// stripNodesFromOptions creates a copy of the instance options without the Nodes field +// to prevent routing loops when sending requests to remote nodes +func (im *instanceManager) stripNodesFromOptions(options *instance.CreateInstanceOptions) *instance.CreateInstanceOptions { + if options == nil { + return nil + } + + // Create a copy of the options struct + optionsCopy := *options + + // Clear the Nodes field to prevent the remote node from trying to route further + optionsCopy.Nodes = nil + + return &optionsCopy +} + +// makeRemoteRequest is a helper function to make HTTP requests to a remote node +func (im *instanceManager) makeRemoteRequest(nodeConfig *config.NodeConfig, method, path string, body any) (*http.Response, error) { + var reqBody io.Reader + if body != nil { + // Strip nodes from CreateInstanceOptions to prevent routing loops + if options, ok := body.(*instance.CreateInstanceOptions); ok { + body = im.stripNodesFromOptions(options) + } + + jsonData, err := json.Marshal(body) + if err != nil { + return nil, fmt.Errorf("failed to marshal request body: %w", err) + } + reqBody = bytes.NewBuffer(jsonData) + } + + url := fmt.Sprintf("%s%s", nodeConfig.Address, path) + req, err := http.NewRequest(method, url, reqBody) + if err != nil { + return nil, fmt.Errorf("failed to create request: %w", err) + } + + if body != nil { + req.Header.Set("Content-Type", "application/json") + } + + if nodeConfig.APIKey != "" { + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", nodeConfig.APIKey)) + } + + resp, err := im.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to execute request: %w", err) + } + + return resp, nil +} + +// parseRemoteResponse is a helper function to parse API responses +func parseRemoteResponse(resp *http.Response, result any) error { + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return fmt.Errorf("failed to read response body: %w", err) + } + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body)) + } + + if result != nil { + if err := json.Unmarshal(body, result); err != nil { + return fmt.Errorf("failed to unmarshal response: %w", err) + } + } + + return nil +} + +// ListRemoteInstances lists all instances on the remote node +func (im *instanceManager) ListRemoteInstances(nodeConfig *config.NodeConfig) ([]*instance.Process, error) { + resp, err := im.makeRemoteRequest(nodeConfig, "GET", "/api/v1/instances/", nil) + if err != nil { + return nil, err + } + + var instances []*instance.Process + if err := parseRemoteResponse(resp, &instances); err != nil { + return nil, err + } + + return instances, nil +} + +// CreateRemoteInstance creates a new instance on the remote node +func (im *instanceManager) CreateRemoteInstance(nodeConfig *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error) { + path := fmt.Sprintf("/api/v1/instances/%s/", name) + + resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, options) + if err != nil { + return nil, err + } + + var inst instance.Process + if err := parseRemoteResponse(resp, &inst); err != nil { + return nil, err + } + + return &inst, nil +} + +// GetRemoteInstance retrieves an instance by name from the remote node +func (im *instanceManager) GetRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) { + path := fmt.Sprintf("/api/v1/instances/%s/", name) + resp, err := im.makeRemoteRequest(nodeConfig, "GET", path, nil) + if err != nil { + return nil, err + } + + var inst instance.Process + if err := parseRemoteResponse(resp, &inst); err != nil { + return nil, err + } + + return &inst, nil +} + +// UpdateRemoteInstance updates an existing instance on the remote node +func (im *instanceManager) UpdateRemoteInstance(nodeConfig *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error) { + path := fmt.Sprintf("/api/v1/instances/%s/", name) + + resp, err := im.makeRemoteRequest(nodeConfig, "PUT", path, options) + if err != nil { + return nil, err + } + + var inst instance.Process + if err := parseRemoteResponse(resp, &inst); err != nil { + return nil, err + } + + return &inst, nil +} + +// DeleteRemoteInstance deletes an instance from the remote node +func (im *instanceManager) DeleteRemoteInstance(nodeConfig *config.NodeConfig, name string) error { + path := fmt.Sprintf("/api/v1/instances/%s/", name) + resp, err := im.makeRemoteRequest(nodeConfig, "DELETE", path, nil) + if err != nil { + return err + } + + return parseRemoteResponse(resp, nil) +} + +// StartRemoteInstance starts an instance on the remote node +func (im *instanceManager) StartRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) { + path := fmt.Sprintf("/api/v1/instances/%s/start", name) + resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil) + if err != nil { + return nil, err + } + + var inst instance.Process + if err := parseRemoteResponse(resp, &inst); err != nil { + return nil, err + } + + return &inst, nil +} + +// StopRemoteInstance stops an instance on the remote node +func (im *instanceManager) StopRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) { + path := fmt.Sprintf("/api/v1/instances/%s/stop", name) + resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil) + if err != nil { + return nil, err + } + + var inst instance.Process + if err := parseRemoteResponse(resp, &inst); err != nil { + return nil, err + } + + return &inst, nil +} + +// RestartRemoteInstance restarts an instance on the remote node +func (im *instanceManager) RestartRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) { + path := fmt.Sprintf("/api/v1/instances/%s/restart", name) + resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil) + if err != nil { + return nil, err + } + + var inst instance.Process + if err := parseRemoteResponse(resp, &inst); err != nil { + return nil, err + } + + return &inst, nil +} + +// GetRemoteInstanceLogs retrieves logs for an instance from the remote node +func (im *instanceManager) GetRemoteInstanceLogs(nodeConfig *config.NodeConfig, name string, numLines int) (string, error) { + path := fmt.Sprintf("/api/v1/instances/%s/logs?lines=%d", name, numLines) + resp, err := im.makeRemoteRequest(nodeConfig, "GET", path, nil) + if err != nil { + return "", err + } + + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("failed to read response body: %w", err) + } + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return "", fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body)) + } + + // Logs endpoint might return plain text or JSON + // Try to parse as JSON first (in case it's wrapped in a response object) + var logResponse struct { + Logs string `json:"logs"` + } + if err := json.Unmarshal(body, &logResponse); err == nil && logResponse.Logs != "" { + return logResponse.Logs, nil + } + + // Otherwise, return as plain text + return string(body), nil +} diff --git a/pkg/manager/remote_ops_test.go b/pkg/manager/remote_ops_test.go new file mode 100644 index 0000000..94db40b --- /dev/null +++ b/pkg/manager/remote_ops_test.go @@ -0,0 +1,39 @@ +package manager + +import ( + "llamactl/pkg/backends" + "llamactl/pkg/instance" + "testing" +) + +func TestStripNodesFromOptions(t *testing.T) { + im := &instanceManager{} + + // Test nil case + if result := im.stripNodesFromOptions(nil); result != nil { + t.Errorf("Expected nil, got %+v", result) + } + + // Test main case: nodes should be stripped, other fields preserved + options := &instance.CreateInstanceOptions{ + BackendType: backends.BackendTypeLlamaCpp, + Nodes: []string{"node1", "node2"}, + Environment: map[string]string{"TEST": "value"}, + } + + result := im.stripNodesFromOptions(options) + + if result.Nodes != nil { + t.Errorf("Expected Nodes to be nil, got %+v", result.Nodes) + } + if result.BackendType != backends.BackendTypeLlamaCpp { + t.Errorf("Expected BackendType preserved") + } + if result.Environment["TEST"] != "value" { + t.Errorf("Expected Environment preserved") + } + // Original should not be modified + if len(options.Nodes) != 2 { + t.Errorf("Original options should not be modified") + } +} diff --git a/pkg/manager/timeout.go b/pkg/manager/timeout.go index 0ee9c11..50b1c10 100644 --- a/pkg/manager/timeout.go +++ b/pkg/manager/timeout.go @@ -12,6 +12,11 @@ func (im *instanceManager) checkAllTimeouts() { // Identify instances that should timeout for _, inst := range im.instances { + // Skip remote instances - they are managed by their respective nodes + if inst.IsRemote() { + continue + } + if inst.ShouldTimeout() { timeoutInstances = append(timeoutInstances, inst.Name) } @@ -40,6 +45,11 @@ func (im *instanceManager) EvictLRUInstance() error { continue } + // Skip remote instances - they are managed by their respective nodes + if inst.IsRemote() { + continue + } + if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 { continue // Skip instances without idle timeout } diff --git a/pkg/manager/timeout_test.go b/pkg/manager/timeout_test.go index 08d500c..55cd781 100644 --- a/pkg/manager/timeout_test.go +++ b/pkg/manager/timeout_test.go @@ -23,7 +23,7 @@ func TestTimeoutFunctionality(t *testing.T) { MaxInstances: 5, } - manager := manager.NewInstanceManager(backendConfig, cfg) + manager := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{}) if manager == nil { t.Fatal("Manager should be initialized with timeout checker") } diff --git a/pkg/server/handlers.go b/pkg/server/handlers.go index 8f0b509..9e31df9 100644 --- a/pkg/server/handlers.go +++ b/pkg/server/handlers.go @@ -1,865 +1,29 @@ package server import ( - "bytes" - "encoding/json" - "fmt" - "io" - "llamactl/pkg/backends" - "llamactl/pkg/backends/llamacpp" - "llamactl/pkg/backends/mlx" - "llamactl/pkg/backends/vllm" "llamactl/pkg/config" - "llamactl/pkg/instance" "llamactl/pkg/manager" "net/http" - "os/exec" - "strconv" - "strings" - - "github.com/go-chi/chi/v5" + "net/http/httputil" + "sync" + "time" ) type Handler struct { InstanceManager manager.InstanceManager cfg config.AppConfig + httpClient *http.Client + remoteProxies map[string]*httputil.ReverseProxy // Cache of remote proxies by instance name + remoteProxiesMu sync.RWMutex } func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler { return &Handler{ InstanceManager: im, cfg: cfg, - } -} - -// VersionHandler godoc -// @Summary Get llamactl version -// @Description Returns the version of the llamactl command -// @Tags version -// @Security ApiKeyAuth -// @Produces text/plain -// @Success 200 {string} string "Version information" -// @Failure 500 {string} string "Internal Server Error" -// @Router /version [get] -func (h *Handler) VersionHandler() http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "text/plain") - fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime) - } -} - -// LlamaServerHelpHandler godoc -// @Summary Get help for llama server -// @Description Returns the help text for the llama server command -// @Tags backends -// @Security ApiKeyAuth -// @Produces text/plain -// @Success 200 {string} string "Help text" -// @Failure 500 {string} string "Internal Server Error" -// @Router /backends/llama-cpp/help [get] -func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - helpCmd := exec.Command("llama-server", "--help") - output, err := helpCmd.CombinedOutput() - if err != nil { - http.Error(w, "Failed to get help: "+err.Error(), http.StatusInternalServerError) - return - } - w.Header().Set("Content-Type", "text/plain") - w.Write(output) - } -} - -// LlamaServerVersionHandler godoc -// @Summary Get version of llama server -// @Description Returns the version of the llama server command -// @Tags backends -// @Security ApiKeyAuth -// @Produces text/plain -// @Success 200 {string} string "Version information" -// @Failure 500 {string} string "Internal Server Error" -// @Router /backends/llama-cpp/version [get] -func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - versionCmd := exec.Command("llama-server", "--version") - output, err := versionCmd.CombinedOutput() - if err != nil { - http.Error(w, "Failed to get version: "+err.Error(), http.StatusInternalServerError) - return - } - w.Header().Set("Content-Type", "text/plain") - w.Write(output) - } -} - -// LlamaServerListDevicesHandler godoc -// @Summary List available devices for llama server -// @Description Returns a list of available devices for the llama server -// @Tags backends -// @Security ApiKeyAuth -// @Produces text/plain -// @Success 200 {string} string "List of devices" -// @Failure 500 {string} string "Internal Server Error" -// @Router /backends/llama-cpp/devices [get] -func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - listCmd := exec.Command("llama-server", "--list-devices") - output, err := listCmd.CombinedOutput() - if err != nil { - http.Error(w, "Failed to list devices: "+err.Error(), http.StatusInternalServerError) - return - } - w.Header().Set("Content-Type", "text/plain") - w.Write(output) - } -} - -// ListInstances godoc -// @Summary List all instances -// @Description Returns a list of all instances managed by the server -// @Tags instances -// @Security ApiKeyAuth -// @Produces json -// @Success 200 {array} instance.Process "List of instances" -// @Failure 500 {string} string "Internal Server Error" -// @Router /instances [get] -func (h *Handler) ListInstances() http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - instances, err := h.InstanceManager.ListInstances() - if err != nil { - http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError) - return - } - - // Marshal to bytes first to set Content-Length header - data, err := json.Marshal(instances) - if err != nil { - http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - w.Header().Set("Content-Length", strconv.Itoa(len(data))) - w.Write(data) - } -} - -// CreateInstance godoc -// @Summary Create and start a new instance -// @Description Creates a new instance with the provided configuration options -// @Tags instances -// @Security ApiKeyAuth -// @Accept json -// @Produces json -// @Param name path string true "Instance Name" -// @Param options body instance.CreateInstanceOptions true "Instance configuration options" -// @Success 201 {object} instance.Process "Created instance details" -// @Failure 400 {string} string "Invalid request body" -// @Failure 500 {string} string "Internal Server Error" -// @Router /instances/{name} [post] -func (h *Handler) CreateInstance() http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - name := chi.URLParam(r, "name") - if name == "" { - http.Error(w, "Instance name cannot be empty", http.StatusBadRequest) - return - } - - var options instance.CreateInstanceOptions - if err := json.NewDecoder(r.Body).Decode(&options); err != nil { - http.Error(w, "Invalid request body", http.StatusBadRequest) - return - } - - inst, err := h.InstanceManager.CreateInstance(name, &options) - if err != nil { - http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(http.StatusCreated) - if err := json.NewEncoder(w).Encode(inst); err != nil { - http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError) - return - } - } -} - -// GetInstance godoc -// @Summary Get details of a specific instance -// @Description Returns the details of a specific instance by name -// @Tags instances -// @Security ApiKeyAuth -// @Produces json -// @Param name path string true "Instance Name" -// @Success 200 {object} instance.Process "Instance details" -// @Failure 400 {string} string "Invalid name format" -// @Failure 500 {string} string "Internal Server Error" -// @Router /instances/{name} [get] -func (h *Handler) GetInstance() http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - name := chi.URLParam(r, "name") - if name == "" { - http.Error(w, "Instance name cannot be empty", http.StatusBadRequest) - return - } - - inst, err := h.InstanceManager.GetInstance(name) - if err != nil { - http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest) - return - } - - w.Header().Set("Content-Type", "application/json") - if err := json.NewEncoder(w).Encode(inst); err != nil { - http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError) - return - } - } -} - -// UpdateInstance godoc -// @Summary Update an instance's configuration -// @Description Updates the configuration of a specific instance by name -// @Tags instances -// @Security ApiKeyAuth -// @Accept json -// @Produces json -// @Param name path string true "Instance Name" -// @Param options body instance.CreateInstanceOptions true "Instance configuration options" -// @Success 200 {object} instance.Process "Updated instance details" -// @Failure 400 {string} string "Invalid name format" -// @Failure 500 {string} string "Internal Server Error" -// @Router /instances/{name} [put] -func (h *Handler) UpdateInstance() http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - name := chi.URLParam(r, "name") - if name == "" { - http.Error(w, "Instance name cannot be empty", http.StatusBadRequest) - return - } - - var options instance.CreateInstanceOptions - if err := json.NewDecoder(r.Body).Decode(&options); err != nil { - http.Error(w, "Invalid request body", http.StatusBadRequest) - return - } - - inst, err := h.InstanceManager.UpdateInstance(name, &options) - if err != nil { - http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - if err := json.NewEncoder(w).Encode(inst); err != nil { - http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError) - return - } - } -} - -// StartInstance godoc -// @Summary Start a stopped instance -// @Description Starts a specific instance by name -// @Tags instances -// @Security ApiKeyAuth -// @Produces json -// @Param name path string true "Instance Name" -// @Success 200 {object} instance.Process "Started instance details" -// @Failure 400 {string} string "Invalid name format" -// @Failure 500 {string} string "Internal Server Error" -// @Router /instances/{name}/start [post] -func (h *Handler) StartInstance() http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - name := chi.URLParam(r, "name") - if name == "" { - http.Error(w, "Instance name cannot be empty", http.StatusBadRequest) - return - } - - inst, err := h.InstanceManager.StartInstance(name) - if err != nil { - // Check if error is due to maximum running instances limit - if _, ok := err.(manager.MaxRunningInstancesError); ok { - http.Error(w, err.Error(), http.StatusConflict) - return - } - - http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - if err := json.NewEncoder(w).Encode(inst); err != nil { - http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError) - return - } - } -} - -// StopInstance godoc -// @Summary Stop a running instance -// @Description Stops a specific instance by name -// @Tags instances -// @Security ApiKeyAuth -// @Produces json -// @Param name path string true "Instance Name" -// @Success 200 {object} instance.Process "Stopped instance details" -// @Failure 400 {string} string "Invalid name format" -// @Failure 500 {string} string "Internal Server Error" -// @Router /instances/{name}/stop [post] -func (h *Handler) StopInstance() http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - name := chi.URLParam(r, "name") - if name == "" { - http.Error(w, "Instance name cannot be empty", http.StatusBadRequest) - return - } - - inst, err := h.InstanceManager.StopInstance(name) - if err != nil { - http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - if err := json.NewEncoder(w).Encode(inst); err != nil { - http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError) - return - } - } -} - -// RestartInstance godoc -// @Summary Restart a running instance -// @Description Restarts a specific instance by name -// @Tags instances -// @Security ApiKeyAuth -// @Produces json -// @Param name path string true "Instance Name" -// @Success 200 {object} instance.Process "Restarted instance details" -// @Failure 400 {string} string "Invalid name format" -// @Failure 500 {string} string "Internal Server Error" -// @Router /instances/{name}/restart [post] -func (h *Handler) RestartInstance() http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - name := chi.URLParam(r, "name") - if name == "" { - http.Error(w, "Instance name cannot be empty", http.StatusBadRequest) - return - } - - inst, err := h.InstanceManager.RestartInstance(name) - if err != nil { - http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "application/json") - if err := json.NewEncoder(w).Encode(inst); err != nil { - http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError) - return - } - } -} - -// DeleteInstance godoc -// @Summary Delete an instance -// @Description Stops and removes a specific instance by name -// @Tags instances -// @Security ApiKeyAuth -// @Param name path string true "Instance Name" -// @Success 204 "No Content" -// @Failure 400 {string} string "Invalid name format" -// @Failure 500 {string} string "Internal Server Error" -// @Router /instances/{name} [delete] -func (h *Handler) DeleteInstance() http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - name := chi.URLParam(r, "name") - if name == "" { - http.Error(w, "Instance name cannot be empty", http.StatusBadRequest) - return - } - - if err := h.InstanceManager.DeleteInstance(name); err != nil { - http.Error(w, "Failed to delete instance: "+err.Error(), http.StatusInternalServerError) - return - } - - w.WriteHeader(http.StatusNoContent) - } -} - -// GetInstanceLogs godoc -// @Summary Get logs from a specific instance -// @Description Returns the logs from a specific instance by name with optional line limit -// @Tags instances -// @Security ApiKeyAuth -// @Param name path string true "Instance Name" -// @Param lines query string false "Number of lines to retrieve (default: all lines)" -// @Produces text/plain -// @Success 200 {string} string "Instance logs" -// @Failure 400 {string} string "Invalid name format or lines parameter" -// @Failure 500 {string} string "Internal Server Error" -// @Router /instances/{name}/logs [get] -func (h *Handler) GetInstanceLogs() http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - name := chi.URLParam(r, "name") - if name == "" { - http.Error(w, "Instance name cannot be empty", http.StatusBadRequest) - return - } - - lines := r.URL.Query().Get("lines") - if lines == "" { - lines = "-1" - } - - num_lines, err := strconv.Atoi(lines) - if err != nil { - http.Error(w, "Invalid lines parameter: "+err.Error(), http.StatusBadRequest) - return - } - - inst, err := h.InstanceManager.GetInstance(name) - if err != nil { - http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError) - return - } - - logs, err := inst.GetLogs(num_lines) - if err != nil { - http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError) - return - } - - w.Header().Set("Content-Type", "text/plain") - w.Write([]byte(logs)) - } -} - -// ProxyToInstance godoc -// @Summary Proxy requests to a specific instance -// @Description Forwards HTTP requests to the llama-server instance running on a specific port -// @Tags instances -// @Security ApiKeyAuth -// @Param name path string true "Instance Name" -// @Success 200 "Request successfully proxied to instance" -// @Failure 400 {string} string "Invalid name format" -// @Failure 500 {string} string "Internal Server Error" -// @Failure 503 {string} string "Instance is not running" -// @Router /instances/{name}/proxy [get] -// @Router /instances/{name}/proxy [post] -func (h *Handler) ProxyToInstance() http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - name := chi.URLParam(r, "name") - if name == "" { - http.Error(w, "Instance name cannot be empty", http.StatusBadRequest) - return - } - - inst, err := h.InstanceManager.GetInstance(name) - if err != nil { - http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError) - return - } - - if !inst.IsRunning() { - http.Error(w, "Instance is not running", http.StatusServiceUnavailable) - return - } - - // Get the cached proxy for this instance - proxy, err := inst.GetProxy() - if err != nil { - http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError) - return - } - - // Strip the "/api/v1/instances//proxy" prefix from the request URL - prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name) - r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix) - - // Update the last request time for the instance - inst.UpdateLastRequestTime() - - // Set forwarded headers - r.Header.Set("X-Forwarded-Host", r.Header.Get("Host")) - r.Header.Set("X-Forwarded-Proto", "http") - - // Forward the request using the cached proxy - proxy.ServeHTTP(w, r) - } -} - -// OpenAIListInstances godoc -// @Summary List instances in OpenAI-compatible format -// @Description Returns a list of instances in a format compatible with OpenAI API -// @Tags openai -// @Security ApiKeyAuth -// @Produces json -// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances" -// @Failure 500 {string} string "Internal Server Error" -// @Router /v1/models [get] -func (h *Handler) OpenAIListInstances() http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - instances, err := h.InstanceManager.ListInstances() - if err != nil { - http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError) - return - } - - openaiInstances := make([]OpenAIInstance, len(instances)) - for i, inst := range instances { - openaiInstances[i] = OpenAIInstance{ - ID: inst.Name, - Object: "model", - Created: inst.Created, - OwnedBy: "llamactl", - } - } - - openaiResponse := OpenAIListInstancesResponse{ - Object: "list", - Data: openaiInstances, - } - - w.Header().Set("Content-Type", "application/json") - if err := json.NewEncoder(w).Encode(openaiResponse); err != nil { - http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError) - return - } - } -} - -// OpenAIProxy godoc -// @Summary OpenAI-compatible proxy endpoint -// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header. -// @Tags openai -// @Security ApiKeyAuth -// @Accept json -// @Produces json -// @Success 200 "OpenAI response" -// @Failure 400 {string} string "Invalid request body or instance name" -// @Failure 500 {string} string "Internal Server Error" -// @Router /v1/ [post] -func (h *Handler) OpenAIProxy() http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - // Read the entire body first - bodyBytes, err := io.ReadAll(r.Body) - if err != nil { - http.Error(w, "Failed to read request body", http.StatusBadRequest) - return - } - r.Body.Close() - - // Parse the body to extract instance name - var requestBody map[string]any - if err := json.Unmarshal(bodyBytes, &requestBody); err != nil { - http.Error(w, "Invalid request body", http.StatusBadRequest) - return - } - - modelName, ok := requestBody["model"].(string) - if !ok || modelName == "" { - http.Error(w, "Instance name is required", http.StatusBadRequest) - return - } - - // Route to the appropriate inst based on instance name - inst, err := h.InstanceManager.GetInstance(modelName) - if err != nil { - http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest) - return - } - - if !inst.IsRunning() { - options := inst.GetOptions() - allowOnDemand := options != nil && options.OnDemandStart != nil && *options.OnDemandStart - if !allowOnDemand { - http.Error(w, "Instance is not running", http.StatusServiceUnavailable) - return - } - - if h.InstanceManager.IsMaxRunningInstancesReached() { - if h.cfg.Instances.EnableLRUEviction { - err := h.InstanceManager.EvictLRUInstance() - if err != nil { - http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError) - return - } - } else { - http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict) - return - } - } - - // If on-demand start is enabled, start the instance - if _, err := h.InstanceManager.StartInstance(modelName); err != nil { - http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError) - return - } - - // Wait for the instance to become healthy before proceeding - if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout - http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable) - return - } - } - - proxy, err := inst.GetProxy() - if err != nil { - http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError) - return - } - - // Update last request time for the instance - inst.UpdateLastRequestTime() - - // Recreate the request body from the bytes we read - r.Body = io.NopCloser(bytes.NewReader(bodyBytes)) - r.ContentLength = int64(len(bodyBytes)) - - proxy.ServeHTTP(w, r) - } -} - -func (h *Handler) LlamaCppProxy(onDemandStart bool) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - - // Get the instance name from the URL parameter - name := chi.URLParam(r, "name") - if name == "" { - http.Error(w, "Instance name cannot be empty", http.StatusBadRequest) - return - } - - // Route to the appropriate inst based on instance name - inst, err := h.InstanceManager.GetInstance(name) - if err != nil { - http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest) - return - } - - options := inst.GetOptions() - if options == nil { - http.Error(w, "Cannot obtain Instance's options", http.StatusInternalServerError) - return - } - - if options.BackendType != backends.BackendTypeLlamaCpp { - http.Error(w, "Instance is not a llama.cpp server.", http.StatusBadRequest) - return - } - - if !inst.IsRunning() { - - if !(onDemandStart && options.OnDemandStart != nil && *options.OnDemandStart) { - http.Error(w, "Instance is not running", http.StatusServiceUnavailable) - return - } - - if h.InstanceManager.IsMaxRunningInstancesReached() { - if h.cfg.Instances.EnableLRUEviction { - err := h.InstanceManager.EvictLRUInstance() - if err != nil { - http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError) - return - } - } else { - http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict) - return - } - } - - // If on-demand start is enabled, start the instance - if _, err := h.InstanceManager.StartInstance(name); err != nil { - http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError) - return - } - - // Wait for the instance to become healthy before proceeding - if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout - http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable) - return - } - } - - proxy, err := inst.GetProxy() - if err != nil { - http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError) - return - } - - // Strip the "/llama-cpp/" prefix from the request URL - prefix := fmt.Sprintf("/llama-cpp/%s", name) - r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix) - - // Update the last request time for the instance - inst.UpdateLastRequestTime() - - proxy.ServeHTTP(w, r) - } -} - -// ParseCommandRequest represents the request body for command parsing -type ParseCommandRequest struct { - Command string `json:"command"` -} - -// ParseLlamaCommand godoc -// @Summary Parse llama-server command -// @Description Parses a llama-server command string into instance options -// @Tags backends -// @Security ApiKeyAuth -// @Accept json -// @Produce json -// @Param request body ParseCommandRequest true "Command to parse" -// @Success 200 {object} instance.CreateInstanceOptions "Parsed options" -// @Failure 400 {object} map[string]string "Invalid request or command" -// @Failure 500 {object} map[string]string "Internal Server Error" -// @Router /backends/llama-cpp/parse-command [post] -func (h *Handler) ParseLlamaCommand() http.HandlerFunc { - type errorResponse struct { - Error string `json:"error"` - Details string `json:"details,omitempty"` - } - writeError := func(w http.ResponseWriter, status int, code, details string) { - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(status) - _ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details}) - } - return func(w http.ResponseWriter, r *http.Request) { - var req ParseCommandRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body") - return - } - if strings.TrimSpace(req.Command) == "" { - writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty") - return - } - llamaOptions, err := llamacpp.ParseLlamaCommand(req.Command) - if err != nil { - writeError(w, http.StatusBadRequest, "parse_error", err.Error()) - return - } - options := &instance.CreateInstanceOptions{ - BackendType: backends.BackendTypeLlamaCpp, - LlamaServerOptions: llamaOptions, - } - w.Header().Set("Content-Type", "application/json") - if err := json.NewEncoder(w).Encode(options); err != nil { - writeError(w, http.StatusInternalServerError, "encode_error", err.Error()) - } - } -} - -// ParseMlxCommand godoc -// @Summary Parse mlx_lm.server command -// @Description Parses MLX-LM server command string into instance options -// @Tags backends -// @Security ApiKeyAuth -// @Accept json -// @Produce json -// @Param request body ParseCommandRequest true "Command to parse" -// @Success 200 {object} instance.CreateInstanceOptions "Parsed options" -// @Failure 400 {object} map[string]string "Invalid request or command" -// @Router /backends/mlx/parse-command [post] -func (h *Handler) ParseMlxCommand() http.HandlerFunc { - type errorResponse struct { - Error string `json:"error"` - Details string `json:"details,omitempty"` - } - writeError := func(w http.ResponseWriter, status int, code, details string) { - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(status) - _ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details}) - } - return func(w http.ResponseWriter, r *http.Request) { - var req ParseCommandRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body") - return - } - - if strings.TrimSpace(req.Command) == "" { - writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty") - return - } - - mlxOptions, err := mlx.ParseMlxCommand(req.Command) - if err != nil { - writeError(w, http.StatusBadRequest, "parse_error", err.Error()) - return - } - - // Currently only support mlx_lm backend type - backendType := backends.BackendTypeMlxLm - - options := &instance.CreateInstanceOptions{ - BackendType: backendType, - MlxServerOptions: mlxOptions, - } - - w.Header().Set("Content-Type", "application/json") - if err := json.NewEncoder(w).Encode(options); err != nil { - writeError(w, http.StatusInternalServerError, "encode_error", err.Error()) - } - } -} - -// ParseVllmCommand godoc -// @Summary Parse vllm serve command -// @Description Parses a vLLM serve command string into instance options -// @Tags backends -// @Security ApiKeyAuth -// @Accept json -// @Produce json -// @Param request body ParseCommandRequest true "Command to parse" -// @Success 200 {object} instance.CreateInstanceOptions "Parsed options" -// @Failure 400 {object} map[string]string "Invalid request or command" -// @Router /backends/vllm/parse-command [post] -func (h *Handler) ParseVllmCommand() http.HandlerFunc { - type errorResponse struct { - Error string `json:"error"` - Details string `json:"details,omitempty"` - } - writeError := func(w http.ResponseWriter, status int, code, details string) { - w.Header().Set("Content-Type", "application/json") - w.WriteHeader(status) - _ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details}) - } - return func(w http.ResponseWriter, r *http.Request) { - var req ParseCommandRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body") - return - } - - if strings.TrimSpace(req.Command) == "" { - writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty") - return - } - - vllmOptions, err := vllm.ParseVllmCommand(req.Command) - if err != nil { - writeError(w, http.StatusBadRequest, "parse_error", err.Error()) - return - } - - backendType := backends.BackendTypeVllm - - options := &instance.CreateInstanceOptions{ - BackendType: backendType, - VllmServerOptions: vllmOptions, - } - - w.Header().Set("Content-Type", "application/json") - if err := json.NewEncoder(w).Encode(options); err != nil { - writeError(w, http.StatusInternalServerError, "encode_error", err.Error()) - } + httpClient: &http.Client{ + Timeout: 30 * time.Second, + }, + remoteProxies: make(map[string]*httputil.ReverseProxy), } } diff --git a/pkg/server/handlers_backends.go b/pkg/server/handlers_backends.go new file mode 100644 index 0000000..7d6cab0 --- /dev/null +++ b/pkg/server/handlers_backends.go @@ -0,0 +1,320 @@ +package server + +import ( + "encoding/json" + "fmt" + "llamactl/pkg/backends" + "llamactl/pkg/backends/llamacpp" + "llamactl/pkg/backends/mlx" + "llamactl/pkg/backends/vllm" + "llamactl/pkg/instance" + "net/http" + "os/exec" + "strings" + + "github.com/go-chi/chi/v5" +) + +// ParseCommandRequest represents the request body for command parsing +type ParseCommandRequest struct { + Command string `json:"command"` +} + +func (h *Handler) LlamaCppProxy(onDemandStart bool) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + + // Get the instance name from the URL parameter + name := chi.URLParam(r, "name") + if name == "" { + http.Error(w, "Instance name cannot be empty", http.StatusBadRequest) + return + } + + // Route to the appropriate inst based on instance name + inst, err := h.InstanceManager.GetInstance(name) + if err != nil { + http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest) + return + } + + options := inst.GetOptions() + if options == nil { + http.Error(w, "Cannot obtain Instance's options", http.StatusInternalServerError) + return + } + + if options.BackendType != backends.BackendTypeLlamaCpp { + http.Error(w, "Instance is not a llama.cpp server.", http.StatusBadRequest) + return + } + + if !inst.IsRunning() { + + if !(onDemandStart && options.OnDemandStart != nil && *options.OnDemandStart) { + http.Error(w, "Instance is not running", http.StatusServiceUnavailable) + return + } + + if h.InstanceManager.IsMaxRunningInstancesReached() { + if h.cfg.Instances.EnableLRUEviction { + err := h.InstanceManager.EvictLRUInstance() + if err != nil { + http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError) + return + } + } else { + http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict) + return + } + } + + // If on-demand start is enabled, start the instance + if _, err := h.InstanceManager.StartInstance(name); err != nil { + http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError) + return + } + + // Wait for the instance to become healthy before proceeding + if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout + http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable) + return + } + } + + proxy, err := inst.GetProxy() + if err != nil { + http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError) + return + } + + // Strip the "/llama-cpp/" prefix from the request URL + prefix := fmt.Sprintf("/llama-cpp/%s", name) + r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix) + + // Update the last request time for the instance + inst.UpdateLastRequestTime() + + proxy.ServeHTTP(w, r) + } +} + +// ParseLlamaCommand godoc +// @Summary Parse llama-server command +// @Description Parses a llama-server command string into instance options +// @Tags backends +// @Security ApiKeyAuth +// @Accept json +// @Produce json +// @Param request body ParseCommandRequest true "Command to parse" +// @Success 200 {object} instance.CreateInstanceOptions "Parsed options" +// @Failure 400 {object} map[string]string "Invalid request or command" +// @Failure 500 {object} map[string]string "Internal Server Error" +// @Router /backends/llama-cpp/parse-command [post] +func (h *Handler) ParseLlamaCommand() http.HandlerFunc { + type errorResponse struct { + Error string `json:"error"` + Details string `json:"details,omitempty"` + } + writeError := func(w http.ResponseWriter, status int, code, details string) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + _ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details}) + } + return func(w http.ResponseWriter, r *http.Request) { + var req ParseCommandRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body") + return + } + if strings.TrimSpace(req.Command) == "" { + writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty") + return + } + llamaOptions, err := llamacpp.ParseLlamaCommand(req.Command) + if err != nil { + writeError(w, http.StatusBadRequest, "parse_error", err.Error()) + return + } + options := &instance.CreateInstanceOptions{ + BackendType: backends.BackendTypeLlamaCpp, + LlamaServerOptions: llamaOptions, + } + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(options); err != nil { + writeError(w, http.StatusInternalServerError, "encode_error", err.Error()) + } + } +} + +// ParseMlxCommand godoc +// @Summary Parse mlx_lm.server command +// @Description Parses MLX-LM server command string into instance options +// @Tags backends +// @Security ApiKeyAuth +// @Accept json +// @Produce json +// @Param request body ParseCommandRequest true "Command to parse" +// @Success 200 {object} instance.CreateInstanceOptions "Parsed options" +// @Failure 400 {object} map[string]string "Invalid request or command" +// @Router /backends/mlx/parse-command [post] +func (h *Handler) ParseMlxCommand() http.HandlerFunc { + type errorResponse struct { + Error string `json:"error"` + Details string `json:"details,omitempty"` + } + writeError := func(w http.ResponseWriter, status int, code, details string) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + _ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details}) + } + return func(w http.ResponseWriter, r *http.Request) { + var req ParseCommandRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body") + return + } + + if strings.TrimSpace(req.Command) == "" { + writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty") + return + } + + mlxOptions, err := mlx.ParseMlxCommand(req.Command) + if err != nil { + writeError(w, http.StatusBadRequest, "parse_error", err.Error()) + return + } + + // Currently only support mlx_lm backend type + backendType := backends.BackendTypeMlxLm + + options := &instance.CreateInstanceOptions{ + BackendType: backendType, + MlxServerOptions: mlxOptions, + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(options); err != nil { + writeError(w, http.StatusInternalServerError, "encode_error", err.Error()) + } + } +} + +// ParseVllmCommand godoc +// @Summary Parse vllm serve command +// @Description Parses a vLLM serve command string into instance options +// @Tags backends +// @Security ApiKeyAuth +// @Accept json +// @Produce json +// @Param request body ParseCommandRequest true "Command to parse" +// @Success 200 {object} instance.CreateInstanceOptions "Parsed options" +// @Failure 400 {object} map[string]string "Invalid request or command" +// @Router /backends/vllm/parse-command [post] +func (h *Handler) ParseVllmCommand() http.HandlerFunc { + type errorResponse struct { + Error string `json:"error"` + Details string `json:"details,omitempty"` + } + writeError := func(w http.ResponseWriter, status int, code, details string) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + _ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details}) + } + return func(w http.ResponseWriter, r *http.Request) { + var req ParseCommandRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body") + return + } + + if strings.TrimSpace(req.Command) == "" { + writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty") + return + } + + vllmOptions, err := vllm.ParseVllmCommand(req.Command) + if err != nil { + writeError(w, http.StatusBadRequest, "parse_error", err.Error()) + return + } + + backendType := backends.BackendTypeVllm + + options := &instance.CreateInstanceOptions{ + BackendType: backendType, + VllmServerOptions: vllmOptions, + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(options); err != nil { + writeError(w, http.StatusInternalServerError, "encode_error", err.Error()) + } + } +} + +// LlamaServerHelpHandler godoc +// @Summary Get help for llama server +// @Description Returns the help text for the llama server command +// @Tags backends +// @Security ApiKeyAuth +// @Produces text/plain +// @Success 200 {string} string "Help text" +// @Failure 500 {string} string "Internal Server Error" +// @Router /backends/llama-cpp/help [get] +func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + helpCmd := exec.Command("llama-server", "--help") + output, err := helpCmd.CombinedOutput() + if err != nil { + http.Error(w, "Failed to get help: "+err.Error(), http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "text/plain") + w.Write(output) + } +} + +// LlamaServerVersionHandler godoc +// @Summary Get version of llama server +// @Description Returns the version of the llama server command +// @Tags backends +// @Security ApiKeyAuth +// @Produces text/plain +// @Success 200 {string} string "Version information" +// @Failure 500 {string} string "Internal Server Error" +// @Router /backends/llama-cpp/version [get] +func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + versionCmd := exec.Command("llama-server", "--version") + output, err := versionCmd.CombinedOutput() + if err != nil { + http.Error(w, "Failed to get version: "+err.Error(), http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "text/plain") + w.Write(output) + } +} + +// LlamaServerListDevicesHandler godoc +// @Summary List available devices for llama server +// @Description Returns a list of available devices for the llama server +// @Tags backends +// @Security ApiKeyAuth +// @Produces text/plain +// @Success 200 {string} string "List of devices" +// @Failure 500 {string} string "Internal Server Error" +// @Router /backends/llama-cpp/devices [get] +func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + listCmd := exec.Command("llama-server", "--list-devices") + output, err := listCmd.CombinedOutput() + if err != nil { + http.Error(w, "Failed to list devices: "+err.Error(), http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "text/plain") + w.Write(output) + } +} diff --git a/pkg/server/handlers_instances.go b/pkg/server/handlers_instances.go new file mode 100644 index 0000000..be3cf4a --- /dev/null +++ b/pkg/server/handlers_instances.go @@ -0,0 +1,445 @@ +package server + +import ( + "encoding/json" + "fmt" + "llamactl/pkg/instance" + "llamactl/pkg/manager" + "net/http" + "net/http/httputil" + "net/url" + "strconv" + "strings" + + "github.com/go-chi/chi/v5" +) + +// ListInstances godoc +// @Summary List all instances +// @Description Returns a list of all instances managed by the server +// @Tags instances +// @Security ApiKeyAuth +// @Produces json +// @Success 200 {array} instance.Process "List of instances" +// @Failure 500 {string} string "Internal Server Error" +// @Router /instances [get] +func (h *Handler) ListInstances() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + instances, err := h.InstanceManager.ListInstances() + if err != nil { + http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(instances); err != nil { + http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError) + return + } + } +} + +// CreateInstance godoc +// @Summary Create and start a new instance +// @Description Creates a new instance with the provided configuration options +// @Tags instances +// @Security ApiKeyAuth +// @Accept json +// @Produces json +// @Param name path string true "Instance Name" +// @Param options body instance.CreateInstanceOptions true "Instance configuration options" +// @Success 201 {object} instance.Process "Created instance details" +// @Failure 400 {string} string "Invalid request body" +// @Failure 500 {string} string "Internal Server Error" +// @Router /instances/{name} [post] +func (h *Handler) CreateInstance() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + name := chi.URLParam(r, "name") + if name == "" { + http.Error(w, "Instance name cannot be empty", http.StatusBadRequest) + return + } + + var options instance.CreateInstanceOptions + if err := json.NewDecoder(r.Body).Decode(&options); err != nil { + http.Error(w, "Invalid request body", http.StatusBadRequest) + return + } + + inst, err := h.InstanceManager.CreateInstance(name, &options) + if err != nil { + http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusCreated) + if err := json.NewEncoder(w).Encode(inst); err != nil { + http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError) + return + } + } +} + +// GetInstance godoc +// @Summary Get details of a specific instance +// @Description Returns the details of a specific instance by name +// @Tags instances +// @Security ApiKeyAuth +// @Produces json +// @Param name path string true "Instance Name" +// @Success 200 {object} instance.Process "Instance details" +// @Failure 400 {string} string "Invalid name format" +// @Failure 500 {string} string "Internal Server Error" +// @Router /instances/{name} [get] +func (h *Handler) GetInstance() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + name := chi.URLParam(r, "name") + if name == "" { + http.Error(w, "Instance name cannot be empty", http.StatusBadRequest) + return + } + + inst, err := h.InstanceManager.GetInstance(name) + if err != nil { + http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest) + return + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(inst); err != nil { + http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError) + return + } + } +} + +// UpdateInstance godoc +// @Summary Update an instance's configuration +// @Description Updates the configuration of a specific instance by name +// @Tags instances +// @Security ApiKeyAuth +// @Accept json +// @Produces json +// @Param name path string true "Instance Name" +// @Param options body instance.CreateInstanceOptions true "Instance configuration options" +// @Success 200 {object} instance.Process "Updated instance details" +// @Failure 400 {string} string "Invalid name format" +// @Failure 500 {string} string "Internal Server Error" +// @Router /instances/{name} [put] +func (h *Handler) UpdateInstance() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + name := chi.URLParam(r, "name") + if name == "" { + http.Error(w, "Instance name cannot be empty", http.StatusBadRequest) + return + } + + var options instance.CreateInstanceOptions + if err := json.NewDecoder(r.Body).Decode(&options); err != nil { + http.Error(w, "Invalid request body", http.StatusBadRequest) + return + } + + inst, err := h.InstanceManager.UpdateInstance(name, &options) + if err != nil { + http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(inst); err != nil { + http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError) + return + } + } +} + +// StartInstance godoc +// @Summary Start a stopped instance +// @Description Starts a specific instance by name +// @Tags instances +// @Security ApiKeyAuth +// @Produces json +// @Param name path string true "Instance Name" +// @Success 200 {object} instance.Process "Started instance details" +// @Failure 400 {string} string "Invalid name format" +// @Failure 500 {string} string "Internal Server Error" +// @Router /instances/{name}/start [post] +func (h *Handler) StartInstance() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + name := chi.URLParam(r, "name") + if name == "" { + http.Error(w, "Instance name cannot be empty", http.StatusBadRequest) + return + } + + inst, err := h.InstanceManager.StartInstance(name) + if err != nil { + // Check if error is due to maximum running instances limit + if _, ok := err.(manager.MaxRunningInstancesError); ok { + http.Error(w, err.Error(), http.StatusConflict) + return + } + + http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(inst); err != nil { + http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError) + return + } + } +} + +// StopInstance godoc +// @Summary Stop a running instance +// @Description Stops a specific instance by name +// @Tags instances +// @Security ApiKeyAuth +// @Produces json +// @Param name path string true "Instance Name" +// @Success 200 {object} instance.Process "Stopped instance details" +// @Failure 400 {string} string "Invalid name format" +// @Failure 500 {string} string "Internal Server Error" +// @Router /instances/{name}/stop [post] +func (h *Handler) StopInstance() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + name := chi.URLParam(r, "name") + if name == "" { + http.Error(w, "Instance name cannot be empty", http.StatusBadRequest) + return + } + + inst, err := h.InstanceManager.StopInstance(name) + if err != nil { + http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(inst); err != nil { + http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError) + return + } + } +} + +// RestartInstance godoc +// @Summary Restart a running instance +// @Description Restarts a specific instance by name +// @Tags instances +// @Security ApiKeyAuth +// @Produces json +// @Param name path string true "Instance Name" +// @Success 200 {object} instance.Process "Restarted instance details" +// @Failure 400 {string} string "Invalid name format" +// @Failure 500 {string} string "Internal Server Error" +// @Router /instances/{name}/restart [post] +func (h *Handler) RestartInstance() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + name := chi.URLParam(r, "name") + if name == "" { + http.Error(w, "Instance name cannot be empty", http.StatusBadRequest) + return + } + + inst, err := h.InstanceManager.RestartInstance(name) + if err != nil { + http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(inst); err != nil { + http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError) + return + } + } +} + +// DeleteInstance godoc +// @Summary Delete an instance +// @Description Stops and removes a specific instance by name +// @Tags instances +// @Security ApiKeyAuth +// @Param name path string true "Instance Name" +// @Success 204 "No Content" +// @Failure 400 {string} string "Invalid name format" +// @Failure 500 {string} string "Internal Server Error" +// @Router /instances/{name} [delete] +func (h *Handler) DeleteInstance() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + name := chi.URLParam(r, "name") + if name == "" { + http.Error(w, "Instance name cannot be empty", http.StatusBadRequest) + return + } + + if err := h.InstanceManager.DeleteInstance(name); err != nil { + http.Error(w, "Failed to delete instance: "+err.Error(), http.StatusInternalServerError) + return + } + + w.WriteHeader(http.StatusNoContent) + } +} + +// GetInstanceLogs godoc +// @Summary Get logs from a specific instance +// @Description Returns the logs from a specific instance by name with optional line limit +// @Tags instances +// @Security ApiKeyAuth +// @Param name path string true "Instance Name" +// @Param lines query string false "Number of lines to retrieve (default: all lines)" +// @Produces text/plain +// @Success 200 {string} string "Instance logs" +// @Failure 400 {string} string "Invalid name format or lines parameter" +// @Failure 500 {string} string "Internal Server Error" +// @Router /instances/{name}/logs [get] +func (h *Handler) GetInstanceLogs() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + name := chi.URLParam(r, "name") + if name == "" { + http.Error(w, "Instance name cannot be empty", http.StatusBadRequest) + return + } + + lines := r.URL.Query().Get("lines") + numLines := -1 // Default to all lines + if lines != "" { + parsedLines, err := strconv.Atoi(lines) + if err != nil { + http.Error(w, "Invalid lines parameter: "+err.Error(), http.StatusBadRequest) + return + } + numLines = parsedLines + } + + // Use the instance manager which handles both local and remote instances + logs, err := h.InstanceManager.GetInstanceLogs(name, numLines) + if err != nil { + http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "text/plain") + w.Write([]byte(logs)) + } +} + +// ProxyToInstance godoc +// @Summary Proxy requests to a specific instance +// @Description Forwards HTTP requests to the llama-server instance running on a specific port +// @Tags instances +// @Security ApiKeyAuth +// @Param name path string true "Instance Name" +// @Success 200 "Request successfully proxied to instance" +// @Failure 400 {string} string "Invalid name format" +// @Failure 500 {string} string "Internal Server Error" +// @Failure 503 {string} string "Instance is not running" +// @Router /instances/{name}/proxy [get] +// @Router /instances/{name}/proxy [post] +func (h *Handler) ProxyToInstance() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + name := chi.URLParam(r, "name") + if name == "" { + http.Error(w, "Instance name cannot be empty", http.StatusBadRequest) + return + } + + inst, err := h.InstanceManager.GetInstance(name) + if err != nil { + http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError) + return + } + + // Check if this is a remote instance + if inst.IsRemote() { + h.RemoteInstanceProxy(w, r, name, inst) + return + } + + if !inst.IsRunning() { + http.Error(w, "Instance is not running", http.StatusServiceUnavailable) + return + } + + // Get the cached proxy for this instance + proxy, err := inst.GetProxy() + if err != nil { + http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError) + return + } + + // Strip the "/api/v1/instances//proxy" prefix from the request URL + prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name) + r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix) + + // Update the last request time for the instance + inst.UpdateLastRequestTime() + + // Set forwarded headers + r.Header.Set("X-Forwarded-Host", r.Header.Get("Host")) + r.Header.Set("X-Forwarded-Proto", "http") + + // Forward the request using the cached proxy + proxy.ServeHTTP(w, r) + } +} + +// RemoteInstanceProxy proxies requests to a remote instance +func (h *Handler) RemoteInstanceProxy(w http.ResponseWriter, r *http.Request, name string, inst *instance.Process) { + // Get the node name from instance options + options := inst.GetOptions() + if options == nil || len(options.Nodes) == 0 { + http.Error(w, "Instance has no node configured", http.StatusInternalServerError) + return + } + + nodeName := options.Nodes[0] + + // Check if we have a cached proxy for this node + h.remoteProxiesMu.RLock() + proxy, exists := h.remoteProxies[nodeName] + h.remoteProxiesMu.RUnlock() + + if !exists { + // Find node configuration + nodeConfig, exists := h.cfg.Nodes[nodeName] + if !exists { + http.Error(w, fmt.Sprintf("Node %s not found", nodeName), http.StatusInternalServerError) + return + } + + // Create reverse proxy to remote node + targetURL, err := url.Parse(nodeConfig.Address) + if err != nil { + http.Error(w, "Failed to parse node address: "+err.Error(), http.StatusInternalServerError) + return + } + + proxy = httputil.NewSingleHostReverseProxy(targetURL) + + // Modify request before forwarding + originalDirector := proxy.Director + apiKey := nodeConfig.APIKey // Capture for closure + proxy.Director = func(req *http.Request) { + originalDirector(req) + // Add API key if configured + if apiKey != "" { + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", apiKey)) + } + } + + // Cache the proxy by node name + h.remoteProxiesMu.Lock() + h.remoteProxies[nodeName] = proxy + h.remoteProxiesMu.Unlock() + } + + // Forward the request using the cached proxy + proxy.ServeHTTP(w, r) +} diff --git a/pkg/server/handlers_nodes.go b/pkg/server/handlers_nodes.go new file mode 100644 index 0000000..98a4b43 --- /dev/null +++ b/pkg/server/handlers_nodes.go @@ -0,0 +1,79 @@ +package server + +import ( + "encoding/json" + "net/http" + + "github.com/go-chi/chi/v5" +) + +// NodeResponse represents a sanitized node configuration for API responses +type NodeResponse struct { + Address string `json:"address"` +} + +// ListNodes godoc +// @Summary List all configured nodes +// @Description Returns a map of all nodes configured in the server (node name -> node config) +// @Tags nodes +// @Security ApiKeyAuth +// @Produces json +// @Success 200 {object} map[string]NodeResponse "Map of nodes" +// @Failure 500 {string} string "Internal Server Error" +// @Router /nodes [get] +func (h *Handler) ListNodes() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + // Convert to sanitized response format (map of name -> NodeResponse) + nodeResponses := make(map[string]NodeResponse, len(h.cfg.Nodes)) + for name, node := range h.cfg.Nodes { + nodeResponses[name] = NodeResponse{ + Address: node.Address, + } + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(nodeResponses); err != nil { + http.Error(w, "Failed to encode nodes: "+err.Error(), http.StatusInternalServerError) + return + } + } +} + +// GetNode godoc +// @Summary Get details of a specific node +// @Description Returns the details of a specific node by name +// @Tags nodes +// @Security ApiKeyAuth +// @Produces json +// @Param name path string true "Node Name" +// @Success 200 {object} NodeResponse "Node details" +// @Failure 400 {string} string "Invalid name format" +// @Failure 404 {string} string "Node not found" +// @Failure 500 {string} string "Internal Server Error" +// @Router /nodes/{name} [get] +func (h *Handler) GetNode() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + name := chi.URLParam(r, "name") + if name == "" { + http.Error(w, "Node name cannot be empty", http.StatusBadRequest) + return + } + + nodeConfig, exists := h.cfg.Nodes[name] + if !exists { + http.Error(w, "Node not found", http.StatusNotFound) + return + } + + // Convert to sanitized response format + nodeResponse := NodeResponse{ + Address: nodeConfig.Address, + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(nodeResponse); err != nil { + http.Error(w, "Failed to encode node: "+err.Error(), http.StatusInternalServerError) + return + } + } +} diff --git a/pkg/server/handlers_openai.go b/pkg/server/handlers_openai.go new file mode 100644 index 0000000..c6e56e9 --- /dev/null +++ b/pkg/server/handlers_openai.go @@ -0,0 +1,206 @@ +package server + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "llamactl/pkg/instance" + "net/http" + "net/http/httputil" + "net/url" +) + +// OpenAIListInstances godoc +// @Summary List instances in OpenAI-compatible format +// @Description Returns a list of instances in a format compatible with OpenAI API +// @Tags openai +// @Security ApiKeyAuth +// @Produces json +// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances" +// @Failure 500 {string} string "Internal Server Error" +// @Router /v1/models [get] +func (h *Handler) OpenAIListInstances() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + instances, err := h.InstanceManager.ListInstances() + if err != nil { + http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError) + return + } + + openaiInstances := make([]OpenAIInstance, len(instances)) + for i, inst := range instances { + openaiInstances[i] = OpenAIInstance{ + ID: inst.Name, + Object: "model", + Created: inst.Created, + OwnedBy: "llamactl", + } + } + + openaiResponse := OpenAIListInstancesResponse{ + Object: "list", + Data: openaiInstances, + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(openaiResponse); err != nil { + http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError) + return + } + } +} + +// OpenAIProxy godoc +// @Summary OpenAI-compatible proxy endpoint +// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header. +// @Tags openai +// @Security ApiKeyAuth +// @Accept json +// @Produces json +// @Success 200 "OpenAI response" +// @Failure 400 {string} string "Invalid request body or instance name" +// @Failure 500 {string} string "Internal Server Error" +// @Router /v1/ [post] +func (h *Handler) OpenAIProxy() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + // Read the entire body first + bodyBytes, err := io.ReadAll(r.Body) + if err != nil { + http.Error(w, "Failed to read request body", http.StatusBadRequest) + return + } + r.Body.Close() + + // Parse the body to extract instance name + var requestBody map[string]any + if err := json.Unmarshal(bodyBytes, &requestBody); err != nil { + http.Error(w, "Invalid request body", http.StatusBadRequest) + return + } + + modelName, ok := requestBody["model"].(string) + if !ok || modelName == "" { + http.Error(w, "Instance name is required", http.StatusBadRequest) + return + } + + // Route to the appropriate inst based on instance name + inst, err := h.InstanceManager.GetInstance(modelName) + if err != nil { + http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest) + return + } + + // Check if this is a remote instance + if inst.IsRemote() { + // Restore the body for the remote proxy + r.Body = io.NopCloser(bytes.NewReader(bodyBytes)) + h.RemoteOpenAIProxy(w, r, modelName, inst) + return + } + + if !inst.IsRunning() { + options := inst.GetOptions() + allowOnDemand := options != nil && options.OnDemandStart != nil && *options.OnDemandStart + if !allowOnDemand { + http.Error(w, "Instance is not running", http.StatusServiceUnavailable) + return + } + + if h.InstanceManager.IsMaxRunningInstancesReached() { + if h.cfg.Instances.EnableLRUEviction { + err := h.InstanceManager.EvictLRUInstance() + if err != nil { + http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError) + return + } + } else { + http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict) + return + } + } + + // If on-demand start is enabled, start the instance + if _, err := h.InstanceManager.StartInstance(modelName); err != nil { + http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError) + return + } + + // Wait for the instance to become healthy before proceeding + if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout + http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable) + return + } + } + + proxy, err := inst.GetProxy() + if err != nil { + http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError) + return + } + + // Update last request time for the instance + inst.UpdateLastRequestTime() + + // Recreate the request body from the bytes we read + r.Body = io.NopCloser(bytes.NewReader(bodyBytes)) + r.ContentLength = int64(len(bodyBytes)) + + proxy.ServeHTTP(w, r) + } +} + +// RemoteOpenAIProxy proxies OpenAI-compatible requests to a remote instance +func (h *Handler) RemoteOpenAIProxy(w http.ResponseWriter, r *http.Request, modelName string, inst *instance.Process) { + // Get the node name from instance options + options := inst.GetOptions() + if options == nil || len(options.Nodes) == 0 { + http.Error(w, "Instance has no node configured", http.StatusInternalServerError) + return + } + + nodeName := options.Nodes[0] + + // Check if we have a cached proxy for this node + h.remoteProxiesMu.RLock() + proxy, exists := h.remoteProxies[nodeName] + h.remoteProxiesMu.RUnlock() + + if !exists { + // Find node configuration + nodeConfig, exists := h.cfg.Nodes[nodeName] + if !exists { + http.Error(w, fmt.Sprintf("Node %s not found", nodeName), http.StatusInternalServerError) + return + } + + // Create reverse proxy to remote node + targetURL, err := url.Parse(nodeConfig.Address) + if err != nil { + http.Error(w, "Failed to parse node address: "+err.Error(), http.StatusInternalServerError) + return + } + + proxy = httputil.NewSingleHostReverseProxy(targetURL) + + // Modify request before forwarding + originalDirector := proxy.Director + apiKey := nodeConfig.APIKey // Capture for closure + proxy.Director = func(req *http.Request) { + originalDirector(req) + // Add API key if configured + if apiKey != "" { + req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", apiKey)) + } + } + + // Cache the proxy + h.remoteProxiesMu.Lock() + h.remoteProxies[nodeName] = proxy + h.remoteProxiesMu.Unlock() + } + + // Forward the request using the cached proxy + proxy.ServeHTTP(w, r) +} diff --git a/pkg/server/handlers_system.go b/pkg/server/handlers_system.go new file mode 100644 index 0000000..e3bb016 --- /dev/null +++ b/pkg/server/handlers_system.go @@ -0,0 +1,22 @@ +package server + +import ( + "fmt" + "net/http" +) + +// VersionHandler godoc +// @Summary Get llamactl version +// @Description Returns the version of the llamactl command +// @Tags version +// @Security ApiKeyAuth +// @Produces text/plain +// @Success 200 {string} string "Version information" +// @Failure 500 {string} string "Internal Server Error" +// @Router /version [get] +func (h *Handler) VersionHandler() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/plain") + fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime) + } +} diff --git a/pkg/server/routes.go b/pkg/server/routes.go index 8d5068b..6ced6a7 100644 --- a/pkg/server/routes.go +++ b/pkg/server/routes.go @@ -60,6 +60,15 @@ func SetupRouter(handler *Handler) *chi.Mux { }) }) + // Node management endpoints + r.Route("/nodes", func(r chi.Router) { + r.Get("/", handler.ListNodes()) // List all nodes + + r.Route("/{name}", func(r chi.Router) { + r.Get("/", handler.GetNode()) + }) + }) + // Instance management endpoints r.Route("/instances", func(r chi.Router) { r.Get("/", handler.ListInstances()) // List all instances diff --git a/webui/src/components/InstanceDialog.tsx b/webui/src/components/InstanceDialog.tsx index d9b731c..4a54f7a 100644 --- a/webui/src/components/InstanceDialog.tsx +++ b/webui/src/components/InstanceDialog.tsx @@ -106,7 +106,7 @@ const InstanceDialog: React.FC = ({ // Clean up undefined values to avoid sending empty fields const cleanOptions: CreateInstanceOptions = {}; Object.entries(formData).forEach(([key, value]) => { - if (key === 'backend_options' && value && typeof value === 'object') { + if (key === 'backend_options' && value && typeof value === 'object' && !Array.isArray(value)) { // Handle backend_options specially - clean nested object const cleanBackendOptions: any = {}; Object.entries(value).forEach(([backendKey, backendValue]) => { @@ -118,13 +118,17 @@ const InstanceDialog: React.FC = ({ cleanBackendOptions[backendKey] = backendValue; } }); - + // Only include backend_options if it has content if (Object.keys(cleanBackendOptions).length > 0) { (cleanOptions as any)[key] = cleanBackendOptions; } - } else if (value !== undefined && value !== null && (typeof value !== 'string' || value.trim() !== "")) { - // Handle arrays - don't include empty arrays + } else if (value !== undefined && value !== null) { + // Skip empty strings + if (typeof value === 'string' && value.trim() === "") { + return; + } + // Skip empty arrays if (Array.isArray(value) && value.length === 0) { return; } diff --git a/webui/src/components/instance/InstanceSettingsCard.tsx b/webui/src/components/instance/InstanceSettingsCard.tsx index c85eda9..a89ee90 100644 --- a/webui/src/components/instance/InstanceSettingsCard.tsx +++ b/webui/src/components/instance/InstanceSettingsCard.tsx @@ -1,4 +1,4 @@ -import React from 'react' +import React, { useState, useEffect } from 'react' import type { CreateInstanceOptions } from '@/types/instance' import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card' import { Label } from '@/components/ui/label' @@ -7,6 +7,8 @@ import AutoRestartConfiguration from '@/components/instance/AutoRestartConfigura import NumberInput from '@/components/form/NumberInput' import CheckboxInput from '@/components/form/CheckboxInput' import EnvironmentVariablesInput from '@/components/form/EnvironmentVariablesInput' +import SelectInput from '@/components/form/SelectInput' +import { nodesApi, type NodesMap } from '@/lib/api' interface InstanceSettingsCardProps { instanceName: string @@ -25,6 +27,46 @@ const InstanceSettingsCard: React.FC = ({ onNameChange, onChange }) => { + const [nodes, setNodes] = useState({}) + const [loadingNodes, setLoadingNodes] = useState(true) + + useEffect(() => { + const fetchNodes = async () => { + try { + const fetchedNodes = await nodesApi.list() + setNodes(fetchedNodes) + + // Auto-select first node if none selected + const nodeNames = Object.keys(fetchedNodes) + if (nodeNames.length > 0 && (!formData.nodes || formData.nodes.length === 0)) { + onChange('nodes', [nodeNames[0]]) + } + } catch (error) { + console.error('Failed to fetch nodes:', error) + } finally { + setLoadingNodes(false) + } + } + + void fetchNodes() + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []) + + const nodeOptions = Object.keys(nodes).map(nodeName => ({ + value: nodeName, + label: nodeName + })) + + const handleNodeChange = (value: string | undefined) => { + if (value) { + onChange('nodes', [value]) + } else { + onChange('nodes', undefined) + } + } + + const selectedNode = formData.nodes && formData.nodes.length > 0 ? formData.nodes[0] : '' + return ( @@ -50,6 +92,18 @@ const InstanceSettingsCard: React.FC = ({

+ {/* Node Selection */} + {!loadingNodes && Object.keys(nodes).length > 0 && ( + + )} + {/* Auto Restart Configuration */} ; + +// Node API functions +export const nodesApi = { + // GET /nodes - returns map of node name to NodeResponse + list: () => apiCall("/nodes"), + + // GET /nodes/{name} + get: (name: string) => apiCall(`/nodes/${name}`), +}; + // Instance API functions export const instancesApi = { // GET /instances diff --git a/webui/src/schemas/instanceOptions.ts b/webui/src/schemas/instanceOptions.ts index 0af09c1..3cbf523 100644 --- a/webui/src/schemas/instanceOptions.ts +++ b/webui/src/schemas/instanceOptions.ts @@ -39,6 +39,9 @@ export const CreateInstanceOptionsSchema = z.object({ // Backend configuration backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(), backend_options: BackendOptionsSchema.optional(), + + // Node configuration + nodes: z.array(z.string()).optional(), }) // Re-export types and schemas from backend files