mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-05 16:44:22 +00:00
Merge pull request #59 from lordmathis/feat/multi-host
feat: Implement multi node support
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
# llamactl
|
# llamactl
|
||||||
|
|
||||||
  
|
   
|
||||||
|
|
||||||
**Unified management and routing for llama.cpp, MLX and vLLM models with web dashboard.**
|
**Unified management and routing for llama.cpp, MLX and vLLM models with web dashboard.**
|
||||||
|
|
||||||
@@ -25,6 +25,11 @@
|
|||||||
- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
|
- **Smart Resource Management**: Idle timeout, LRU eviction, and configurable instance limits
|
||||||
- **Environment Variables**: Set custom environment variables per instance for advanced configuration
|
- **Environment Variables**: Set custom environment variables per instance for advanced configuration
|
||||||
|
|
||||||
|
### 🔗 Remote Instance Deployment
|
||||||
|
- **Remote Node Support**: Deploy instances on remote hosts
|
||||||
|
- **Central Management**: Manage remote instances from a single dashboard
|
||||||
|
- **Seamless Routing**: Automatic request routing to remote instances
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|||||||
@@ -58,7 +58,7 @@ func main() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Initialize the instance manager
|
// Initialize the instance manager
|
||||||
instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances)
|
instanceManager := manager.NewInstanceManager(cfg.Backends, cfg.Instances, cfg.Nodes)
|
||||||
|
|
||||||
// Create a new handler with the instance manager
|
// Create a new handler with the instance manager
|
||||||
handler := server.NewHandler(instanceManager, cfg)
|
handler := server.NewHandler(instanceManager, cfg)
|
||||||
|
|||||||
@@ -70,6 +70,10 @@ auth:
|
|||||||
inference_keys: [] # Keys for inference endpoints
|
inference_keys: [] # Keys for inference endpoints
|
||||||
require_management_auth: true # Require auth for management endpoints
|
require_management_auth: true # Require auth for management endpoints
|
||||||
management_keys: [] # Keys for management endpoints
|
management_keys: [] # Keys for management endpoints
|
||||||
|
|
||||||
|
local_node: "main" # Name of the local node (default: "main")
|
||||||
|
nodes: # Node configuration for multi-node deployment
|
||||||
|
main: # Default local node (empty config)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Configuration Files
|
## Configuration Files
|
||||||
@@ -241,12 +245,26 @@ auth:
|
|||||||
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
|
- `LLAMACTL_REQUIRE_MANAGEMENT_AUTH` - Require auth for management endpoints (true/false)
|
||||||
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
|
- `LLAMACTL_MANAGEMENT_KEYS` - Comma-separated management API keys
|
||||||
|
|
||||||
## Command Line Options
|
### Remote Node Configuration
|
||||||
|
|
||||||
View all available command line options:
|
llamactl supports remote node deployments. Configure remote nodes to deploy instances on remote hosts and manage them centrally.
|
||||||
|
|
||||||
```bash
|
```yaml
|
||||||
llamactl --help
|
local_node: "main" # Name of the local node (default: "main")
|
||||||
|
nodes: # Node configuration map
|
||||||
|
main: # Local node (empty address means local)
|
||||||
|
address: "" # Not used for local node
|
||||||
|
api_key: "" # Not used for local node
|
||||||
|
worker1: # Remote worker node
|
||||||
|
address: "http://192.168.1.10:8080"
|
||||||
|
api_key: "worker1-api-key" # Management API key for authentication
|
||||||
```
|
```
|
||||||
|
|
||||||
You can also override configuration using command line flags when starting llamactl.
|
**Node Configuration Fields:**
|
||||||
|
- `local_node`: Specifies which node in the `nodes` map represents the local node
|
||||||
|
- `nodes`: Map of node configurations
|
||||||
|
- `address`: HTTP/HTTPS URL of the remote node (empty for local node)
|
||||||
|
- `api_key`: Management API key for authenticating with the remote node
|
||||||
|
|
||||||
|
**Environment Variables:**
|
||||||
|
- `LLAMACTL_LOCAL_NODE` - Name of the local node
|
||||||
|
|||||||
@@ -157,6 +157,12 @@ cd webui && npm ci && npm run build && cd ..
|
|||||||
go build -o llamactl ./cmd/server
|
go build -o llamactl ./cmd/server
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Remote Node Installation
|
||||||
|
|
||||||
|
For deployments with remote nodes:
|
||||||
|
- Install llamactl on each node using any of the methods above
|
||||||
|
- Configure API keys for authentication between nodes
|
||||||
|
|
||||||
## Verification
|
## Verification
|
||||||
|
|
||||||
Verify your installation by checking the version:
|
Verify your installation by checking the version:
|
||||||
@@ -168,3 +174,5 @@ llamactl --version
|
|||||||
## Next Steps
|
## Next Steps
|
||||||
|
|
||||||
Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!
|
Now that Llamactl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!
|
||||||
|
|
||||||
|
For remote node deployments, see the [Configuration Guide](configuration.md) for node setup instructions.
|
||||||
|
|||||||
@@ -126,6 +126,7 @@ POST /api/v1/instances/{name}
|
|||||||
- `on_demand_start`: Start instance when receiving requests
|
- `on_demand_start`: Start instance when receiving requests
|
||||||
- `idle_timeout`: Idle timeout in minutes
|
- `idle_timeout`: Idle timeout in minutes
|
||||||
- `environment`: Environment variables as key-value pairs
|
- `environment`: Environment variables as key-value pairs
|
||||||
|
- `nodes`: Array with single node name to deploy the instance to (for remote deployments)
|
||||||
|
|
||||||
See [Managing Instances](managing-instances.md) for complete configuration options.
|
See [Managing Instances](managing-instances.md) for complete configuration options.
|
||||||
|
|
||||||
@@ -405,6 +406,38 @@ curl -X DELETE -H "Authorization: Bearer your-api-key" \
|
|||||||
http://localhost:8080/api/v1/instances/my-model
|
http://localhost:8080/api/v1/instances/my-model
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Remote Node Instance Example
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Create instance on specific remote node
|
||||||
|
curl -X POST http://localhost:8080/api/v1/instances/remote-model \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer your-api-key" \
|
||||||
|
-d '{
|
||||||
|
"backend_type": "llama_cpp",
|
||||||
|
"backend_options": {
|
||||||
|
"model": "/models/llama-2-7b.gguf",
|
||||||
|
"gpu_layers": 32
|
||||||
|
},
|
||||||
|
"nodes": ["worker1"]
|
||||||
|
}'
|
||||||
|
|
||||||
|
# Check status of remote instance
|
||||||
|
curl -H "Authorization: Bearer your-api-key" \
|
||||||
|
http://localhost:8080/api/v1/instances/remote-model
|
||||||
|
|
||||||
|
# Use remote instance with OpenAI-compatible API
|
||||||
|
curl -X POST http://localhost:8080/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer your-inference-api-key" \
|
||||||
|
-d '{
|
||||||
|
"model": "remote-model",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Hello from remote node!"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
### Using the Proxy Endpoint
|
### Using the Proxy Endpoint
|
||||||
|
|
||||||
You can also directly proxy requests to the llama-server instance:
|
You can also directly proxy requests to the llama-server instance:
|
||||||
|
|||||||
@@ -39,26 +39,27 @@ Each instance is displayed as a card showing:
|
|||||||
|
|
||||||
1. Click the **"Create Instance"** button on the dashboard
|
1. Click the **"Create Instance"** button on the dashboard
|
||||||
2. Enter a unique **Name** for your instance (only required field)
|
2. Enter a unique **Name** for your instance (only required field)
|
||||||
3. **Choose Backend Type**:
|
3. **Select Target Node**: Choose which node to deploy the instance to from the dropdown
|
||||||
|
4. **Choose Backend Type**:
|
||||||
- **llama.cpp**: For GGUF models using llama-server
|
- **llama.cpp**: For GGUF models using llama-server
|
||||||
- **MLX**: For MLX-optimized models (macOS only)
|
- **MLX**: For MLX-optimized models (macOS only)
|
||||||
- **vLLM**: For distributed serving and high-throughput inference
|
- **vLLM**: For distributed serving and high-throughput inference
|
||||||
4. Configure model source:
|
5. Configure model source:
|
||||||
- **For llama.cpp**: GGUF model path or HuggingFace repo
|
- **For llama.cpp**: GGUF model path or HuggingFace repo
|
||||||
- **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
|
- **For MLX**: MLX model path or identifier (e.g., `mlx-community/Mistral-7B-Instruct-v0.3-4bit`)
|
||||||
- **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
|
- **For vLLM**: HuggingFace model identifier (e.g., `microsoft/DialoGPT-medium`)
|
||||||
5. Configure optional instance management settings:
|
6. Configure optional instance management settings:
|
||||||
- **Auto Restart**: Automatically restart instance on failure
|
- **Auto Restart**: Automatically restart instance on failure
|
||||||
- **Max Restarts**: Maximum number of restart attempts
|
- **Max Restarts**: Maximum number of restart attempts
|
||||||
- **Restart Delay**: Delay in seconds between restart attempts
|
- **Restart Delay**: Delay in seconds between restart attempts
|
||||||
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
|
- **On Demand Start**: Start instance when receiving a request to the OpenAI compatible endpoint
|
||||||
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
|
- **Idle Timeout**: Minutes before stopping idle instance (set to 0 to disable)
|
||||||
- **Environment Variables**: Set custom environment variables for the instance process
|
- **Environment Variables**: Set custom environment variables for the instance process
|
||||||
6. Configure backend-specific options:
|
7. Configure backend-specific options:
|
||||||
- **llama.cpp**: Threads, context size, GPU layers, port, etc.
|
- **llama.cpp**: Threads, context size, GPU layers, port, etc.
|
||||||
- **MLX**: Temperature, top-p, adapter path, Python environment, etc.
|
- **MLX**: Temperature, top-p, adapter path, Python environment, etc.
|
||||||
- **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
|
- **vLLM**: Tensor parallel size, GPU memory utilization, quantization, etc.
|
||||||
7. Click **"Create"** to save the instance
|
8. Click **"Create"** to save the instance
|
||||||
|
|
||||||
### Via API
|
### Via API
|
||||||
|
|
||||||
@@ -121,6 +122,18 @@ curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
|
|||||||
"gpu_layers": 32
|
"gpu_layers": 32
|
||||||
}
|
}
|
||||||
}'
|
}'
|
||||||
|
|
||||||
|
# Create instance on specific remote node
|
||||||
|
curl -X POST http://localhost:8080/api/instances/remote-llama \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"backend_type": "llama_cpp",
|
||||||
|
"backend_options": {
|
||||||
|
"model": "/models/llama-7b.gguf",
|
||||||
|
"gpu_layers": 32
|
||||||
|
},
|
||||||
|
"nodes": ["worker1"]
|
||||||
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
## Start Instance
|
## Start Instance
|
||||||
@@ -227,3 +240,4 @@ Check the health status of your instances:
|
|||||||
```bash
|
```bash
|
||||||
curl http://localhost:8080/api/instances/{name}/proxy/health
|
curl http://localhost:8080/api/instances/{name}/proxy/health
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -125,6 +125,30 @@ This helps determine if the issue is with llamactl or with the underlying llama.
|
|||||||
http://localhost:8080/api/v1/instances
|
http://localhost:8080/api/v1/instances
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Remote Node Issues
|
||||||
|
|
||||||
|
### Node Configuration
|
||||||
|
|
||||||
|
**Problem:** Remote instances not appearing or cannot be managed
|
||||||
|
|
||||||
|
**Solutions:**
|
||||||
|
1. **Verify node configuration:**
|
||||||
|
```yaml
|
||||||
|
local_node: "main" # Must match a key in nodes map
|
||||||
|
nodes:
|
||||||
|
main:
|
||||||
|
address: "" # Empty for local node
|
||||||
|
worker1:
|
||||||
|
address: "http://worker1.internal:8080"
|
||||||
|
api_key: "secure-key" # Must match worker1's management key
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Test remote node connectivity:**
|
||||||
|
```bash
|
||||||
|
curl -H "Authorization: Bearer remote-node-key" \
|
||||||
|
http://remote-node:8080/api/v1/instances
|
||||||
|
```
|
||||||
|
|
||||||
## Debugging and Logs
|
## Debugging and Logs
|
||||||
|
|
||||||
### Viewing Instance Logs
|
### Viewing Instance Logs
|
||||||
|
|||||||
@@ -41,6 +41,8 @@ type AppConfig struct {
|
|||||||
Backends BackendConfig `yaml:"backends"`
|
Backends BackendConfig `yaml:"backends"`
|
||||||
Instances InstancesConfig `yaml:"instances"`
|
Instances InstancesConfig `yaml:"instances"`
|
||||||
Auth AuthConfig `yaml:"auth"`
|
Auth AuthConfig `yaml:"auth"`
|
||||||
|
LocalNode string `yaml:"local_node,omitempty"`
|
||||||
|
Nodes map[string]NodeConfig `yaml:"nodes,omitempty"`
|
||||||
Version string `yaml:"-"`
|
Version string `yaml:"-"`
|
||||||
CommitHash string `yaml:"-"`
|
CommitHash string `yaml:"-"`
|
||||||
BuildTime string `yaml:"-"`
|
BuildTime string `yaml:"-"`
|
||||||
@@ -128,6 +130,11 @@ type AuthConfig struct {
|
|||||||
ManagementKeys []string `yaml:"management_keys"`
|
ManagementKeys []string `yaml:"management_keys"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type NodeConfig struct {
|
||||||
|
Address string `yaml:"address"`
|
||||||
|
APIKey string `yaml:"api_key,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
// LoadConfig loads configuration with the following precedence:
|
// LoadConfig loads configuration with the following precedence:
|
||||||
// 1. Hardcoded defaults
|
// 1. Hardcoded defaults
|
||||||
// 2. Config file
|
// 2. Config file
|
||||||
@@ -142,6 +149,10 @@ func LoadConfig(configPath string) (AppConfig, error) {
|
|||||||
AllowedHeaders: []string{"*"}, // Default to allow all headers
|
AllowedHeaders: []string{"*"}, // Default to allow all headers
|
||||||
EnableSwagger: false,
|
EnableSwagger: false,
|
||||||
},
|
},
|
||||||
|
LocalNode: "main",
|
||||||
|
Nodes: map[string]NodeConfig{
|
||||||
|
"main": {}, // Local node with empty config
|
||||||
|
},
|
||||||
Backends: BackendConfig{
|
Backends: BackendConfig{
|
||||||
LlamaCpp: BackendSettings{
|
LlamaCpp: BackendSettings{
|
||||||
Command: "llama-server",
|
Command: "llama-server",
|
||||||
@@ -469,6 +480,11 @@ func loadEnvVars(cfg *AppConfig) {
|
|||||||
if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
|
if managementKeys := os.Getenv("LLAMACTL_MANAGEMENT_KEYS"); managementKeys != "" {
|
||||||
cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
|
cfg.Auth.ManagementKeys = strings.Split(managementKeys, ",")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Local node config
|
||||||
|
if localNode := os.Getenv("LLAMACTL_LOCAL_NODE"); localNode != "" {
|
||||||
|
cfg.LocalNode = localNode
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
|
// ParsePortRange parses port range from string formats like "8000-9000" or "8000,9000"
|
||||||
|
|||||||
@@ -510,3 +510,132 @@ func TestGetBackendSettings_InvalidBackendType(t *testing.T) {
|
|||||||
t.Errorf("Expected empty command for invalid backend, got %q", settings.Command)
|
t.Errorf("Expected empty command for invalid backend, got %q", settings.Command)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestLoadConfig_LocalNode(t *testing.T) {
|
||||||
|
t.Run("default local node", func(t *testing.T) {
|
||||||
|
cfg, err := config.LoadConfig("nonexistent-file.yaml")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("LoadConfig failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.LocalNode != "main" {
|
||||||
|
t.Errorf("Expected default local node 'main', got %q", cfg.LocalNode)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("local node from file", func(t *testing.T) {
|
||||||
|
tempDir := t.TempDir()
|
||||||
|
configFile := filepath.Join(tempDir, "test-config.yaml")
|
||||||
|
|
||||||
|
configContent := `
|
||||||
|
local_node: "worker1"
|
||||||
|
nodes:
|
||||||
|
worker1:
|
||||||
|
address: ""
|
||||||
|
worker2:
|
||||||
|
address: "http://192.168.1.10:8080"
|
||||||
|
api_key: "test-key"
|
||||||
|
`
|
||||||
|
|
||||||
|
err := os.WriteFile(configFile, []byte(configContent), 0644)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to write test config file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg, err := config.LoadConfig(configFile)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("LoadConfig failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.LocalNode != "worker1" {
|
||||||
|
t.Errorf("Expected local node 'worker1', got %q", cfg.LocalNode)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify nodes map (includes default "main" + worker1 + worker2)
|
||||||
|
if len(cfg.Nodes) != 3 {
|
||||||
|
t.Errorf("Expected 3 nodes (default main + worker1 + worker2), got %d", len(cfg.Nodes))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify local node exists and is empty
|
||||||
|
localNode, exists := cfg.Nodes["worker1"]
|
||||||
|
if !exists {
|
||||||
|
t.Error("Expected local node 'worker1' to exist in nodes map")
|
||||||
|
}
|
||||||
|
if localNode.Address != "" {
|
||||||
|
t.Errorf("Expected local node address to be empty, got %q", localNode.Address)
|
||||||
|
}
|
||||||
|
if localNode.APIKey != "" {
|
||||||
|
t.Errorf("Expected local node api_key to be empty, got %q", localNode.APIKey)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify remote node
|
||||||
|
remoteNode, exists := cfg.Nodes["worker2"]
|
||||||
|
if !exists {
|
||||||
|
t.Error("Expected remote node 'worker2' to exist in nodes map")
|
||||||
|
}
|
||||||
|
if remoteNode.Address != "http://192.168.1.10:8080" {
|
||||||
|
t.Errorf("Expected remote node address 'http://192.168.1.10:8080', got %q", remoteNode.Address)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify default main node still exists
|
||||||
|
_, exists = cfg.Nodes["main"]
|
||||||
|
if !exists {
|
||||||
|
t.Error("Expected default 'main' node to still exist in nodes map")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("custom local node name in config", func(t *testing.T) {
|
||||||
|
tempDir := t.TempDir()
|
||||||
|
configFile := filepath.Join(tempDir, "test-config.yaml")
|
||||||
|
|
||||||
|
configContent := `
|
||||||
|
local_node: "primary"
|
||||||
|
nodes:
|
||||||
|
primary:
|
||||||
|
address: ""
|
||||||
|
worker1:
|
||||||
|
address: "http://192.168.1.10:8080"
|
||||||
|
`
|
||||||
|
|
||||||
|
err := os.WriteFile(configFile, []byte(configContent), 0644)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to write test config file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg, err := config.LoadConfig(configFile)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("LoadConfig failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.LocalNode != "primary" {
|
||||||
|
t.Errorf("Expected local node 'primary', got %q", cfg.LocalNode)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify nodes map includes default "main" + primary + worker1
|
||||||
|
if len(cfg.Nodes) != 3 {
|
||||||
|
t.Errorf("Expected 3 nodes (default main + primary + worker1), got %d", len(cfg.Nodes))
|
||||||
|
}
|
||||||
|
|
||||||
|
localNode, exists := cfg.Nodes["primary"]
|
||||||
|
if !exists {
|
||||||
|
t.Error("Expected local node 'primary' to exist in nodes map")
|
||||||
|
}
|
||||||
|
if localNode.Address != "" {
|
||||||
|
t.Errorf("Expected local node address to be empty, got %q", localNode.Address)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("local node from environment variable", func(t *testing.T) {
|
||||||
|
os.Setenv("LLAMACTL_LOCAL_NODE", "custom-node")
|
||||||
|
defer os.Unsetenv("LLAMACTL_LOCAL_NODE")
|
||||||
|
|
||||||
|
cfg, err := config.LoadConfig("nonexistent-file.yaml")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("LoadConfig failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.LocalNode != "custom-node" {
|
||||||
|
t.Errorf("Expected local node 'custom-node' from env var, got %q", cfg.LocalNode)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|||||||
@@ -171,6 +171,11 @@ func (i *Process) GetProxy() (*httputil.ReverseProxy, error) {
|
|||||||
return nil, fmt.Errorf("instance %s has no options set", i.Name)
|
return nil, fmt.Errorf("instance %s has no options set", i.Name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Remote instances should not use local proxy - they are handled by RemoteInstanceProxy
|
||||||
|
if len(i.options.Nodes) > 0 {
|
||||||
|
return nil, fmt.Errorf("instance %s is a remote instance and should not use local proxy", i.Name)
|
||||||
|
}
|
||||||
|
|
||||||
var host string
|
var host string
|
||||||
var port int
|
var port int
|
||||||
switch i.options.BackendType {
|
switch i.options.BackendType {
|
||||||
@@ -285,5 +290,24 @@ func (i *Process) UnmarshalJSON(data []byte) error {
|
|||||||
i.options = aux.Options
|
i.options = aux.Options
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Initialize fields that are not serialized
|
||||||
|
if i.timeProvider == nil {
|
||||||
|
i.timeProvider = realTimeProvider{}
|
||||||
|
}
|
||||||
|
if i.logger == nil && i.globalInstanceSettings != nil {
|
||||||
|
i.logger = NewInstanceLogger(i.Name, i.globalInstanceSettings.LogsDir)
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (i *Process) IsRemote() bool {
|
||||||
|
i.mu.RLock()
|
||||||
|
defer i.mu.RUnlock()
|
||||||
|
|
||||||
|
if i.options == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return len(i.options.Nodes) > 0
|
||||||
|
}
|
||||||
|
|||||||
@@ -27,6 +27,8 @@ type CreateInstanceOptions struct {
|
|||||||
BackendType backends.BackendType `json:"backend_type"`
|
BackendType backends.BackendType `json:"backend_type"`
|
||||||
BackendOptions map[string]any `json:"backend_options,omitempty"`
|
BackendOptions map[string]any `json:"backend_options,omitempty"`
|
||||||
|
|
||||||
|
Nodes []string `json:"nodes,omitempty"`
|
||||||
|
|
||||||
// Backend-specific options
|
// Backend-specific options
|
||||||
LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
|
LlamaServerOptions *llamacpp.LlamaServerOptions `json:"-"`
|
||||||
MlxServerOptions *mlx.MlxServerOptions `json:"-"`
|
MlxServerOptions *mlx.MlxServerOptions `json:"-"`
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
"llamactl/pkg/config"
|
"llamactl/pkg/config"
|
||||||
"llamactl/pkg/instance"
|
"llamactl/pkg/instance"
|
||||||
"log"
|
"log"
|
||||||
|
"net/http"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -25,10 +26,22 @@ type InstanceManager interface {
|
|||||||
StopInstance(name string) (*instance.Process, error)
|
StopInstance(name string) (*instance.Process, error)
|
||||||
EvictLRUInstance() error
|
EvictLRUInstance() error
|
||||||
RestartInstance(name string) (*instance.Process, error)
|
RestartInstance(name string) (*instance.Process, error)
|
||||||
GetInstanceLogs(name string) (string, error)
|
GetInstanceLogs(name string, numLines int) (string, error)
|
||||||
Shutdown()
|
Shutdown()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type RemoteManager interface {
|
||||||
|
ListRemoteInstances(node *config.NodeConfig) ([]*instance.Process, error)
|
||||||
|
CreateRemoteInstance(node *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
|
||||||
|
GetRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
|
||||||
|
UpdateRemoteInstance(node *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error)
|
||||||
|
DeleteRemoteInstance(node *config.NodeConfig, name string) error
|
||||||
|
StartRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
|
||||||
|
StopRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
|
||||||
|
RestartRemoteInstance(node *config.NodeConfig, name string) (*instance.Process, error)
|
||||||
|
GetRemoteInstanceLogs(node *config.NodeConfig, name string, numLines int) (string, error)
|
||||||
|
}
|
||||||
|
|
||||||
type instanceManager struct {
|
type instanceManager struct {
|
||||||
mu sync.RWMutex
|
mu sync.RWMutex
|
||||||
instances map[string]*instance.Process
|
instances map[string]*instance.Process
|
||||||
@@ -42,13 +55,26 @@ type instanceManager struct {
|
|||||||
shutdownChan chan struct{}
|
shutdownChan chan struct{}
|
||||||
shutdownDone chan struct{}
|
shutdownDone chan struct{}
|
||||||
isShutdown bool
|
isShutdown bool
|
||||||
|
|
||||||
|
// Remote instance management
|
||||||
|
httpClient *http.Client
|
||||||
|
instanceNodeMap map[string]*config.NodeConfig // Maps instance name to its node config
|
||||||
|
nodeConfigMap map[string]*config.NodeConfig // Maps node name to node config for quick lookup
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewInstanceManager creates a new instance of InstanceManager.
|
// NewInstanceManager creates a new instance of InstanceManager.
|
||||||
func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig) InstanceManager {
|
func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig config.InstancesConfig, nodesConfig map[string]config.NodeConfig) InstanceManager {
|
||||||
if instancesConfig.TimeoutCheckInterval <= 0 {
|
if instancesConfig.TimeoutCheckInterval <= 0 {
|
||||||
instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
|
instancesConfig.TimeoutCheckInterval = 5 // Default to 5 minutes if not set
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Build node config map for quick lookup
|
||||||
|
nodeConfigMap := make(map[string]*config.NodeConfig)
|
||||||
|
for name := range nodesConfig {
|
||||||
|
nodeCopy := nodesConfig[name]
|
||||||
|
nodeConfigMap[name] = &nodeCopy
|
||||||
|
}
|
||||||
|
|
||||||
im := &instanceManager{
|
im := &instanceManager{
|
||||||
instances: make(map[string]*instance.Process),
|
instances: make(map[string]*instance.Process),
|
||||||
runningInstances: make(map[string]struct{}),
|
runningInstances: make(map[string]struct{}),
|
||||||
@@ -59,6 +85,13 @@ func NewInstanceManager(backendsConfig config.BackendConfig, instancesConfig con
|
|||||||
timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
|
timeoutChecker: time.NewTicker(time.Duration(instancesConfig.TimeoutCheckInterval) * time.Minute),
|
||||||
shutdownChan: make(chan struct{}),
|
shutdownChan: make(chan struct{}),
|
||||||
shutdownDone: make(chan struct{}),
|
shutdownDone: make(chan struct{}),
|
||||||
|
|
||||||
|
httpClient: &http.Client{
|
||||||
|
Timeout: 30 * time.Second,
|
||||||
|
},
|
||||||
|
|
||||||
|
instanceNodeMap: make(map[string]*config.NodeConfig),
|
||||||
|
nodeConfigMap: nodeConfigMap,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load existing instances from disk
|
// Load existing instances from disk
|
||||||
@@ -238,18 +271,36 @@ func (im *instanceManager) loadInstance(name, path string) error {
|
|||||||
return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name)
|
return fmt.Errorf("instance name mismatch: file=%s, instance.Name=%s", name, persistedInstance.Name)
|
||||||
}
|
}
|
||||||
|
|
||||||
statusCallback := func(oldStatus, newStatus instance.InstanceStatus) {
|
options := persistedInstance.GetOptions()
|
||||||
|
|
||||||
|
// Check if this is a remote instance
|
||||||
|
isRemote := options != nil && len(options.Nodes) > 0
|
||||||
|
|
||||||
|
var statusCallback func(oldStatus, newStatus instance.InstanceStatus)
|
||||||
|
if !isRemote {
|
||||||
|
// Only set status callback for local instances
|
||||||
|
statusCallback = func(oldStatus, newStatus instance.InstanceStatus) {
|
||||||
im.onStatusChange(persistedInstance.Name, oldStatus, newStatus)
|
im.onStatusChange(persistedInstance.Name, oldStatus, newStatus)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Create new inst using NewInstance (handles validation, defaults, setup)
|
// Create new inst using NewInstance (handles validation, defaults, setup)
|
||||||
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, persistedInstance.GetOptions(), statusCallback)
|
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, statusCallback)
|
||||||
|
|
||||||
// Restore persisted fields that NewInstance doesn't set
|
// Restore persisted fields that NewInstance doesn't set
|
||||||
inst.Created = persistedInstance.Created
|
inst.Created = persistedInstance.Created
|
||||||
inst.SetStatus(persistedInstance.Status)
|
inst.SetStatus(persistedInstance.Status)
|
||||||
|
|
||||||
// Check for port conflicts and add to maps
|
// Handle remote instance mapping
|
||||||
|
if isRemote {
|
||||||
|
nodeName := options.Nodes[0]
|
||||||
|
nodeConfig, exists := im.nodeConfigMap[nodeName]
|
||||||
|
if !exists {
|
||||||
|
return fmt.Errorf("node %s not found for remote instance %s", nodeName, name)
|
||||||
|
}
|
||||||
|
im.instanceNodeMap[name] = nodeConfig
|
||||||
|
} else {
|
||||||
|
// Check for port conflicts only for local instances
|
||||||
if inst.GetPort() > 0 {
|
if inst.GetPort() > 0 {
|
||||||
port := inst.GetPort()
|
port := inst.GetPort()
|
||||||
if im.ports[port] {
|
if im.ports[port] {
|
||||||
@@ -257,6 +308,7 @@ func (im *instanceManager) loadInstance(name, path string) error {
|
|||||||
}
|
}
|
||||||
im.ports[port] = true
|
im.ports[port] = true
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
im.instances[name] = inst
|
im.instances[name] = inst
|
||||||
return nil
|
return nil
|
||||||
@@ -293,10 +345,20 @@ func (im *instanceManager) autoStartInstances() {
|
|||||||
log.Printf("Auto-starting instance %s", inst.Name)
|
log.Printf("Auto-starting instance %s", inst.Name)
|
||||||
// Reset running state before starting (since Start() expects stopped instance)
|
// Reset running state before starting (since Start() expects stopped instance)
|
||||||
inst.SetStatus(instance.Stopped)
|
inst.SetStatus(instance.Stopped)
|
||||||
|
|
||||||
|
// Check if this is a remote instance
|
||||||
|
if node := im.getNodeForInstance(inst); node != nil {
|
||||||
|
// Remote instance - use StartRemoteInstance
|
||||||
|
if _, err := im.StartRemoteInstance(node, inst.Name); err != nil {
|
||||||
|
log.Printf("Failed to auto-start remote instance %s: %v", inst.Name, err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Local instance - call Start() directly
|
||||||
if err := inst.Start(); err != nil {
|
if err := inst.Start(); err != nil {
|
||||||
log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
|
log.Printf("Failed to auto-start instance %s: %v", inst.Name, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus instance.InstanceStatus) {
|
func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus instance.InstanceStatus) {
|
||||||
@@ -309,3 +371,18 @@ func (im *instanceManager) onStatusChange(name string, oldStatus, newStatus inst
|
|||||||
delete(im.runningInstances, name)
|
delete(im.runningInstances, name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getNodeForInstance returns the node configuration for a remote instance
|
||||||
|
// Returns nil if the instance is not remote or the node is not found
|
||||||
|
func (im *instanceManager) getNodeForInstance(inst *instance.Process) *config.NodeConfig {
|
||||||
|
if !inst.IsRemote() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if we have a cached mapping
|
||||||
|
if nodeConfig, exists := im.instanceNodeMap[inst.Name]; exists {
|
||||||
|
return nodeConfig
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ func TestNewInstanceManager(t *testing.T) {
|
|||||||
TimeoutCheckInterval: 5,
|
TimeoutCheckInterval: 5,
|
||||||
}
|
}
|
||||||
|
|
||||||
mgr := manager.NewInstanceManager(backendConfig, cfg)
|
mgr := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
|
||||||
if mgr == nil {
|
if mgr == nil {
|
||||||
t.Fatal("NewInstanceManager returned nil")
|
t.Fatal("NewInstanceManager returned nil")
|
||||||
}
|
}
|
||||||
@@ -69,7 +69,7 @@ func TestPersistence(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Test instance persistence on creation
|
// Test instance persistence on creation
|
||||||
manager1 := manager.NewInstanceManager(backendConfig, cfg)
|
manager1 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
|
||||||
options := &instance.CreateInstanceOptions{
|
options := &instance.CreateInstanceOptions{
|
||||||
BackendType: backends.BackendTypeLlamaCpp,
|
BackendType: backends.BackendTypeLlamaCpp,
|
||||||
LlamaServerOptions: &llamacpp.LlamaServerOptions{
|
LlamaServerOptions: &llamacpp.LlamaServerOptions{
|
||||||
@@ -90,7 +90,7 @@ func TestPersistence(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Test loading instances from disk
|
// Test loading instances from disk
|
||||||
manager2 := manager.NewInstanceManager(backendConfig, cfg)
|
manager2 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
|
||||||
instances, err := manager2.ListInstances()
|
instances, err := manager2.ListInstances()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("ListInstances failed: %v", err)
|
t.Fatalf("ListInstances failed: %v", err)
|
||||||
@@ -207,7 +207,7 @@ func createTestManager() manager.InstanceManager {
|
|||||||
DefaultRestartDelay: 5,
|
DefaultRestartDelay: 5,
|
||||||
TimeoutCheckInterval: 5,
|
TimeoutCheckInterval: 5,
|
||||||
}
|
}
|
||||||
return manager.NewInstanceManager(backendConfig, cfg)
|
return manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAutoRestartDisabledInstanceStatus(t *testing.T) {
|
func TestAutoRestartDisabledInstanceStatus(t *testing.T) {
|
||||||
@@ -227,7 +227,7 @@ func TestAutoRestartDisabledInstanceStatus(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Create first manager and instance with auto-restart disabled
|
// Create first manager and instance with auto-restart disabled
|
||||||
manager1 := manager.NewInstanceManager(backendConfig, cfg)
|
manager1 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
|
||||||
|
|
||||||
autoRestart := false
|
autoRestart := false
|
||||||
options := &instance.CreateInstanceOptions{
|
options := &instance.CreateInstanceOptions{
|
||||||
@@ -252,7 +252,7 @@ func TestAutoRestartDisabledInstanceStatus(t *testing.T) {
|
|||||||
manager1.Shutdown()
|
manager1.Shutdown()
|
||||||
|
|
||||||
// Create second manager (simulating restart of llamactl)
|
// Create second manager (simulating restart of llamactl)
|
||||||
manager2 := manager.NewInstanceManager(backendConfig, cfg)
|
manager2 := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
|
||||||
|
|
||||||
// Get the loaded instance
|
// Get the loaded instance
|
||||||
loadedInst, err := manager2.GetInstance("test-instance")
|
loadedInst, err := manager2.GetInstance("test-instance")
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package manager
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"llamactl/pkg/backends"
|
"llamactl/pkg/backends"
|
||||||
|
"llamactl/pkg/config"
|
||||||
"llamactl/pkg/instance"
|
"llamactl/pkg/instance"
|
||||||
"llamactl/pkg/validation"
|
"llamactl/pkg/validation"
|
||||||
"os"
|
"os"
|
||||||
@@ -11,16 +12,65 @@ import (
|
|||||||
|
|
||||||
type MaxRunningInstancesError error
|
type MaxRunningInstancesError error
|
||||||
|
|
||||||
|
// updateLocalInstanceFromRemote updates the local stub instance with data from the remote instance
|
||||||
|
// while preserving the Nodes field to maintain remote instance tracking
|
||||||
|
func (im *instanceManager) updateLocalInstanceFromRemote(localInst *instance.Process, remoteInst *instance.Process) {
|
||||||
|
if localInst == nil || remoteInst == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the remote instance options
|
||||||
|
remoteOptions := remoteInst.GetOptions()
|
||||||
|
if remoteOptions == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Preserve the Nodes field from the local instance
|
||||||
|
localOptions := localInst.GetOptions()
|
||||||
|
var preservedNodes []string
|
||||||
|
if localOptions != nil && len(localOptions.Nodes) > 0 {
|
||||||
|
preservedNodes = make([]string, len(localOptions.Nodes))
|
||||||
|
copy(preservedNodes, localOptions.Nodes)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a copy of remote options and restore the Nodes field
|
||||||
|
updatedOptions := *remoteOptions
|
||||||
|
updatedOptions.Nodes = preservedNodes
|
||||||
|
|
||||||
|
// Update the local instance with all remote data
|
||||||
|
localInst.SetOptions(&updatedOptions)
|
||||||
|
localInst.Status = remoteInst.Status
|
||||||
|
localInst.Created = remoteInst.Created
|
||||||
|
}
|
||||||
|
|
||||||
// ListInstances returns a list of all instances managed by the instance manager.
|
// ListInstances returns a list of all instances managed by the instance manager.
|
||||||
|
// For remote instances, this fetches the live state from remote nodes and updates local stubs.
|
||||||
func (im *instanceManager) ListInstances() ([]*instance.Process, error) {
|
func (im *instanceManager) ListInstances() ([]*instance.Process, error) {
|
||||||
im.mu.RLock()
|
im.mu.RLock()
|
||||||
defer im.mu.RUnlock()
|
localInstances := make([]*instance.Process, 0, len(im.instances))
|
||||||
|
|
||||||
instances := make([]*instance.Process, 0, len(im.instances))
|
|
||||||
for _, inst := range im.instances {
|
for _, inst := range im.instances {
|
||||||
instances = append(instances, inst)
|
localInstances = append(localInstances, inst)
|
||||||
}
|
}
|
||||||
return instances, nil
|
im.mu.RUnlock()
|
||||||
|
|
||||||
|
// Update remote instances with live state
|
||||||
|
for _, inst := range localInstances {
|
||||||
|
if node := im.getNodeForInstance(inst); node != nil {
|
||||||
|
remoteInst, err := im.GetRemoteInstance(node, inst.Name)
|
||||||
|
if err != nil {
|
||||||
|
// Log error but continue with stale data
|
||||||
|
// Don't fail the entire list operation due to one remote failure
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the local stub with all remote data (preserving Nodes)
|
||||||
|
im.mu.Lock()
|
||||||
|
im.updateLocalInstanceFromRemote(inst, remoteInst)
|
||||||
|
im.mu.Unlock()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return localInstances, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// CreateInstance creates a new instance with the given options and returns it.
|
// CreateInstance creates a new instance with the given options and returns it.
|
||||||
@@ -43,16 +93,56 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
|
|||||||
im.mu.Lock()
|
im.mu.Lock()
|
||||||
defer im.mu.Unlock()
|
defer im.mu.Unlock()
|
||||||
|
|
||||||
// Check max instances limit after acquiring the lock
|
// Check if instance with this name already exists (must be globally unique)
|
||||||
if len(im.instances) >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
|
|
||||||
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if instance with this name already exists
|
|
||||||
if im.instances[name] != nil {
|
if im.instances[name] != nil {
|
||||||
return nil, fmt.Errorf("instance with name %s already exists", name)
|
return nil, fmt.Errorf("instance with name %s already exists", name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if this is a remote instance
|
||||||
|
isRemote := len(options.Nodes) > 0
|
||||||
|
var nodeConfig *config.NodeConfig
|
||||||
|
|
||||||
|
if isRemote {
|
||||||
|
// Validate that the node exists
|
||||||
|
nodeName := options.Nodes[0] // Use first node for now
|
||||||
|
var exists bool
|
||||||
|
nodeConfig, exists = im.nodeConfigMap[nodeName]
|
||||||
|
if !exists {
|
||||||
|
return nil, fmt.Errorf("node %s not found", nodeName)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the remote instance on the remote node
|
||||||
|
remoteInst, err := im.CreateRemoteInstance(nodeConfig, name, options)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a local stub that preserves the Nodes field for tracking
|
||||||
|
// We keep the original options (with Nodes) so IsRemote() works correctly
|
||||||
|
inst := instance.NewInstance(name, &im.backendsConfig, &im.instancesConfig, options, nil)
|
||||||
|
|
||||||
|
// Update the local stub with all remote data (preserving Nodes)
|
||||||
|
im.updateLocalInstanceFromRemote(inst, remoteInst)
|
||||||
|
|
||||||
|
// Add to local tracking maps (but don't count towards limits)
|
||||||
|
im.instances[name] = inst
|
||||||
|
im.instanceNodeMap[name] = nodeConfig
|
||||||
|
|
||||||
|
// Persist the remote instance locally for tracking across restarts
|
||||||
|
if err := im.persistInstance(inst); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to persist remote instance %s: %w", name, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Local instance creation
|
||||||
|
// Check max instances limit for local instances only
|
||||||
|
localInstanceCount := len(im.instances) - len(im.instanceNodeMap)
|
||||||
|
if localInstanceCount >= im.instancesConfig.MaxInstances && im.instancesConfig.MaxInstances != -1 {
|
||||||
|
return nil, fmt.Errorf("maximum number of instances (%d) reached", im.instancesConfig.MaxInstances)
|
||||||
|
}
|
||||||
|
|
||||||
// Assign and validate port for backend-specific options
|
// Assign and validate port for backend-specific options
|
||||||
if err := im.assignAndValidatePort(options); err != nil {
|
if err := im.assignAndValidatePort(options); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -73,28 +163,68 @@ func (im *instanceManager) CreateInstance(name string, options *instance.CreateI
|
|||||||
}
|
}
|
||||||
|
|
||||||
// GetInstance retrieves an instance by its name.
|
// GetInstance retrieves an instance by its name.
|
||||||
|
// For remote instances, this fetches the live state from the remote node and updates the local stub.
|
||||||
func (im *instanceManager) GetInstance(name string) (*instance.Process, error) {
|
func (im *instanceManager) GetInstance(name string) (*instance.Process, error) {
|
||||||
im.mu.RLock()
|
im.mu.RLock()
|
||||||
defer im.mu.RUnlock()
|
inst, exists := im.instances[name]
|
||||||
|
im.mu.RUnlock()
|
||||||
|
|
||||||
instance, exists := im.instances[name]
|
|
||||||
if !exists {
|
if !exists {
|
||||||
return nil, fmt.Errorf("instance with name %s not found", name)
|
return nil, fmt.Errorf("instance with name %s not found", name)
|
||||||
}
|
}
|
||||||
return instance, nil
|
|
||||||
|
// Check if instance is remote and fetch live state
|
||||||
|
if node := im.getNodeForInstance(inst); node != nil {
|
||||||
|
remoteInst, err := im.GetRemoteInstance(node, name)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the local stub with all remote data (preserving Nodes)
|
||||||
|
im.mu.Lock()
|
||||||
|
im.updateLocalInstanceFromRemote(inst, remoteInst)
|
||||||
|
im.mu.Unlock()
|
||||||
|
|
||||||
|
// Return the local stub (preserving Nodes field)
|
||||||
|
return inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return inst, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// UpdateInstance updates the options of an existing instance and returns it.
|
// UpdateInstance updates the options of an existing instance and returns it.
|
||||||
// If the instance is running, it will be restarted to apply the new options.
|
// If the instance is running, it will be restarted to apply the new options.
|
||||||
func (im *instanceManager) UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
|
func (im *instanceManager) UpdateInstance(name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
|
||||||
im.mu.RLock()
|
im.mu.RLock()
|
||||||
instance, exists := im.instances[name]
|
inst, exists := im.instances[name]
|
||||||
im.mu.RUnlock()
|
im.mu.RUnlock()
|
||||||
|
|
||||||
if !exists {
|
if !exists {
|
||||||
return nil, fmt.Errorf("instance with name %s not found", name)
|
return nil, fmt.Errorf("instance with name %s not found", name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check if instance is remote and delegate to remote operation
|
||||||
|
if node := im.getNodeForInstance(inst); node != nil {
|
||||||
|
remoteInst, err := im.UpdateRemoteInstance(node, name, options)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the local stub with all remote data (preserving Nodes)
|
||||||
|
im.mu.Lock()
|
||||||
|
im.updateLocalInstanceFromRemote(inst, remoteInst)
|
||||||
|
im.mu.Unlock()
|
||||||
|
|
||||||
|
// Persist the updated remote instance locally
|
||||||
|
im.mu.Lock()
|
||||||
|
defer im.mu.Unlock()
|
||||||
|
if err := im.persistInstance(inst); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to persist updated remote instance %s: %w", name, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
if options == nil {
|
if options == nil {
|
||||||
return nil, fmt.Errorf("instance options cannot be nil")
|
return nil, fmt.Errorf("instance options cannot be nil")
|
||||||
}
|
}
|
||||||
@@ -105,55 +235,90 @@ func (im *instanceManager) UpdateInstance(name string, options *instance.CreateI
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check if instance is running before updating options
|
// Check if instance is running before updating options
|
||||||
wasRunning := instance.IsRunning()
|
wasRunning := inst.IsRunning()
|
||||||
|
|
||||||
// If the instance is running, stop it first
|
// If the instance is running, stop it first
|
||||||
if wasRunning {
|
if wasRunning {
|
||||||
if err := instance.Stop(); err != nil {
|
if err := inst.Stop(); err != nil {
|
||||||
return nil, fmt.Errorf("failed to stop instance %s for update: %w", name, err)
|
return nil, fmt.Errorf("failed to stop instance %s for update: %w", name, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now update the options while the instance is stopped
|
// Now update the options while the instance is stopped
|
||||||
instance.SetOptions(options)
|
inst.SetOptions(options)
|
||||||
|
|
||||||
// If it was running before, start it again with the new options
|
// If it was running before, start it again with the new options
|
||||||
if wasRunning {
|
if wasRunning {
|
||||||
if err := instance.Start(); err != nil {
|
if err := inst.Start(); err != nil {
|
||||||
return nil, fmt.Errorf("failed to start instance %s after update: %w", name, err)
|
return nil, fmt.Errorf("failed to start instance %s after update: %w", name, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
im.mu.Lock()
|
im.mu.Lock()
|
||||||
defer im.mu.Unlock()
|
defer im.mu.Unlock()
|
||||||
if err := im.persistInstance(instance); err != nil {
|
if err := im.persistInstance(inst); err != nil {
|
||||||
return nil, fmt.Errorf("failed to persist updated instance %s: %w", name, err)
|
return nil, fmt.Errorf("failed to persist updated instance %s: %w", name, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return instance, nil
|
return inst, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// DeleteInstance removes stopped instance by its name.
|
// DeleteInstance removes stopped instance by its name.
|
||||||
func (im *instanceManager) DeleteInstance(name string) error {
|
func (im *instanceManager) DeleteInstance(name string) error {
|
||||||
im.mu.Lock()
|
im.mu.Lock()
|
||||||
defer im.mu.Unlock()
|
inst, exists := im.instances[name]
|
||||||
|
im.mu.Unlock()
|
||||||
|
|
||||||
instance, exists := im.instances[name]
|
|
||||||
if !exists {
|
if !exists {
|
||||||
return fmt.Errorf("instance with name %s not found", name)
|
return fmt.Errorf("instance with name %s not found", name)
|
||||||
}
|
}
|
||||||
|
|
||||||
if instance.IsRunning() {
|
// Check if instance is remote and delegate to remote operation
|
||||||
|
if node := im.getNodeForInstance(inst); node != nil {
|
||||||
|
err := im.DeleteRemoteInstance(node, name)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean up local tracking
|
||||||
|
im.mu.Lock()
|
||||||
|
defer im.mu.Unlock()
|
||||||
|
delete(im.instances, name)
|
||||||
|
delete(im.instanceNodeMap, name)
|
||||||
|
|
||||||
|
// Delete the instance's config file if persistence is enabled
|
||||||
|
// Re-validate instance name for security (defense in depth)
|
||||||
|
validatedName, err := validation.ValidateInstanceName(name)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("invalid instance name for file deletion: %w", err)
|
||||||
|
}
|
||||||
|
instancePath := filepath.Join(im.instancesConfig.InstancesDir, validatedName+".json")
|
||||||
|
if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
|
||||||
|
return fmt.Errorf("failed to delete config file for remote instance %s: %w", validatedName, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if inst.IsRunning() {
|
||||||
return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
|
return fmt.Errorf("instance with name %s is still running, stop it before deleting", name)
|
||||||
}
|
}
|
||||||
|
|
||||||
delete(im.ports, instance.GetPort())
|
im.mu.Lock()
|
||||||
|
defer im.mu.Unlock()
|
||||||
|
|
||||||
|
delete(im.ports, inst.GetPort())
|
||||||
delete(im.instances, name)
|
delete(im.instances, name)
|
||||||
|
|
||||||
// Delete the instance's config file if persistence is enabled
|
// Delete the instance's config file if persistence is enabled
|
||||||
instancePath := filepath.Join(im.instancesConfig.InstancesDir, instance.Name+".json")
|
// Re-validate instance name for security (defense in depth)
|
||||||
|
validatedName, err := validation.ValidateInstanceName(inst.Name)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("invalid instance name for file deletion: %w", err)
|
||||||
|
}
|
||||||
|
instancePath := filepath.Join(im.instancesConfig.InstancesDir, validatedName+".json")
|
||||||
if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
|
if err := os.Remove(instancePath); err != nil && !os.IsNotExist(err) {
|
||||||
return fmt.Errorf("failed to delete config file for instance %s: %w", instance.Name, err)
|
return fmt.Errorf("failed to delete config file for instance %s: %w", validatedName, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
@@ -163,33 +328,59 @@ func (im *instanceManager) DeleteInstance(name string) error {
|
|||||||
// If the instance is already running, it returns an error.
|
// If the instance is already running, it returns an error.
|
||||||
func (im *instanceManager) StartInstance(name string) (*instance.Process, error) {
|
func (im *instanceManager) StartInstance(name string) (*instance.Process, error) {
|
||||||
im.mu.RLock()
|
im.mu.RLock()
|
||||||
instance, exists := im.instances[name]
|
inst, exists := im.instances[name]
|
||||||
maxRunningExceeded := len(im.runningInstances) >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
|
|
||||||
im.mu.RUnlock()
|
im.mu.RUnlock()
|
||||||
|
|
||||||
if !exists {
|
if !exists {
|
||||||
return nil, fmt.Errorf("instance with name %s not found", name)
|
return nil, fmt.Errorf("instance with name %s not found", name)
|
||||||
}
|
}
|
||||||
if instance.IsRunning() {
|
|
||||||
return instance, fmt.Errorf("instance with name %s is already running", name)
|
// Check if instance is remote and delegate to remote operation
|
||||||
|
if node := im.getNodeForInstance(inst); node != nil {
|
||||||
|
remoteInst, err := im.StartRemoteInstance(node, name)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Update the local stub with all remote data (preserving Nodes)
|
||||||
|
im.mu.Lock()
|
||||||
|
im.updateLocalInstanceFromRemote(inst, remoteInst)
|
||||||
|
im.mu.Unlock()
|
||||||
|
|
||||||
|
return inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if inst.IsRunning() {
|
||||||
|
return inst, fmt.Errorf("instance with name %s is already running", name)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check max running instances limit for local instances only
|
||||||
|
im.mu.RLock()
|
||||||
|
localRunningCount := 0
|
||||||
|
for instName := range im.runningInstances {
|
||||||
|
if _, isRemote := im.instanceNodeMap[instName]; !isRemote {
|
||||||
|
localRunningCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
maxRunningExceeded := localRunningCount >= im.instancesConfig.MaxRunningInstances && im.instancesConfig.MaxRunningInstances != -1
|
||||||
|
im.mu.RUnlock()
|
||||||
|
|
||||||
if maxRunningExceeded {
|
if maxRunningExceeded {
|
||||||
return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances))
|
return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.instancesConfig.MaxRunningInstances))
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := instance.Start(); err != nil {
|
if err := inst.Start(); err != nil {
|
||||||
return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
|
return nil, fmt.Errorf("failed to start instance %s: %w", name, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
im.mu.Lock()
|
im.mu.Lock()
|
||||||
defer im.mu.Unlock()
|
defer im.mu.Unlock()
|
||||||
err := im.persistInstance(instance)
|
err := im.persistInstance(inst)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
|
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return instance, nil
|
return inst, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (im *instanceManager) IsMaxRunningInstancesReached() bool {
|
func (im *instanceManager) IsMaxRunningInstancesReached() bool {
|
||||||
@@ -206,51 +397,95 @@ func (im *instanceManager) IsMaxRunningInstancesReached() bool {
|
|||||||
// StopInstance stops a running instance and returns it.
|
// StopInstance stops a running instance and returns it.
|
||||||
func (im *instanceManager) StopInstance(name string) (*instance.Process, error) {
|
func (im *instanceManager) StopInstance(name string) (*instance.Process, error) {
|
||||||
im.mu.RLock()
|
im.mu.RLock()
|
||||||
instance, exists := im.instances[name]
|
inst, exists := im.instances[name]
|
||||||
im.mu.RUnlock()
|
im.mu.RUnlock()
|
||||||
|
|
||||||
if !exists {
|
if !exists {
|
||||||
return nil, fmt.Errorf("instance with name %s not found", name)
|
return nil, fmt.Errorf("instance with name %s not found", name)
|
||||||
}
|
}
|
||||||
if !instance.IsRunning() {
|
|
||||||
return instance, fmt.Errorf("instance with name %s is already stopped", name)
|
// Check if instance is remote and delegate to remote operation
|
||||||
|
if node := im.getNodeForInstance(inst); node != nil {
|
||||||
|
remoteInst, err := im.StopRemoteInstance(node, name)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := instance.Stop(); err != nil {
|
// Update the local stub with all remote data (preserving Nodes)
|
||||||
|
im.mu.Lock()
|
||||||
|
im.updateLocalInstanceFromRemote(inst, remoteInst)
|
||||||
|
im.mu.Unlock()
|
||||||
|
|
||||||
|
return inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if !inst.IsRunning() {
|
||||||
|
return inst, fmt.Errorf("instance with name %s is already stopped", name)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := inst.Stop(); err != nil {
|
||||||
return nil, fmt.Errorf("failed to stop instance %s: %w", name, err)
|
return nil, fmt.Errorf("failed to stop instance %s: %w", name, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
im.mu.Lock()
|
im.mu.Lock()
|
||||||
defer im.mu.Unlock()
|
defer im.mu.Unlock()
|
||||||
err := im.persistInstance(instance)
|
err := im.persistInstance(inst)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
|
return nil, fmt.Errorf("failed to persist instance %s: %w", name, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return instance, nil
|
return inst, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// RestartInstance stops and then starts an instance, returning the updated instance.
|
// RestartInstance stops and then starts an instance, returning the updated instance.
|
||||||
func (im *instanceManager) RestartInstance(name string) (*instance.Process, error) {
|
func (im *instanceManager) RestartInstance(name string) (*instance.Process, error) {
|
||||||
instance, err := im.StopInstance(name)
|
im.mu.RLock()
|
||||||
|
inst, exists := im.instances[name]
|
||||||
|
im.mu.RUnlock()
|
||||||
|
|
||||||
|
if !exists {
|
||||||
|
return nil, fmt.Errorf("instance with name %s not found", name)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if instance is remote and delegate to remote operation
|
||||||
|
if node := im.getNodeForInstance(inst); node != nil {
|
||||||
|
remoteInst, err := im.RestartRemoteInstance(node, name)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return im.StartInstance(instance.Name)
|
|
||||||
|
// Update the local stub with all remote data (preserving Nodes)
|
||||||
|
im.mu.Lock()
|
||||||
|
im.updateLocalInstanceFromRemote(inst, remoteInst)
|
||||||
|
im.mu.Unlock()
|
||||||
|
|
||||||
|
return inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
inst, err := im.StopInstance(name)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return im.StartInstance(inst.Name)
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetInstanceLogs retrieves the logs for a specific instance by its name.
|
// GetInstanceLogs retrieves the logs for a specific instance by its name.
|
||||||
func (im *instanceManager) GetInstanceLogs(name string) (string, error) {
|
func (im *instanceManager) GetInstanceLogs(name string, numLines int) (string, error) {
|
||||||
im.mu.RLock()
|
im.mu.RLock()
|
||||||
_, exists := im.instances[name]
|
inst, exists := im.instances[name]
|
||||||
im.mu.RUnlock()
|
im.mu.RUnlock()
|
||||||
|
|
||||||
if !exists {
|
if !exists {
|
||||||
return "", fmt.Errorf("instance with name %s not found", name)
|
return "", fmt.Errorf("instance with name %s not found", name)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Implement actual log retrieval logic
|
// Check if instance is remote and delegate to remote operation
|
||||||
return fmt.Sprintf("Logs for instance %s", name), nil
|
if node := im.getNodeForInstance(inst); node != nil {
|
||||||
|
return im.GetRemoteInstanceLogs(node, name, numLines)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get logs from the local instance
|
||||||
|
return inst.GetLogs(numLines)
|
||||||
}
|
}
|
||||||
|
|
||||||
// getPortFromOptions extracts the port from backend-specific options
|
// getPortFromOptions extracts the port from backend-specific options
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ func TestCreateInstance_ValidationAndLimits(t *testing.T) {
|
|||||||
MaxInstances: 1, // Very low limit for testing
|
MaxInstances: 1, // Very low limit for testing
|
||||||
TimeoutCheckInterval: 5,
|
TimeoutCheckInterval: 5,
|
||||||
}
|
}
|
||||||
limitedManager := manager.NewInstanceManager(backendConfig, cfg)
|
limitedManager := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
|
||||||
|
|
||||||
_, err = limitedManager.CreateInstance("instance1", options)
|
_, err = limitedManager.CreateInstance("instance1", options)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
243
pkg/manager/remote_ops.go
Normal file
243
pkg/manager/remote_ops.go
Normal file
@@ -0,0 +1,243 @@
|
|||||||
|
package manager
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"llamactl/pkg/config"
|
||||||
|
"llamactl/pkg/instance"
|
||||||
|
"net/http"
|
||||||
|
)
|
||||||
|
|
||||||
|
// stripNodesFromOptions creates a copy of the instance options without the Nodes field
|
||||||
|
// to prevent routing loops when sending requests to remote nodes
|
||||||
|
func (im *instanceManager) stripNodesFromOptions(options *instance.CreateInstanceOptions) *instance.CreateInstanceOptions {
|
||||||
|
if options == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a copy of the options struct
|
||||||
|
optionsCopy := *options
|
||||||
|
|
||||||
|
// Clear the Nodes field to prevent the remote node from trying to route further
|
||||||
|
optionsCopy.Nodes = nil
|
||||||
|
|
||||||
|
return &optionsCopy
|
||||||
|
}
|
||||||
|
|
||||||
|
// makeRemoteRequest is a helper function to make HTTP requests to a remote node
|
||||||
|
func (im *instanceManager) makeRemoteRequest(nodeConfig *config.NodeConfig, method, path string, body any) (*http.Response, error) {
|
||||||
|
var reqBody io.Reader
|
||||||
|
if body != nil {
|
||||||
|
// Strip nodes from CreateInstanceOptions to prevent routing loops
|
||||||
|
if options, ok := body.(*instance.CreateInstanceOptions); ok {
|
||||||
|
body = im.stripNodesFromOptions(options)
|
||||||
|
}
|
||||||
|
|
||||||
|
jsonData, err := json.Marshal(body)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to marshal request body: %w", err)
|
||||||
|
}
|
||||||
|
reqBody = bytes.NewBuffer(jsonData)
|
||||||
|
}
|
||||||
|
|
||||||
|
url := fmt.Sprintf("%s%s", nodeConfig.Address, path)
|
||||||
|
req, err := http.NewRequest(method, url, reqBody)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if body != nil {
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
}
|
||||||
|
|
||||||
|
if nodeConfig.APIKey != "" {
|
||||||
|
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", nodeConfig.APIKey))
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := im.httpClient.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to execute request: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseRemoteResponse is a helper function to parse API responses
|
||||||
|
func parseRemoteResponse(resp *http.Response, result any) error {
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
body, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to read response body: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
|
return fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
if result != nil {
|
||||||
|
if err := json.Unmarshal(body, result); err != nil {
|
||||||
|
return fmt.Errorf("failed to unmarshal response: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListRemoteInstances lists all instances on the remote node
|
||||||
|
func (im *instanceManager) ListRemoteInstances(nodeConfig *config.NodeConfig) ([]*instance.Process, error) {
|
||||||
|
resp, err := im.makeRemoteRequest(nodeConfig, "GET", "/api/v1/instances/", nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var instances []*instance.Process
|
||||||
|
if err := parseRemoteResponse(resp, &instances); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return instances, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateRemoteInstance creates a new instance on the remote node
|
||||||
|
func (im *instanceManager) CreateRemoteInstance(nodeConfig *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
|
||||||
|
path := fmt.Sprintf("/api/v1/instances/%s/", name)
|
||||||
|
|
||||||
|
resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, options)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var inst instance.Process
|
||||||
|
if err := parseRemoteResponse(resp, &inst); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetRemoteInstance retrieves an instance by name from the remote node
|
||||||
|
func (im *instanceManager) GetRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
|
||||||
|
path := fmt.Sprintf("/api/v1/instances/%s/", name)
|
||||||
|
resp, err := im.makeRemoteRequest(nodeConfig, "GET", path, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var inst instance.Process
|
||||||
|
if err := parseRemoteResponse(resp, &inst); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpdateRemoteInstance updates an existing instance on the remote node
|
||||||
|
func (im *instanceManager) UpdateRemoteInstance(nodeConfig *config.NodeConfig, name string, options *instance.CreateInstanceOptions) (*instance.Process, error) {
|
||||||
|
path := fmt.Sprintf("/api/v1/instances/%s/", name)
|
||||||
|
|
||||||
|
resp, err := im.makeRemoteRequest(nodeConfig, "PUT", path, options)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var inst instance.Process
|
||||||
|
if err := parseRemoteResponse(resp, &inst); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeleteRemoteInstance deletes an instance from the remote node
|
||||||
|
func (im *instanceManager) DeleteRemoteInstance(nodeConfig *config.NodeConfig, name string) error {
|
||||||
|
path := fmt.Sprintf("/api/v1/instances/%s/", name)
|
||||||
|
resp, err := im.makeRemoteRequest(nodeConfig, "DELETE", path, nil)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return parseRemoteResponse(resp, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// StartRemoteInstance starts an instance on the remote node
|
||||||
|
func (im *instanceManager) StartRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
|
||||||
|
path := fmt.Sprintf("/api/v1/instances/%s/start", name)
|
||||||
|
resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var inst instance.Process
|
||||||
|
if err := parseRemoteResponse(resp, &inst); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// StopRemoteInstance stops an instance on the remote node
|
||||||
|
func (im *instanceManager) StopRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
|
||||||
|
path := fmt.Sprintf("/api/v1/instances/%s/stop", name)
|
||||||
|
resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var inst instance.Process
|
||||||
|
if err := parseRemoteResponse(resp, &inst); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// RestartRemoteInstance restarts an instance on the remote node
|
||||||
|
func (im *instanceManager) RestartRemoteInstance(nodeConfig *config.NodeConfig, name string) (*instance.Process, error) {
|
||||||
|
path := fmt.Sprintf("/api/v1/instances/%s/restart", name)
|
||||||
|
resp, err := im.makeRemoteRequest(nodeConfig, "POST", path, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var inst instance.Process
|
||||||
|
if err := parseRemoteResponse(resp, &inst); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &inst, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetRemoteInstanceLogs retrieves logs for an instance from the remote node
|
||||||
|
func (im *instanceManager) GetRemoteInstanceLogs(nodeConfig *config.NodeConfig, name string, numLines int) (string, error) {
|
||||||
|
path := fmt.Sprintf("/api/v1/instances/%s/logs?lines=%d", name, numLines)
|
||||||
|
resp, err := im.makeRemoteRequest(nodeConfig, "GET", path, nil)
|
||||||
|
if err != nil {
|
||||||
|
return "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
body, err := io.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to read response body: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
|
return "", fmt.Errorf("API request failed with status %d: %s", resp.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Logs endpoint might return plain text or JSON
|
||||||
|
// Try to parse as JSON first (in case it's wrapped in a response object)
|
||||||
|
var logResponse struct {
|
||||||
|
Logs string `json:"logs"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(body, &logResponse); err == nil && logResponse.Logs != "" {
|
||||||
|
return logResponse.Logs, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, return as plain text
|
||||||
|
return string(body), nil
|
||||||
|
}
|
||||||
39
pkg/manager/remote_ops_test.go
Normal file
39
pkg/manager/remote_ops_test.go
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
package manager
|
||||||
|
|
||||||
|
import (
|
||||||
|
"llamactl/pkg/backends"
|
||||||
|
"llamactl/pkg/instance"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestStripNodesFromOptions(t *testing.T) {
|
||||||
|
im := &instanceManager{}
|
||||||
|
|
||||||
|
// Test nil case
|
||||||
|
if result := im.stripNodesFromOptions(nil); result != nil {
|
||||||
|
t.Errorf("Expected nil, got %+v", result)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test main case: nodes should be stripped, other fields preserved
|
||||||
|
options := &instance.CreateInstanceOptions{
|
||||||
|
BackendType: backends.BackendTypeLlamaCpp,
|
||||||
|
Nodes: []string{"node1", "node2"},
|
||||||
|
Environment: map[string]string{"TEST": "value"},
|
||||||
|
}
|
||||||
|
|
||||||
|
result := im.stripNodesFromOptions(options)
|
||||||
|
|
||||||
|
if result.Nodes != nil {
|
||||||
|
t.Errorf("Expected Nodes to be nil, got %+v", result.Nodes)
|
||||||
|
}
|
||||||
|
if result.BackendType != backends.BackendTypeLlamaCpp {
|
||||||
|
t.Errorf("Expected BackendType preserved")
|
||||||
|
}
|
||||||
|
if result.Environment["TEST"] != "value" {
|
||||||
|
t.Errorf("Expected Environment preserved")
|
||||||
|
}
|
||||||
|
// Original should not be modified
|
||||||
|
if len(options.Nodes) != 2 {
|
||||||
|
t.Errorf("Original options should not be modified")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -12,6 +12,11 @@ func (im *instanceManager) checkAllTimeouts() {
|
|||||||
|
|
||||||
// Identify instances that should timeout
|
// Identify instances that should timeout
|
||||||
for _, inst := range im.instances {
|
for _, inst := range im.instances {
|
||||||
|
// Skip remote instances - they are managed by their respective nodes
|
||||||
|
if inst.IsRemote() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
if inst.ShouldTimeout() {
|
if inst.ShouldTimeout() {
|
||||||
timeoutInstances = append(timeoutInstances, inst.Name)
|
timeoutInstances = append(timeoutInstances, inst.Name)
|
||||||
}
|
}
|
||||||
@@ -40,6 +45,11 @@ func (im *instanceManager) EvictLRUInstance() error {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Skip remote instances - they are managed by their respective nodes
|
||||||
|
if inst.IsRemote() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
|
if inst.GetOptions() != nil && inst.GetOptions().IdleTimeout != nil && *inst.GetOptions().IdleTimeout <= 0 {
|
||||||
continue // Skip instances without idle timeout
|
continue // Skip instances without idle timeout
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ func TestTimeoutFunctionality(t *testing.T) {
|
|||||||
MaxInstances: 5,
|
MaxInstances: 5,
|
||||||
}
|
}
|
||||||
|
|
||||||
manager := manager.NewInstanceManager(backendConfig, cfg)
|
manager := manager.NewInstanceManager(backendConfig, cfg, map[string]config.NodeConfig{})
|
||||||
if manager == nil {
|
if manager == nil {
|
||||||
t.Fatal("Manager should be initialized with timeout checker")
|
t.Fatal("Manager should be initialized with timeout checker")
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,865 +1,29 @@
|
|||||||
package server
|
package server
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"encoding/json"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"llamactl/pkg/backends"
|
|
||||||
"llamactl/pkg/backends/llamacpp"
|
|
||||||
"llamactl/pkg/backends/mlx"
|
|
||||||
"llamactl/pkg/backends/vllm"
|
|
||||||
"llamactl/pkg/config"
|
"llamactl/pkg/config"
|
||||||
"llamactl/pkg/instance"
|
|
||||||
"llamactl/pkg/manager"
|
"llamactl/pkg/manager"
|
||||||
"net/http"
|
"net/http"
|
||||||
"os/exec"
|
"net/http/httputil"
|
||||||
"strconv"
|
"sync"
|
||||||
"strings"
|
"time"
|
||||||
|
|
||||||
"github.com/go-chi/chi/v5"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type Handler struct {
|
type Handler struct {
|
||||||
InstanceManager manager.InstanceManager
|
InstanceManager manager.InstanceManager
|
||||||
cfg config.AppConfig
|
cfg config.AppConfig
|
||||||
|
httpClient *http.Client
|
||||||
|
remoteProxies map[string]*httputil.ReverseProxy // Cache of remote proxies by instance name
|
||||||
|
remoteProxiesMu sync.RWMutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
|
func NewHandler(im manager.InstanceManager, cfg config.AppConfig) *Handler {
|
||||||
return &Handler{
|
return &Handler{
|
||||||
InstanceManager: im,
|
InstanceManager: im,
|
||||||
cfg: cfg,
|
cfg: cfg,
|
||||||
}
|
httpClient: &http.Client{
|
||||||
}
|
Timeout: 30 * time.Second,
|
||||||
|
},
|
||||||
// VersionHandler godoc
|
remoteProxies: make(map[string]*httputil.ReverseProxy),
|
||||||
// @Summary Get llamactl version
|
|
||||||
// @Description Returns the version of the llamactl command
|
|
||||||
// @Tags version
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Produces text/plain
|
|
||||||
// @Success 200 {string} string "Version information"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /version [get]
|
|
||||||
func (h *Handler) VersionHandler() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
w.Header().Set("Content-Type", "text/plain")
|
|
||||||
fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// LlamaServerHelpHandler godoc
|
|
||||||
// @Summary Get help for llama server
|
|
||||||
// @Description Returns the help text for the llama server command
|
|
||||||
// @Tags backends
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Produces text/plain
|
|
||||||
// @Success 200 {string} string "Help text"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /backends/llama-cpp/help [get]
|
|
||||||
func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
helpCmd := exec.Command("llama-server", "--help")
|
|
||||||
output, err := helpCmd.CombinedOutput()
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to get help: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
w.Header().Set("Content-Type", "text/plain")
|
|
||||||
w.Write(output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// LlamaServerVersionHandler godoc
|
|
||||||
// @Summary Get version of llama server
|
|
||||||
// @Description Returns the version of the llama server command
|
|
||||||
// @Tags backends
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Produces text/plain
|
|
||||||
// @Success 200 {string} string "Version information"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /backends/llama-cpp/version [get]
|
|
||||||
func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
versionCmd := exec.Command("llama-server", "--version")
|
|
||||||
output, err := versionCmd.CombinedOutput()
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to get version: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
w.Header().Set("Content-Type", "text/plain")
|
|
||||||
w.Write(output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// LlamaServerListDevicesHandler godoc
|
|
||||||
// @Summary List available devices for llama server
|
|
||||||
// @Description Returns a list of available devices for the llama server
|
|
||||||
// @Tags backends
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Produces text/plain
|
|
||||||
// @Success 200 {string} string "List of devices"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /backends/llama-cpp/devices [get]
|
|
||||||
func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
listCmd := exec.Command("llama-server", "--list-devices")
|
|
||||||
output, err := listCmd.CombinedOutput()
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to list devices: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
w.Header().Set("Content-Type", "text/plain")
|
|
||||||
w.Write(output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ListInstances godoc
|
|
||||||
// @Summary List all instances
|
|
||||||
// @Description Returns a list of all instances managed by the server
|
|
||||||
// @Tags instances
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Produces json
|
|
||||||
// @Success 200 {array} instance.Process "List of instances"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /instances [get]
|
|
||||||
func (h *Handler) ListInstances() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
instances, err := h.InstanceManager.ListInstances()
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Marshal to bytes first to set Content-Length header
|
|
||||||
data, err := json.Marshal(instances)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
w.Header().Set("Content-Length", strconv.Itoa(len(data)))
|
|
||||||
w.Write(data)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// CreateInstance godoc
|
|
||||||
// @Summary Create and start a new instance
|
|
||||||
// @Description Creates a new instance with the provided configuration options
|
|
||||||
// @Tags instances
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Accept json
|
|
||||||
// @Produces json
|
|
||||||
// @Param name path string true "Instance Name"
|
|
||||||
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
|
|
||||||
// @Success 201 {object} instance.Process "Created instance details"
|
|
||||||
// @Failure 400 {string} string "Invalid request body"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /instances/{name} [post]
|
|
||||||
func (h *Handler) CreateInstance() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
name := chi.URLParam(r, "name")
|
|
||||||
if name == "" {
|
|
||||||
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
var options instance.CreateInstanceOptions
|
|
||||||
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
|
|
||||||
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
inst, err := h.InstanceManager.CreateInstance(name, &options)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
w.WriteHeader(http.StatusCreated)
|
|
||||||
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
|
||||||
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetInstance godoc
|
|
||||||
// @Summary Get details of a specific instance
|
|
||||||
// @Description Returns the details of a specific instance by name
|
|
||||||
// @Tags instances
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Produces json
|
|
||||||
// @Param name path string true "Instance Name"
|
|
||||||
// @Success 200 {object} instance.Process "Instance details"
|
|
||||||
// @Failure 400 {string} string "Invalid name format"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /instances/{name} [get]
|
|
||||||
func (h *Handler) GetInstance() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
name := chi.URLParam(r, "name")
|
|
||||||
if name == "" {
|
|
||||||
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
inst, err := h.InstanceManager.GetInstance(name)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
|
||||||
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// UpdateInstance godoc
|
|
||||||
// @Summary Update an instance's configuration
|
|
||||||
// @Description Updates the configuration of a specific instance by name
|
|
||||||
// @Tags instances
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Accept json
|
|
||||||
// @Produces json
|
|
||||||
// @Param name path string true "Instance Name"
|
|
||||||
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
|
|
||||||
// @Success 200 {object} instance.Process "Updated instance details"
|
|
||||||
// @Failure 400 {string} string "Invalid name format"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /instances/{name} [put]
|
|
||||||
func (h *Handler) UpdateInstance() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
name := chi.URLParam(r, "name")
|
|
||||||
if name == "" {
|
|
||||||
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
var options instance.CreateInstanceOptions
|
|
||||||
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
|
|
||||||
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
inst, err := h.InstanceManager.UpdateInstance(name, &options)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
|
||||||
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// StartInstance godoc
|
|
||||||
// @Summary Start a stopped instance
|
|
||||||
// @Description Starts a specific instance by name
|
|
||||||
// @Tags instances
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Produces json
|
|
||||||
// @Param name path string true "Instance Name"
|
|
||||||
// @Success 200 {object} instance.Process "Started instance details"
|
|
||||||
// @Failure 400 {string} string "Invalid name format"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /instances/{name}/start [post]
|
|
||||||
func (h *Handler) StartInstance() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
name := chi.URLParam(r, "name")
|
|
||||||
if name == "" {
|
|
||||||
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
inst, err := h.InstanceManager.StartInstance(name)
|
|
||||||
if err != nil {
|
|
||||||
// Check if error is due to maximum running instances limit
|
|
||||||
if _, ok := err.(manager.MaxRunningInstancesError); ok {
|
|
||||||
http.Error(w, err.Error(), http.StatusConflict)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
|
||||||
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// StopInstance godoc
|
|
||||||
// @Summary Stop a running instance
|
|
||||||
// @Description Stops a specific instance by name
|
|
||||||
// @Tags instances
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Produces json
|
|
||||||
// @Param name path string true "Instance Name"
|
|
||||||
// @Success 200 {object} instance.Process "Stopped instance details"
|
|
||||||
// @Failure 400 {string} string "Invalid name format"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /instances/{name}/stop [post]
|
|
||||||
func (h *Handler) StopInstance() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
name := chi.URLParam(r, "name")
|
|
||||||
if name == "" {
|
|
||||||
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
inst, err := h.InstanceManager.StopInstance(name)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
|
||||||
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// RestartInstance godoc
|
|
||||||
// @Summary Restart a running instance
|
|
||||||
// @Description Restarts a specific instance by name
|
|
||||||
// @Tags instances
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Produces json
|
|
||||||
// @Param name path string true "Instance Name"
|
|
||||||
// @Success 200 {object} instance.Process "Restarted instance details"
|
|
||||||
// @Failure 400 {string} string "Invalid name format"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /instances/{name}/restart [post]
|
|
||||||
func (h *Handler) RestartInstance() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
name := chi.URLParam(r, "name")
|
|
||||||
if name == "" {
|
|
||||||
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
inst, err := h.InstanceManager.RestartInstance(name)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
|
||||||
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// DeleteInstance godoc
|
|
||||||
// @Summary Delete an instance
|
|
||||||
// @Description Stops and removes a specific instance by name
|
|
||||||
// @Tags instances
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Param name path string true "Instance Name"
|
|
||||||
// @Success 204 "No Content"
|
|
||||||
// @Failure 400 {string} string "Invalid name format"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /instances/{name} [delete]
|
|
||||||
func (h *Handler) DeleteInstance() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
name := chi.URLParam(r, "name")
|
|
||||||
if name == "" {
|
|
||||||
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := h.InstanceManager.DeleteInstance(name); err != nil {
|
|
||||||
http.Error(w, "Failed to delete instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
w.WriteHeader(http.StatusNoContent)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetInstanceLogs godoc
|
|
||||||
// @Summary Get logs from a specific instance
|
|
||||||
// @Description Returns the logs from a specific instance by name with optional line limit
|
|
||||||
// @Tags instances
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Param name path string true "Instance Name"
|
|
||||||
// @Param lines query string false "Number of lines to retrieve (default: all lines)"
|
|
||||||
// @Produces text/plain
|
|
||||||
// @Success 200 {string} string "Instance logs"
|
|
||||||
// @Failure 400 {string} string "Invalid name format or lines parameter"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /instances/{name}/logs [get]
|
|
||||||
func (h *Handler) GetInstanceLogs() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
name := chi.URLParam(r, "name")
|
|
||||||
if name == "" {
|
|
||||||
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
lines := r.URL.Query().Get("lines")
|
|
||||||
if lines == "" {
|
|
||||||
lines = "-1"
|
|
||||||
}
|
|
||||||
|
|
||||||
num_lines, err := strconv.Atoi(lines)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Invalid lines parameter: "+err.Error(), http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
inst, err := h.InstanceManager.GetInstance(name)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
logs, err := inst.GetLogs(num_lines)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "text/plain")
|
|
||||||
w.Write([]byte(logs))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ProxyToInstance godoc
|
|
||||||
// @Summary Proxy requests to a specific instance
|
|
||||||
// @Description Forwards HTTP requests to the llama-server instance running on a specific port
|
|
||||||
// @Tags instances
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Param name path string true "Instance Name"
|
|
||||||
// @Success 200 "Request successfully proxied to instance"
|
|
||||||
// @Failure 400 {string} string "Invalid name format"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Failure 503 {string} string "Instance is not running"
|
|
||||||
// @Router /instances/{name}/proxy [get]
|
|
||||||
// @Router /instances/{name}/proxy [post]
|
|
||||||
func (h *Handler) ProxyToInstance() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
name := chi.URLParam(r, "name")
|
|
||||||
if name == "" {
|
|
||||||
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
inst, err := h.InstanceManager.GetInstance(name)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if !inst.IsRunning() {
|
|
||||||
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the cached proxy for this instance
|
|
||||||
proxy, err := inst.GetProxy()
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
|
|
||||||
prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
|
|
||||||
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
|
|
||||||
|
|
||||||
// Update the last request time for the instance
|
|
||||||
inst.UpdateLastRequestTime()
|
|
||||||
|
|
||||||
// Set forwarded headers
|
|
||||||
r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
|
|
||||||
r.Header.Set("X-Forwarded-Proto", "http")
|
|
||||||
|
|
||||||
// Forward the request using the cached proxy
|
|
||||||
proxy.ServeHTTP(w, r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// OpenAIListInstances godoc
|
|
||||||
// @Summary List instances in OpenAI-compatible format
|
|
||||||
// @Description Returns a list of instances in a format compatible with OpenAI API
|
|
||||||
// @Tags openai
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Produces json
|
|
||||||
// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /v1/models [get]
|
|
||||||
func (h *Handler) OpenAIListInstances() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
instances, err := h.InstanceManager.ListInstances()
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
openaiInstances := make([]OpenAIInstance, len(instances))
|
|
||||||
for i, inst := range instances {
|
|
||||||
openaiInstances[i] = OpenAIInstance{
|
|
||||||
ID: inst.Name,
|
|
||||||
Object: "model",
|
|
||||||
Created: inst.Created,
|
|
||||||
OwnedBy: "llamactl",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
openaiResponse := OpenAIListInstancesResponse{
|
|
||||||
Object: "list",
|
|
||||||
Data: openaiInstances,
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
if err := json.NewEncoder(w).Encode(openaiResponse); err != nil {
|
|
||||||
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// OpenAIProxy godoc
|
|
||||||
// @Summary OpenAI-compatible proxy endpoint
|
|
||||||
// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
|
|
||||||
// @Tags openai
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Accept json
|
|
||||||
// @Produces json
|
|
||||||
// @Success 200 "OpenAI response"
|
|
||||||
// @Failure 400 {string} string "Invalid request body or instance name"
|
|
||||||
// @Failure 500 {string} string "Internal Server Error"
|
|
||||||
// @Router /v1/ [post]
|
|
||||||
func (h *Handler) OpenAIProxy() http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
// Read the entire body first
|
|
||||||
bodyBytes, err := io.ReadAll(r.Body)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to read request body", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
r.Body.Close()
|
|
||||||
|
|
||||||
// Parse the body to extract instance name
|
|
||||||
var requestBody map[string]any
|
|
||||||
if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
|
|
||||||
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
modelName, ok := requestBody["model"].(string)
|
|
||||||
if !ok || modelName == "" {
|
|
||||||
http.Error(w, "Instance name is required", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Route to the appropriate inst based on instance name
|
|
||||||
inst, err := h.InstanceManager.GetInstance(modelName)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if !inst.IsRunning() {
|
|
||||||
options := inst.GetOptions()
|
|
||||||
allowOnDemand := options != nil && options.OnDemandStart != nil && *options.OnDemandStart
|
|
||||||
if !allowOnDemand {
|
|
||||||
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if h.InstanceManager.IsMaxRunningInstancesReached() {
|
|
||||||
if h.cfg.Instances.EnableLRUEviction {
|
|
||||||
err := h.InstanceManager.EvictLRUInstance()
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If on-demand start is enabled, start the instance
|
|
||||||
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
|
|
||||||
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for the instance to become healthy before proceeding
|
|
||||||
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
|
|
||||||
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
proxy, err := inst.GetProxy()
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update last request time for the instance
|
|
||||||
inst.UpdateLastRequestTime()
|
|
||||||
|
|
||||||
// Recreate the request body from the bytes we read
|
|
||||||
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
|
||||||
r.ContentLength = int64(len(bodyBytes))
|
|
||||||
|
|
||||||
proxy.ServeHTTP(w, r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (h *Handler) LlamaCppProxy(onDemandStart bool) http.HandlerFunc {
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
|
|
||||||
// Get the instance name from the URL parameter
|
|
||||||
name := chi.URLParam(r, "name")
|
|
||||||
if name == "" {
|
|
||||||
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Route to the appropriate inst based on instance name
|
|
||||||
inst, err := h.InstanceManager.GetInstance(name)
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
options := inst.GetOptions()
|
|
||||||
if options == nil {
|
|
||||||
http.Error(w, "Cannot obtain Instance's options", http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if options.BackendType != backends.BackendTypeLlamaCpp {
|
|
||||||
http.Error(w, "Instance is not a llama.cpp server.", http.StatusBadRequest)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if !inst.IsRunning() {
|
|
||||||
|
|
||||||
if !(onDemandStart && options.OnDemandStart != nil && *options.OnDemandStart) {
|
|
||||||
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if h.InstanceManager.IsMaxRunningInstancesReached() {
|
|
||||||
if h.cfg.Instances.EnableLRUEviction {
|
|
||||||
err := h.InstanceManager.EvictLRUInstance()
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If on-demand start is enabled, start the instance
|
|
||||||
if _, err := h.InstanceManager.StartInstance(name); err != nil {
|
|
||||||
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Wait for the instance to become healthy before proceeding
|
|
||||||
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
|
|
||||||
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
proxy, err := inst.GetProxy()
|
|
||||||
if err != nil {
|
|
||||||
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Strip the "/llama-cpp/<name>" prefix from the request URL
|
|
||||||
prefix := fmt.Sprintf("/llama-cpp/%s", name)
|
|
||||||
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
|
|
||||||
|
|
||||||
// Update the last request time for the instance
|
|
||||||
inst.UpdateLastRequestTime()
|
|
||||||
|
|
||||||
proxy.ServeHTTP(w, r)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ParseCommandRequest represents the request body for command parsing
|
|
||||||
type ParseCommandRequest struct {
|
|
||||||
Command string `json:"command"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ParseLlamaCommand godoc
|
|
||||||
// @Summary Parse llama-server command
|
|
||||||
// @Description Parses a llama-server command string into instance options
|
|
||||||
// @Tags backends
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Accept json
|
|
||||||
// @Produce json
|
|
||||||
// @Param request body ParseCommandRequest true "Command to parse"
|
|
||||||
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
|
|
||||||
// @Failure 400 {object} map[string]string "Invalid request or command"
|
|
||||||
// @Failure 500 {object} map[string]string "Internal Server Error"
|
|
||||||
// @Router /backends/llama-cpp/parse-command [post]
|
|
||||||
func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
|
|
||||||
type errorResponse struct {
|
|
||||||
Error string `json:"error"`
|
|
||||||
Details string `json:"details,omitempty"`
|
|
||||||
}
|
|
||||||
writeError := func(w http.ResponseWriter, status int, code, details string) {
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
w.WriteHeader(status)
|
|
||||||
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
|
|
||||||
}
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
var req ParseCommandRequest
|
|
||||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
||||||
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
if strings.TrimSpace(req.Command) == "" {
|
|
||||||
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
llamaOptions, err := llamacpp.ParseLlamaCommand(req.Command)
|
|
||||||
if err != nil {
|
|
||||||
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
|
|
||||||
return
|
|
||||||
}
|
|
||||||
options := &instance.CreateInstanceOptions{
|
|
||||||
BackendType: backends.BackendTypeLlamaCpp,
|
|
||||||
LlamaServerOptions: llamaOptions,
|
|
||||||
}
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
if err := json.NewEncoder(w).Encode(options); err != nil {
|
|
||||||
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ParseMlxCommand godoc
|
|
||||||
// @Summary Parse mlx_lm.server command
|
|
||||||
// @Description Parses MLX-LM server command string into instance options
|
|
||||||
// @Tags backends
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Accept json
|
|
||||||
// @Produce json
|
|
||||||
// @Param request body ParseCommandRequest true "Command to parse"
|
|
||||||
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
|
|
||||||
// @Failure 400 {object} map[string]string "Invalid request or command"
|
|
||||||
// @Router /backends/mlx/parse-command [post]
|
|
||||||
func (h *Handler) ParseMlxCommand() http.HandlerFunc {
|
|
||||||
type errorResponse struct {
|
|
||||||
Error string `json:"error"`
|
|
||||||
Details string `json:"details,omitempty"`
|
|
||||||
}
|
|
||||||
writeError := func(w http.ResponseWriter, status int, code, details string) {
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
w.WriteHeader(status)
|
|
||||||
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
|
|
||||||
}
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
var req ParseCommandRequest
|
|
||||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
||||||
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if strings.TrimSpace(req.Command) == "" {
|
|
||||||
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
mlxOptions, err := mlx.ParseMlxCommand(req.Command)
|
|
||||||
if err != nil {
|
|
||||||
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Currently only support mlx_lm backend type
|
|
||||||
backendType := backends.BackendTypeMlxLm
|
|
||||||
|
|
||||||
options := &instance.CreateInstanceOptions{
|
|
||||||
BackendType: backendType,
|
|
||||||
MlxServerOptions: mlxOptions,
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
if err := json.NewEncoder(w).Encode(options); err != nil {
|
|
||||||
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ParseVllmCommand godoc
|
|
||||||
// @Summary Parse vllm serve command
|
|
||||||
// @Description Parses a vLLM serve command string into instance options
|
|
||||||
// @Tags backends
|
|
||||||
// @Security ApiKeyAuth
|
|
||||||
// @Accept json
|
|
||||||
// @Produce json
|
|
||||||
// @Param request body ParseCommandRequest true "Command to parse"
|
|
||||||
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
|
|
||||||
// @Failure 400 {object} map[string]string "Invalid request or command"
|
|
||||||
// @Router /backends/vllm/parse-command [post]
|
|
||||||
func (h *Handler) ParseVllmCommand() http.HandlerFunc {
|
|
||||||
type errorResponse struct {
|
|
||||||
Error string `json:"error"`
|
|
||||||
Details string `json:"details,omitempty"`
|
|
||||||
}
|
|
||||||
writeError := func(w http.ResponseWriter, status int, code, details string) {
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
w.WriteHeader(status)
|
|
||||||
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
|
|
||||||
}
|
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
|
||||||
var req ParseCommandRequest
|
|
||||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
||||||
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if strings.TrimSpace(req.Command) == "" {
|
|
||||||
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
vllmOptions, err := vllm.ParseVllmCommand(req.Command)
|
|
||||||
if err != nil {
|
|
||||||
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
backendType := backends.BackendTypeVllm
|
|
||||||
|
|
||||||
options := &instance.CreateInstanceOptions{
|
|
||||||
BackendType: backendType,
|
|
||||||
VllmServerOptions: vllmOptions,
|
|
||||||
}
|
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
|
||||||
if err := json.NewEncoder(w).Encode(options); err != nil {
|
|
||||||
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
320
pkg/server/handlers_backends.go
Normal file
320
pkg/server/handlers_backends.go
Normal file
@@ -0,0 +1,320 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"llamactl/pkg/backends"
|
||||||
|
"llamactl/pkg/backends/llamacpp"
|
||||||
|
"llamactl/pkg/backends/mlx"
|
||||||
|
"llamactl/pkg/backends/vllm"
|
||||||
|
"llamactl/pkg/instance"
|
||||||
|
"net/http"
|
||||||
|
"os/exec"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/go-chi/chi/v5"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ParseCommandRequest represents the request body for command parsing
|
||||||
|
type ParseCommandRequest struct {
|
||||||
|
Command string `json:"command"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *Handler) LlamaCppProxy(onDemandStart bool) http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
|
||||||
|
// Get the instance name from the URL parameter
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Route to the appropriate inst based on instance name
|
||||||
|
inst, err := h.InstanceManager.GetInstance(name)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
options := inst.GetOptions()
|
||||||
|
if options == nil {
|
||||||
|
http.Error(w, "Cannot obtain Instance's options", http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if options.BackendType != backends.BackendTypeLlamaCpp {
|
||||||
|
http.Error(w, "Instance is not a llama.cpp server.", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if !inst.IsRunning() {
|
||||||
|
|
||||||
|
if !(onDemandStart && options.OnDemandStart != nil && *options.OnDemandStart) {
|
||||||
|
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if h.InstanceManager.IsMaxRunningInstancesReached() {
|
||||||
|
if h.cfg.Instances.EnableLRUEviction {
|
||||||
|
err := h.InstanceManager.EvictLRUInstance()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If on-demand start is enabled, start the instance
|
||||||
|
if _, err := h.InstanceManager.StartInstance(name); err != nil {
|
||||||
|
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for the instance to become healthy before proceeding
|
||||||
|
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
|
||||||
|
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
proxy, err := inst.GetProxy()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strip the "/llama-cpp/<name>" prefix from the request URL
|
||||||
|
prefix := fmt.Sprintf("/llama-cpp/%s", name)
|
||||||
|
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
|
||||||
|
|
||||||
|
// Update the last request time for the instance
|
||||||
|
inst.UpdateLastRequestTime()
|
||||||
|
|
||||||
|
proxy.ServeHTTP(w, r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseLlamaCommand godoc
|
||||||
|
// @Summary Parse llama-server command
|
||||||
|
// @Description Parses a llama-server command string into instance options
|
||||||
|
// @Tags backends
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Accept json
|
||||||
|
// @Produce json
|
||||||
|
// @Param request body ParseCommandRequest true "Command to parse"
|
||||||
|
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
|
||||||
|
// @Failure 400 {object} map[string]string "Invalid request or command"
|
||||||
|
// @Failure 500 {object} map[string]string "Internal Server Error"
|
||||||
|
// @Router /backends/llama-cpp/parse-command [post]
|
||||||
|
func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
|
||||||
|
type errorResponse struct {
|
||||||
|
Error string `json:"error"`
|
||||||
|
Details string `json:"details,omitempty"`
|
||||||
|
}
|
||||||
|
writeError := func(w http.ResponseWriter, status int, code, details string) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(status)
|
||||||
|
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
|
||||||
|
}
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req ParseCommandRequest
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(req.Command) == "" {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
llamaOptions, err := llamacpp.ParseLlamaCommand(req.Command)
|
||||||
|
if err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
options := &instance.CreateInstanceOptions{
|
||||||
|
BackendType: backends.BackendTypeLlamaCpp,
|
||||||
|
LlamaServerOptions: llamaOptions,
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(options); err != nil {
|
||||||
|
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseMlxCommand godoc
|
||||||
|
// @Summary Parse mlx_lm.server command
|
||||||
|
// @Description Parses MLX-LM server command string into instance options
|
||||||
|
// @Tags backends
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Accept json
|
||||||
|
// @Produce json
|
||||||
|
// @Param request body ParseCommandRequest true "Command to parse"
|
||||||
|
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
|
||||||
|
// @Failure 400 {object} map[string]string "Invalid request or command"
|
||||||
|
// @Router /backends/mlx/parse-command [post]
|
||||||
|
func (h *Handler) ParseMlxCommand() http.HandlerFunc {
|
||||||
|
type errorResponse struct {
|
||||||
|
Error string `json:"error"`
|
||||||
|
Details string `json:"details,omitempty"`
|
||||||
|
}
|
||||||
|
writeError := func(w http.ResponseWriter, status int, code, details string) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(status)
|
||||||
|
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
|
||||||
|
}
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req ParseCommandRequest
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.TrimSpace(req.Command) == "" {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
mlxOptions, err := mlx.ParseMlxCommand(req.Command)
|
||||||
|
if err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Currently only support mlx_lm backend type
|
||||||
|
backendType := backends.BackendTypeMlxLm
|
||||||
|
|
||||||
|
options := &instance.CreateInstanceOptions{
|
||||||
|
BackendType: backendType,
|
||||||
|
MlxServerOptions: mlxOptions,
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(options); err != nil {
|
||||||
|
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ParseVllmCommand godoc
|
||||||
|
// @Summary Parse vllm serve command
|
||||||
|
// @Description Parses a vLLM serve command string into instance options
|
||||||
|
// @Tags backends
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Accept json
|
||||||
|
// @Produce json
|
||||||
|
// @Param request body ParseCommandRequest true "Command to parse"
|
||||||
|
// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
|
||||||
|
// @Failure 400 {object} map[string]string "Invalid request or command"
|
||||||
|
// @Router /backends/vllm/parse-command [post]
|
||||||
|
func (h *Handler) ParseVllmCommand() http.HandlerFunc {
|
||||||
|
type errorResponse struct {
|
||||||
|
Error string `json:"error"`
|
||||||
|
Details string `json:"details,omitempty"`
|
||||||
|
}
|
||||||
|
writeError := func(w http.ResponseWriter, status int, code, details string) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(status)
|
||||||
|
_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
|
||||||
|
}
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var req ParseCommandRequest
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if strings.TrimSpace(req.Command) == "" {
|
||||||
|
writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
vllmOptions, err := vllm.ParseVllmCommand(req.Command)
|
||||||
|
if err != nil {
|
||||||
|
writeError(w, http.StatusBadRequest, "parse_error", err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
backendType := backends.BackendTypeVllm
|
||||||
|
|
||||||
|
options := &instance.CreateInstanceOptions{
|
||||||
|
BackendType: backendType,
|
||||||
|
VllmServerOptions: vllmOptions,
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(options); err != nil {
|
||||||
|
writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// LlamaServerHelpHandler godoc
|
||||||
|
// @Summary Get help for llama server
|
||||||
|
// @Description Returns the help text for the llama server command
|
||||||
|
// @Tags backends
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces text/plain
|
||||||
|
// @Success 200 {string} string "Help text"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /backends/llama-cpp/help [get]
|
||||||
|
func (h *Handler) LlamaServerHelpHandler() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
helpCmd := exec.Command("llama-server", "--help")
|
||||||
|
output, err := helpCmd.CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to get help: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "text/plain")
|
||||||
|
w.Write(output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// LlamaServerVersionHandler godoc
|
||||||
|
// @Summary Get version of llama server
|
||||||
|
// @Description Returns the version of the llama server command
|
||||||
|
// @Tags backends
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces text/plain
|
||||||
|
// @Success 200 {string} string "Version information"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /backends/llama-cpp/version [get]
|
||||||
|
func (h *Handler) LlamaServerVersionHandler() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
versionCmd := exec.Command("llama-server", "--version")
|
||||||
|
output, err := versionCmd.CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to get version: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "text/plain")
|
||||||
|
w.Write(output)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// LlamaServerListDevicesHandler godoc
|
||||||
|
// @Summary List available devices for llama server
|
||||||
|
// @Description Returns a list of available devices for the llama server
|
||||||
|
// @Tags backends
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces text/plain
|
||||||
|
// @Success 200 {string} string "List of devices"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /backends/llama-cpp/devices [get]
|
||||||
|
func (h *Handler) LlamaServerListDevicesHandler() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
listCmd := exec.Command("llama-server", "--list-devices")
|
||||||
|
output, err := listCmd.CombinedOutput()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to list devices: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "text/plain")
|
||||||
|
w.Write(output)
|
||||||
|
}
|
||||||
|
}
|
||||||
445
pkg/server/handlers_instances.go
Normal file
445
pkg/server/handlers_instances.go
Normal file
@@ -0,0 +1,445 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"llamactl/pkg/instance"
|
||||||
|
"llamactl/pkg/manager"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httputil"
|
||||||
|
"net/url"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/go-chi/chi/v5"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ListInstances godoc
|
||||||
|
// @Summary List all instances
|
||||||
|
// @Description Returns a list of all instances managed by the server
|
||||||
|
// @Tags instances
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces json
|
||||||
|
// @Success 200 {array} instance.Process "List of instances"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /instances [get]
|
||||||
|
func (h *Handler) ListInstances() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
instances, err := h.InstanceManager.ListInstances()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(instances); err != nil {
|
||||||
|
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateInstance godoc
|
||||||
|
// @Summary Create and start a new instance
|
||||||
|
// @Description Creates a new instance with the provided configuration options
|
||||||
|
// @Tags instances
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Accept json
|
||||||
|
// @Produces json
|
||||||
|
// @Param name path string true "Instance Name"
|
||||||
|
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
|
||||||
|
// @Success 201 {object} instance.Process "Created instance details"
|
||||||
|
// @Failure 400 {string} string "Invalid request body"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /instances/{name} [post]
|
||||||
|
func (h *Handler) CreateInstance() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var options instance.CreateInstanceOptions
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
|
||||||
|
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
inst, err := h.InstanceManager.CreateInstance(name, &options)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to create instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusCreated)
|
||||||
|
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
||||||
|
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetInstance godoc
|
||||||
|
// @Summary Get details of a specific instance
|
||||||
|
// @Description Returns the details of a specific instance by name
|
||||||
|
// @Tags instances
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces json
|
||||||
|
// @Param name path string true "Instance Name"
|
||||||
|
// @Success 200 {object} instance.Process "Instance details"
|
||||||
|
// @Failure 400 {string} string "Invalid name format"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /instances/{name} [get]
|
||||||
|
func (h *Handler) GetInstance() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
inst, err := h.InstanceManager.GetInstance(name)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
||||||
|
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpdateInstance godoc
|
||||||
|
// @Summary Update an instance's configuration
|
||||||
|
// @Description Updates the configuration of a specific instance by name
|
||||||
|
// @Tags instances
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Accept json
|
||||||
|
// @Produces json
|
||||||
|
// @Param name path string true "Instance Name"
|
||||||
|
// @Param options body instance.CreateInstanceOptions true "Instance configuration options"
|
||||||
|
// @Success 200 {object} instance.Process "Updated instance details"
|
||||||
|
// @Failure 400 {string} string "Invalid name format"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /instances/{name} [put]
|
||||||
|
func (h *Handler) UpdateInstance() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
var options instance.CreateInstanceOptions
|
||||||
|
if err := json.NewDecoder(r.Body).Decode(&options); err != nil {
|
||||||
|
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
inst, err := h.InstanceManager.UpdateInstance(name, &options)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to update instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
||||||
|
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// StartInstance godoc
|
||||||
|
// @Summary Start a stopped instance
|
||||||
|
// @Description Starts a specific instance by name
|
||||||
|
// @Tags instances
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces json
|
||||||
|
// @Param name path string true "Instance Name"
|
||||||
|
// @Success 200 {object} instance.Process "Started instance details"
|
||||||
|
// @Failure 400 {string} string "Invalid name format"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /instances/{name}/start [post]
|
||||||
|
func (h *Handler) StartInstance() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
inst, err := h.InstanceManager.StartInstance(name)
|
||||||
|
if err != nil {
|
||||||
|
// Check if error is due to maximum running instances limit
|
||||||
|
if _, ok := err.(manager.MaxRunningInstancesError); ok {
|
||||||
|
http.Error(w, err.Error(), http.StatusConflict)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
||||||
|
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// StopInstance godoc
|
||||||
|
// @Summary Stop a running instance
|
||||||
|
// @Description Stops a specific instance by name
|
||||||
|
// @Tags instances
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces json
|
||||||
|
// @Param name path string true "Instance Name"
|
||||||
|
// @Success 200 {object} instance.Process "Stopped instance details"
|
||||||
|
// @Failure 400 {string} string "Invalid name format"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /instances/{name}/stop [post]
|
||||||
|
func (h *Handler) StopInstance() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
inst, err := h.InstanceManager.StopInstance(name)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to stop instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
||||||
|
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RestartInstance godoc
|
||||||
|
// @Summary Restart a running instance
|
||||||
|
// @Description Restarts a specific instance by name
|
||||||
|
// @Tags instances
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces json
|
||||||
|
// @Param name path string true "Instance Name"
|
||||||
|
// @Success 200 {object} instance.Process "Restarted instance details"
|
||||||
|
// @Failure 400 {string} string "Invalid name format"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /instances/{name}/restart [post]
|
||||||
|
func (h *Handler) RestartInstance() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
inst, err := h.InstanceManager.RestartInstance(name)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to restart instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(inst); err != nil {
|
||||||
|
http.Error(w, "Failed to encode instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeleteInstance godoc
|
||||||
|
// @Summary Delete an instance
|
||||||
|
// @Description Stops and removes a specific instance by name
|
||||||
|
// @Tags instances
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Param name path string true "Instance Name"
|
||||||
|
// @Success 204 "No Content"
|
||||||
|
// @Failure 400 {string} string "Invalid name format"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /instances/{name} [delete]
|
||||||
|
func (h *Handler) DeleteInstance() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := h.InstanceManager.DeleteInstance(name); err != nil {
|
||||||
|
http.Error(w, "Failed to delete instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.WriteHeader(http.StatusNoContent)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetInstanceLogs godoc
|
||||||
|
// @Summary Get logs from a specific instance
|
||||||
|
// @Description Returns the logs from a specific instance by name with optional line limit
|
||||||
|
// @Tags instances
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Param name path string true "Instance Name"
|
||||||
|
// @Param lines query string false "Number of lines to retrieve (default: all lines)"
|
||||||
|
// @Produces text/plain
|
||||||
|
// @Success 200 {string} string "Instance logs"
|
||||||
|
// @Failure 400 {string} string "Invalid name format or lines parameter"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /instances/{name}/logs [get]
|
||||||
|
func (h *Handler) GetInstanceLogs() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
lines := r.URL.Query().Get("lines")
|
||||||
|
numLines := -1 // Default to all lines
|
||||||
|
if lines != "" {
|
||||||
|
parsedLines, err := strconv.Atoi(lines)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Invalid lines parameter: "+err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
numLines = parsedLines
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use the instance manager which handles both local and remote instances
|
||||||
|
logs, err := h.InstanceManager.GetInstanceLogs(name, numLines)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to get logs: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "text/plain")
|
||||||
|
w.Write([]byte(logs))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ProxyToInstance godoc
|
||||||
|
// @Summary Proxy requests to a specific instance
|
||||||
|
// @Description Forwards HTTP requests to the llama-server instance running on a specific port
|
||||||
|
// @Tags instances
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Param name path string true "Instance Name"
|
||||||
|
// @Success 200 "Request successfully proxied to instance"
|
||||||
|
// @Failure 400 {string} string "Invalid name format"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Failure 503 {string} string "Instance is not running"
|
||||||
|
// @Router /instances/{name}/proxy [get]
|
||||||
|
// @Router /instances/{name}/proxy [post]
|
||||||
|
func (h *Handler) ProxyToInstance() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Instance name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
inst, err := h.InstanceManager.GetInstance(name)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to get instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this is a remote instance
|
||||||
|
if inst.IsRemote() {
|
||||||
|
h.RemoteInstanceProxy(w, r, name, inst)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if !inst.IsRunning() {
|
||||||
|
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the cached proxy for this instance
|
||||||
|
proxy, err := inst.GetProxy()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strip the "/api/v1/instances/<name>/proxy" prefix from the request URL
|
||||||
|
prefix := fmt.Sprintf("/api/v1/instances/%s/proxy", name)
|
||||||
|
r.URL.Path = strings.TrimPrefix(r.URL.Path, prefix)
|
||||||
|
|
||||||
|
// Update the last request time for the instance
|
||||||
|
inst.UpdateLastRequestTime()
|
||||||
|
|
||||||
|
// Set forwarded headers
|
||||||
|
r.Header.Set("X-Forwarded-Host", r.Header.Get("Host"))
|
||||||
|
r.Header.Set("X-Forwarded-Proto", "http")
|
||||||
|
|
||||||
|
// Forward the request using the cached proxy
|
||||||
|
proxy.ServeHTTP(w, r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RemoteInstanceProxy proxies requests to a remote instance
|
||||||
|
func (h *Handler) RemoteInstanceProxy(w http.ResponseWriter, r *http.Request, name string, inst *instance.Process) {
|
||||||
|
// Get the node name from instance options
|
||||||
|
options := inst.GetOptions()
|
||||||
|
if options == nil || len(options.Nodes) == 0 {
|
||||||
|
http.Error(w, "Instance has no node configured", http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeName := options.Nodes[0]
|
||||||
|
|
||||||
|
// Check if we have a cached proxy for this node
|
||||||
|
h.remoteProxiesMu.RLock()
|
||||||
|
proxy, exists := h.remoteProxies[nodeName]
|
||||||
|
h.remoteProxiesMu.RUnlock()
|
||||||
|
|
||||||
|
if !exists {
|
||||||
|
// Find node configuration
|
||||||
|
nodeConfig, exists := h.cfg.Nodes[nodeName]
|
||||||
|
if !exists {
|
||||||
|
http.Error(w, fmt.Sprintf("Node %s not found", nodeName), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create reverse proxy to remote node
|
||||||
|
targetURL, err := url.Parse(nodeConfig.Address)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to parse node address: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
proxy = httputil.NewSingleHostReverseProxy(targetURL)
|
||||||
|
|
||||||
|
// Modify request before forwarding
|
||||||
|
originalDirector := proxy.Director
|
||||||
|
apiKey := nodeConfig.APIKey // Capture for closure
|
||||||
|
proxy.Director = func(req *http.Request) {
|
||||||
|
originalDirector(req)
|
||||||
|
// Add API key if configured
|
||||||
|
if apiKey != "" {
|
||||||
|
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", apiKey))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cache the proxy by node name
|
||||||
|
h.remoteProxiesMu.Lock()
|
||||||
|
h.remoteProxies[nodeName] = proxy
|
||||||
|
h.remoteProxiesMu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Forward the request using the cached proxy
|
||||||
|
proxy.ServeHTTP(w, r)
|
||||||
|
}
|
||||||
79
pkg/server/handlers_nodes.go
Normal file
79
pkg/server/handlers_nodes.go
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/go-chi/chi/v5"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NodeResponse represents a sanitized node configuration for API responses
|
||||||
|
type NodeResponse struct {
|
||||||
|
Address string `json:"address"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ListNodes godoc
|
||||||
|
// @Summary List all configured nodes
|
||||||
|
// @Description Returns a map of all nodes configured in the server (node name -> node config)
|
||||||
|
// @Tags nodes
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces json
|
||||||
|
// @Success 200 {object} map[string]NodeResponse "Map of nodes"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /nodes [get]
|
||||||
|
func (h *Handler) ListNodes() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
// Convert to sanitized response format (map of name -> NodeResponse)
|
||||||
|
nodeResponses := make(map[string]NodeResponse, len(h.cfg.Nodes))
|
||||||
|
for name, node := range h.cfg.Nodes {
|
||||||
|
nodeResponses[name] = NodeResponse{
|
||||||
|
Address: node.Address,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(nodeResponses); err != nil {
|
||||||
|
http.Error(w, "Failed to encode nodes: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetNode godoc
|
||||||
|
// @Summary Get details of a specific node
|
||||||
|
// @Description Returns the details of a specific node by name
|
||||||
|
// @Tags nodes
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces json
|
||||||
|
// @Param name path string true "Node Name"
|
||||||
|
// @Success 200 {object} NodeResponse "Node details"
|
||||||
|
// @Failure 400 {string} string "Invalid name format"
|
||||||
|
// @Failure 404 {string} string "Node not found"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /nodes/{name} [get]
|
||||||
|
func (h *Handler) GetNode() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
name := chi.URLParam(r, "name")
|
||||||
|
if name == "" {
|
||||||
|
http.Error(w, "Node name cannot be empty", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeConfig, exists := h.cfg.Nodes[name]
|
||||||
|
if !exists {
|
||||||
|
http.Error(w, "Node not found", http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to sanitized response format
|
||||||
|
nodeResponse := NodeResponse{
|
||||||
|
Address: nodeConfig.Address,
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(nodeResponse); err != nil {
|
||||||
|
http.Error(w, "Failed to encode node: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
206
pkg/server/handlers_openai.go
Normal file
206
pkg/server/handlers_openai.go
Normal file
@@ -0,0 +1,206 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"llamactl/pkg/instance"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httputil"
|
||||||
|
"net/url"
|
||||||
|
)
|
||||||
|
|
||||||
|
// OpenAIListInstances godoc
|
||||||
|
// @Summary List instances in OpenAI-compatible format
|
||||||
|
// @Description Returns a list of instances in a format compatible with OpenAI API
|
||||||
|
// @Tags openai
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces json
|
||||||
|
// @Success 200 {object} OpenAIListInstancesResponse "List of OpenAI-compatible instances"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /v1/models [get]
|
||||||
|
func (h *Handler) OpenAIListInstances() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
instances, err := h.InstanceManager.ListInstances()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to list instances: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
openaiInstances := make([]OpenAIInstance, len(instances))
|
||||||
|
for i, inst := range instances {
|
||||||
|
openaiInstances[i] = OpenAIInstance{
|
||||||
|
ID: inst.Name,
|
||||||
|
Object: "model",
|
||||||
|
Created: inst.Created,
|
||||||
|
OwnedBy: "llamactl",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
openaiResponse := OpenAIListInstancesResponse{
|
||||||
|
Object: "list",
|
||||||
|
Data: openaiInstances,
|
||||||
|
}
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
if err := json.NewEncoder(w).Encode(openaiResponse); err != nil {
|
||||||
|
http.Error(w, "Failed to encode instances: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// OpenAIProxy godoc
|
||||||
|
// @Summary OpenAI-compatible proxy endpoint
|
||||||
|
// @Description Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.
|
||||||
|
// @Tags openai
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Accept json
|
||||||
|
// @Produces json
|
||||||
|
// @Success 200 "OpenAI response"
|
||||||
|
// @Failure 400 {string} string "Invalid request body or instance name"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /v1/ [post]
|
||||||
|
func (h *Handler) OpenAIProxy() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
// Read the entire body first
|
||||||
|
bodyBytes, err := io.ReadAll(r.Body)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to read request body", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
r.Body.Close()
|
||||||
|
|
||||||
|
// Parse the body to extract instance name
|
||||||
|
var requestBody map[string]any
|
||||||
|
if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
|
||||||
|
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
modelName, ok := requestBody["model"].(string)
|
||||||
|
if !ok || modelName == "" {
|
||||||
|
http.Error(w, "Instance name is required", http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Route to the appropriate inst based on instance name
|
||||||
|
inst, err := h.InstanceManager.GetInstance(modelName)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Invalid instance: "+err.Error(), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this is a remote instance
|
||||||
|
if inst.IsRemote() {
|
||||||
|
// Restore the body for the remote proxy
|
||||||
|
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||||
|
h.RemoteOpenAIProxy(w, r, modelName, inst)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if !inst.IsRunning() {
|
||||||
|
options := inst.GetOptions()
|
||||||
|
allowOnDemand := options != nil && options.OnDemandStart != nil && *options.OnDemandStart
|
||||||
|
if !allowOnDemand {
|
||||||
|
http.Error(w, "Instance is not running", http.StatusServiceUnavailable)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if h.InstanceManager.IsMaxRunningInstancesReached() {
|
||||||
|
if h.cfg.Instances.EnableLRUEviction {
|
||||||
|
err := h.InstanceManager.EvictLRUInstance()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Cannot start Instance, failed to evict instance "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
http.Error(w, "Cannot start Instance, maximum number of instances reached", http.StatusConflict)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If on-demand start is enabled, start the instance
|
||||||
|
if _, err := h.InstanceManager.StartInstance(modelName); err != nil {
|
||||||
|
http.Error(w, "Failed to start instance: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for the instance to become healthy before proceeding
|
||||||
|
if err := inst.WaitForHealthy(h.cfg.Instances.OnDemandStartTimeout); err != nil { // 2 minutes timeout
|
||||||
|
http.Error(w, "Instance failed to become healthy: "+err.Error(), http.StatusServiceUnavailable)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
proxy, err := inst.GetProxy()
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to get proxy: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update last request time for the instance
|
||||||
|
inst.UpdateLastRequestTime()
|
||||||
|
|
||||||
|
// Recreate the request body from the bytes we read
|
||||||
|
r.Body = io.NopCloser(bytes.NewReader(bodyBytes))
|
||||||
|
r.ContentLength = int64(len(bodyBytes))
|
||||||
|
|
||||||
|
proxy.ServeHTTP(w, r)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RemoteOpenAIProxy proxies OpenAI-compatible requests to a remote instance
|
||||||
|
func (h *Handler) RemoteOpenAIProxy(w http.ResponseWriter, r *http.Request, modelName string, inst *instance.Process) {
|
||||||
|
// Get the node name from instance options
|
||||||
|
options := inst.GetOptions()
|
||||||
|
if options == nil || len(options.Nodes) == 0 {
|
||||||
|
http.Error(w, "Instance has no node configured", http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
nodeName := options.Nodes[0]
|
||||||
|
|
||||||
|
// Check if we have a cached proxy for this node
|
||||||
|
h.remoteProxiesMu.RLock()
|
||||||
|
proxy, exists := h.remoteProxies[nodeName]
|
||||||
|
h.remoteProxiesMu.RUnlock()
|
||||||
|
|
||||||
|
if !exists {
|
||||||
|
// Find node configuration
|
||||||
|
nodeConfig, exists := h.cfg.Nodes[nodeName]
|
||||||
|
if !exists {
|
||||||
|
http.Error(w, fmt.Sprintf("Node %s not found", nodeName), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create reverse proxy to remote node
|
||||||
|
targetURL, err := url.Parse(nodeConfig.Address)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Failed to parse node address: "+err.Error(), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
proxy = httputil.NewSingleHostReverseProxy(targetURL)
|
||||||
|
|
||||||
|
// Modify request before forwarding
|
||||||
|
originalDirector := proxy.Director
|
||||||
|
apiKey := nodeConfig.APIKey // Capture for closure
|
||||||
|
proxy.Director = func(req *http.Request) {
|
||||||
|
originalDirector(req)
|
||||||
|
// Add API key if configured
|
||||||
|
if apiKey != "" {
|
||||||
|
req.Header.Set("Authorization", fmt.Sprintf("Bearer %s", apiKey))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cache the proxy
|
||||||
|
h.remoteProxiesMu.Lock()
|
||||||
|
h.remoteProxies[nodeName] = proxy
|
||||||
|
h.remoteProxiesMu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Forward the request using the cached proxy
|
||||||
|
proxy.ServeHTTP(w, r)
|
||||||
|
}
|
||||||
22
pkg/server/handlers_system.go
Normal file
22
pkg/server/handlers_system.go
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
package server
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
)
|
||||||
|
|
||||||
|
// VersionHandler godoc
|
||||||
|
// @Summary Get llamactl version
|
||||||
|
// @Description Returns the version of the llamactl command
|
||||||
|
// @Tags version
|
||||||
|
// @Security ApiKeyAuth
|
||||||
|
// @Produces text/plain
|
||||||
|
// @Success 200 {string} string "Version information"
|
||||||
|
// @Failure 500 {string} string "Internal Server Error"
|
||||||
|
// @Router /version [get]
|
||||||
|
func (h *Handler) VersionHandler() http.HandlerFunc {
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "text/plain")
|
||||||
|
fmt.Fprintf(w, "Version: %s\nCommit: %s\nBuild Time: %s\n", h.cfg.Version, h.cfg.CommitHash, h.cfg.BuildTime)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -60,6 +60,15 @@ func SetupRouter(handler *Handler) *chi.Mux {
|
|||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Node management endpoints
|
||||||
|
r.Route("/nodes", func(r chi.Router) {
|
||||||
|
r.Get("/", handler.ListNodes()) // List all nodes
|
||||||
|
|
||||||
|
r.Route("/{name}", func(r chi.Router) {
|
||||||
|
r.Get("/", handler.GetNode())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
// Instance management endpoints
|
// Instance management endpoints
|
||||||
r.Route("/instances", func(r chi.Router) {
|
r.Route("/instances", func(r chi.Router) {
|
||||||
r.Get("/", handler.ListInstances()) // List all instances
|
r.Get("/", handler.ListInstances()) // List all instances
|
||||||
|
|||||||
@@ -106,7 +106,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
|
|||||||
// Clean up undefined values to avoid sending empty fields
|
// Clean up undefined values to avoid sending empty fields
|
||||||
const cleanOptions: CreateInstanceOptions = {};
|
const cleanOptions: CreateInstanceOptions = {};
|
||||||
Object.entries(formData).forEach(([key, value]) => {
|
Object.entries(formData).forEach(([key, value]) => {
|
||||||
if (key === 'backend_options' && value && typeof value === 'object') {
|
if (key === 'backend_options' && value && typeof value === 'object' && !Array.isArray(value)) {
|
||||||
// Handle backend_options specially - clean nested object
|
// Handle backend_options specially - clean nested object
|
||||||
const cleanBackendOptions: any = {};
|
const cleanBackendOptions: any = {};
|
||||||
Object.entries(value).forEach(([backendKey, backendValue]) => {
|
Object.entries(value).forEach(([backendKey, backendValue]) => {
|
||||||
@@ -123,8 +123,12 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
|
|||||||
if (Object.keys(cleanBackendOptions).length > 0) {
|
if (Object.keys(cleanBackendOptions).length > 0) {
|
||||||
(cleanOptions as any)[key] = cleanBackendOptions;
|
(cleanOptions as any)[key] = cleanBackendOptions;
|
||||||
}
|
}
|
||||||
} else if (value !== undefined && value !== null && (typeof value !== 'string' || value.trim() !== "")) {
|
} else if (value !== undefined && value !== null) {
|
||||||
// Handle arrays - don't include empty arrays
|
// Skip empty strings
|
||||||
|
if (typeof value === 'string' && value.trim() === "") {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Skip empty arrays
|
||||||
if (Array.isArray(value) && value.length === 0) {
|
if (Array.isArray(value) && value.length === 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import React from 'react'
|
import React, { useState, useEffect } from 'react'
|
||||||
import type { CreateInstanceOptions } from '@/types/instance'
|
import type { CreateInstanceOptions } from '@/types/instance'
|
||||||
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
|
import { Card, CardContent, CardHeader, CardTitle } from '@/components/ui/card'
|
||||||
import { Label } from '@/components/ui/label'
|
import { Label } from '@/components/ui/label'
|
||||||
@@ -7,6 +7,8 @@ import AutoRestartConfiguration from '@/components/instance/AutoRestartConfigura
|
|||||||
import NumberInput from '@/components/form/NumberInput'
|
import NumberInput from '@/components/form/NumberInput'
|
||||||
import CheckboxInput from '@/components/form/CheckboxInput'
|
import CheckboxInput from '@/components/form/CheckboxInput'
|
||||||
import EnvironmentVariablesInput from '@/components/form/EnvironmentVariablesInput'
|
import EnvironmentVariablesInput from '@/components/form/EnvironmentVariablesInput'
|
||||||
|
import SelectInput from '@/components/form/SelectInput'
|
||||||
|
import { nodesApi, type NodesMap } from '@/lib/api'
|
||||||
|
|
||||||
interface InstanceSettingsCardProps {
|
interface InstanceSettingsCardProps {
|
||||||
instanceName: string
|
instanceName: string
|
||||||
@@ -25,6 +27,46 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
|
|||||||
onNameChange,
|
onNameChange,
|
||||||
onChange
|
onChange
|
||||||
}) => {
|
}) => {
|
||||||
|
const [nodes, setNodes] = useState<NodesMap>({})
|
||||||
|
const [loadingNodes, setLoadingNodes] = useState(true)
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
const fetchNodes = async () => {
|
||||||
|
try {
|
||||||
|
const fetchedNodes = await nodesApi.list()
|
||||||
|
setNodes(fetchedNodes)
|
||||||
|
|
||||||
|
// Auto-select first node if none selected
|
||||||
|
const nodeNames = Object.keys(fetchedNodes)
|
||||||
|
if (nodeNames.length > 0 && (!formData.nodes || formData.nodes.length === 0)) {
|
||||||
|
onChange('nodes', [nodeNames[0]])
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to fetch nodes:', error)
|
||||||
|
} finally {
|
||||||
|
setLoadingNodes(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void fetchNodes()
|
||||||
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||||
|
}, [])
|
||||||
|
|
||||||
|
const nodeOptions = Object.keys(nodes).map(nodeName => ({
|
||||||
|
value: nodeName,
|
||||||
|
label: nodeName
|
||||||
|
}))
|
||||||
|
|
||||||
|
const handleNodeChange = (value: string | undefined) => {
|
||||||
|
if (value) {
|
||||||
|
onChange('nodes', [value])
|
||||||
|
} else {
|
||||||
|
onChange('nodes', undefined)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const selectedNode = formData.nodes && formData.nodes.length > 0 ? formData.nodes[0] : ''
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Card>
|
<Card>
|
||||||
<CardHeader>
|
<CardHeader>
|
||||||
@@ -50,6 +92,18 @@ const InstanceSettingsCard: React.FC<InstanceSettingsCardProps> = ({
|
|||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{/* Node Selection */}
|
||||||
|
{!loadingNodes && Object.keys(nodes).length > 0 && (
|
||||||
|
<SelectInput
|
||||||
|
id="node"
|
||||||
|
label="Node"
|
||||||
|
value={selectedNode}
|
||||||
|
onChange={handleNodeChange}
|
||||||
|
options={nodeOptions}
|
||||||
|
description="Select the node where the instance will run (default: main node)"
|
||||||
|
/>
|
||||||
|
)}
|
||||||
|
|
||||||
{/* Auto Restart Configuration */}
|
{/* Auto Restart Configuration */}
|
||||||
<AutoRestartConfiguration
|
<AutoRestartConfiguration
|
||||||
formData={formData}
|
formData={formData}
|
||||||
|
|||||||
@@ -103,6 +103,22 @@ export const backendsApi = {
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Node API types
|
||||||
|
export interface NodeResponse {
|
||||||
|
address: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export type NodesMap = Record<string, NodeResponse>;
|
||||||
|
|
||||||
|
// Node API functions
|
||||||
|
export const nodesApi = {
|
||||||
|
// GET /nodes - returns map of node name to NodeResponse
|
||||||
|
list: () => apiCall<NodesMap>("/nodes"),
|
||||||
|
|
||||||
|
// GET /nodes/{name}
|
||||||
|
get: (name: string) => apiCall<NodeResponse>(`/nodes/${name}`),
|
||||||
|
};
|
||||||
|
|
||||||
// Instance API functions
|
// Instance API functions
|
||||||
export const instancesApi = {
|
export const instancesApi = {
|
||||||
// GET /instances
|
// GET /instances
|
||||||
|
|||||||
@@ -39,6 +39,9 @@ export const CreateInstanceOptionsSchema = z.object({
|
|||||||
// Backend configuration
|
// Backend configuration
|
||||||
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
|
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
|
||||||
backend_options: BackendOptionsSchema.optional(),
|
backend_options: BackendOptionsSchema.optional(),
|
||||||
|
|
||||||
|
// Node configuration
|
||||||
|
nodes: z.array(z.string()).optional(),
|
||||||
})
|
})
|
||||||
|
|
||||||
// Re-export types and schemas from backend files
|
// Re-export types and schemas from backend files
|
||||||
|
|||||||
Reference in New Issue
Block a user