Merge pull request #31 from lordmathis/feat/parse-command

feat: Implement command parsing in Create Instance
2025-12-23 01:24:24 +00:00 · 2025-09-15 22:18:39 +02:00
parent cb1669f853 1b5934303b
commit 1f25e9d05b
10 changed files with 955 additions and 23 deletions
--- a/pkg/backends/llamacpp/parser.go
+++ b/pkg/backends/llamacpp/parser.go
@@ -0,0 +1,286 @@
+package llamacpp
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"path/filepath"
+	"regexp"
+	"strconv"
+	"strings"
+)
+
+// ParseLlamaCommand parses a llama-server command string into LlamaServerOptions
+// Supports multiple formats:
+// 1. Full command: "llama-server --model file.gguf"
+// 2. Full path: "/usr/local/bin/llama-server --model file.gguf"
+// 3. Args only: "--model file.gguf --gpu-layers 32"
+// 4. Multiline commands with backslashes
+func ParseLlamaCommand(command string) (*LlamaServerOptions, error) {
+	// 1. Normalize the command - handle multiline with backslashes
+	trimmed := normalizeMultilineCommand(command)
+	if trimmed == "" {
+		return nil, fmt.Errorf("command cannot be empty")
+	}
+
+	// 2. Extract arguments from command
+	args, err := extractArgumentsFromCommand(trimmed)
+	if err != nil {
+		return nil, err
+	}
+
+	// 3. Parse arguments into map
+	options := make(map[string]any)
+
+	// Known multi-valued flags (snake_case form)
+	multiValued := map[string]struct{}{
+		"override_tensor":       {},
+		"override_kv":           {},
+		"lora":                  {},
+		"lora_scaled":           {},
+		"control_vector":        {},
+		"control_vector_scaled": {},
+		"dry_sequence_breaker":  {},
+		"logit_bias":            {},
+	}
+
+	i := 0
+	for i < len(args) {
+		arg := args[i]
+
+		if !strings.HasPrefix(arg, "-") { // skip positional / stray values
+			i++
+			continue
+		}
+
+		// Reject malformed flags with more than two leading dashes (e.g. ---model) to surface user mistakes
+		if strings.HasPrefix(arg, "---") {
+			return nil, fmt.Errorf("malformed flag: %s", arg)
+		}
+
+		// Unified parsing for --flag=value vs --flag value
+		var rawFlag, rawValue string
+		hasEquals := false
+		if strings.Contains(arg, "=") {
+			parts := strings.SplitN(arg, "=", 2)
+			rawFlag = parts[0]
+			rawValue = parts[1] // may be empty string
+			hasEquals = true
+		} else {
+			rawFlag = arg
+		}
+
+		flagCore := strings.TrimPrefix(strings.TrimPrefix(rawFlag, "-"), "-")
+		flagName := strings.ReplaceAll(flagCore, "-", "_")
+
+		// Detect value if not in equals form
+		valueProvided := hasEquals
+		if !hasEquals {
+			if i+1 < len(args) && !isFlag(args[i+1]) { // next token is value
+				rawValue = args[i+1]
+				valueProvided = true
+			}
+		}
+
+		// Determine if multi-valued flag
+		_, isMulti := multiValued[flagName]
+
+		// Normalization helper: ensure slice for multi-valued flags
+		appendValue := func(valStr string) {
+			if existing, ok := options[flagName]; ok {
+				// Existing value; ensure slice semantics for multi-valued flags or repeated occurrences
+				if slice, ok := existing.([]string); ok {
+					options[flagName] = append(slice, valStr)
+					return
+				}
+				// Convert scalar to slice
+				options[flagName] = []string{fmt.Sprintf("%v", existing), valStr}
+				return
+			}
+			// First value
+			if isMulti {
+				options[flagName] = []string{valStr}
+			} else {
+				// We'll parse type below for single-valued flags
+				options[flagName] = valStr
+			}
+		}
+
+		if valueProvided {
+			// Use raw token for multi-valued flags; else allow typed parsing
+			appendValue(rawValue)
+			if !isMulti { // convert to typed value if scalar
+				if strVal, ok := options[flagName].(string); ok { // still scalar
+					options[flagName] = parseValue(strVal)
+				}
+			}
+			// Advance index: if we consumed a following token as value (non equals form), skip it
+			if !hasEquals && i+1 < len(args) && rawValue == args[i+1] {
+				i += 2
+			} else {
+				i++
+			}
+			continue
+		}
+
+		// Boolean flag (no value)
+		options[flagName] = true
+		i++
+	}
+
+	// 4. Convert to LlamaServerOptions using existing UnmarshalJSON
+	jsonData, err := json.Marshal(options)
+	if err != nil {
+		return nil, fmt.Errorf("failed to marshal parsed options: %w", err)
+	}
+
+	var llamaOptions LlamaServerOptions
+	if err := json.Unmarshal(jsonData, &llamaOptions); err != nil {
+		return nil, fmt.Errorf("failed to parse command options: %w", err)
+	}
+
+	// 5. Return LlamaServerOptions
+	return &llamaOptions, nil
+}
+
+// parseValue attempts to parse a string value into the most appropriate type
+func parseValue(value string) any {
+	// Surrounding matching quotes (single or double)
+	if l := len(value); l >= 2 {
+		if (value[0] == '"' && value[l-1] == '"') || (value[0] == '\'' && value[l-1] == '\'') {
+			value = value[1 : l-1]
+		}
+	}
+
+	lower := strings.ToLower(value)
+	if lower == "true" {
+		return true
+	}
+	if lower == "false" {
+		return false
+	}
+
+	if intVal, err := strconv.Atoi(value); err == nil {
+		return intVal
+	}
+	if floatVal, err := strconv.ParseFloat(value, 64); err == nil {
+		return floatVal
+	}
+	return value
+}
+
+// normalizeMultilineCommand handles multiline commands with backslashes
+func normalizeMultilineCommand(command string) string {
+	// Handle escaped newlines (backslash followed by newline)
+	re := regexp.MustCompile(`\\\s*\n\s*`)
+	normalized := re.ReplaceAllString(command, " ")
+
+	// Clean up extra whitespace
+	re = regexp.MustCompile(`\s+`)
+	normalized = re.ReplaceAllString(normalized, " ")
+
+	return strings.TrimSpace(normalized)
+}
+
+// extractArgumentsFromCommand extracts arguments from various command formats
+func extractArgumentsFromCommand(command string) ([]string, error) {
+	// Split command into tokens respecting quotes
+	tokens, err := splitCommandTokens(command)
+	if err != nil {
+		return nil, err
+	}
+
+	if len(tokens) == 0 {
+		return nil, fmt.Errorf("no command tokens found")
+	}
+
+	// Check if first token looks like an executable
+	firstToken := tokens[0]
+
+	// Case 1: Full path to executable (contains path separator or ends with llama-server)
+	if strings.Contains(firstToken, string(filepath.Separator)) ||
+		strings.HasSuffix(filepath.Base(firstToken), "llama-server") {
+		return tokens[1:], nil // Return everything except the executable
+	}
+
+	// Case 2: Just "llama-server" command
+	if strings.ToLower(firstToken) == "llama-server" {
+		return tokens[1:], nil // Return everything except the command
+	}
+
+	// Case 3: Arguments only (starts with a flag)
+	if strings.HasPrefix(firstToken, "-") {
+		return tokens, nil // Return all tokens as arguments
+	}
+
+	// Case 4: Unknown format - might be a different executable name
+	// Be permissive and assume it's the executable
+	return tokens[1:], nil
+}
+
+// splitCommandTokens splits a command string into tokens, respecting quotes
+func splitCommandTokens(command string) ([]string, error) {
+	var tokens []string
+	var current strings.Builder
+	inQuotes := false
+	quoteChar := byte(0)
+	escaped := false
+
+	for i := 0; i < len(command); i++ {
+		c := command[i]
+
+		if escaped {
+			current.WriteByte(c)
+			escaped = false
+			continue
+		}
+
+		if c == '\\' {
+			escaped = true
+			current.WriteByte(c)
+			continue
+		}
+
+		if !inQuotes && (c == '"' || c == '\'') {
+			inQuotes = true
+			quoteChar = c
+			current.WriteByte(c)
+		} else if inQuotes && c == quoteChar {
+			inQuotes = false
+			quoteChar = 0
+			current.WriteByte(c)
+		} else if !inQuotes && (c == ' ' || c == '\t') {
+			if current.Len() > 0 {
+				tokens = append(tokens, current.String())
+				current.Reset()
+			}
+		} else {
+			current.WriteByte(c)
+		}
+	}
+
+	if inQuotes {
+		return nil, errors.New("unterminated quoted string")
+	}
+
+	if current.Len() > 0 {
+		tokens = append(tokens, current.String())
+	}
+
+	return tokens, nil
+}
+
+// isFlag determines if a string is a command line flag or a value
+// Handles the special case where negative numbers should be treated as values, not flags
+func isFlag(arg string) bool {
+	if !strings.HasPrefix(arg, "-") {
+		return false
+	}
+
+	// Special case: if it's a negative number, treat it as a value
+	if _, err := strconv.ParseFloat(arg, 64); err == nil {
+		return false
+	}
+
+	return true
+}
--- a/pkg/backends/llamacpp/parser_test.go
+++ b/pkg/backends/llamacpp/parser_test.go
@@ -0,0 +1,413 @@
+package llamacpp
+
+import (
+	"testing"
+)
+
+func TestParseLlamaCommand(t *testing.T) {
+	tests := []struct {
+		name      string
+		command   string
+		expectErr bool
+	}{
+		{
+			name:      "basic command with model",
+			command:   "llama-server --model /path/to/model.gguf",
+			expectErr: false,
+		},
+		{
+			name:      "command with multiple flags",
+			command:   "llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096",
+			expectErr: false,
+		},
+		{
+			name:      "command with short flags",
+			command:   "llama-server -m /path/to/model.gguf -ngl 32 -c 4096",
+			expectErr: false,
+		},
+		{
+			name:      "command with equals format",
+			command:   "llama-server --model=/path/to/model.gguf --gpu-layers=32",
+			expectErr: false,
+		},
+		{
+			name:      "command with boolean flags",
+			command:   "llama-server --model /path/to/model.gguf --verbose --no-mmap",
+			expectErr: false,
+		},
+		{
+			name:      "empty command",
+			command:   "",
+			expectErr: true,
+		},
+		{
+			name:      "case insensitive command",
+			command:   "LLAMA-SERVER --model /path/to/model.gguf",
+			expectErr: false,
+		},
+		// New test cases for improved functionality
+		{
+			name:      "args only without llama-server",
+			command:   "--model /path/to/model.gguf --gpu-layers 32",
+			expectErr: false,
+		},
+		{
+			name:      "full path to executable",
+			command:   "/usr/local/bin/llama-server --model /path/to/model.gguf",
+			expectErr: false,
+		},
+		{
+			name:      "negative number handling",
+			command:   "llama-server --gpu-layers -1 --model test.gguf",
+			expectErr: false,
+		},
+		{
+			name:      "multiline command with backslashes",
+			command:   "llama-server --model /path/to/model.gguf \\\n  --ctx-size 4096 \\\n  --batch-size 512",
+			expectErr: false,
+		},
+		{
+			name:      "quoted string with special characters",
+			command:   `llama-server --model test.gguf --chat-template "{% for message in messages %}{{ message.role }}: {{ message.content }}\n{% endfor %}"`,
+			expectErr: false,
+		},
+		{
+			name:      "unterminated quoted string",
+			command:   `llama-server --model test.gguf --chat-template "unterminated quote`,
+			expectErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result, err := ParseLlamaCommand(tt.command)
+
+			if tt.expectErr {
+				if err == nil {
+					t.Errorf("expected error but got none")
+				}
+				return
+			}
+
+			if err != nil {
+				t.Errorf("unexpected error: %v", err)
+				return
+			}
+
+			if result == nil {
+				t.Errorf("expected result but got nil")
+				return
+			}
+		})
+	}
+}
+
+func TestParseLlamaCommandSpecificValues(t *testing.T) {
+	// Test specific value parsing
+	command := "llama-server --model /test/model.gguf --gpu-layers 32 --ctx-size 4096 --verbose"
+	result, err := ParseLlamaCommand(command)
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if result.Model != "/test/model.gguf" {
+		t.Errorf("expected model '/test/model.gguf', got '%s'", result.Model)
+	}
+
+	if result.GPULayers != 32 {
+		t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
+	}
+
+	if result.CtxSize != 4096 {
+		t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
+	}
+
+	if !result.Verbose {
+		t.Errorf("expected verbose to be true, got %v", result.Verbose)
+	}
+}
+
+func TestParseLlamaCommandArrayFlags(t *testing.T) {
+	// Test array flag handling (critical for lora, override-tensor, etc.)
+	command := "llama-server --model test.gguf --lora adapter1.bin --lora adapter2.bin"
+	result, err := ParseLlamaCommand(command)
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if len(result.Lora) != 2 {
+		t.Errorf("expected 2 lora adapters, got %d", len(result.Lora))
+	}
+
+	if result.Lora[0] != "adapter1.bin" || result.Lora[1] != "adapter2.bin" {
+		t.Errorf("expected lora adapters [adapter1.bin, adapter2.bin], got %v", result.Lora)
+	}
+}
+
+func TestParseLlamaCommandMixedFormats(t *testing.T) {
+	// Test mixing --flag=value and --flag value formats
+	command := "llama-server --model=/path/model.gguf --gpu-layers 16 --batch-size=512 --verbose"
+	result, err := ParseLlamaCommand(command)
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if result.Model != "/path/model.gguf" {
+		t.Errorf("expected model '/path/model.gguf', got '%s'", result.Model)
+	}
+
+	if result.GPULayers != 16 {
+		t.Errorf("expected gpu_layers 16, got %d", result.GPULayers)
+	}
+
+	if result.BatchSize != 512 {
+		t.Errorf("expected batch_size 512, got %d", result.BatchSize)
+	}
+
+	if !result.Verbose {
+		t.Errorf("expected verbose to be true, got %v", result.Verbose)
+	}
+}
+
+func TestParseLlamaCommandTypeConversion(t *testing.T) {
+	// Test that values are converted to appropriate types
+	command := "llama-server --model test.gguf --temp 0.7 --top-k 40 --no-mmap"
+	result, err := ParseLlamaCommand(command)
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if result.Temperature != 0.7 {
+		t.Errorf("expected temperature 0.7, got %f", result.Temperature)
+	}
+
+	if result.TopK != 40 {
+		t.Errorf("expected top_k 40, got %d", result.TopK)
+	}
+
+	if !result.NoMmap {
+		t.Errorf("expected no_mmap to be true, got %v", result.NoMmap)
+	}
+}
+
+func TestParseLlamaCommandArgsOnly(t *testing.T) {
+	// Test parsing arguments without llama-server command
+	command := "--model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096"
+	result, err := ParseLlamaCommand(command)
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if result.Model != "/path/to/model.gguf" {
+		t.Errorf("expected model '/path/to/model.gguf', got '%s'", result.Model)
+	}
+
+	if result.GPULayers != 32 {
+		t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
+	}
+
+	if result.CtxSize != 4096 {
+		t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
+	}
+}
+
+func TestParseLlamaCommandFullPath(t *testing.T) {
+	// Test full path to executable
+	command := "/usr/local/bin/llama-server --model test.gguf --gpu-layers 16"
+	result, err := ParseLlamaCommand(command)
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if result.Model != "test.gguf" {
+		t.Errorf("expected model 'test.gguf', got '%s'", result.Model)
+	}
+
+	if result.GPULayers != 16 {
+		t.Errorf("expected gpu_layers 16, got %d", result.GPULayers)
+	}
+}
+
+func TestParseLlamaCommandNegativeNumbers(t *testing.T) {
+	// Test negative number parsing
+	command := "llama-server --model test.gguf --gpu-layers -1 --seed -12345"
+	result, err := ParseLlamaCommand(command)
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if result.GPULayers != -1 {
+		t.Errorf("expected gpu_layers -1, got %d", result.GPULayers)
+	}
+
+	if result.Seed != -12345 {
+		t.Errorf("expected seed -12345, got %d", result.Seed)
+	}
+}
+
+func TestParseLlamaCommandMultiline(t *testing.T) {
+	// Test multiline command with backslashes
+	command := `llama-server --model /path/to/model.gguf \
+  --ctx-size 4096 \
+  --batch-size 512 \
+  --gpu-layers 32`
+
+	result, err := ParseLlamaCommand(command)
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if result.Model != "/path/to/model.gguf" {
+		t.Errorf("expected model '/path/to/model.gguf', got '%s'", result.Model)
+	}
+
+	if result.CtxSize != 4096 {
+		t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
+	}
+
+	if result.BatchSize != 512 {
+		t.Errorf("expected batch_size 512, got %d", result.BatchSize)
+	}
+
+	if result.GPULayers != 32 {
+		t.Errorf("expected gpu_layers 32, got %d", result.GPULayers)
+	}
+}
+
+func TestParseLlamaCommandQuotedStrings(t *testing.T) {
+	// Test quoted strings with special characters
+	command := `llama-server --model test.gguf --api-key "sk-1234567890abcdef" --chat-template "User: {user}\nAssistant: "`
+	result, err := ParseLlamaCommand(command)
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	if result.Model != "test.gguf" {
+		t.Errorf("expected model 'test.gguf', got '%s'", result.Model)
+	}
+
+	if result.APIKey != "sk-1234567890abcdef" {
+		t.Errorf("expected api_key 'sk-1234567890abcdef', got '%s'", result.APIKey)
+	}
+
+	expectedTemplate := "User: {user}\\nAssistant: "
+	if result.ChatTemplate != expectedTemplate {
+		t.Errorf("expected chat_template '%s', got '%s'", expectedTemplate, result.ChatTemplate)
+	}
+}
+
+func TestParseLlamaCommandUnslothExample(t *testing.T) {
+	// Test with realistic unsloth-style command
+	command := `llama-server --model /path/to/model.gguf \
+  --ctx-size 4096 \
+  --batch-size 512 \
+  --gpu-layers -1 \
+  --temp 0.7 \
+  --repeat-penalty 1.1 \
+  --top-k 40 \
+  --top-p 0.95 \
+  --host 0.0.0.0 \
+  --port 8000 \
+  --api-key "sk-1234567890abcdef"`
+
+	result, err := ParseLlamaCommand(command)
+
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	// Verify key fields
+	if result.Model != "/path/to/model.gguf" {
+		t.Errorf("expected model '/path/to/model.gguf', got '%s'", result.Model)
+	}
+
+	if result.CtxSize != 4096 {
+		t.Errorf("expected ctx_size 4096, got %d", result.CtxSize)
+	}
+
+	if result.BatchSize != 512 {
+		t.Errorf("expected batch_size 512, got %d", result.BatchSize)
+	}
+
+	if result.GPULayers != -1 {
+		t.Errorf("expected gpu_layers -1, got %d", result.GPULayers)
+	}
+
+	if result.Temperature != 0.7 {
+		t.Errorf("expected temperature 0.7, got %f", result.Temperature)
+	}
+
+	if result.RepeatPenalty != 1.1 {
+		t.Errorf("expected repeat_penalty 1.1, got %f", result.RepeatPenalty)
+	}
+
+	if result.TopK != 40 {
+		t.Errorf("expected top_k 40, got %d", result.TopK)
+	}
+
+	if result.TopP != 0.95 {
+		t.Errorf("expected top_p 0.95, got %f", result.TopP)
+	}
+
+	if result.Host != "0.0.0.0" {
+		t.Errorf("expected host '0.0.0.0', got '%s'", result.Host)
+	}
+
+	if result.Port != 8000 {
+		t.Errorf("expected port 8000, got %d", result.Port)
+	}
+
+	if result.APIKey != "sk-1234567890abcdef" {
+		t.Errorf("expected api_key 'sk-1234567890abcdef', got '%s'", result.APIKey)
+	}
+}
+
+// Focused additional edge case tests (kept minimal per guidance)
+func TestParseLlamaCommandSingleQuotedValue(t *testing.T) {
+	cmd := "llama-server --model 'my model.gguf' --alias 'Test Alias'"
+	result, err := ParseLlamaCommand(cmd)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if result.Model != "my model.gguf" {
+		t.Errorf("expected model 'my model.gguf', got '%s'", result.Model)
+	}
+	if result.Alias != "Test Alias" {
+		t.Errorf("expected alias 'Test Alias', got '%s'", result.Alias)
+	}
+}
+
+func TestParseLlamaCommandMixedArrayForms(t *testing.T) {
+	// Same multi-value flag using --flag value and --flag=value forms
+	cmd := "llama-server --lora adapter1.bin --lora=adapter2.bin --lora adapter3.bin"
+	result, err := ParseLlamaCommand(cmd)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(result.Lora) != 3 {
+		t.Fatalf("expected 3 lora values, got %d (%v)", len(result.Lora), result.Lora)
+	}
+	expected := []string{"adapter1.bin", "adapter2.bin", "adapter3.bin"}
+	for i, v := range expected {
+		if result.Lora[i] != v {
+			t.Errorf("expected lora[%d]=%s got %s", i, v, result.Lora[i])
+		}
+	}
+}
+
+func TestParseLlamaCommandMalformedFlag(t *testing.T) {
+	cmd := "llama-server ---model test.gguf"
+	_, err := ParseLlamaCommand(cmd)
+	if err == nil {
+		t.Fatalf("expected error for malformed flag but got none")
+	}
+}
--- a/pkg/server/handlers.go
+++ b/pkg/server/handlers.go
@@ -5,6 +5,8 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
+	"llamactl/pkg/backends"
+	"llamactl/pkg/backends/llamacpp"
 	"llamactl/pkg/config"
 	"llamactl/pkg/instance"
 	"llamactl/pkg/manager"
@@ -629,3 +631,56 @@ func (h *Handler) OpenAIProxy() http.HandlerFunc {
 		proxy.ServeHTTP(w, r)
 	}
 }
+
+// ParseCommandRequest represents the request body for command parsing
+type ParseCommandRequest struct {
+	Command string `json:"command"`
+}
+
+// ParseLlamaCommand godoc
+// @Summary Parse llama-server command
+// @Description Parses a llama-server command string into instance options
+// @Tags backends
+// @Security ApiKeyAuth
+// @Accept json
+// @Produce json
+// @Param request body ParseCommandRequest true "Command to parse"
+// @Success 200 {object} instance.CreateInstanceOptions "Parsed options"
+// @Failure 400 {object} map[string]string "Invalid request or command"
+// @Failure 500 {object} map[string]string "Internal Server Error"
+// @Router /backends/llama-cpp/parse-command [post]
+func (h *Handler) ParseLlamaCommand() http.HandlerFunc {
+	type errorResponse struct {
+		Error   string `json:"error"`
+		Details string `json:"details,omitempty"`
+	}
+	writeError := func(w http.ResponseWriter, status int, code, details string) {
+		w.Header().Set("Content-Type", "application/json")
+		w.WriteHeader(status)
+		_ = json.NewEncoder(w).Encode(errorResponse{Error: code, Details: details})
+	}
+	return func(w http.ResponseWriter, r *http.Request) {
+		var req ParseCommandRequest
+		if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+			writeError(w, http.StatusBadRequest, "invalid_request", "Invalid JSON body")
+			return
+		}
+		if strings.TrimSpace(req.Command) == "" {
+			writeError(w, http.StatusBadRequest, "invalid_command", "Command cannot be empty")
+			return
+		}
+		llamaOptions, err := llamacpp.ParseLlamaCommand(req.Command)
+		if err != nil {
+			writeError(w, http.StatusBadRequest, "parse_error", err.Error())
+			return
+		}
+		options := &instance.CreateInstanceOptions{
+			BackendType:        backends.BackendTypeLlamaCpp,
+			LlamaServerOptions: llamaOptions,
+		}
+		w.Header().Set("Content-Type", "application/json")
+		if err := json.NewEncoder(w).Encode(options); err != nil {
+			writeError(w, http.StatusInternalServerError, "encode_error", err.Error())
+		}
+	}
+}
--- a/pkg/server/routes.go
+++ b/pkg/server/routes.go
@@ -50,6 +50,13 @@ func SetupRouter(handler *Handler) *chi.Mux {
 			r.Get("/devices", handler.LlamaServerListDevicesHandler())
 		})

+		// Backend-specific endpoints
+		r.Route("/backends", func(r chi.Router) {
+			r.Route("/llama-cpp", func(r chi.Router) {
+				r.Post("/parse-command", handler.ParseLlamaCommand())
+			})
+		})
+
 		// Instance management endpoints
 		r.Route("/instances", func(r chi.Router) {
 			r.Get("/", handler.ListInstances()) // List all instances
--- a/webui/package-lock.json
+++ b/webui/package-lock.json
@@ -19,6 +19,7 @@
        "lucide-react": "^0.525.0",
        "react": "^19.1.0",
        "react-dom": "^19.1.0",
+        "sonner": "^2.0.7",
        "tailwind-merge": "^3.3.1",
        "tailwindcss": "^4.1.11",
        "zod": "^4.0.5"
@@ -6750,6 +6751,16 @@
        "node": ">=18"
      }
    },
+    "node_modules/sonner": {
+      "version": "2.0.7",
+      "resolved": "https://registry.npmjs.org/sonner/-/sonner-2.0.7.tgz",
+      "integrity": "sha512-W6ZN4p58k8aDKA4XPcx2hpIQXBRAgyiWVkYhT7CvK6D3iAu7xjvVyhQHg2/iaKJZ1XVJ4r7XuwGL+WGEK37i9w==",
+      "license": "MIT",
+      "peerDependencies": {
+        "react": "^18.0.0 || ^19.0.0 || ^19.0.0-rc",
+        "react-dom": "^18.0.0 || ^19.0.0 || ^19.0.0-rc"
+      }
+    },
    "node_modules/source-map-js": {
      "version": "1.2.1",
      "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
--- a/webui/package.json
+++ b/webui/package.json
@@ -28,6 +28,7 @@
    "lucide-react": "^0.525.0",
    "react": "^19.1.0",
    "react-dom": "^19.1.0",
+    "sonner": "^2.0.7",
    "tailwind-merge": "^3.3.1",
    "tailwindcss": "^4.1.11",
    "zod": "^4.0.5"
--- a/webui/src/App.tsx
+++ b/webui/src/App.tsx
@@ -8,6 +8,7 @@ import { type CreateInstanceOptions, type Instance } from "@/types/instance";
 import { useInstances } from "@/contexts/InstancesContext";
 import { useAuth } from "@/contexts/AuthContext";
 import { ThemeProvider } from "@/contexts/ThemeContext";
+import { Toaster } from "sonner";

 function App() {
  const { isAuthenticated, isLoading: authLoading } = useAuth();
@@ -85,6 +86,8 @@ function App() {
          open={isSystemInfoModalOpen}
          onOpenChange={setIsSystemInfoModalOpen}
        />
+        
+        <Toaster />
      </div>
    </ThemeProvider>
  );
--- a/webui/src/components/InstanceDialog.tsx
+++ b/webui/src/components/InstanceDialog.tsx
@@ -12,9 +12,10 @@ import {
 } from "@/components/ui/dialog";
 import { BackendType, type CreateInstanceOptions, type Instance } from "@/types/instance";
 import { getBasicFields, getAdvancedFields, getBasicBackendFields, getAdvancedBackendFields } from "@/lib/zodFormUtils";
-import { ChevronDown, ChevronRight } from "lucide-react";
+import { ChevronDown, ChevronRight, Terminal } from "lucide-react";
 import ZodFormField from "@/components/ZodFormField";
 import BackendFormField from "@/components/BackendFormField";
+import ParseCommandDialog from "@/components/ParseCommandDialog";

 interface InstanceDialogProps {
  open: boolean;
@@ -35,6 +36,7 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
  const [formData, setFormData] = useState<CreateInstanceOptions>({});
  const [showAdvanced, setShowAdvanced] = useState(false);
  const [nameError, setNameError] = useState("");
+  const [showParseDialog, setShowParseDialog] = useState(false);

  // Get field lists dynamically from the type
  const basicFields = getBasicFields();
@@ -142,6 +144,14 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
    setShowAdvanced(!showAdvanced);
  };

+  const handleCommandParsed = (parsedOptions: CreateInstanceOptions) => {
+    setFormData(prev => ({
+      ...prev,
+      ...parsedOptions,
+    }));
+    setShowParseDialog(false);
+  };
+
  // Check if auto_restart is enabled
  const isAutoRestartEnabled = formData.auto_restart === true;

@@ -258,28 +268,39 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({

            {/* Advanced Fields Toggle */}
            <div className="border-t pt-4">
-              <Button
-                variant="ghost"
-                onClick={toggleAdvanced}
-                className="flex items-center gap-2 p-0 h-auto font-medium"
-              >
-                {showAdvanced ? (
-                  <ChevronDown className="h-4 w-4" />
-                ) : (
-                  <ChevronRight className="h-4 w-4" />
-                )}
-                Advanced Configuration
-                <span className="text-muted-foreground text-sm font-normal">
-                  (
-                  {
-                    advancedFields.filter(
-                      (f) =>
-                        !["max_restarts", "restart_delay", "backend_options"].includes(f as string)
-                    ).length + advancedBackendFields.length
-                  }{" "}
-                  options)
-                </span>
-              </Button>
+              <div className="flex items-center justify-between">
+                <Button
+                  variant="outline"
+                  onClick={() => setShowParseDialog(true)}
+                  className="flex items-center gap-2"
+                >
+                  <Terminal className="h-4 w-4" />
+                  Parse Command
+                </Button>
+                
+                <Button
+                  variant="ghost"
+                  onClick={toggleAdvanced}
+                  className="flex items-center gap-2 p-0 h-auto font-medium"
+                >
+                  {showAdvanced ? (
+                    <ChevronDown className="h-4 w-4" />
+                  ) : (
+                    <ChevronRight className="h-4 w-4" />
+                  )}
+                  Advanced Configuration
+                  <span className="text-muted-foreground text-sm font-normal">
+                    (
+                    {
+                      advancedFields.filter(
+                        (f) =>
+                          !["max_restarts", "restart_delay", "backend_options"].includes(f as string)
+                      ).length + advancedBackendFields.length
+                    }{" "}
+                    options)
+                  </span>
+                </Button>
+              </div>
            </div>

            {/* Advanced Fields - Automatically generated from type (excluding restart options) */}
@@ -352,6 +373,12 @@ const InstanceDialog: React.FC<InstanceDialogProps> = ({
          </Button>
        </DialogFooter>
      </DialogContent>
+      
+      <ParseCommandDialog
+        open={showParseDialog}
+        onOpenChange={setShowParseDialog}
+        onParsed={handleCommandParsed}
+      />
    </Dialog>
  );
 };
--- a/webui/src/components/ParseCommandDialog.tsx
+++ b/webui/src/components/ParseCommandDialog.tsx
@@ -0,0 +1,117 @@
+import React, { useState } from "react";
+import { Button } from "@/components/ui/button";
+import { Label } from "@/components/ui/label";
+import {
+  Dialog,
+  DialogContent,
+  DialogDescription,
+  DialogFooter,
+  DialogHeader,
+  DialogTitle,
+} from "@/components/ui/dialog";
+import { type CreateInstanceOptions } from "@/types/instance";
+import { backendsApi } from "@/lib/api";
+import { toast } from "sonner";
+
+interface ParseCommandDialogProps {
+  open: boolean;
+  onOpenChange: (open: boolean) => void;
+  onParsed: (options: CreateInstanceOptions) => void;
+}
+
+const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
+  open,
+  onOpenChange,
+  onParsed,
+}) => {
+  const [command, setCommand] = useState('');
+  const [loading, setLoading] = useState(false);
+  const [error, setError] = useState<string | null>(null);
+
+  const handleParse = async () => {
+    if (!command.trim()) {
+      setError("Command cannot be empty");
+      return;
+    }
+
+    setLoading(true);
+    setError(null);
+
+    try {
+      const options = await backendsApi.llamaCpp.parseCommand(command);
+      onParsed(options);
+      onOpenChange(false);
+      // Reset form
+      setCommand('');
+      setError(null);
+      // Show success toast
+      toast.success('Command parsed successfully');
+    } catch (err) {
+      const errorMessage = err instanceof Error ? err.message : 'Failed to parse command';
+      setError(errorMessage);
+      // Show error toast
+      toast.error('Failed to parse command', {
+        description: errorMessage
+      });
+    } finally {
+      setLoading(false);
+    }
+  };
+
+  const handleOpenChange = (open: boolean) => {
+    if (!open) {
+      // Reset form when closing
+      setCommand('');
+      setError(null);
+    }
+    onOpenChange(open);
+  };
+
+  return (
+    <Dialog open={open} onOpenChange={handleOpenChange}>
+      <DialogContent className="sm:max-w-[600px]">
+        <DialogHeader>
+          <DialogTitle>Parse Llama Server Command</DialogTitle>
+          <DialogDescription>
+            Paste your llama-server command to automatically populate the form fields
+          </DialogDescription>
+        </DialogHeader>
+        
+        <div className="space-y-4">
+          <div>
+            <Label htmlFor="command">Command</Label>
+            <textarea
+              id="command"
+              value={command}
+              onChange={(e) => setCommand(e.target.value)}
+              placeholder="llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096"
+              className="w-full h-32 p-3 mt-2 border border-input rounded-md font-mono text-sm resize-vertical focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2"
+            />
+          </div>
+          
+          {error && (
+            <div className="text-destructive text-sm bg-destructive/10 p-3 rounded-md">
+              {error}
+            </div>
+          )}
+        </div>
+
+        <DialogFooter>
+          <Button variant="outline" onClick={() => handleOpenChange(false)}>
+            Cancel
+          </Button>
+          <Button 
+            onClick={() => {
+              handleParse().catch(console.error);
+            }}
+            disabled={!command.trim() || loading}
+          >
+            {loading ? 'Parsing...' : 'Parse Command'}
+          </Button>
+        </DialogFooter>
+      </DialogContent>
+    </Dialog>
+  );
+};
+
+export default ParseCommandDialog;
--- a/webui/src/lib/api.ts
+++ b/webui/src/lib/api.ts
@@ -83,6 +83,18 @@ export const serverApi = {
  getDevices: () => apiCall<string>("/server/devices", {}, "text"),
 };

+// Backend API functions
+export const backendsApi = {
+  llamaCpp: {
+    // POST /backends/llama-cpp/parse-command
+    parseCommand: (command: string) =>
+      apiCall<CreateInstanceOptions>('/backends/llama-cpp/parse-command', {
+        method: 'POST',
+        body: JSON.stringify({ command }),
+      }),
+  },
+};
+
 // Instance API functions
 export const instancesApi = {
  // GET /instances