Update API documentation and instance configuration

2025-12-23 09:34:23 +00:00 · 2025-07-19 22:04:13 +02:00
parent 154e1f36f9
commit 80a9971652
6 changed files with 1779 additions and 263 deletions
--- a/server/docs/docs.go
+++ b/server/docs/docs.go
@@ -22,9 +22,6 @@ const docTemplate = `{
        "/instances": {
            "get": {
                "description": "Returns a list of all instances managed by the server",
-                "produces": [
-                    "application/json"
-                ],
                "tags": [
                    "instances"
                ],
@@ -46,50 +43,6 @@ const docTemplate = `{
                        }
                    }
                }
-            },
-            "post": {
-                "description": "Creates a new instance with the provided configuration options",
-                "consumes": [
-                    "application/json"
-                ],
-                "produces": [
-                    "application/json"
-                ],
-                "tags": [
-                    "instances"
-                ],
-                "summary": "Create and start a new instance",
-                "parameters": [
-                    {
-                        "description": "Instance configuration options",
-                        "name": "options",
-                        "in": "body",
-                        "required": true,
-                        "schema": {
-                            "$ref": "#/definitions/llamactl.InstanceOptions"
-                        }
-                    }
-                ],
-                "responses": {
-                    "201": {
-                        "description": "Created instance details",
-                        "schema": {
-                            "$ref": "#/definitions/llamactl.Instance"
-                        }
-                    },
-                    "400": {
-                        "description": "Invalid request body",
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    "500": {
-                        "description": "Internal Server Error",
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                }
            }
        },
        "/instances/{name}": {
@@ -134,9 +87,6 @@ const docTemplate = `{
                "consumes": [
                    "application/json"
                ],
-                "produces": [
-                    "application/json"
-                ],
                "tags": [
                    "instances"
                ],
@@ -155,7 +105,7 @@ const docTemplate = `{
                        "in": "body",
                        "required": true,
                        "schema": {
-                            "$ref": "#/definitions/llamactl.InstanceOptions"
+                            "$ref": "#/definitions/llamactl.CreateInstanceOptions"
                        }
                    }
                ],
@@ -180,14 +130,59 @@ const docTemplate = `{
                    }
                }
            },
-            "delete": {
-                "description": "Stops and removes a specific instance by name",
-                "produces": [
+            "post": {
+                "description": "Creates a new instance with the provided configuration options",
+                "consumes": [
                    "application/json"
                ],
                "tags": [
                    "instances"
                ],
+                "summary": "Create and start a new instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "description": "Instance configuration options",
+                        "name": "options",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/llamactl.CreateInstanceOptions"
+                        }
+                    }
+                ],
+                "responses": {
+                    "201": {
+                        "description": "Created instance details",
+                        "schema": {
+                            "$ref": "#/definitions/llamactl.Instance"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request body",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            },
+            "delete": {
+                "description": "Stops and removes a specific instance by name",
+                "tags": [
+                    "instances"
+                ],
                "summary": "Delete an instance",
                "parameters": [
                    {
@@ -217,12 +212,94 @@ const docTemplate = `{
                }
            }
        },
+        "/instances/{name}/logs": {
+            "get": {
+                "description": "Returns the logs from a specific instance by name with optional line limit",
+                "tags": [
+                    "instances"
+                ],
+                "summary": "Get logs from a specific instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "type": "string",
+                        "description": "Number of lines to retrieve (default: all lines)",
+                        "name": "lines",
+                        "in": "query"
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Instance logs",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid name format or lines parameter",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/instances/{name}/proxy": {
+            "get": {
+                "description": "Forwards HTTP requests to the llama-server instance running on a specific port",
+                "tags": [
+                    "instances"
+                ],
+                "summary": "Proxy requests to a specific instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Request successfully proxied to instance"
+                    },
+                    "400": {
+                        "description": "Invalid name format",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "503": {
+                        "description": "Instance is not running",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
        "/instances/{name}/restart": {
            "post": {
                "description": "Restarts a specific instance by name",
-                "produces": [
-                    "application/json"
-                ],
                "tags": [
                    "instances"
                ],
@@ -261,9 +338,6 @@ const docTemplate = `{
        "/instances/{name}/start": {
            "post": {
                "description": "Starts a specific instance by name",
-                "produces": [
-                    "application/json"
-                ],
                "tags": [
                    "instances"
                ],
@@ -302,9 +376,6 @@ const docTemplate = `{
        "/instances/{name}/stop": {
            "post": {
                "description": "Stops a specific instance by name",
-                "produces": [
-                    "application/json"
-                ],
                "tags": [
                    "instances"
                ],
@@ -411,6 +482,520 @@ const docTemplate = `{
        }
    },
    "definitions": {
+        "llamactl.CreateInstanceOptions": {
+            "type": "object",
+            "properties": {
+                "alias": {
+                    "type": "string"
+                },
+                "api_key": {
+                    "type": "string"
+                },
+                "api_key_file": {
+                    "type": "string"
+                },
+                "auto_restart": {
+                    "description": "Auto restart",
+                    "type": "boolean"
+                },
+                "batch_size": {
+                    "type": "integer"
+                },
+                "cache_reuse": {
+                    "type": "integer"
+                },
+                "cache_type_k": {
+                    "type": "string"
+                },
+                "cache_type_k_draft": {
+                    "type": "string"
+                },
+                "cache_type_v": {
+                    "type": "string"
+                },
+                "cache_type_v_draft": {
+                    "type": "string"
+                },
+                "chat_template": {
+                    "type": "string"
+                },
+                "chat_template_file": {
+                    "type": "string"
+                },
+                "chat_template_kwargs": {
+                    "type": "string"
+                },
+                "check_tensors": {
+                    "type": "boolean"
+                },
+                "cont_batching": {
+                    "type": "boolean"
+                },
+                "control_vector": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "control_vector_layer_range": {
+                    "type": "string"
+                },
+                "control_vector_scaled": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "cpu_mask": {
+                    "type": "string"
+                },
+                "cpu_mask_batch": {
+                    "type": "string"
+                },
+                "cpu_range": {
+                    "type": "string"
+                },
+                "cpu_range_batch": {
+                    "type": "string"
+                },
+                "cpu_strict": {
+                    "type": "integer"
+                },
+                "cpu_strict_batch": {
+                    "type": "integer"
+                },
+                "ctx_size": {
+                    "type": "integer"
+                },
+                "ctx_size_draft": {
+                    "type": "integer"
+                },
+                "defrag_thold": {
+                    "type": "number"
+                },
+                "device": {
+                    "type": "string"
+                },
+                "device_draft": {
+                    "type": "string"
+                },
+                "draft_max": {
+                    "description": "Speculative decoding params",
+                    "type": "integer"
+                },
+                "draft_min": {
+                    "type": "integer"
+                },
+                "draft_p_min": {
+                    "type": "number"
+                },
+                "dry_allowed_length": {
+                    "type": "integer"
+                },
+                "dry_base": {
+                    "type": "number"
+                },
+                "dry_multiplier": {
+                    "type": "number"
+                },
+                "dry_penalty_last_n": {
+                    "type": "integer"
+                },
+                "dry_sequence_breaker": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "dump_kv_cache": {
+                    "type": "boolean"
+                },
+                "dynatemp_exp": {
+                    "type": "number"
+                },
+                "dynatemp_range": {
+                    "type": "number"
+                },
+                "embd_bge_small_en_default": {
+                    "description": "Default model params",
+                    "type": "boolean"
+                },
+                "embd_e5_small_en_default": {
+                    "type": "boolean"
+                },
+                "embd_gte_small_default": {
+                    "type": "boolean"
+                },
+                "embedding": {
+                    "type": "boolean"
+                },
+                "escape": {
+                    "type": "boolean"
+                },
+                "fim_qwen_14b_spec": {
+                    "type": "boolean"
+                },
+                "fim_qwen_1_5b_default": {
+                    "type": "boolean"
+                },
+                "fim_qwen_3b_default": {
+                    "type": "boolean"
+                },
+                "fim_qwen_7b_default": {
+                    "type": "boolean"
+                },
+                "fim_qwen_7b_spec": {
+                    "type": "boolean"
+                },
+                "flash_attn": {
+                    "type": "boolean"
+                },
+                "frequency_penalty": {
+                    "type": "number"
+                },
+                "gpu_layers": {
+                    "type": "integer"
+                },
+                "gpu_layers_draft": {
+                    "type": "integer"
+                },
+                "grammar": {
+                    "type": "string"
+                },
+                "grammar_file": {
+                    "type": "string"
+                },
+                "hf_file": {
+                    "type": "string"
+                },
+                "hf_file_v": {
+                    "type": "string"
+                },
+                "hf_repo": {
+                    "type": "string"
+                },
+                "hf_repo_draft": {
+                    "type": "string"
+                },
+                "hf_repo_v": {
+                    "type": "string"
+                },
+                "hf_token": {
+                    "type": "string"
+                },
+                "host": {
+                    "type": "string"
+                },
+                "ignore_eos": {
+                    "type": "boolean"
+                },
+                "jinja": {
+                    "type": "boolean"
+                },
+                "json_schema": {
+                    "type": "string"
+                },
+                "json_schema_file": {
+                    "type": "string"
+                },
+                "keep": {
+                    "type": "integer"
+                },
+                "log_colors": {
+                    "type": "boolean"
+                },
+                "log_disable": {
+                    "type": "boolean"
+                },
+                "log_file": {
+                    "type": "string"
+                },
+                "log_prefix": {
+                    "type": "boolean"
+                },
+                "log_timestamps": {
+                    "type": "boolean"
+                },
+                "logit_bias": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "lora": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "lora_init_without_apply": {
+                    "type": "boolean"
+                },
+                "lora_scaled": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "main_gpu": {
+                    "type": "integer"
+                },
+                "max_restarts": {
+                    "type": "integer"
+                },
+                "metrics": {
+                    "type": "boolean"
+                },
+                "min_p": {
+                    "type": "number"
+                },
+                "mirostat": {
+                    "type": "integer"
+                },
+                "mirostat_ent": {
+                    "type": "number"
+                },
+                "mirostat_lr": {
+                    "type": "number"
+                },
+                "mlock": {
+                    "type": "boolean"
+                },
+                "mmproj": {
+                    "type": "string"
+                },
+                "mmproj_url": {
+                    "type": "string"
+                },
+                "model": {
+                    "type": "string"
+                },
+                "model_draft": {
+                    "type": "string"
+                },
+                "model_url": {
+                    "type": "string"
+                },
+                "model_vocoder": {
+                    "description": "Audio/TTS params",
+                    "type": "string"
+                },
+                "no_cont_batching": {
+                    "type": "boolean"
+                },
+                "no_context_shift": {
+                    "description": "Server/Example-specific params",
+                    "type": "boolean"
+                },
+                "no_escape": {
+                    "type": "boolean"
+                },
+                "no_kv_offload": {
+                    "type": "boolean"
+                },
+                "no_mmap": {
+                    "type": "boolean"
+                },
+                "no_mmproj": {
+                    "type": "boolean"
+                },
+                "no_mmproj_offload": {
+                    "type": "boolean"
+                },
+                "no_perf": {
+                    "type": "boolean"
+                },
+                "no_prefill_assistant": {
+                    "type": "boolean"
+                },
+                "no_slots": {
+                    "type": "boolean"
+                },
+                "no_warmup": {
+                    "type": "boolean"
+                },
+                "no_webui": {
+                    "type": "boolean"
+                },
+                "numa": {
+                    "type": "string"
+                },
+                "override_kv": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "override_tensor": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "parallel": {
+                    "type": "integer"
+                },
+                "path": {
+                    "type": "string"
+                },
+                "poll": {
+                    "type": "integer"
+                },
+                "poll_batch": {
+                    "type": "integer"
+                },
+                "pooling": {
+                    "type": "string"
+                },
+                "port": {
+                    "type": "integer"
+                },
+                "predict": {
+                    "type": "integer"
+                },
+                "presence_penalty": {
+                    "type": "number"
+                },
+                "priority": {
+                    "type": "integer"
+                },
+                "priority_batch": {
+                    "type": "integer"
+                },
+                "props": {
+                    "type": "boolean"
+                },
+                "reasoning_budget": {
+                    "type": "integer"
+                },
+                "reasoning_format": {
+                    "type": "string"
+                },
+                "repeat_last_n": {
+                    "type": "integer"
+                },
+                "repeat_penalty": {
+                    "type": "number"
+                },
+                "reranking": {
+                    "type": "boolean"
+                },
+                "restart_delay_seconds": {
+                    "description": "RestartDelay duration in seconds",
+                    "type": "integer"
+                },
+                "rope_freq_base": {
+                    "type": "number"
+                },
+                "rope_freq_scale": {
+                    "type": "number"
+                },
+                "rope_scale": {
+                    "type": "number"
+                },
+                "rope_scaling": {
+                    "type": "string"
+                },
+                "samplers": {
+                    "description": "Sampling params",
+                    "type": "string"
+                },
+                "sampling_seq": {
+                    "type": "string"
+                },
+                "seed": {
+                    "type": "integer"
+                },
+                "slot_prompt_similarity": {
+                    "type": "number"
+                },
+                "slot_save_path": {
+                    "type": "string"
+                },
+                "slots": {
+                    "type": "boolean"
+                },
+                "special": {
+                    "type": "boolean"
+                },
+                "split_mode": {
+                    "type": "string"
+                },
+                "spm_infill": {
+                    "type": "boolean"
+                },
+                "ssl_cert_file": {
+                    "type": "string"
+                },
+                "ssl_key_file": {
+                    "type": "string"
+                },
+                "temperature": {
+                    "type": "number"
+                },
+                "tensor_split": {
+                    "type": "string"
+                },
+                "threads": {
+                    "type": "integer"
+                },
+                "threads_batch": {
+                    "type": "integer"
+                },
+                "threads_http": {
+                    "type": "integer"
+                },
+                "timeout": {
+                    "type": "integer"
+                },
+                "top_k": {
+                    "type": "integer"
+                },
+                "top_p": {
+                    "type": "number"
+                },
+                "tts_use_guide_tokens": {
+                    "type": "boolean"
+                },
+                "typical": {
+                    "type": "number"
+                },
+                "ubatch_size": {
+                    "type": "integer"
+                },
+                "verbose": {
+                    "type": "boolean"
+                },
+                "verbose_prompt": {
+                    "description": "Common params",
+                    "type": "boolean"
+                },
+                "verbosity": {
+                    "type": "integer"
+                },
+                "xtc_probability": {
+                    "type": "number"
+                },
+                "xtc_threshold": {
+                    "type": "number"
+                },
+                "yarn_attn_factor": {
+                    "type": "number"
+                },
+                "yarn_beta_fast": {
+                    "type": "number"
+                },
+                "yarn_beta_slow": {
+                    "type": "number"
+                },
+                "yarn_ext_factor": {
+                    "type": "number"
+                },
+                "yarn_orig_ctx": {
+                    "type": "integer"
+                }
+            }
+        },
        "llamactl.Instance": {
            "type": "object",
            "properties": {
@@ -420,19 +1005,8 @@ const docTemplate = `{
                "running": {
                    "description": "Status",
                    "type": "boolean"
-                },
-                "stdErrChan": {
-                    "description": "Channel for sending error messages",
-                    "type": "object"
-                },
-                "stdOutChan": {
-                    "description": "Output channels",
-                    "type": "object"
                }
            }
-        },
-        "llamactl.InstanceOptions": {
-            "type": "object"
        }
    }
 }`
--- a/server/docs/swagger.json
+++ b/server/docs/swagger.json
@@ -15,9 +15,6 @@
        "/instances": {
            "get": {
                "description": "Returns a list of all instances managed by the server",
-                "produces": [
-                    "application/json"
-                ],
                "tags": [
                    "instances"
                ],
@@ -39,50 +36,6 @@
                        }
                    }
                }
-            },
-            "post": {
-                "description": "Creates a new instance with the provided configuration options",
-                "consumes": [
-                    "application/json"
-                ],
-                "produces": [
-                    "application/json"
-                ],
-                "tags": [
-                    "instances"
-                ],
-                "summary": "Create and start a new instance",
-                "parameters": [
-                    {
-                        "description": "Instance configuration options",
-                        "name": "options",
-                        "in": "body",
-                        "required": true,
-                        "schema": {
-                            "$ref": "#/definitions/llamactl.InstanceOptions"
-                        }
-                    }
-                ],
-                "responses": {
-                    "201": {
-                        "description": "Created instance details",
-                        "schema": {
-                            "$ref": "#/definitions/llamactl.Instance"
-                        }
-                    },
-                    "400": {
-                        "description": "Invalid request body",
-                        "schema": {
-                            "type": "string"
-                        }
-                    },
-                    "500": {
-                        "description": "Internal Server Error",
-                        "schema": {
-                            "type": "string"
-                        }
-                    }
-                }
            }
        },
        "/instances/{name}": {
@@ -127,9 +80,6 @@
                "consumes": [
                    "application/json"
                ],
-                "produces": [
-                    "application/json"
-                ],
                "tags": [
                    "instances"
                ],
@@ -148,7 +98,7 @@
                        "in": "body",
                        "required": true,
                        "schema": {
-                            "$ref": "#/definitions/llamactl.InstanceOptions"
+                            "$ref": "#/definitions/llamactl.CreateInstanceOptions"
                        }
                    }
                ],
@@ -173,14 +123,59 @@
                    }
                }
            },
-            "delete": {
-                "description": "Stops and removes a specific instance by name",
-                "produces": [
+            "post": {
+                "description": "Creates a new instance with the provided configuration options",
+                "consumes": [
                    "application/json"
                ],
                "tags": [
                    "instances"
                ],
+                "summary": "Create and start a new instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "description": "Instance configuration options",
+                        "name": "options",
+                        "in": "body",
+                        "required": true,
+                        "schema": {
+                            "$ref": "#/definitions/llamactl.CreateInstanceOptions"
+                        }
+                    }
+                ],
+                "responses": {
+                    "201": {
+                        "description": "Created instance details",
+                        "schema": {
+                            "$ref": "#/definitions/llamactl.Instance"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid request body",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            },
+            "delete": {
+                "description": "Stops and removes a specific instance by name",
+                "tags": [
+                    "instances"
+                ],
                "summary": "Delete an instance",
                "parameters": [
                    {
@@ -210,12 +205,94 @@
                }
            }
        },
+        "/instances/{name}/logs": {
+            "get": {
+                "description": "Returns the logs from a specific instance by name with optional line limit",
+                "tags": [
+                    "instances"
+                ],
+                "summary": "Get logs from a specific instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    },
+                    {
+                        "type": "string",
+                        "description": "Number of lines to retrieve (default: all lines)",
+                        "name": "lines",
+                        "in": "query"
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Instance logs",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "400": {
+                        "description": "Invalid name format or lines parameter",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
+        "/instances/{name}/proxy": {
+            "get": {
+                "description": "Forwards HTTP requests to the llama-server instance running on a specific port",
+                "tags": [
+                    "instances"
+                ],
+                "summary": "Proxy requests to a specific instance",
+                "parameters": [
+                    {
+                        "type": "string",
+                        "description": "Instance Name",
+                        "name": "name",
+                        "in": "path",
+                        "required": true
+                    }
+                ],
+                "responses": {
+                    "200": {
+                        "description": "Request successfully proxied to instance"
+                    },
+                    "400": {
+                        "description": "Invalid name format",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "500": {
+                        "description": "Internal Server Error",
+                        "schema": {
+                            "type": "string"
+                        }
+                    },
+                    "503": {
+                        "description": "Instance is not running",
+                        "schema": {
+                            "type": "string"
+                        }
+                    }
+                }
+            }
+        },
        "/instances/{name}/restart": {
            "post": {
                "description": "Restarts a specific instance by name",
-                "produces": [
-                    "application/json"
-                ],
                "tags": [
                    "instances"
                ],
@@ -254,9 +331,6 @@
        "/instances/{name}/start": {
            "post": {
                "description": "Starts a specific instance by name",
-                "produces": [
-                    "application/json"
-                ],
                "tags": [
                    "instances"
                ],
@@ -295,9 +369,6 @@
        "/instances/{name}/stop": {
            "post": {
                "description": "Stops a specific instance by name",
-                "produces": [
-                    "application/json"
-                ],
                "tags": [
                    "instances"
                ],
@@ -404,6 +475,520 @@
        }
    },
    "definitions": {
+        "llamactl.CreateInstanceOptions": {
+            "type": "object",
+            "properties": {
+                "alias": {
+                    "type": "string"
+                },
+                "api_key": {
+                    "type": "string"
+                },
+                "api_key_file": {
+                    "type": "string"
+                },
+                "auto_restart": {
+                    "description": "Auto restart",
+                    "type": "boolean"
+                },
+                "batch_size": {
+                    "type": "integer"
+                },
+                "cache_reuse": {
+                    "type": "integer"
+                },
+                "cache_type_k": {
+                    "type": "string"
+                },
+                "cache_type_k_draft": {
+                    "type": "string"
+                },
+                "cache_type_v": {
+                    "type": "string"
+                },
+                "cache_type_v_draft": {
+                    "type": "string"
+                },
+                "chat_template": {
+                    "type": "string"
+                },
+                "chat_template_file": {
+                    "type": "string"
+                },
+                "chat_template_kwargs": {
+                    "type": "string"
+                },
+                "check_tensors": {
+                    "type": "boolean"
+                },
+                "cont_batching": {
+                    "type": "boolean"
+                },
+                "control_vector": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "control_vector_layer_range": {
+                    "type": "string"
+                },
+                "control_vector_scaled": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "cpu_mask": {
+                    "type": "string"
+                },
+                "cpu_mask_batch": {
+                    "type": "string"
+                },
+                "cpu_range": {
+                    "type": "string"
+                },
+                "cpu_range_batch": {
+                    "type": "string"
+                },
+                "cpu_strict": {
+                    "type": "integer"
+                },
+                "cpu_strict_batch": {
+                    "type": "integer"
+                },
+                "ctx_size": {
+                    "type": "integer"
+                },
+                "ctx_size_draft": {
+                    "type": "integer"
+                },
+                "defrag_thold": {
+                    "type": "number"
+                },
+                "device": {
+                    "type": "string"
+                },
+                "device_draft": {
+                    "type": "string"
+                },
+                "draft_max": {
+                    "description": "Speculative decoding params",
+                    "type": "integer"
+                },
+                "draft_min": {
+                    "type": "integer"
+                },
+                "draft_p_min": {
+                    "type": "number"
+                },
+                "dry_allowed_length": {
+                    "type": "integer"
+                },
+                "dry_base": {
+                    "type": "number"
+                },
+                "dry_multiplier": {
+                    "type": "number"
+                },
+                "dry_penalty_last_n": {
+                    "type": "integer"
+                },
+                "dry_sequence_breaker": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "dump_kv_cache": {
+                    "type": "boolean"
+                },
+                "dynatemp_exp": {
+                    "type": "number"
+                },
+                "dynatemp_range": {
+                    "type": "number"
+                },
+                "embd_bge_small_en_default": {
+                    "description": "Default model params",
+                    "type": "boolean"
+                },
+                "embd_e5_small_en_default": {
+                    "type": "boolean"
+                },
+                "embd_gte_small_default": {
+                    "type": "boolean"
+                },
+                "embedding": {
+                    "type": "boolean"
+                },
+                "escape": {
+                    "type": "boolean"
+                },
+                "fim_qwen_14b_spec": {
+                    "type": "boolean"
+                },
+                "fim_qwen_1_5b_default": {
+                    "type": "boolean"
+                },
+                "fim_qwen_3b_default": {
+                    "type": "boolean"
+                },
+                "fim_qwen_7b_default": {
+                    "type": "boolean"
+                },
+                "fim_qwen_7b_spec": {
+                    "type": "boolean"
+                },
+                "flash_attn": {
+                    "type": "boolean"
+                },
+                "frequency_penalty": {
+                    "type": "number"
+                },
+                "gpu_layers": {
+                    "type": "integer"
+                },
+                "gpu_layers_draft": {
+                    "type": "integer"
+                },
+                "grammar": {
+                    "type": "string"
+                },
+                "grammar_file": {
+                    "type": "string"
+                },
+                "hf_file": {
+                    "type": "string"
+                },
+                "hf_file_v": {
+                    "type": "string"
+                },
+                "hf_repo": {
+                    "type": "string"
+                },
+                "hf_repo_draft": {
+                    "type": "string"
+                },
+                "hf_repo_v": {
+                    "type": "string"
+                },
+                "hf_token": {
+                    "type": "string"
+                },
+                "host": {
+                    "type": "string"
+                },
+                "ignore_eos": {
+                    "type": "boolean"
+                },
+                "jinja": {
+                    "type": "boolean"
+                },
+                "json_schema": {
+                    "type": "string"
+                },
+                "json_schema_file": {
+                    "type": "string"
+                },
+                "keep": {
+                    "type": "integer"
+                },
+                "log_colors": {
+                    "type": "boolean"
+                },
+                "log_disable": {
+                    "type": "boolean"
+                },
+                "log_file": {
+                    "type": "string"
+                },
+                "log_prefix": {
+                    "type": "boolean"
+                },
+                "log_timestamps": {
+                    "type": "boolean"
+                },
+                "logit_bias": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "lora": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "lora_init_without_apply": {
+                    "type": "boolean"
+                },
+                "lora_scaled": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "main_gpu": {
+                    "type": "integer"
+                },
+                "max_restarts": {
+                    "type": "integer"
+                },
+                "metrics": {
+                    "type": "boolean"
+                },
+                "min_p": {
+                    "type": "number"
+                },
+                "mirostat": {
+                    "type": "integer"
+                },
+                "mirostat_ent": {
+                    "type": "number"
+                },
+                "mirostat_lr": {
+                    "type": "number"
+                },
+                "mlock": {
+                    "type": "boolean"
+                },
+                "mmproj": {
+                    "type": "string"
+                },
+                "mmproj_url": {
+                    "type": "string"
+                },
+                "model": {
+                    "type": "string"
+                },
+                "model_draft": {
+                    "type": "string"
+                },
+                "model_url": {
+                    "type": "string"
+                },
+                "model_vocoder": {
+                    "description": "Audio/TTS params",
+                    "type": "string"
+                },
+                "no_cont_batching": {
+                    "type": "boolean"
+                },
+                "no_context_shift": {
+                    "description": "Server/Example-specific params",
+                    "type": "boolean"
+                },
+                "no_escape": {
+                    "type": "boolean"
+                },
+                "no_kv_offload": {
+                    "type": "boolean"
+                },
+                "no_mmap": {
+                    "type": "boolean"
+                },
+                "no_mmproj": {
+                    "type": "boolean"
+                },
+                "no_mmproj_offload": {
+                    "type": "boolean"
+                },
+                "no_perf": {
+                    "type": "boolean"
+                },
+                "no_prefill_assistant": {
+                    "type": "boolean"
+                },
+                "no_slots": {
+                    "type": "boolean"
+                },
+                "no_warmup": {
+                    "type": "boolean"
+                },
+                "no_webui": {
+                    "type": "boolean"
+                },
+                "numa": {
+                    "type": "string"
+                },
+                "override_kv": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "override_tensor": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                },
+                "parallel": {
+                    "type": "integer"
+                },
+                "path": {
+                    "type": "string"
+                },
+                "poll": {
+                    "type": "integer"
+                },
+                "poll_batch": {
+                    "type": "integer"
+                },
+                "pooling": {
+                    "type": "string"
+                },
+                "port": {
+                    "type": "integer"
+                },
+                "predict": {
+                    "type": "integer"
+                },
+                "presence_penalty": {
+                    "type": "number"
+                },
+                "priority": {
+                    "type": "integer"
+                },
+                "priority_batch": {
+                    "type": "integer"
+                },
+                "props": {
+                    "type": "boolean"
+                },
+                "reasoning_budget": {
+                    "type": "integer"
+                },
+                "reasoning_format": {
+                    "type": "string"
+                },
+                "repeat_last_n": {
+                    "type": "integer"
+                },
+                "repeat_penalty": {
+                    "type": "number"
+                },
+                "reranking": {
+                    "type": "boolean"
+                },
+                "restart_delay_seconds": {
+                    "description": "RestartDelay duration in seconds",
+                    "type": "integer"
+                },
+                "rope_freq_base": {
+                    "type": "number"
+                },
+                "rope_freq_scale": {
+                    "type": "number"
+                },
+                "rope_scale": {
+                    "type": "number"
+                },
+                "rope_scaling": {
+                    "type": "string"
+                },
+                "samplers": {
+                    "description": "Sampling params",
+                    "type": "string"
+                },
+                "sampling_seq": {
+                    "type": "string"
+                },
+                "seed": {
+                    "type": "integer"
+                },
+                "slot_prompt_similarity": {
+                    "type": "number"
+                },
+                "slot_save_path": {
+                    "type": "string"
+                },
+                "slots": {
+                    "type": "boolean"
+                },
+                "special": {
+                    "type": "boolean"
+                },
+                "split_mode": {
+                    "type": "string"
+                },
+                "spm_infill": {
+                    "type": "boolean"
+                },
+                "ssl_cert_file": {
+                    "type": "string"
+                },
+                "ssl_key_file": {
+                    "type": "string"
+                },
+                "temperature": {
+                    "type": "number"
+                },
+                "tensor_split": {
+                    "type": "string"
+                },
+                "threads": {
+                    "type": "integer"
+                },
+                "threads_batch": {
+                    "type": "integer"
+                },
+                "threads_http": {
+                    "type": "integer"
+                },
+                "timeout": {
+                    "type": "integer"
+                },
+                "top_k": {
+                    "type": "integer"
+                },
+                "top_p": {
+                    "type": "number"
+                },
+                "tts_use_guide_tokens": {
+                    "type": "boolean"
+                },
+                "typical": {
+                    "type": "number"
+                },
+                "ubatch_size": {
+                    "type": "integer"
+                },
+                "verbose": {
+                    "type": "boolean"
+                },
+                "verbose_prompt": {
+                    "description": "Common params",
+                    "type": "boolean"
+                },
+                "verbosity": {
+                    "type": "integer"
+                },
+                "xtc_probability": {
+                    "type": "number"
+                },
+                "xtc_threshold": {
+                    "type": "number"
+                },
+                "yarn_attn_factor": {
+                    "type": "number"
+                },
+                "yarn_beta_fast": {
+                    "type": "number"
+                },
+                "yarn_beta_slow": {
+                    "type": "number"
+                },
+                "yarn_ext_factor": {
+                    "type": "number"
+                },
+                "yarn_orig_ctx": {
+                    "type": "integer"
+                }
+            }
+        },
        "llamactl.Instance": {
            "type": "object",
            "properties": {
@@ -413,19 +998,8 @@
                "running": {
                    "description": "Status",
                    "type": "boolean"
-                },
-                "stdErrChan": {
-                    "description": "Channel for sending error messages",
-                    "type": "object"
-                },
-                "stdOutChan": {
-                    "description": "Output channels",
-                    "type": "object"
                }
            }
-        },
-        "llamactl.InstanceOptions": {
-            "type": "object"
        }
    }
 }
--- a/server/docs/swagger.yaml
+++ b/server/docs/swagger.yaml
@@ -1,5 +1,350 @@
 basePath: /api/v1
 definitions:
+  llamactl.CreateInstanceOptions:
+    properties:
+      alias:
+        type: string
+      api_key:
+        type: string
+      api_key_file:
+        type: string
+      auto_restart:
+        description: Auto restart
+        type: boolean
+      batch_size:
+        type: integer
+      cache_reuse:
+        type: integer
+      cache_type_k:
+        type: string
+      cache_type_k_draft:
+        type: string
+      cache_type_v:
+        type: string
+      cache_type_v_draft:
+        type: string
+      chat_template:
+        type: string
+      chat_template_file:
+        type: string
+      chat_template_kwargs:
+        type: string
+      check_tensors:
+        type: boolean
+      cont_batching:
+        type: boolean
+      control_vector:
+        items:
+          type: string
+        type: array
+      control_vector_layer_range:
+        type: string
+      control_vector_scaled:
+        items:
+          type: string
+        type: array
+      cpu_mask:
+        type: string
+      cpu_mask_batch:
+        type: string
+      cpu_range:
+        type: string
+      cpu_range_batch:
+        type: string
+      cpu_strict:
+        type: integer
+      cpu_strict_batch:
+        type: integer
+      ctx_size:
+        type: integer
+      ctx_size_draft:
+        type: integer
+      defrag_thold:
+        type: number
+      device:
+        type: string
+      device_draft:
+        type: string
+      draft_max:
+        description: Speculative decoding params
+        type: integer
+      draft_min:
+        type: integer
+      draft_p_min:
+        type: number
+      dry_allowed_length:
+        type: integer
+      dry_base:
+        type: number
+      dry_multiplier:
+        type: number
+      dry_penalty_last_n:
+        type: integer
+      dry_sequence_breaker:
+        items:
+          type: string
+        type: array
+      dump_kv_cache:
+        type: boolean
+      dynatemp_exp:
+        type: number
+      dynatemp_range:
+        type: number
+      embd_bge_small_en_default:
+        description: Default model params
+        type: boolean
+      embd_e5_small_en_default:
+        type: boolean
+      embd_gte_small_default:
+        type: boolean
+      embedding:
+        type: boolean
+      escape:
+        type: boolean
+      fim_qwen_1_5b_default:
+        type: boolean
+      fim_qwen_3b_default:
+        type: boolean
+      fim_qwen_7b_default:
+        type: boolean
+      fim_qwen_7b_spec:
+        type: boolean
+      fim_qwen_14b_spec:
+        type: boolean
+      flash_attn:
+        type: boolean
+      frequency_penalty:
+        type: number
+      gpu_layers:
+        type: integer
+      gpu_layers_draft:
+        type: integer
+      grammar:
+        type: string
+      grammar_file:
+        type: string
+      hf_file:
+        type: string
+      hf_file_v:
+        type: string
+      hf_repo:
+        type: string
+      hf_repo_draft:
+        type: string
+      hf_repo_v:
+        type: string
+      hf_token:
+        type: string
+      host:
+        type: string
+      ignore_eos:
+        type: boolean
+      jinja:
+        type: boolean
+      json_schema:
+        type: string
+      json_schema_file:
+        type: string
+      keep:
+        type: integer
+      log_colors:
+        type: boolean
+      log_disable:
+        type: boolean
+      log_file:
+        type: string
+      log_prefix:
+        type: boolean
+      log_timestamps:
+        type: boolean
+      logit_bias:
+        items:
+          type: string
+        type: array
+      lora:
+        items:
+          type: string
+        type: array
+      lora_init_without_apply:
+        type: boolean
+      lora_scaled:
+        items:
+          type: string
+        type: array
+      main_gpu:
+        type: integer
+      max_restarts:
+        type: integer
+      metrics:
+        type: boolean
+      min_p:
+        type: number
+      mirostat:
+        type: integer
+      mirostat_ent:
+        type: number
+      mirostat_lr:
+        type: number
+      mlock:
+        type: boolean
+      mmproj:
+        type: string
+      mmproj_url:
+        type: string
+      model:
+        type: string
+      model_draft:
+        type: string
+      model_url:
+        type: string
+      model_vocoder:
+        description: Audio/TTS params
+        type: string
+      no_cont_batching:
+        type: boolean
+      no_context_shift:
+        description: Server/Example-specific params
+        type: boolean
+      no_escape:
+        type: boolean
+      no_kv_offload:
+        type: boolean
+      no_mmap:
+        type: boolean
+      no_mmproj:
+        type: boolean
+      no_mmproj_offload:
+        type: boolean
+      no_perf:
+        type: boolean
+      no_prefill_assistant:
+        type: boolean
+      no_slots:
+        type: boolean
+      no_warmup:
+        type: boolean
+      no_webui:
+        type: boolean
+      numa:
+        type: string
+      override_kv:
+        items:
+          type: string
+        type: array
+      override_tensor:
+        items:
+          type: string
+        type: array
+      parallel:
+        type: integer
+      path:
+        type: string
+      poll:
+        type: integer
+      poll_batch:
+        type: integer
+      pooling:
+        type: string
+      port:
+        type: integer
+      predict:
+        type: integer
+      presence_penalty:
+        type: number
+      priority:
+        type: integer
+      priority_batch:
+        type: integer
+      props:
+        type: boolean
+      reasoning_budget:
+        type: integer
+      reasoning_format:
+        type: string
+      repeat_last_n:
+        type: integer
+      repeat_penalty:
+        type: number
+      reranking:
+        type: boolean
+      restart_delay_seconds:
+        description: RestartDelay duration in seconds
+        type: integer
+      rope_freq_base:
+        type: number
+      rope_freq_scale:
+        type: number
+      rope_scale:
+        type: number
+      rope_scaling:
+        type: string
+      samplers:
+        description: Sampling params
+        type: string
+      sampling_seq:
+        type: string
+      seed:
+        type: integer
+      slot_prompt_similarity:
+        type: number
+      slot_save_path:
+        type: string
+      slots:
+        type: boolean
+      special:
+        type: boolean
+      split_mode:
+        type: string
+      spm_infill:
+        type: boolean
+      ssl_cert_file:
+        type: string
+      ssl_key_file:
+        type: string
+      temperature:
+        type: number
+      tensor_split:
+        type: string
+      threads:
+        type: integer
+      threads_batch:
+        type: integer
+      threads_http:
+        type: integer
+      timeout:
+        type: integer
+      top_k:
+        type: integer
+      top_p:
+        type: number
+      tts_use_guide_tokens:
+        type: boolean
+      typical:
+        type: number
+      ubatch_size:
+        type: integer
+      verbose:
+        type: boolean
+      verbose_prompt:
+        description: Common params
+        type: boolean
+      verbosity:
+        type: integer
+      xtc_probability:
+        type: number
+      xtc_threshold:
+        type: number
+      yarn_attn_factor:
+        type: number
+      yarn_beta_fast:
+        type: number
+      yarn_beta_slow:
+        type: number
+      yarn_ext_factor:
+        type: number
+      yarn_orig_ctx:
+        type: integer
+    type: object
  llamactl.Instance:
    properties:
      name:
@@ -7,14 +352,6 @@ definitions:
      running:
        description: Status
        type: boolean
-      stdErrChan:
-        description: Channel for sending error messages
-        type: object
-      stdOutChan:
-        description: Output channels
-        type: object
-    type: object
-  llamactl.InstanceOptions:
    type: object
 info:
  contact: {}
@@ -28,8 +365,6 @@ paths:
  /instances:
    get:
      description: Returns a list of all instances managed by the server
-      produces:
-      - application/json
      responses:
        "200":
          description: List of instances
@@ -44,35 +379,6 @@ paths:
      summary: List all instances
      tags:
      - instances
-    post:
-      consumes:
-      - application/json
-      description: Creates a new instance with the provided configuration options
-      parameters:
-      - description: Instance configuration options
-        in: body
-        name: options
-        required: true
-        schema:
-          $ref: '#/definitions/llamactl.InstanceOptions'
-      produces:
-      - application/json
-      responses:
-        "201":
-          description: Created instance details
-          schema:
-            $ref: '#/definitions/llamactl.Instance'
-        "400":
-          description: Invalid request body
-          schema:
-            type: string
-        "500":
-          description: Internal Server Error
-          schema:
-            type: string
-      summary: Create and start a new instance
-      tags:
-      - instances
  /instances/{name}:
    delete:
      description: Stops and removes a specific instance by name
@@ -82,8 +388,6 @@ paths:
        name: name
        required: true
        type: string
-      produces:
-      - application/json
      responses:
        "204":
          description: No Content
@@ -122,6 +426,38 @@ paths:
      summary: Get details of a specific instance
      tags:
      - instances
+    post:
+      consumes:
+      - application/json
+      description: Creates a new instance with the provided configuration options
+      parameters:
+      - description: Instance Name
+        in: path
+        name: name
+        required: true
+        type: string
+      - description: Instance configuration options
+        in: body
+        name: options
+        required: true
+        schema:
+          $ref: '#/definitions/llamactl.CreateInstanceOptions'
+      responses:
+        "201":
+          description: Created instance details
+          schema:
+            $ref: '#/definitions/llamactl.Instance'
+        "400":
+          description: Invalid request body
+          schema:
+            type: string
+        "500":
+          description: Internal Server Error
+          schema:
+            type: string
+      summary: Create and start a new instance
+      tags:
+      - instances
    put:
      consumes:
      - application/json
@@ -137,9 +473,7 @@ paths:
        name: options
        required: true
        schema:
-          $ref: '#/definitions/llamactl.InstanceOptions'
-      produces:
-      - application/json
+          $ref: '#/definitions/llamactl.CreateInstanceOptions'
      responses:
        "200":
          description: Updated instance details
@@ -156,6 +490,64 @@ paths:
      summary: Update an instance's configuration
      tags:
      - instances
+  /instances/{name}/logs:
+    get:
+      description: Returns the logs from a specific instance by name with optional
+        line limit
+      parameters:
+      - description: Instance Name
+        in: path
+        name: name
+        required: true
+        type: string
+      - description: 'Number of lines to retrieve (default: all lines)'
+        in: query
+        name: lines
+        type: string
+      responses:
+        "200":
+          description: Instance logs
+          schema:
+            type: string
+        "400":
+          description: Invalid name format or lines parameter
+          schema:
+            type: string
+        "500":
+          description: Internal Server Error
+          schema:
+            type: string
+      summary: Get logs from a specific instance
+      tags:
+      - instances
+  /instances/{name}/proxy:
+    get:
+      description: Forwards HTTP requests to the llama-server instance running on
+        a specific port
+      parameters:
+      - description: Instance Name
+        in: path
+        name: name
+        required: true
+        type: string
+      responses:
+        "200":
+          description: Request successfully proxied to instance
+        "400":
+          description: Invalid name format
+          schema:
+            type: string
+        "500":
+          description: Internal Server Error
+          schema:
+            type: string
+        "503":
+          description: Instance is not running
+          schema:
+            type: string
+      summary: Proxy requests to a specific instance
+      tags:
+      - instances
  /instances/{name}/restart:
    post:
      description: Restarts a specific instance by name
@@ -165,8 +557,6 @@ paths:
        name: name
        required: true
        type: string
-      produces:
-      - application/json
      responses:
        "200":
          description: Restarted instance details
@@ -192,8 +582,6 @@ paths:
        name: name
        required: true
        type: string
-      produces:
-      - application/json
      responses:
        "200":
          description: Started instance details
@@ -219,8 +607,6 @@ paths:
        name: name
        required: true
        type: string
-      produces:
-      - application/json
      responses:
        "200":
          description: Stopped instance details