diff --git a/server/docs/docs.go b/server/docs/docs.go index 9f83d5e..851dee8 100644 --- a/server/docs/docs.go +++ b/server/docs/docs.go @@ -22,9 +22,6 @@ const docTemplate = `{ "/instances": { "get": { "description": "Returns a list of all instances managed by the server", - "produces": [ - "application/json" - ], "tags": [ "instances" ], @@ -46,50 +43,6 @@ const docTemplate = `{ } } } - }, - "post": { - "description": "Creates a new instance with the provided configuration options", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "instances" - ], - "summary": "Create and start a new instance", - "parameters": [ - { - "description": "Instance configuration options", - "name": "options", - "in": "body", - "required": true, - "schema": { - "$ref": "#/definitions/llamactl.InstanceOptions" - } - } - ], - "responses": { - "201": { - "description": "Created instance details", - "schema": { - "$ref": "#/definitions/llamactl.Instance" - } - }, - "400": { - "description": "Invalid request body", - "schema": { - "type": "string" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "string" - } - } - } } }, "/instances/{name}": { @@ -134,9 +87,6 @@ const docTemplate = `{ "consumes": [ "application/json" ], - "produces": [ - "application/json" - ], "tags": [ "instances" ], @@ -155,7 +105,7 @@ const docTemplate = `{ "in": "body", "required": true, "schema": { - "$ref": "#/definitions/llamactl.InstanceOptions" + "$ref": "#/definitions/llamactl.CreateInstanceOptions" } } ], @@ -180,14 +130,59 @@ const docTemplate = `{ } } }, - "delete": { - "description": "Stops and removes a specific instance by name", - "produces": [ + "post": { + "description": "Creates a new instance with the provided configuration options", + "consumes": [ "application/json" ], "tags": [ "instances" ], + "summary": "Create and start a new instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + }, + { + "description": "Instance configuration options", + "name": "options", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/llamactl.CreateInstanceOptions" + } + } + ], + "responses": { + "201": { + "description": "Created instance details", + "schema": { + "$ref": "#/definitions/llamactl.Instance" + } + }, + "400": { + "description": "Invalid request body", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + }, + "delete": { + "description": "Stops and removes a specific instance by name", + "tags": [ + "instances" + ], "summary": "Delete an instance", "parameters": [ { @@ -217,12 +212,94 @@ const docTemplate = `{ } } }, + "/instances/{name}/logs": { + "get": { + "description": "Returns the logs from a specific instance by name with optional line limit", + "tags": [ + "instances" + ], + "summary": "Get logs from a specific instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + }, + { + "type": "string", + "description": "Number of lines to retrieve (default: all lines)", + "name": "lines", + "in": "query" + } + ], + "responses": { + "200": { + "description": "Instance logs", + "schema": { + "type": "string" + } + }, + "400": { + "description": "Invalid name format or lines parameter", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/instances/{name}/proxy": { + "get": { + "description": "Forwards HTTP requests to the llama-server instance running on a specific port", + "tags": [ + "instances" + ], + "summary": "Proxy requests to a specific instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Request successfully proxied to instance" + }, + "400": { + "description": "Invalid name format", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + }, + "503": { + "description": "Instance is not running", + "schema": { + "type": "string" + } + } + } + } + }, "/instances/{name}/restart": { "post": { "description": "Restarts a specific instance by name", - "produces": [ - "application/json" - ], "tags": [ "instances" ], @@ -261,9 +338,6 @@ const docTemplate = `{ "/instances/{name}/start": { "post": { "description": "Starts a specific instance by name", - "produces": [ - "application/json" - ], "tags": [ "instances" ], @@ -302,9 +376,6 @@ const docTemplate = `{ "/instances/{name}/stop": { "post": { "description": "Stops a specific instance by name", - "produces": [ - "application/json" - ], "tags": [ "instances" ], @@ -411,6 +482,520 @@ const docTemplate = `{ } }, "definitions": { + "llamactl.CreateInstanceOptions": { + "type": "object", + "properties": { + "alias": { + "type": "string" + }, + "api_key": { + "type": "string" + }, + "api_key_file": { + "type": "string" + }, + "auto_restart": { + "description": "Auto restart", + "type": "boolean" + }, + "batch_size": { + "type": "integer" + }, + "cache_reuse": { + "type": "integer" + }, + "cache_type_k": { + "type": "string" + }, + "cache_type_k_draft": { + "type": "string" + }, + "cache_type_v": { + "type": "string" + }, + "cache_type_v_draft": { + "type": "string" + }, + "chat_template": { + "type": "string" + }, + "chat_template_file": { + "type": "string" + }, + "chat_template_kwargs": { + "type": "string" + }, + "check_tensors": { + "type": "boolean" + }, + "cont_batching": { + "type": "boolean" + }, + "control_vector": { + "type": "array", + "items": { + "type": "string" + } + }, + "control_vector_layer_range": { + "type": "string" + }, + "control_vector_scaled": { + "type": "array", + "items": { + "type": "string" + } + }, + "cpu_mask": { + "type": "string" + }, + "cpu_mask_batch": { + "type": "string" + }, + "cpu_range": { + "type": "string" + }, + "cpu_range_batch": { + "type": "string" + }, + "cpu_strict": { + "type": "integer" + }, + "cpu_strict_batch": { + "type": "integer" + }, + "ctx_size": { + "type": "integer" + }, + "ctx_size_draft": { + "type": "integer" + }, + "defrag_thold": { + "type": "number" + }, + "device": { + "type": "string" + }, + "device_draft": { + "type": "string" + }, + "draft_max": { + "description": "Speculative decoding params", + "type": "integer" + }, + "draft_min": { + "type": "integer" + }, + "draft_p_min": { + "type": "number" + }, + "dry_allowed_length": { + "type": "integer" + }, + "dry_base": { + "type": "number" + }, + "dry_multiplier": { + "type": "number" + }, + "dry_penalty_last_n": { + "type": "integer" + }, + "dry_sequence_breaker": { + "type": "array", + "items": { + "type": "string" + } + }, + "dump_kv_cache": { + "type": "boolean" + }, + "dynatemp_exp": { + "type": "number" + }, + "dynatemp_range": { + "type": "number" + }, + "embd_bge_small_en_default": { + "description": "Default model params", + "type": "boolean" + }, + "embd_e5_small_en_default": { + "type": "boolean" + }, + "embd_gte_small_default": { + "type": "boolean" + }, + "embedding": { + "type": "boolean" + }, + "escape": { + "type": "boolean" + }, + "fim_qwen_14b_spec": { + "type": "boolean" + }, + "fim_qwen_1_5b_default": { + "type": "boolean" + }, + "fim_qwen_3b_default": { + "type": "boolean" + }, + "fim_qwen_7b_default": { + "type": "boolean" + }, + "fim_qwen_7b_spec": { + "type": "boolean" + }, + "flash_attn": { + "type": "boolean" + }, + "frequency_penalty": { + "type": "number" + }, + "gpu_layers": { + "type": "integer" + }, + "gpu_layers_draft": { + "type": "integer" + }, + "grammar": { + "type": "string" + }, + "grammar_file": { + "type": "string" + }, + "hf_file": { + "type": "string" + }, + "hf_file_v": { + "type": "string" + }, + "hf_repo": { + "type": "string" + }, + "hf_repo_draft": { + "type": "string" + }, + "hf_repo_v": { + "type": "string" + }, + "hf_token": { + "type": "string" + }, + "host": { + "type": "string" + }, + "ignore_eos": { + "type": "boolean" + }, + "jinja": { + "type": "boolean" + }, + "json_schema": { + "type": "string" + }, + "json_schema_file": { + "type": "string" + }, + "keep": { + "type": "integer" + }, + "log_colors": { + "type": "boolean" + }, + "log_disable": { + "type": "boolean" + }, + "log_file": { + "type": "string" + }, + "log_prefix": { + "type": "boolean" + }, + "log_timestamps": { + "type": "boolean" + }, + "logit_bias": { + "type": "array", + "items": { + "type": "string" + } + }, + "lora": { + "type": "array", + "items": { + "type": "string" + } + }, + "lora_init_without_apply": { + "type": "boolean" + }, + "lora_scaled": { + "type": "array", + "items": { + "type": "string" + } + }, + "main_gpu": { + "type": "integer" + }, + "max_restarts": { + "type": "integer" + }, + "metrics": { + "type": "boolean" + }, + "min_p": { + "type": "number" + }, + "mirostat": { + "type": "integer" + }, + "mirostat_ent": { + "type": "number" + }, + "mirostat_lr": { + "type": "number" + }, + "mlock": { + "type": "boolean" + }, + "mmproj": { + "type": "string" + }, + "mmproj_url": { + "type": "string" + }, + "model": { + "type": "string" + }, + "model_draft": { + "type": "string" + }, + "model_url": { + "type": "string" + }, + "model_vocoder": { + "description": "Audio/TTS params", + "type": "string" + }, + "no_cont_batching": { + "type": "boolean" + }, + "no_context_shift": { + "description": "Server/Example-specific params", + "type": "boolean" + }, + "no_escape": { + "type": "boolean" + }, + "no_kv_offload": { + "type": "boolean" + }, + "no_mmap": { + "type": "boolean" + }, + "no_mmproj": { + "type": "boolean" + }, + "no_mmproj_offload": { + "type": "boolean" + }, + "no_perf": { + "type": "boolean" + }, + "no_prefill_assistant": { + "type": "boolean" + }, + "no_slots": { + "type": "boolean" + }, + "no_warmup": { + "type": "boolean" + }, + "no_webui": { + "type": "boolean" + }, + "numa": { + "type": "string" + }, + "override_kv": { + "type": "array", + "items": { + "type": "string" + } + }, + "override_tensor": { + "type": "array", + "items": { + "type": "string" + } + }, + "parallel": { + "type": "integer" + }, + "path": { + "type": "string" + }, + "poll": { + "type": "integer" + }, + "poll_batch": { + "type": "integer" + }, + "pooling": { + "type": "string" + }, + "port": { + "type": "integer" + }, + "predict": { + "type": "integer" + }, + "presence_penalty": { + "type": "number" + }, + "priority": { + "type": "integer" + }, + "priority_batch": { + "type": "integer" + }, + "props": { + "type": "boolean" + }, + "reasoning_budget": { + "type": "integer" + }, + "reasoning_format": { + "type": "string" + }, + "repeat_last_n": { + "type": "integer" + }, + "repeat_penalty": { + "type": "number" + }, + "reranking": { + "type": "boolean" + }, + "restart_delay_seconds": { + "description": "RestartDelay duration in seconds", + "type": "integer" + }, + "rope_freq_base": { + "type": "number" + }, + "rope_freq_scale": { + "type": "number" + }, + "rope_scale": { + "type": "number" + }, + "rope_scaling": { + "type": "string" + }, + "samplers": { + "description": "Sampling params", + "type": "string" + }, + "sampling_seq": { + "type": "string" + }, + "seed": { + "type": "integer" + }, + "slot_prompt_similarity": { + "type": "number" + }, + "slot_save_path": { + "type": "string" + }, + "slots": { + "type": "boolean" + }, + "special": { + "type": "boolean" + }, + "split_mode": { + "type": "string" + }, + "spm_infill": { + "type": "boolean" + }, + "ssl_cert_file": { + "type": "string" + }, + "ssl_key_file": { + "type": "string" + }, + "temperature": { + "type": "number" + }, + "tensor_split": { + "type": "string" + }, + "threads": { + "type": "integer" + }, + "threads_batch": { + "type": "integer" + }, + "threads_http": { + "type": "integer" + }, + "timeout": { + "type": "integer" + }, + "top_k": { + "type": "integer" + }, + "top_p": { + "type": "number" + }, + "tts_use_guide_tokens": { + "type": "boolean" + }, + "typical": { + "type": "number" + }, + "ubatch_size": { + "type": "integer" + }, + "verbose": { + "type": "boolean" + }, + "verbose_prompt": { + "description": "Common params", + "type": "boolean" + }, + "verbosity": { + "type": "integer" + }, + "xtc_probability": { + "type": "number" + }, + "xtc_threshold": { + "type": "number" + }, + "yarn_attn_factor": { + "type": "number" + }, + "yarn_beta_fast": { + "type": "number" + }, + "yarn_beta_slow": { + "type": "number" + }, + "yarn_ext_factor": { + "type": "number" + }, + "yarn_orig_ctx": { + "type": "integer" + } + } + }, "llamactl.Instance": { "type": "object", "properties": { @@ -420,19 +1005,8 @@ const docTemplate = `{ "running": { "description": "Status", "type": "boolean" - }, - "stdErrChan": { - "description": "Channel for sending error messages", - "type": "object" - }, - "stdOutChan": { - "description": "Output channels", - "type": "object" } } - }, - "llamactl.InstanceOptions": { - "type": "object" } } }` diff --git a/server/docs/swagger.json b/server/docs/swagger.json index 62116f2..e9ad2b1 100644 --- a/server/docs/swagger.json +++ b/server/docs/swagger.json @@ -15,9 +15,6 @@ "/instances": { "get": { "description": "Returns a list of all instances managed by the server", - "produces": [ - "application/json" - ], "tags": [ "instances" ], @@ -39,50 +36,6 @@ } } } - }, - "post": { - "description": "Creates a new instance with the provided configuration options", - "consumes": [ - "application/json" - ], - "produces": [ - "application/json" - ], - "tags": [ - "instances" - ], - "summary": "Create and start a new instance", - "parameters": [ - { - "description": "Instance configuration options", - "name": "options", - "in": "body", - "required": true, - "schema": { - "$ref": "#/definitions/llamactl.InstanceOptions" - } - } - ], - "responses": { - "201": { - "description": "Created instance details", - "schema": { - "$ref": "#/definitions/llamactl.Instance" - } - }, - "400": { - "description": "Invalid request body", - "schema": { - "type": "string" - } - }, - "500": { - "description": "Internal Server Error", - "schema": { - "type": "string" - } - } - } } }, "/instances/{name}": { @@ -127,9 +80,6 @@ "consumes": [ "application/json" ], - "produces": [ - "application/json" - ], "tags": [ "instances" ], @@ -148,7 +98,7 @@ "in": "body", "required": true, "schema": { - "$ref": "#/definitions/llamactl.InstanceOptions" + "$ref": "#/definitions/llamactl.CreateInstanceOptions" } } ], @@ -173,14 +123,59 @@ } } }, - "delete": { - "description": "Stops and removes a specific instance by name", - "produces": [ + "post": { + "description": "Creates a new instance with the provided configuration options", + "consumes": [ "application/json" ], "tags": [ "instances" ], + "summary": "Create and start a new instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + }, + { + "description": "Instance configuration options", + "name": "options", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/llamactl.CreateInstanceOptions" + } + } + ], + "responses": { + "201": { + "description": "Created instance details", + "schema": { + "$ref": "#/definitions/llamactl.Instance" + } + }, + "400": { + "description": "Invalid request body", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + }, + "delete": { + "description": "Stops and removes a specific instance by name", + "tags": [ + "instances" + ], "summary": "Delete an instance", "parameters": [ { @@ -210,12 +205,94 @@ } } }, + "/instances/{name}/logs": { + "get": { + "description": "Returns the logs from a specific instance by name with optional line limit", + "tags": [ + "instances" + ], + "summary": "Get logs from a specific instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + }, + { + "type": "string", + "description": "Number of lines to retrieve (default: all lines)", + "name": "lines", + "in": "query" + } + ], + "responses": { + "200": { + "description": "Instance logs", + "schema": { + "type": "string" + } + }, + "400": { + "description": "Invalid name format or lines parameter", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + } + } + } + }, + "/instances/{name}/proxy": { + "get": { + "description": "Forwards HTTP requests to the llama-server instance running on a specific port", + "tags": [ + "instances" + ], + "summary": "Proxy requests to a specific instance", + "parameters": [ + { + "type": "string", + "description": "Instance Name", + "name": "name", + "in": "path", + "required": true + } + ], + "responses": { + "200": { + "description": "Request successfully proxied to instance" + }, + "400": { + "description": "Invalid name format", + "schema": { + "type": "string" + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "string" + } + }, + "503": { + "description": "Instance is not running", + "schema": { + "type": "string" + } + } + } + } + }, "/instances/{name}/restart": { "post": { "description": "Restarts a specific instance by name", - "produces": [ - "application/json" - ], "tags": [ "instances" ], @@ -254,9 +331,6 @@ "/instances/{name}/start": { "post": { "description": "Starts a specific instance by name", - "produces": [ - "application/json" - ], "tags": [ "instances" ], @@ -295,9 +369,6 @@ "/instances/{name}/stop": { "post": { "description": "Stops a specific instance by name", - "produces": [ - "application/json" - ], "tags": [ "instances" ], @@ -404,6 +475,520 @@ } }, "definitions": { + "llamactl.CreateInstanceOptions": { + "type": "object", + "properties": { + "alias": { + "type": "string" + }, + "api_key": { + "type": "string" + }, + "api_key_file": { + "type": "string" + }, + "auto_restart": { + "description": "Auto restart", + "type": "boolean" + }, + "batch_size": { + "type": "integer" + }, + "cache_reuse": { + "type": "integer" + }, + "cache_type_k": { + "type": "string" + }, + "cache_type_k_draft": { + "type": "string" + }, + "cache_type_v": { + "type": "string" + }, + "cache_type_v_draft": { + "type": "string" + }, + "chat_template": { + "type": "string" + }, + "chat_template_file": { + "type": "string" + }, + "chat_template_kwargs": { + "type": "string" + }, + "check_tensors": { + "type": "boolean" + }, + "cont_batching": { + "type": "boolean" + }, + "control_vector": { + "type": "array", + "items": { + "type": "string" + } + }, + "control_vector_layer_range": { + "type": "string" + }, + "control_vector_scaled": { + "type": "array", + "items": { + "type": "string" + } + }, + "cpu_mask": { + "type": "string" + }, + "cpu_mask_batch": { + "type": "string" + }, + "cpu_range": { + "type": "string" + }, + "cpu_range_batch": { + "type": "string" + }, + "cpu_strict": { + "type": "integer" + }, + "cpu_strict_batch": { + "type": "integer" + }, + "ctx_size": { + "type": "integer" + }, + "ctx_size_draft": { + "type": "integer" + }, + "defrag_thold": { + "type": "number" + }, + "device": { + "type": "string" + }, + "device_draft": { + "type": "string" + }, + "draft_max": { + "description": "Speculative decoding params", + "type": "integer" + }, + "draft_min": { + "type": "integer" + }, + "draft_p_min": { + "type": "number" + }, + "dry_allowed_length": { + "type": "integer" + }, + "dry_base": { + "type": "number" + }, + "dry_multiplier": { + "type": "number" + }, + "dry_penalty_last_n": { + "type": "integer" + }, + "dry_sequence_breaker": { + "type": "array", + "items": { + "type": "string" + } + }, + "dump_kv_cache": { + "type": "boolean" + }, + "dynatemp_exp": { + "type": "number" + }, + "dynatemp_range": { + "type": "number" + }, + "embd_bge_small_en_default": { + "description": "Default model params", + "type": "boolean" + }, + "embd_e5_small_en_default": { + "type": "boolean" + }, + "embd_gte_small_default": { + "type": "boolean" + }, + "embedding": { + "type": "boolean" + }, + "escape": { + "type": "boolean" + }, + "fim_qwen_14b_spec": { + "type": "boolean" + }, + "fim_qwen_1_5b_default": { + "type": "boolean" + }, + "fim_qwen_3b_default": { + "type": "boolean" + }, + "fim_qwen_7b_default": { + "type": "boolean" + }, + "fim_qwen_7b_spec": { + "type": "boolean" + }, + "flash_attn": { + "type": "boolean" + }, + "frequency_penalty": { + "type": "number" + }, + "gpu_layers": { + "type": "integer" + }, + "gpu_layers_draft": { + "type": "integer" + }, + "grammar": { + "type": "string" + }, + "grammar_file": { + "type": "string" + }, + "hf_file": { + "type": "string" + }, + "hf_file_v": { + "type": "string" + }, + "hf_repo": { + "type": "string" + }, + "hf_repo_draft": { + "type": "string" + }, + "hf_repo_v": { + "type": "string" + }, + "hf_token": { + "type": "string" + }, + "host": { + "type": "string" + }, + "ignore_eos": { + "type": "boolean" + }, + "jinja": { + "type": "boolean" + }, + "json_schema": { + "type": "string" + }, + "json_schema_file": { + "type": "string" + }, + "keep": { + "type": "integer" + }, + "log_colors": { + "type": "boolean" + }, + "log_disable": { + "type": "boolean" + }, + "log_file": { + "type": "string" + }, + "log_prefix": { + "type": "boolean" + }, + "log_timestamps": { + "type": "boolean" + }, + "logit_bias": { + "type": "array", + "items": { + "type": "string" + } + }, + "lora": { + "type": "array", + "items": { + "type": "string" + } + }, + "lora_init_without_apply": { + "type": "boolean" + }, + "lora_scaled": { + "type": "array", + "items": { + "type": "string" + } + }, + "main_gpu": { + "type": "integer" + }, + "max_restarts": { + "type": "integer" + }, + "metrics": { + "type": "boolean" + }, + "min_p": { + "type": "number" + }, + "mirostat": { + "type": "integer" + }, + "mirostat_ent": { + "type": "number" + }, + "mirostat_lr": { + "type": "number" + }, + "mlock": { + "type": "boolean" + }, + "mmproj": { + "type": "string" + }, + "mmproj_url": { + "type": "string" + }, + "model": { + "type": "string" + }, + "model_draft": { + "type": "string" + }, + "model_url": { + "type": "string" + }, + "model_vocoder": { + "description": "Audio/TTS params", + "type": "string" + }, + "no_cont_batching": { + "type": "boolean" + }, + "no_context_shift": { + "description": "Server/Example-specific params", + "type": "boolean" + }, + "no_escape": { + "type": "boolean" + }, + "no_kv_offload": { + "type": "boolean" + }, + "no_mmap": { + "type": "boolean" + }, + "no_mmproj": { + "type": "boolean" + }, + "no_mmproj_offload": { + "type": "boolean" + }, + "no_perf": { + "type": "boolean" + }, + "no_prefill_assistant": { + "type": "boolean" + }, + "no_slots": { + "type": "boolean" + }, + "no_warmup": { + "type": "boolean" + }, + "no_webui": { + "type": "boolean" + }, + "numa": { + "type": "string" + }, + "override_kv": { + "type": "array", + "items": { + "type": "string" + } + }, + "override_tensor": { + "type": "array", + "items": { + "type": "string" + } + }, + "parallel": { + "type": "integer" + }, + "path": { + "type": "string" + }, + "poll": { + "type": "integer" + }, + "poll_batch": { + "type": "integer" + }, + "pooling": { + "type": "string" + }, + "port": { + "type": "integer" + }, + "predict": { + "type": "integer" + }, + "presence_penalty": { + "type": "number" + }, + "priority": { + "type": "integer" + }, + "priority_batch": { + "type": "integer" + }, + "props": { + "type": "boolean" + }, + "reasoning_budget": { + "type": "integer" + }, + "reasoning_format": { + "type": "string" + }, + "repeat_last_n": { + "type": "integer" + }, + "repeat_penalty": { + "type": "number" + }, + "reranking": { + "type": "boolean" + }, + "restart_delay_seconds": { + "description": "RestartDelay duration in seconds", + "type": "integer" + }, + "rope_freq_base": { + "type": "number" + }, + "rope_freq_scale": { + "type": "number" + }, + "rope_scale": { + "type": "number" + }, + "rope_scaling": { + "type": "string" + }, + "samplers": { + "description": "Sampling params", + "type": "string" + }, + "sampling_seq": { + "type": "string" + }, + "seed": { + "type": "integer" + }, + "slot_prompt_similarity": { + "type": "number" + }, + "slot_save_path": { + "type": "string" + }, + "slots": { + "type": "boolean" + }, + "special": { + "type": "boolean" + }, + "split_mode": { + "type": "string" + }, + "spm_infill": { + "type": "boolean" + }, + "ssl_cert_file": { + "type": "string" + }, + "ssl_key_file": { + "type": "string" + }, + "temperature": { + "type": "number" + }, + "tensor_split": { + "type": "string" + }, + "threads": { + "type": "integer" + }, + "threads_batch": { + "type": "integer" + }, + "threads_http": { + "type": "integer" + }, + "timeout": { + "type": "integer" + }, + "top_k": { + "type": "integer" + }, + "top_p": { + "type": "number" + }, + "tts_use_guide_tokens": { + "type": "boolean" + }, + "typical": { + "type": "number" + }, + "ubatch_size": { + "type": "integer" + }, + "verbose": { + "type": "boolean" + }, + "verbose_prompt": { + "description": "Common params", + "type": "boolean" + }, + "verbosity": { + "type": "integer" + }, + "xtc_probability": { + "type": "number" + }, + "xtc_threshold": { + "type": "number" + }, + "yarn_attn_factor": { + "type": "number" + }, + "yarn_beta_fast": { + "type": "number" + }, + "yarn_beta_slow": { + "type": "number" + }, + "yarn_ext_factor": { + "type": "number" + }, + "yarn_orig_ctx": { + "type": "integer" + } + } + }, "llamactl.Instance": { "type": "object", "properties": { @@ -413,19 +998,8 @@ "running": { "description": "Status", "type": "boolean" - }, - "stdErrChan": { - "description": "Channel for sending error messages", - "type": "object" - }, - "stdOutChan": { - "description": "Output channels", - "type": "object" } } - }, - "llamactl.InstanceOptions": { - "type": "object" } } } \ No newline at end of file diff --git a/server/docs/swagger.yaml b/server/docs/swagger.yaml index 20ce023..0d373ff 100644 --- a/server/docs/swagger.yaml +++ b/server/docs/swagger.yaml @@ -1,5 +1,350 @@ basePath: /api/v1 definitions: + llamactl.CreateInstanceOptions: + properties: + alias: + type: string + api_key: + type: string + api_key_file: + type: string + auto_restart: + description: Auto restart + type: boolean + batch_size: + type: integer + cache_reuse: + type: integer + cache_type_k: + type: string + cache_type_k_draft: + type: string + cache_type_v: + type: string + cache_type_v_draft: + type: string + chat_template: + type: string + chat_template_file: + type: string + chat_template_kwargs: + type: string + check_tensors: + type: boolean + cont_batching: + type: boolean + control_vector: + items: + type: string + type: array + control_vector_layer_range: + type: string + control_vector_scaled: + items: + type: string + type: array + cpu_mask: + type: string + cpu_mask_batch: + type: string + cpu_range: + type: string + cpu_range_batch: + type: string + cpu_strict: + type: integer + cpu_strict_batch: + type: integer + ctx_size: + type: integer + ctx_size_draft: + type: integer + defrag_thold: + type: number + device: + type: string + device_draft: + type: string + draft_max: + description: Speculative decoding params + type: integer + draft_min: + type: integer + draft_p_min: + type: number + dry_allowed_length: + type: integer + dry_base: + type: number + dry_multiplier: + type: number + dry_penalty_last_n: + type: integer + dry_sequence_breaker: + items: + type: string + type: array + dump_kv_cache: + type: boolean + dynatemp_exp: + type: number + dynatemp_range: + type: number + embd_bge_small_en_default: + description: Default model params + type: boolean + embd_e5_small_en_default: + type: boolean + embd_gte_small_default: + type: boolean + embedding: + type: boolean + escape: + type: boolean + fim_qwen_1_5b_default: + type: boolean + fim_qwen_3b_default: + type: boolean + fim_qwen_7b_default: + type: boolean + fim_qwen_7b_spec: + type: boolean + fim_qwen_14b_spec: + type: boolean + flash_attn: + type: boolean + frequency_penalty: + type: number + gpu_layers: + type: integer + gpu_layers_draft: + type: integer + grammar: + type: string + grammar_file: + type: string + hf_file: + type: string + hf_file_v: + type: string + hf_repo: + type: string + hf_repo_draft: + type: string + hf_repo_v: + type: string + hf_token: + type: string + host: + type: string + ignore_eos: + type: boolean + jinja: + type: boolean + json_schema: + type: string + json_schema_file: + type: string + keep: + type: integer + log_colors: + type: boolean + log_disable: + type: boolean + log_file: + type: string + log_prefix: + type: boolean + log_timestamps: + type: boolean + logit_bias: + items: + type: string + type: array + lora: + items: + type: string + type: array + lora_init_without_apply: + type: boolean + lora_scaled: + items: + type: string + type: array + main_gpu: + type: integer + max_restarts: + type: integer + metrics: + type: boolean + min_p: + type: number + mirostat: + type: integer + mirostat_ent: + type: number + mirostat_lr: + type: number + mlock: + type: boolean + mmproj: + type: string + mmproj_url: + type: string + model: + type: string + model_draft: + type: string + model_url: + type: string + model_vocoder: + description: Audio/TTS params + type: string + no_cont_batching: + type: boolean + no_context_shift: + description: Server/Example-specific params + type: boolean + no_escape: + type: boolean + no_kv_offload: + type: boolean + no_mmap: + type: boolean + no_mmproj: + type: boolean + no_mmproj_offload: + type: boolean + no_perf: + type: boolean + no_prefill_assistant: + type: boolean + no_slots: + type: boolean + no_warmup: + type: boolean + no_webui: + type: boolean + numa: + type: string + override_kv: + items: + type: string + type: array + override_tensor: + items: + type: string + type: array + parallel: + type: integer + path: + type: string + poll: + type: integer + poll_batch: + type: integer + pooling: + type: string + port: + type: integer + predict: + type: integer + presence_penalty: + type: number + priority: + type: integer + priority_batch: + type: integer + props: + type: boolean + reasoning_budget: + type: integer + reasoning_format: + type: string + repeat_last_n: + type: integer + repeat_penalty: + type: number + reranking: + type: boolean + restart_delay_seconds: + description: RestartDelay duration in seconds + type: integer + rope_freq_base: + type: number + rope_freq_scale: + type: number + rope_scale: + type: number + rope_scaling: + type: string + samplers: + description: Sampling params + type: string + sampling_seq: + type: string + seed: + type: integer + slot_prompt_similarity: + type: number + slot_save_path: + type: string + slots: + type: boolean + special: + type: boolean + split_mode: + type: string + spm_infill: + type: boolean + ssl_cert_file: + type: string + ssl_key_file: + type: string + temperature: + type: number + tensor_split: + type: string + threads: + type: integer + threads_batch: + type: integer + threads_http: + type: integer + timeout: + type: integer + top_k: + type: integer + top_p: + type: number + tts_use_guide_tokens: + type: boolean + typical: + type: number + ubatch_size: + type: integer + verbose: + type: boolean + verbose_prompt: + description: Common params + type: boolean + verbosity: + type: integer + xtc_probability: + type: number + xtc_threshold: + type: number + yarn_attn_factor: + type: number + yarn_beta_fast: + type: number + yarn_beta_slow: + type: number + yarn_ext_factor: + type: number + yarn_orig_ctx: + type: integer + type: object llamactl.Instance: properties: name: @@ -7,14 +352,6 @@ definitions: running: description: Status type: boolean - stdErrChan: - description: Channel for sending error messages - type: object - stdOutChan: - description: Output channels - type: object - type: object - llamactl.InstanceOptions: type: object info: contact: {} @@ -28,8 +365,6 @@ paths: /instances: get: description: Returns a list of all instances managed by the server - produces: - - application/json responses: "200": description: List of instances @@ -44,35 +379,6 @@ paths: summary: List all instances tags: - instances - post: - consumes: - - application/json - description: Creates a new instance with the provided configuration options - parameters: - - description: Instance configuration options - in: body - name: options - required: true - schema: - $ref: '#/definitions/llamactl.InstanceOptions' - produces: - - application/json - responses: - "201": - description: Created instance details - schema: - $ref: '#/definitions/llamactl.Instance' - "400": - description: Invalid request body - schema: - type: string - "500": - description: Internal Server Error - schema: - type: string - summary: Create and start a new instance - tags: - - instances /instances/{name}: delete: description: Stops and removes a specific instance by name @@ -82,8 +388,6 @@ paths: name: name required: true type: string - produces: - - application/json responses: "204": description: No Content @@ -122,6 +426,38 @@ paths: summary: Get details of a specific instance tags: - instances + post: + consumes: + - application/json + description: Creates a new instance with the provided configuration options + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + - description: Instance configuration options + in: body + name: options + required: true + schema: + $ref: '#/definitions/llamactl.CreateInstanceOptions' + responses: + "201": + description: Created instance details + schema: + $ref: '#/definitions/llamactl.Instance' + "400": + description: Invalid request body + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + summary: Create and start a new instance + tags: + - instances put: consumes: - application/json @@ -137,9 +473,7 @@ paths: name: options required: true schema: - $ref: '#/definitions/llamactl.InstanceOptions' - produces: - - application/json + $ref: '#/definitions/llamactl.CreateInstanceOptions' responses: "200": description: Updated instance details @@ -156,6 +490,64 @@ paths: summary: Update an instance's configuration tags: - instances + /instances/{name}/logs: + get: + description: Returns the logs from a specific instance by name with optional + line limit + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + - description: 'Number of lines to retrieve (default: all lines)' + in: query + name: lines + type: string + responses: + "200": + description: Instance logs + schema: + type: string + "400": + description: Invalid name format or lines parameter + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + summary: Get logs from a specific instance + tags: + - instances + /instances/{name}/proxy: + get: + description: Forwards HTTP requests to the llama-server instance running on + a specific port + parameters: + - description: Instance Name + in: path + name: name + required: true + type: string + responses: + "200": + description: Request successfully proxied to instance + "400": + description: Invalid name format + schema: + type: string + "500": + description: Internal Server Error + schema: + type: string + "503": + description: Instance is not running + schema: + type: string + summary: Proxy requests to a specific instance + tags: + - instances /instances/{name}/restart: post: description: Restarts a specific instance by name @@ -165,8 +557,6 @@ paths: name: name required: true type: string - produces: - - application/json responses: "200": description: Restarted instance details @@ -192,8 +582,6 @@ paths: name: name required: true type: string - produces: - - application/json responses: "200": description: Started instance details @@ -219,8 +607,6 @@ paths: name: name required: true type: string - produces: - - application/json responses: "200": description: Stopped instance details diff --git a/server/pkg/config.go b/server/pkg/config.go index 8c4cc3d..2bc8494 100644 --- a/server/pkg/config.go +++ b/server/pkg/config.go @@ -6,7 +6,6 @@ import ( "runtime" "strconv" "strings" - "time" "gopkg.in/yaml.v3" ) @@ -46,8 +45,8 @@ type InstancesConfig struct { // Default max restarts for new instances DefaultMaxRestarts int `yaml:"default_max_restarts"` - // Default restart delay for new instances - DefaultRestartDelay Duration `yaml:"default_restart_delay"` + // Default restart delay for new instances (in seconds) + DefaultRestartDelay int `yaml:"default_restart_delay"` } // LoadConfig loads configuration with the following precedence: @@ -68,7 +67,7 @@ func LoadConfig(configPath string) (Config, error) { LlamaExecutable: "llama-server", DefaultAutoRestart: false, DefaultMaxRestarts: 3, - DefaultRestartDelay: Duration(5 * time.Second), + DefaultRestartDelay: 5, }, } @@ -147,29 +146,12 @@ func loadEnvVars(cfg *Config) { } } if restartDelay := os.Getenv("LLAMACTL_DEFAULT_RESTART_DELAY"); restartDelay != "" { - if d, err := parseDelaySeconds(restartDelay); err == nil { - cfg.Instances.DefaultRestartDelay = Duration(d) + if seconds, err := strconv.Atoi(restartDelay); err == nil { + cfg.Instances.DefaultRestartDelay = seconds } } } -// parseDelaySeconds parses a string as seconds and returns a time.Duration -// Accepts both plain numbers (seconds) and duration strings like "5s", "30s" -func parseDelaySeconds(s string) (time.Duration, error) { - // If it contains letters, try parsing as duration - if strings.ContainsAny(s, "smh") { - return time.ParseDuration(s) - } - - // Otherwise parse as seconds - seconds, err := strconv.ParseFloat(s, 64) - if err != nil { - return 0, err - } - - return time.Duration(seconds * float64(time.Second)), nil -} - // parsePortRange parses port range from string formats like "8000-9000" or "8000,9000" func parsePortRange(s string) [2]int { var parts []string diff --git a/server/pkg/handlers.go b/server/pkg/handlers.go index b8c5bd8..ffdbd0d 100644 --- a/server/pkg/handlers.go +++ b/server/pkg/handlers.go @@ -25,7 +25,7 @@ func NewHandler(im InstanceManager) *Handler { // @Summary Get help for llama server // @Description Returns the help text for the llama server command // @Tags server -// #Produces text/plain +// @Produces text/plain // @Success 200 {string} string "Help text" // @Failure 500 {string} string "Internal Server Error" // @Router /server/help [get] @@ -46,7 +46,7 @@ func (h *Handler) HelpHandler() http.HandlerFunc { // @Summary Get version of llama server // @Description Returns the version of the llama server command // @Tags server -// #Produces text/plain +// @Produces text/plain // @Success 200 {string} string "Version information" // @Failure 500 {string} string "Internal Server Error" // @Router /server/version [get] @@ -67,7 +67,7 @@ func (h *Handler) VersionHandler() http.HandlerFunc { // @Summary List available devices for llama server // @Description Returns a list of available devices for the llama server // @Tags server -// #Produces text/plain +// @Produces text/plain // @Success 200 {string} string "List of devices" // @Failure 500 {string} string "Internal Server Error" // @Router /server/devices [get] @@ -88,7 +88,7 @@ func (h *Handler) ListDevicesHandler() http.HandlerFunc { // @Summary List all instances // @Description Returns a list of all instances managed by the server // @Tags instances -// @Produce json +// @Produces json // @Success 200 {array} Instance "List of instances" // @Failure 500 {string} string "Internal Server Error" // @Router /instances [get] @@ -113,12 +113,13 @@ func (h *Handler) ListInstances() http.HandlerFunc { // @Description Creates a new instance with the provided configuration options // @Tags instances // @Accept json -// @Produce json -// @Param options body InstanceOptions true "Instance configuration options" +// @Produces json +// @Param name path string true "Instance Name" +// @Param options body CreateInstanceOptions true "Instance configuration options" // @Success 201 {object} Instance "Created instance details" // @Failure 400 {string} string "Invalid request body" // @Failure 500 {string} string "Internal Server Error" -// @Router /instances [post] +// @Router /instances/{name} [post] func (h *Handler) CreateInstance() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { name := chi.URLParam(r, "name") @@ -152,6 +153,7 @@ func (h *Handler) CreateInstance() http.HandlerFunc { // @Summary Get details of a specific instance // @Description Returns the details of a specific instance by name // @Tags instances +// @Produces json // @Param name path string true "Instance Name" // @Success 200 {object} Instance "Instance details" // @Failure 400 {string} string "Invalid name format" @@ -184,9 +186,9 @@ func (h *Handler) GetInstance() http.HandlerFunc { // @Description Updates the configuration of a specific instance by name // @Tags instances // @Accept json -// @Produce json +// @Produces json // @Param name path string true "Instance Name" -// @Param options body InstanceOptions true "Instance configuration options" +// @Param options body CreateInstanceOptions true "Instance configuration options" // @Success 200 {object} Instance "Updated instance details" // @Failure 400 {string} string "Invalid name format" // @Failure 500 {string} string "Internal Server Error" @@ -229,7 +231,7 @@ func (h *Handler) UpdateInstance() http.HandlerFunc { // @Summary Start a stopped instance // @Description Starts a specific instance by name // @Tags instances -// @Produce json +// @Produces json // @Param name path string true "Instance Name" // @Success 200 {object} Instance "Started instance details" // @Failure 400 {string} string "Invalid name format" @@ -261,7 +263,7 @@ func (h *Handler) StartInstance() http.HandlerFunc { // @Summary Stop a running instance // @Description Stops a specific instance by name // @Tags instances -// @Produce json +// @Produces json // @Param name path string true "Instance Name" // @Success 200 {object} Instance "Stopped instance details" // @Failure 400 {string} string "Invalid name format" @@ -293,7 +295,7 @@ func (h *Handler) StopInstance() http.HandlerFunc { // @Summary Restart a running instance // @Description Restarts a specific instance by name // @Tags instances -// @Produce json +// @Produces json // @Param name path string true "Instance Name" // @Success 200 {object} Instance "Restarted instance details" // @Failure 400 {string} string "Invalid name format" @@ -325,7 +327,6 @@ func (h *Handler) RestartInstance() http.HandlerFunc { // @Summary Delete an instance // @Description Stops and removes a specific instance by name // @Tags instances -// @Produce json // @Param name path string true "Instance Name" // @Success 204 "No Content" // @Failure 400 {string} string "Invalid name format" @@ -348,6 +349,17 @@ func (h *Handler) DeleteInstance() http.HandlerFunc { } } +// GetInstanceLogs godoc +// @Summary Get logs from a specific instance +// @Description Returns the logs from a specific instance by name with optional line limit +// @Tags instances +// @Param name path string true "Instance Name" +// @Param lines query string false "Number of lines to retrieve (default: all lines)" +// @Produces text/plain +// @Success 200 {string} string "Instance logs" +// @Failure 400 {string} string "Invalid name format or lines parameter" +// @Failure 500 {string} string "Internal Server Error" +// @Router /instances/{name}/logs [get] func (h *Handler) GetInstanceLogs() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { name := chi.URLParam(r, "name") @@ -384,6 +396,16 @@ func (h *Handler) GetInstanceLogs() http.HandlerFunc { } } +// ProxyToInstance godoc +// @Summary Proxy requests to a specific instance +// @Description Forwards HTTP requests to the llama-server instance running on a specific port +// @Tags instances +// @Param name path string true "Instance Name" +// @Success 200 "Request successfully proxied to instance" +// @Failure 400 {string} string "Invalid name format" +// @Failure 500 {string} string "Internal Server Error" +// @Failure 503 {string} string "Instance is not running" +// @Router /instances/{name}/proxy [get] func (h *Handler) ProxyToInstance() http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { name := chi.URLParam(r, "name") diff --git a/server/pkg/instance.go b/server/pkg/instance.go index 9260d2c..13bfe20 100644 --- a/server/pkg/instance.go +++ b/server/pkg/instance.go @@ -18,35 +18,12 @@ import ( "time" ) -// Duration is a custom type that wraps time.Duration for better JSON/Swagger support -// @description Duration in seconds -type Duration time.Duration - -// MarshalJSON implements json.Marshaler for Duration -func (d Duration) MarshalJSON() ([]byte, error) { - return json.Marshal(time.Duration(d).Seconds()) -} - -// UnmarshalJSON implements json.Unmarshaler for Duration -func (d *Duration) UnmarshalJSON(data []byte) error { - var seconds float64 - if err := json.Unmarshal(data, &seconds); err != nil { - return err - } - *d = Duration(time.Duration(seconds * float64(time.Second))) - return nil -} - -// ToDuration converts Duration to time.Duration -func (d Duration) ToDuration() time.Duration { - return time.Duration(d) -} - type CreateInstanceOptions struct { // Auto restart - AutoRestart *bool `json:"auto_restart,omitempty"` - MaxRestarts *int `json:"max_restarts,omitempty"` - RestartDelay *Duration `json:"restart_delay,omitempty"` // Duration in seconds + AutoRestart *bool `json:"auto_restart,omitempty"` + MaxRestarts *int `json:"max_restarts,omitempty"` + // RestartDelay duration in seconds + RestartDelay *int `json:"restart_delay_seconds,omitempty"` LlamaServerOptions `json:",inline"` } @@ -332,12 +309,13 @@ func (i *Instance) monitorProcess() { // Handle restart if process crashed and auto-restart is enabled if err != nil && *i.options.AutoRestart && i.restarts < *i.options.MaxRestarts { i.restarts++ + delayDuration := time.Duration(*i.options.RestartDelay) * time.Second log.Printf("Auto-restarting instance %s (attempt %d/%d) in %v", - i.Name, i.restarts, i.options.MaxRestarts, i.options.RestartDelay.ToDuration()) + i.Name, i.restarts, i.options.MaxRestarts, delayDuration) // Unlock mutex during sleep to avoid blocking other operations i.mu.Unlock() - time.Sleep(i.options.RestartDelay.ToDuration()) + time.Sleep(delayDuration) i.mu.Lock() // Attempt restart