From 785915943bc43d89151fea1491db32180674a9be Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sun, 21 Sep 2025 22:03:07 +0200 Subject: [PATCH] Update api docs --- apidocs/docs.go | 675 ++++++++++++------------------------------- apidocs/swagger.json | 675 ++++++++++++------------------------------- apidocs/swagger.yaml | 450 ++++++++--------------------- 3 files changed, 477 insertions(+), 1323 deletions(-) diff --git a/apidocs/docs.go b/apidocs/docs.go index 7ea502e..93edced 100644 --- a/apidocs/docs.go +++ b/apidocs/docs.go @@ -19,6 +19,159 @@ const docTemplate = `{ "host": "{{.Host}}", "basePath": "{{.BasePath}}", "paths": { + "/backends/llama-cpp/parse-command": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Parses a llama-server command string into instance options", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Parse llama-server command", + "parameters": [ + { + "description": "Command to parse", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/server.ParseCommandRequest" + } + } + ], + "responses": { + "200": { + "description": "Parsed options", + "schema": { + "$ref": "#/definitions/instance.CreateInstanceOptions" + } + }, + "400": { + "description": "Invalid request or command", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/backends/mlx/parse-command": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Parses MLX-LM server command string into instance options", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Parse mlx_lm.server command", + "parameters": [ + { + "description": "Command to parse", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/server.ParseCommandRequest" + } + } + ], + "responses": { + "200": { + "description": "Parsed options", + "schema": { + "$ref": "#/definitions/instance.CreateInstanceOptions" + } + }, + "400": { + "description": "Invalid request or command", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/backends/vllm/parse-command": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Parses a vLLM serve command string into instance options", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Parse vllm serve command", + "parameters": [ + { + "description": "Command to parse", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/server.ParseCommandRequest" + } + } + ], + "responses": { + "200": { + "description": "Parsed options", + "schema": { + "$ref": "#/definitions/instance.CreateInstanceOptions" + } + }, + "400": { + "description": "Invalid request or command", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, "/instances": { "get": { "security": [ @@ -681,522 +834,46 @@ const docTemplate = `{ } }, "definitions": { + "backends.BackendType": { + "type": "string", + "enum": [ + "llama_cpp", + "mlx_lm", + "vllm" + ], + "x-enum-varnames": [ + "BackendTypeLlamaCpp", + "BackendTypeMlxLm", + "BackendTypeVllm" + ] + }, "instance.CreateInstanceOptions": { "type": "object", "properties": { - "alias": { - "type": "string" - }, - "api_key": { - "type": "string" - }, - "api_key_file": { - "type": "string" - }, "auto_restart": { "description": "Auto restart", "type": "boolean" }, - "batch_size": { - "type": "integer" + "backend_options": { + "type": "object", + "additionalProperties": {} }, - "cache_reuse": { - "type": "integer" - }, - "cache_type_k": { - "type": "string" - }, - "cache_type_k_draft": { - "type": "string" - }, - "cache_type_v": { - "type": "string" - }, - "cache_type_v_draft": { - "type": "string" - }, - "chat_template": { - "type": "string" - }, - "chat_template_file": { - "type": "string" - }, - "chat_template_kwargs": { - "type": "string" - }, - "check_tensors": { - "type": "boolean" - }, - "cont_batching": { - "type": "boolean" - }, - "control_vector": { - "type": "array", - "items": { - "type": "string" - } - }, - "control_vector_layer_range": { - "type": "string" - }, - "control_vector_scaled": { - "type": "array", - "items": { - "type": "string" - } - }, - "cpu_mask": { - "type": "string" - }, - "cpu_mask_batch": { - "type": "string" - }, - "cpu_range": { - "type": "string" - }, - "cpu_range_batch": { - "type": "string" - }, - "cpu_strict": { - "type": "integer" - }, - "cpu_strict_batch": { - "type": "integer" - }, - "ctx_size": { - "type": "integer" - }, - "ctx_size_draft": { - "type": "integer" - }, - "defrag_thold": { - "type": "number" - }, - "device": { - "type": "string" - }, - "device_draft": { - "type": "string" - }, - "draft_max": { - "type": "integer" - }, - "draft_min": { - "type": "integer" - }, - "draft_p_min": { - "type": "number" - }, - "dry_allowed_length": { - "type": "integer" - }, - "dry_base": { - "type": "number" - }, - "dry_multiplier": { - "type": "number" - }, - "dry_penalty_last_n": { - "type": "integer" - }, - "dry_sequence_breaker": { - "type": "array", - "items": { - "type": "string" - } - }, - "dump_kv_cache": { - "type": "boolean" - }, - "dynatemp_exp": { - "type": "number" - }, - "dynatemp_range": { - "type": "number" - }, - "embd_bge_small_en_default": { - "description": "Default model params", - "type": "boolean" - }, - "embd_e5_small_en_default": { - "type": "boolean" - }, - "embd_gte_small_default": { - "type": "boolean" - }, - "embedding": { - "type": "boolean" - }, - "escape": { - "type": "boolean" - }, - "fim_qwen_14b_spec": { - "type": "boolean" - }, - "fim_qwen_1_5b_default": { - "type": "boolean" - }, - "fim_qwen_3b_default": { - "type": "boolean" - }, - "fim_qwen_7b_default": { - "type": "boolean" - }, - "fim_qwen_7b_spec": { - "type": "boolean" - }, - "flash_attn": { - "type": "boolean" - }, - "frequency_penalty": { - "type": "number" - }, - "gpu_layers": { - "type": "integer" - }, - "gpu_layers_draft": { - "type": "integer" - }, - "grammar": { - "type": "string" - }, - "grammar_file": { - "type": "string" - }, - "hf_file": { - "type": "string" - }, - "hf_file_v": { - "type": "string" - }, - "hf_repo": { - "type": "string" - }, - "hf_repo_draft": { - "type": "string" - }, - "hf_repo_v": { - "type": "string" - }, - "hf_token": { - "type": "string" - }, - "host": { - "type": "string" + "backend_type": { + "$ref": "#/definitions/backends.BackendType" }, "idle_timeout": { "description": "Idle timeout", "type": "integer" }, - "ignore_eos": { - "type": "boolean" - }, - "jinja": { - "type": "boolean" - }, - "json_schema": { - "type": "string" - }, - "json_schema_file": { - "type": "string" - }, - "keep": { - "type": "integer" - }, - "log_colors": { - "type": "boolean" - }, - "log_disable": { - "type": "boolean" - }, - "log_file": { - "type": "string" - }, - "log_prefix": { - "type": "boolean" - }, - "log_timestamps": { - "type": "boolean" - }, - "logit_bias": { - "type": "array", - "items": { - "type": "string" - } - }, - "lora": { - "type": "array", - "items": { - "type": "string" - } - }, - "lora_init_without_apply": { - "type": "boolean" - }, - "lora_scaled": { - "type": "array", - "items": { - "type": "string" - } - }, - "main_gpu": { - "type": "integer" - }, "max_restarts": { "type": "integer" }, - "metrics": { - "type": "boolean" - }, - "min_p": { - "type": "number" - }, - "mirostat": { - "type": "integer" - }, - "mirostat_ent": { - "type": "number" - }, - "mirostat_lr": { - "type": "number" - }, - "mlock": { - "type": "boolean" - }, - "mmproj": { - "type": "string" - }, - "mmproj_url": { - "type": "string" - }, - "model": { - "type": "string" - }, - "model_draft": { - "type": "string" - }, - "model_url": { - "type": "string" - }, - "model_vocoder": { - "description": "Audio/TTS params", - "type": "string" - }, - "no_cont_batching": { - "type": "boolean" - }, - "no_context_shift": { - "description": "Example-specific params", - "type": "boolean" - }, - "no_escape": { - "type": "boolean" - }, - "no_kv_offload": { - "type": "boolean" - }, - "no_mmap": { - "type": "boolean" - }, - "no_mmproj": { - "type": "boolean" - }, - "no_mmproj_offload": { - "type": "boolean" - }, - "no_perf": { - "type": "boolean" - }, - "no_prefill_assistant": { - "type": "boolean" - }, - "no_slots": { - "type": "boolean" - }, - "no_warmup": { - "type": "boolean" - }, - "no_webui": { - "type": "boolean" - }, - "numa": { - "type": "string" - }, "on_demand_start": { "description": "On demand start", "type": "boolean" }, - "override_kv": { - "type": "array", - "items": { - "type": "string" - } - }, - "override_tensor": { - "type": "array", - "items": { - "type": "string" - } - }, - "parallel": { - "type": "integer" - }, - "path": { - "type": "string" - }, - "poll": { - "type": "integer" - }, - "poll_batch": { - "type": "integer" - }, - "pooling": { - "type": "string" - }, - "port": { - "type": "integer" - }, - "predict": { - "type": "integer" - }, - "presence_penalty": { - "type": "number" - }, - "prio": { - "type": "integer" - }, - "prio_batch": { - "type": "integer" - }, - "props": { - "type": "boolean" - }, - "reasoning_budget": { - "type": "integer" - }, - "reasoning_format": { - "type": "string" - }, - "repeat_last_n": { - "type": "integer" - }, - "repeat_penalty": { - "type": "number" - }, - "reranking": { - "type": "boolean" - }, "restart_delay": { - "type": "integer" - }, - "rope_freq_base": { - "type": "number" - }, - "rope_freq_scale": { - "type": "number" - }, - "rope_scale": { - "type": "number" - }, - "rope_scaling": { - "type": "string" - }, - "samplers": { - "description": "Sampling params", - "type": "string" - }, - "sampling_seq": { - "type": "string" - }, - "seed": { - "type": "integer" - }, - "slot_prompt_similarity": { - "type": "number" - }, - "slot_save_path": { - "type": "string" - }, - "slots": { - "type": "boolean" - }, - "special": { - "type": "boolean" - }, - "split_mode": { - "type": "string" - }, - "spm_infill": { - "type": "boolean" - }, - "ssl_cert_file": { - "type": "string" - }, - "ssl_key_file": { - "type": "string" - }, - "temp": { - "type": "number" - }, - "tensor_split": { - "type": "string" - }, - "threads": { - "type": "integer" - }, - "threads_batch": { - "type": "integer" - }, - "threads_http": { - "type": "integer" - }, - "timeout": { - "type": "integer" - }, - "top_k": { - "type": "integer" - }, - "top_p": { - "type": "number" - }, - "tts_use_guide_tokens": { - "type": "boolean" - }, - "typical": { - "type": "number" - }, - "ubatch_size": { - "type": "integer" - }, - "verbose": { - "type": "boolean" - }, - "verbose_prompt": { - "description": "Common params", - "type": "boolean" - }, - "verbosity": { - "type": "integer" - }, - "xtc_probability": { - "type": "number" - }, - "xtc_threshold": { - "type": "number" - }, - "yarn_attn_factor": { - "type": "number" - }, - "yarn_beta_fast": { - "type": "number" - }, - "yarn_beta_slow": { - "type": "number" - }, - "yarn_ext_factor": { - "type": "number" - }, - "yarn_orig_ctx": { + "description": "seconds", "type": "integer" } } @@ -1264,6 +941,14 @@ const docTemplate = `{ "type": "string" } } + }, + "server.ParseCommandRequest": { + "type": "object", + "properties": { + "command": { + "type": "string" + } + } } } }` diff --git a/apidocs/swagger.json b/apidocs/swagger.json index be8d193..dc7f1c8 100644 --- a/apidocs/swagger.json +++ b/apidocs/swagger.json @@ -12,6 +12,159 @@ }, "basePath": "/api/v1", "paths": { + "/backends/llama-cpp/parse-command": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Parses a llama-server command string into instance options", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Parse llama-server command", + "parameters": [ + { + "description": "Command to parse", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/server.ParseCommandRequest" + } + } + ], + "responses": { + "200": { + "description": "Parsed options", + "schema": { + "$ref": "#/definitions/instance.CreateInstanceOptions" + } + }, + "400": { + "description": "Invalid request or command", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + }, + "500": { + "description": "Internal Server Error", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/backends/mlx/parse-command": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Parses MLX-LM server command string into instance options", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Parse mlx_lm.server command", + "parameters": [ + { + "description": "Command to parse", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/server.ParseCommandRequest" + } + } + ], + "responses": { + "200": { + "description": "Parsed options", + "schema": { + "$ref": "#/definitions/instance.CreateInstanceOptions" + } + }, + "400": { + "description": "Invalid request or command", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, + "/backends/vllm/parse-command": { + "post": { + "security": [ + { + "ApiKeyAuth": [] + } + ], + "description": "Parses a vLLM serve command string into instance options", + "consumes": [ + "application/json" + ], + "produces": [ + "application/json" + ], + "tags": [ + "backends" + ], + "summary": "Parse vllm serve command", + "parameters": [ + { + "description": "Command to parse", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/server.ParseCommandRequest" + } + } + ], + "responses": { + "200": { + "description": "Parsed options", + "schema": { + "$ref": "#/definitions/instance.CreateInstanceOptions" + } + }, + "400": { + "description": "Invalid request or command", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, "/instances": { "get": { "security": [ @@ -674,522 +827,46 @@ } }, "definitions": { + "backends.BackendType": { + "type": "string", + "enum": [ + "llama_cpp", + "mlx_lm", + "vllm" + ], + "x-enum-varnames": [ + "BackendTypeLlamaCpp", + "BackendTypeMlxLm", + "BackendTypeVllm" + ] + }, "instance.CreateInstanceOptions": { "type": "object", "properties": { - "alias": { - "type": "string" - }, - "api_key": { - "type": "string" - }, - "api_key_file": { - "type": "string" - }, "auto_restart": { "description": "Auto restart", "type": "boolean" }, - "batch_size": { - "type": "integer" + "backend_options": { + "type": "object", + "additionalProperties": {} }, - "cache_reuse": { - "type": "integer" - }, - "cache_type_k": { - "type": "string" - }, - "cache_type_k_draft": { - "type": "string" - }, - "cache_type_v": { - "type": "string" - }, - "cache_type_v_draft": { - "type": "string" - }, - "chat_template": { - "type": "string" - }, - "chat_template_file": { - "type": "string" - }, - "chat_template_kwargs": { - "type": "string" - }, - "check_tensors": { - "type": "boolean" - }, - "cont_batching": { - "type": "boolean" - }, - "control_vector": { - "type": "array", - "items": { - "type": "string" - } - }, - "control_vector_layer_range": { - "type": "string" - }, - "control_vector_scaled": { - "type": "array", - "items": { - "type": "string" - } - }, - "cpu_mask": { - "type": "string" - }, - "cpu_mask_batch": { - "type": "string" - }, - "cpu_range": { - "type": "string" - }, - "cpu_range_batch": { - "type": "string" - }, - "cpu_strict": { - "type": "integer" - }, - "cpu_strict_batch": { - "type": "integer" - }, - "ctx_size": { - "type": "integer" - }, - "ctx_size_draft": { - "type": "integer" - }, - "defrag_thold": { - "type": "number" - }, - "device": { - "type": "string" - }, - "device_draft": { - "type": "string" - }, - "draft_max": { - "type": "integer" - }, - "draft_min": { - "type": "integer" - }, - "draft_p_min": { - "type": "number" - }, - "dry_allowed_length": { - "type": "integer" - }, - "dry_base": { - "type": "number" - }, - "dry_multiplier": { - "type": "number" - }, - "dry_penalty_last_n": { - "type": "integer" - }, - "dry_sequence_breaker": { - "type": "array", - "items": { - "type": "string" - } - }, - "dump_kv_cache": { - "type": "boolean" - }, - "dynatemp_exp": { - "type": "number" - }, - "dynatemp_range": { - "type": "number" - }, - "embd_bge_small_en_default": { - "description": "Default model params", - "type": "boolean" - }, - "embd_e5_small_en_default": { - "type": "boolean" - }, - "embd_gte_small_default": { - "type": "boolean" - }, - "embedding": { - "type": "boolean" - }, - "escape": { - "type": "boolean" - }, - "fim_qwen_14b_spec": { - "type": "boolean" - }, - "fim_qwen_1_5b_default": { - "type": "boolean" - }, - "fim_qwen_3b_default": { - "type": "boolean" - }, - "fim_qwen_7b_default": { - "type": "boolean" - }, - "fim_qwen_7b_spec": { - "type": "boolean" - }, - "flash_attn": { - "type": "boolean" - }, - "frequency_penalty": { - "type": "number" - }, - "gpu_layers": { - "type": "integer" - }, - "gpu_layers_draft": { - "type": "integer" - }, - "grammar": { - "type": "string" - }, - "grammar_file": { - "type": "string" - }, - "hf_file": { - "type": "string" - }, - "hf_file_v": { - "type": "string" - }, - "hf_repo": { - "type": "string" - }, - "hf_repo_draft": { - "type": "string" - }, - "hf_repo_v": { - "type": "string" - }, - "hf_token": { - "type": "string" - }, - "host": { - "type": "string" + "backend_type": { + "$ref": "#/definitions/backends.BackendType" }, "idle_timeout": { "description": "Idle timeout", "type": "integer" }, - "ignore_eos": { - "type": "boolean" - }, - "jinja": { - "type": "boolean" - }, - "json_schema": { - "type": "string" - }, - "json_schema_file": { - "type": "string" - }, - "keep": { - "type": "integer" - }, - "log_colors": { - "type": "boolean" - }, - "log_disable": { - "type": "boolean" - }, - "log_file": { - "type": "string" - }, - "log_prefix": { - "type": "boolean" - }, - "log_timestamps": { - "type": "boolean" - }, - "logit_bias": { - "type": "array", - "items": { - "type": "string" - } - }, - "lora": { - "type": "array", - "items": { - "type": "string" - } - }, - "lora_init_without_apply": { - "type": "boolean" - }, - "lora_scaled": { - "type": "array", - "items": { - "type": "string" - } - }, - "main_gpu": { - "type": "integer" - }, "max_restarts": { "type": "integer" }, - "metrics": { - "type": "boolean" - }, - "min_p": { - "type": "number" - }, - "mirostat": { - "type": "integer" - }, - "mirostat_ent": { - "type": "number" - }, - "mirostat_lr": { - "type": "number" - }, - "mlock": { - "type": "boolean" - }, - "mmproj": { - "type": "string" - }, - "mmproj_url": { - "type": "string" - }, - "model": { - "type": "string" - }, - "model_draft": { - "type": "string" - }, - "model_url": { - "type": "string" - }, - "model_vocoder": { - "description": "Audio/TTS params", - "type": "string" - }, - "no_cont_batching": { - "type": "boolean" - }, - "no_context_shift": { - "description": "Example-specific params", - "type": "boolean" - }, - "no_escape": { - "type": "boolean" - }, - "no_kv_offload": { - "type": "boolean" - }, - "no_mmap": { - "type": "boolean" - }, - "no_mmproj": { - "type": "boolean" - }, - "no_mmproj_offload": { - "type": "boolean" - }, - "no_perf": { - "type": "boolean" - }, - "no_prefill_assistant": { - "type": "boolean" - }, - "no_slots": { - "type": "boolean" - }, - "no_warmup": { - "type": "boolean" - }, - "no_webui": { - "type": "boolean" - }, - "numa": { - "type": "string" - }, "on_demand_start": { "description": "On demand start", "type": "boolean" }, - "override_kv": { - "type": "array", - "items": { - "type": "string" - } - }, - "override_tensor": { - "type": "array", - "items": { - "type": "string" - } - }, - "parallel": { - "type": "integer" - }, - "path": { - "type": "string" - }, - "poll": { - "type": "integer" - }, - "poll_batch": { - "type": "integer" - }, - "pooling": { - "type": "string" - }, - "port": { - "type": "integer" - }, - "predict": { - "type": "integer" - }, - "presence_penalty": { - "type": "number" - }, - "prio": { - "type": "integer" - }, - "prio_batch": { - "type": "integer" - }, - "props": { - "type": "boolean" - }, - "reasoning_budget": { - "type": "integer" - }, - "reasoning_format": { - "type": "string" - }, - "repeat_last_n": { - "type": "integer" - }, - "repeat_penalty": { - "type": "number" - }, - "reranking": { - "type": "boolean" - }, "restart_delay": { - "type": "integer" - }, - "rope_freq_base": { - "type": "number" - }, - "rope_freq_scale": { - "type": "number" - }, - "rope_scale": { - "type": "number" - }, - "rope_scaling": { - "type": "string" - }, - "samplers": { - "description": "Sampling params", - "type": "string" - }, - "sampling_seq": { - "type": "string" - }, - "seed": { - "type": "integer" - }, - "slot_prompt_similarity": { - "type": "number" - }, - "slot_save_path": { - "type": "string" - }, - "slots": { - "type": "boolean" - }, - "special": { - "type": "boolean" - }, - "split_mode": { - "type": "string" - }, - "spm_infill": { - "type": "boolean" - }, - "ssl_cert_file": { - "type": "string" - }, - "ssl_key_file": { - "type": "string" - }, - "temp": { - "type": "number" - }, - "tensor_split": { - "type": "string" - }, - "threads": { - "type": "integer" - }, - "threads_batch": { - "type": "integer" - }, - "threads_http": { - "type": "integer" - }, - "timeout": { - "type": "integer" - }, - "top_k": { - "type": "integer" - }, - "top_p": { - "type": "number" - }, - "tts_use_guide_tokens": { - "type": "boolean" - }, - "typical": { - "type": "number" - }, - "ubatch_size": { - "type": "integer" - }, - "verbose": { - "type": "boolean" - }, - "verbose_prompt": { - "description": "Common params", - "type": "boolean" - }, - "verbosity": { - "type": "integer" - }, - "xtc_probability": { - "type": "number" - }, - "xtc_threshold": { - "type": "number" - }, - "yarn_attn_factor": { - "type": "number" - }, - "yarn_beta_fast": { - "type": "number" - }, - "yarn_beta_slow": { - "type": "number" - }, - "yarn_ext_factor": { - "type": "number" - }, - "yarn_orig_ctx": { + "description": "seconds", "type": "integer" } } @@ -1257,6 +934,14 @@ "type": "string" } } + }, + "server.ParseCommandRequest": { + "type": "object", + "properties": { + "command": { + "type": "string" + } + } } } } \ No newline at end of file diff --git a/apidocs/swagger.yaml b/apidocs/swagger.yaml index bc6e4ec..89b53fd 100644 --- a/apidocs/swagger.yaml +++ b/apidocs/swagger.yaml @@ -1,352 +1,35 @@ basePath: /api/v1 definitions: + backends.BackendType: + enum: + - llama_cpp + - mlx_lm + - vllm + type: string + x-enum-varnames: + - BackendTypeLlamaCpp + - BackendTypeMlxLm + - BackendTypeVllm instance.CreateInstanceOptions: properties: - alias: - type: string - api_key: - type: string - api_key_file: - type: string auto_restart: description: Auto restart type: boolean - batch_size: - type: integer - cache_reuse: - type: integer - cache_type_k: - type: string - cache_type_k_draft: - type: string - cache_type_v: - type: string - cache_type_v_draft: - type: string - chat_template: - type: string - chat_template_file: - type: string - chat_template_kwargs: - type: string - check_tensors: - type: boolean - cont_batching: - type: boolean - control_vector: - items: - type: string - type: array - control_vector_layer_range: - type: string - control_vector_scaled: - items: - type: string - type: array - cpu_mask: - type: string - cpu_mask_batch: - type: string - cpu_range: - type: string - cpu_range_batch: - type: string - cpu_strict: - type: integer - cpu_strict_batch: - type: integer - ctx_size: - type: integer - ctx_size_draft: - type: integer - defrag_thold: - type: number - device: - type: string - device_draft: - type: string - draft_max: - type: integer - draft_min: - type: integer - draft_p_min: - type: number - dry_allowed_length: - type: integer - dry_base: - type: number - dry_multiplier: - type: number - dry_penalty_last_n: - type: integer - dry_sequence_breaker: - items: - type: string - type: array - dump_kv_cache: - type: boolean - dynatemp_exp: - type: number - dynatemp_range: - type: number - embd_bge_small_en_default: - description: Default model params - type: boolean - embd_e5_small_en_default: - type: boolean - embd_gte_small_default: - type: boolean - embedding: - type: boolean - escape: - type: boolean - fim_qwen_1_5b_default: - type: boolean - fim_qwen_3b_default: - type: boolean - fim_qwen_7b_default: - type: boolean - fim_qwen_7b_spec: - type: boolean - fim_qwen_14b_spec: - type: boolean - flash_attn: - type: boolean - frequency_penalty: - type: number - gpu_layers: - type: integer - gpu_layers_draft: - type: integer - grammar: - type: string - grammar_file: - type: string - hf_file: - type: string - hf_file_v: - type: string - hf_repo: - type: string - hf_repo_draft: - type: string - hf_repo_v: - type: string - hf_token: - type: string - host: - type: string + backend_options: + additionalProperties: {} + type: object + backend_type: + $ref: '#/definitions/backends.BackendType' idle_timeout: description: Idle timeout type: integer - ignore_eos: - type: boolean - jinja: - type: boolean - json_schema: - type: string - json_schema_file: - type: string - keep: - type: integer - log_colors: - type: boolean - log_disable: - type: boolean - log_file: - type: string - log_prefix: - type: boolean - log_timestamps: - type: boolean - logit_bias: - items: - type: string - type: array - lora: - items: - type: string - type: array - lora_init_without_apply: - type: boolean - lora_scaled: - items: - type: string - type: array - main_gpu: - type: integer max_restarts: type: integer - metrics: - type: boolean - min_p: - type: number - mirostat: - type: integer - mirostat_ent: - type: number - mirostat_lr: - type: number - mlock: - type: boolean - mmproj: - type: string - mmproj_url: - type: string - model: - type: string - model_draft: - type: string - model_url: - type: string - model_vocoder: - description: Audio/TTS params - type: string - no_cont_batching: - type: boolean - no_context_shift: - description: Example-specific params - type: boolean - no_escape: - type: boolean - no_kv_offload: - type: boolean - no_mmap: - type: boolean - no_mmproj: - type: boolean - no_mmproj_offload: - type: boolean - no_perf: - type: boolean - no_prefill_assistant: - type: boolean - no_slots: - type: boolean - no_warmup: - type: boolean - no_webui: - type: boolean - numa: - type: string on_demand_start: description: On demand start type: boolean - override_kv: - items: - type: string - type: array - override_tensor: - items: - type: string - type: array - parallel: - type: integer - path: - type: string - poll: - type: integer - poll_batch: - type: integer - pooling: - type: string - port: - type: integer - predict: - type: integer - presence_penalty: - type: number - prio: - type: integer - prio_batch: - type: integer - props: - type: boolean - reasoning_budget: - type: integer - reasoning_format: - type: string - repeat_last_n: - type: integer - repeat_penalty: - type: number - reranking: - type: boolean restart_delay: - type: integer - rope_freq_base: - type: number - rope_freq_scale: - type: number - rope_scale: - type: number - rope_scaling: - type: string - samplers: - description: Sampling params - type: string - sampling_seq: - type: string - seed: - type: integer - slot_prompt_similarity: - type: number - slot_save_path: - type: string - slots: - type: boolean - special: - type: boolean - split_mode: - type: string - spm_infill: - type: boolean - ssl_cert_file: - type: string - ssl_key_file: - type: string - temp: - type: number - tensor_split: - type: string - threads: - type: integer - threads_batch: - type: integer - threads_http: - type: integer - timeout: - type: integer - top_k: - type: integer - top_p: - type: number - tts_use_guide_tokens: - type: boolean - typical: - type: number - ubatch_size: - type: integer - verbose: - type: boolean - verbose_prompt: - description: Common params - type: boolean - verbosity: - type: integer - xtc_probability: - type: number - xtc_threshold: - type: number - yarn_attn_factor: - type: number - yarn_beta_fast: - type: number - yarn_beta_slow: - type: number - yarn_ext_factor: - type: number - yarn_orig_ctx: + description: seconds type: integer type: object instance.InstanceStatus: @@ -391,6 +74,11 @@ definitions: object: type: string type: object + server.ParseCommandRequest: + properties: + command: + type: string + type: object info: contact: {} description: llamactl is a control server for managing Llama Server instances. @@ -400,6 +88,102 @@ info: title: llamactl API version: "1.0" paths: + /backends/llama-cpp/parse-command: + post: + consumes: + - application/json + description: Parses a llama-server command string into instance options + parameters: + - description: Command to parse + in: body + name: request + required: true + schema: + $ref: '#/definitions/server.ParseCommandRequest' + produces: + - application/json + responses: + "200": + description: Parsed options + schema: + $ref: '#/definitions/instance.CreateInstanceOptions' + "400": + description: Invalid request or command + schema: + additionalProperties: + type: string + type: object + "500": + description: Internal Server Error + schema: + additionalProperties: + type: string + type: object + security: + - ApiKeyAuth: [] + summary: Parse llama-server command + tags: + - backends + /backends/mlx/parse-command: + post: + consumes: + - application/json + description: Parses MLX-LM server command string into instance options + parameters: + - description: Command to parse + in: body + name: request + required: true + schema: + $ref: '#/definitions/server.ParseCommandRequest' + produces: + - application/json + responses: + "200": + description: Parsed options + schema: + $ref: '#/definitions/instance.CreateInstanceOptions' + "400": + description: Invalid request or command + schema: + additionalProperties: + type: string + type: object + security: + - ApiKeyAuth: [] + summary: Parse mlx_lm.server command + tags: + - backends + /backends/vllm/parse-command: + post: + consumes: + - application/json + description: Parses a vLLM serve command string into instance options + parameters: + - description: Command to parse + in: body + name: request + required: true + schema: + $ref: '#/definitions/server.ParseCommandRequest' + produces: + - application/json + responses: + "200": + description: Parsed options + schema: + $ref: '#/definitions/instance.CreateInstanceOptions' + "400": + description: Invalid request or command + schema: + additionalProperties: + type: string + type: object + security: + - ApiKeyAuth: [] + summary: Parse vllm serve command + tags: + - backends /instances: get: description: Returns a list of all instances managed by the server