{ "swagger": "2.0", "info": { "description": "llamactl is a control server for managing Llama Server instances.", "title": "llamactl API", "contact": {}, "license": { "name": "MIT License", "url": "https://opensource.org/license/mit/" }, "version": "1.0" }, "basePath": "/api/v1", "paths": { "/instances": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Returns a list of all instances managed by the server", "tags": [ "instances" ], "summary": "List all instances", "responses": { "200": { "description": "List of instances", "schema": { "type": "array", "items": { "$ref": "#/definitions/instance.Process" } } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/instances/{name}": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Returns the details of a specific instance by name", "tags": [ "instances" ], "summary": "Get details of a specific instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Instance details", "schema": { "$ref": "#/definitions/instance.Process" } }, "400": { "description": "Invalid name format", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } }, "put": { "security": [ { "ApiKeyAuth": [] } ], "description": "Updates the configuration of a specific instance by name", "consumes": [ "application/json" ], "tags": [ "instances" ], "summary": "Update an instance's configuration", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true }, { "description": "Instance configuration options", "name": "options", "in": "body", "required": true, "schema": { "$ref": "#/definitions/instance.CreateInstanceOptions" } } ], "responses": { "200": { "description": "Updated instance details", "schema": { "$ref": "#/definitions/instance.Process" } }, "400": { "description": "Invalid name format", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } }, "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Creates a new instance with the provided configuration options", "consumes": [ "application/json" ], "tags": [ "instances" ], "summary": "Create and start a new instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true }, { "description": "Instance configuration options", "name": "options", "in": "body", "required": true, "schema": { "$ref": "#/definitions/instance.CreateInstanceOptions" } } ], "responses": { "201": { "description": "Created instance details", "schema": { "$ref": "#/definitions/instance.Process" } }, "400": { "description": "Invalid request body", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } }, "delete": { "security": [ { "ApiKeyAuth": [] } ], "description": "Stops and removes a specific instance by name", "tags": [ "instances" ], "summary": "Delete an instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "204": { "description": "No Content" }, "400": { "description": "Invalid name format", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/instances/{name}/logs": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Returns the logs from a specific instance by name with optional line limit", "tags": [ "instances" ], "summary": "Get logs from a specific instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true }, { "type": "string", "description": "Number of lines to retrieve (default: all lines)", "name": "lines", "in": "query" } ], "responses": { "200": { "description": "Instance logs", "schema": { "type": "string" } }, "400": { "description": "Invalid name format or lines parameter", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/instances/{name}/proxy": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Forwards HTTP requests to the llama-server instance running on a specific port", "tags": [ "instances" ], "summary": "Proxy requests to a specific instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Request successfully proxied to instance" }, "400": { "description": "Invalid name format", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } }, "503": { "description": "Instance is not running", "schema": { "type": "string" } } } }, "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Forwards HTTP requests to the llama-server instance running on a specific port", "tags": [ "instances" ], "summary": "Proxy requests to a specific instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Request successfully proxied to instance" }, "400": { "description": "Invalid name format", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } }, "503": { "description": "Instance is not running", "schema": { "type": "string" } } } } }, "/instances/{name}/restart": { "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Restarts a specific instance by name", "tags": [ "instances" ], "summary": "Restart a running instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Restarted instance details", "schema": { "$ref": "#/definitions/instance.Process" } }, "400": { "description": "Invalid name format", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/instances/{name}/start": { "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Starts a specific instance by name", "tags": [ "instances" ], "summary": "Start a stopped instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Started instance details", "schema": { "$ref": "#/definitions/instance.Process" } }, "400": { "description": "Invalid name format", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/instances/{name}/stop": { "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Stops a specific instance by name", "tags": [ "instances" ], "summary": "Stop a running instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Stopped instance details", "schema": { "$ref": "#/definitions/instance.Process" } }, "400": { "description": "Invalid name format", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/server/devices": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Returns a list of available devices for the llama server", "tags": [ "server" ], "summary": "List available devices for llama server", "responses": { "200": { "description": "List of devices", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/server/help": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Returns the help text for the llama server command", "tags": [ "server" ], "summary": "Get help for llama server", "responses": { "200": { "description": "Help text", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/server/version": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Returns the version of the llama server command", "tags": [ "server" ], "summary": "Get version of llama server", "responses": { "200": { "description": "Version information", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/v1/": { "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.", "consumes": [ "application/json" ], "tags": [ "openai" ], "summary": "OpenAI-compatible proxy endpoint", "responses": { "200": { "description": "OpenAI response" }, "400": { "description": "Invalid request body or model name", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/v1/models": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Returns a list of instances in a format compatible with OpenAI API", "tags": [ "openai" ], "summary": "List instances in OpenAI-compatible format", "responses": { "200": { "description": "List of OpenAI-compatible instances", "schema": { "$ref": "#/definitions/server.OpenAIListInstancesResponse" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/version": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Returns the version of the llamactl command", "tags": [ "version" ], "summary": "Get llamactl version", "responses": { "200": { "description": "Version information", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } } }, "definitions": { "instance.CreateInstanceOptions": { "type": "object", "properties": { "alias": { "type": "string" }, "api_key": { "type": "string" }, "api_key_file": { "type": "string" }, "auto_restart": { "description": "Auto restart", "type": "boolean" }, "batch_size": { "type": "integer" }, "cache_reuse": { "type": "integer" }, "cache_type_k": { "type": "string" }, "cache_type_k_draft": { "type": "string" }, "cache_type_v": { "type": "string" }, "cache_type_v_draft": { "type": "string" }, "chat_template": { "type": "string" }, "chat_template_file": { "type": "string" }, "chat_template_kwargs": { "type": "string" }, "check_tensors": { "type": "boolean" }, "cont_batching": { "type": "boolean" }, "control_vector": { "type": "array", "items": { "type": "string" } }, "control_vector_layer_range": { "type": "string" }, "control_vector_scaled": { "type": "array", "items": { "type": "string" } }, "cpu_mask": { "type": "string" }, "cpu_mask_batch": { "type": "string" }, "cpu_range": { "type": "string" }, "cpu_range_batch": { "type": "string" }, "cpu_strict": { "type": "integer" }, "cpu_strict_batch": { "type": "integer" }, "ctx_size": { "type": "integer" }, "ctx_size_draft": { "type": "integer" }, "defrag_thold": { "type": "number" }, "device": { "type": "string" }, "device_draft": { "type": "string" }, "draft_max": { "type": "integer" }, "draft_min": { "type": "integer" }, "draft_p_min": { "type": "number" }, "dry_allowed_length": { "type": "integer" }, "dry_base": { "type": "number" }, "dry_multiplier": { "type": "number" }, "dry_penalty_last_n": { "type": "integer" }, "dry_sequence_breaker": { "type": "array", "items": { "type": "string" } }, "dump_kv_cache": { "type": "boolean" }, "dynatemp_exp": { "type": "number" }, "dynatemp_range": { "type": "number" }, "embd_bge_small_en_default": { "description": "Default model params", "type": "boolean" }, "embd_e5_small_en_default": { "type": "boolean" }, "embd_gte_small_default": { "type": "boolean" }, "embedding": { "type": "boolean" }, "escape": { "type": "boolean" }, "fim_qwen_14b_spec": { "type": "boolean" }, "fim_qwen_1_5b_default": { "type": "boolean" }, "fim_qwen_3b_default": { "type": "boolean" }, "fim_qwen_7b_default": { "type": "boolean" }, "fim_qwen_7b_spec": { "type": "boolean" }, "flash_attn": { "type": "boolean" }, "frequency_penalty": { "type": "number" }, "gpu_layers": { "type": "integer" }, "gpu_layers_draft": { "type": "integer" }, "grammar": { "type": "string" }, "grammar_file": { "type": "string" }, "hf_file": { "type": "string" }, "hf_file_v": { "type": "string" }, "hf_repo": { "type": "string" }, "hf_repo_draft": { "type": "string" }, "hf_repo_v": { "type": "string" }, "hf_token": { "type": "string" }, "host": { "type": "string" }, "idle_timeout": { "description": "Idle timeout", "type": "integer" }, "ignore_eos": { "type": "boolean" }, "jinja": { "type": "boolean" }, "json_schema": { "type": "string" }, "json_schema_file": { "type": "string" }, "keep": { "type": "integer" }, "log_colors": { "type": "boolean" }, "log_disable": { "type": "boolean" }, "log_file": { "type": "string" }, "log_prefix": { "type": "boolean" }, "log_timestamps": { "type": "boolean" }, "logit_bias": { "type": "array", "items": { "type": "string" } }, "lora": { "type": "array", "items": { "type": "string" } }, "lora_init_without_apply": { "type": "boolean" }, "lora_scaled": { "type": "array", "items": { "type": "string" } }, "main_gpu": { "type": "integer" }, "max_restarts": { "type": "integer" }, "metrics": { "type": "boolean" }, "min_p": { "type": "number" }, "mirostat": { "type": "integer" }, "mirostat_ent": { "type": "number" }, "mirostat_lr": { "type": "number" }, "mlock": { "type": "boolean" }, "mmproj": { "type": "string" }, "mmproj_url": { "type": "string" }, "model": { "type": "string" }, "model_draft": { "type": "string" }, "model_url": { "type": "string" }, "model_vocoder": { "description": "Audio/TTS params", "type": "string" }, "no_cont_batching": { "type": "boolean" }, "no_context_shift": { "description": "Example-specific params", "type": "boolean" }, "no_escape": { "type": "boolean" }, "no_kv_offload": { "type": "boolean" }, "no_mmap": { "type": "boolean" }, "no_mmproj": { "type": "boolean" }, "no_mmproj_offload": { "type": "boolean" }, "no_perf": { "type": "boolean" }, "no_prefill_assistant": { "type": "boolean" }, "no_slots": { "type": "boolean" }, "no_warmup": { "type": "boolean" }, "no_webui": { "type": "boolean" }, "numa": { "type": "string" }, "on_demand_start": { "description": "On demand start", "type": "boolean" }, "override_kv": { "type": "array", "items": { "type": "string" } }, "override_tensor": { "type": "array", "items": { "type": "string" } }, "parallel": { "type": "integer" }, "path": { "type": "string" }, "poll": { "type": "integer" }, "poll_batch": { "type": "integer" }, "pooling": { "type": "string" }, "port": { "type": "integer" }, "predict": { "type": "integer" }, "presence_penalty": { "type": "number" }, "prio": { "type": "integer" }, "prio_batch": { "type": "integer" }, "props": { "type": "boolean" }, "reasoning_budget": { "type": "integer" }, "reasoning_format": { "type": "string" }, "repeat_last_n": { "type": "integer" }, "repeat_penalty": { "type": "number" }, "reranking": { "type": "boolean" }, "restart_delay": { "type": "integer" }, "rope_freq_base": { "type": "number" }, "rope_freq_scale": { "type": "number" }, "rope_scale": { "type": "number" }, "rope_scaling": { "type": "string" }, "samplers": { "description": "Sampling params", "type": "string" }, "sampling_seq": { "type": "string" }, "seed": { "type": "integer" }, "slot_prompt_similarity": { "type": "number" }, "slot_save_path": { "type": "string" }, "slots": { "type": "boolean" }, "special": { "type": "boolean" }, "split_mode": { "type": "string" }, "spm_infill": { "type": "boolean" }, "ssl_cert_file": { "type": "string" }, "ssl_key_file": { "type": "string" }, "temp": { "type": "number" }, "tensor_split": { "type": "string" }, "threads": { "type": "integer" }, "threads_batch": { "type": "integer" }, "threads_http": { "type": "integer" }, "timeout": { "type": "integer" }, "top_k": { "type": "integer" }, "top_p": { "type": "number" }, "tts_use_guide_tokens": { "type": "boolean" }, "typical": { "type": "number" }, "ubatch_size": { "type": "integer" }, "verbose": { "type": "boolean" }, "verbose_prompt": { "description": "Common params", "type": "boolean" }, "verbosity": { "type": "integer" }, "xtc_probability": { "type": "number" }, "xtc_threshold": { "type": "number" }, "yarn_attn_factor": { "type": "number" }, "yarn_beta_fast": { "type": "number" }, "yarn_beta_slow": { "type": "number" }, "yarn_ext_factor": { "type": "number" }, "yarn_orig_ctx": { "type": "integer" } } }, "instance.InstanceStatus": { "type": "integer", "enum": [ 0, 1, 2 ], "x-enum-varnames": [ "Stopped", "Running", "Failed" ] }, "instance.Process": { "type": "object", "properties": { "created": { "description": "Creation time", "type": "integer" }, "name": { "type": "string" }, "status": { "description": "Status", "allOf": [ { "$ref": "#/definitions/instance.InstanceStatus" } ] } } }, "server.OpenAIInstance": { "type": "object", "properties": { "created": { "type": "integer" }, "id": { "type": "string" }, "object": { "type": "string" }, "owned_by": { "type": "string" } } }, "server.OpenAIListInstancesResponse": { "type": "object", "properties": { "data": { "type": "array", "items": { "$ref": "#/definitions/server.OpenAIInstance" } }, "object": { "type": "string" } } } } }