{ "swagger": "2.0", "info": { "description": "llamactl is a control server for managing Llama Server instances.", "title": "llamactl API", "contact": {}, "license": { "name": "MIT License", "url": "https://opensource.org/license/mit/" }, "version": "1.0" }, "basePath": "/api/v1", "paths": { "/backends/llama-cpp/devices": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Returns a list of available devices for the llama server", "tags": [ "backends" ], "summary": "List available devices for llama server", "responses": { "200": { "description": "List of devices", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/backends/llama-cpp/help": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Returns the help text for the llama server command", "tags": [ "backends" ], "summary": "Get help for llama server", "responses": { "200": { "description": "Help text", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/backends/llama-cpp/parse-command": { "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Parses a llama-server command string into instance options", "consumes": [ "application/json" ], "produces": [ "application/json" ], "tags": [ "backends" ], "summary": "Parse llama-server command", "parameters": [ { "description": "Command to parse", "name": "request", "in": "body", "required": true, "schema": { "$ref": "#/definitions/server.ParseCommandRequest" } } ], "responses": { "200": { "description": "Parsed options", "schema": { "$ref": "#/definitions/instance.Options" } }, "400": { "description": "Invalid request or command", "schema": { "type": "object", "additionalProperties": { "type": "string" } } }, "500": { "description": "Internal Server Error", "schema": { "type": "object", "additionalProperties": { "type": "string" } } } } } }, "/backends/llama-cpp/version": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Returns the version of the llama server command", "tags": [ "backends" ], "summary": "Get version of llama server", "responses": { "200": { "description": "Version information", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/backends/mlx/parse-command": { "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Parses MLX-LM server command string into instance options", "consumes": [ "application/json" ], "produces": [ "application/json" ], "tags": [ "backends" ], "summary": "Parse mlx_lm.server command", "parameters": [ { "description": "Command to parse", "name": "request", "in": "body", "required": true, "schema": { "$ref": "#/definitions/server.ParseCommandRequest" } } ], "responses": { "200": { "description": "Parsed options", "schema": { "$ref": "#/definitions/instance.Options" } }, "400": { "description": "Invalid request or command", "schema": { "type": "object", "additionalProperties": { "type": "string" } } } } } }, "/backends/vllm/parse-command": { "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Parses a vLLM serve command string into instance options", "consumes": [ "application/json" ], "produces": [ "application/json" ], "tags": [ "backends" ], "summary": "Parse vllm serve command", "parameters": [ { "description": "Command to parse", "name": "request", "in": "body", "required": true, "schema": { "$ref": "#/definitions/server.ParseCommandRequest" } } ], "responses": { "200": { "description": "Parsed options", "schema": { "$ref": "#/definitions/instance.Options" } }, "400": { "description": "Invalid request or command", "schema": { "type": "object", "additionalProperties": { "type": "string" } } } } } }, "/instances": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Returns a list of all instances managed by the server", "tags": [ "instances" ], "summary": "List all instances", "responses": { "200": { "description": "List of instances", "schema": { "type": "array", "items": { "$ref": "#/definitions/instance.Instance" } } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/instances/{name}": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Returns the details of a specific instance by name", "tags": [ "instances" ], "summary": "Get details of a specific instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Instance details", "schema": { "$ref": "#/definitions/instance.Instance" } }, "400": { "description": "Invalid name format", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } }, "put": { "security": [ { "ApiKeyAuth": [] } ], "description": "Updates the configuration of a specific instance by name", "consumes": [ "application/json" ], "tags": [ "instances" ], "summary": "Update an instance's configuration", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true }, { "description": "Instance configuration options", "name": "options", "in": "body", "required": true, "schema": { "$ref": "#/definitions/instance.Options" } } ], "responses": { "200": { "description": "Updated instance details", "schema": { "$ref": "#/definitions/instance.Instance" } }, "400": { "description": "Invalid name format", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } }, "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Creates a new instance with the provided configuration options", "consumes": [ "application/json" ], "tags": [ "instances" ], "summary": "Create and start a new instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true }, { "description": "Instance configuration options", "name": "options", "in": "body", "required": true, "schema": { "$ref": "#/definitions/instance.Options" } } ], "responses": { "201": { "description": "Created instance details", "schema": { "$ref": "#/definitions/instance.Instance" } }, "400": { "description": "Invalid request body", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } }, "delete": { "security": [ { "ApiKeyAuth": [] } ], "description": "Stops and removes a specific instance by name", "tags": [ "instances" ], "summary": "Delete an instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "204": { "description": "No Content" }, "400": { "description": "Invalid name format", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/instances/{name}/logs": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Returns the logs from a specific instance by name with optional line limit", "tags": [ "instances" ], "summary": "Get logs from a specific instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true }, { "type": "string", "description": "Number of lines to retrieve (default: all lines)", "name": "lines", "in": "query" } ], "responses": { "200": { "description": "Instance logs", "schema": { "type": "string" } }, "400": { "description": "Invalid name format or lines parameter", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/instances/{name}/proxy": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Forwards HTTP requests to the llama-server instance running on a specific port", "tags": [ "instances" ], "summary": "Proxy requests to a specific instance, does not autostart instance if stopped", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Request successfully proxied to instance" }, "400": { "description": "Invalid name format", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } }, "503": { "description": "Instance is not running", "schema": { "type": "string" } } } }, "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Forwards HTTP requests to the llama-server instance running on a specific port", "tags": [ "instances" ], "summary": "Proxy requests to a specific instance, does not autostart instance if stopped", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Request successfully proxied to instance" }, "400": { "description": "Invalid name format", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } }, "503": { "description": "Instance is not running", "schema": { "type": "string" } } } } }, "/instances/{name}/restart": { "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Restarts a specific instance by name", "tags": [ "instances" ], "summary": "Restart a running instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Restarted instance details", "schema": { "$ref": "#/definitions/instance.Instance" } }, "400": { "description": "Invalid name format", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/instances/{name}/start": { "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Starts a specific instance by name", "tags": [ "instances" ], "summary": "Start a stopped instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Started instance details", "schema": { "$ref": "#/definitions/instance.Instance" } }, "400": { "description": "Invalid name format", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/instances/{name}/stop": { "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Stops a specific instance by name", "tags": [ "instances" ], "summary": "Stop a running instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Stopped instance details", "schema": { "$ref": "#/definitions/instance.Instance" } }, "400": { "description": "Invalid name format", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/llama-cpp/{name}/": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Proxies requests to the llama.cpp UI for the specified instance", "produces": [ "text/html" ], "tags": [ "backends" ], "summary": "Proxy requests to llama.cpp UI for the instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "query", "required": true } ], "responses": { "200": { "description": "Proxied HTML response", "schema": { "type": "string" } }, "400": { "description": "Invalid instance", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/llama-cpp/{name}/apply-template": { "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", "produces": [ "application/json" ], "tags": [ "backends" ], "summary": "Proxy requests to llama.cpp server instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Proxied response", "schema": { "type": "object", "additionalProperties": true } }, "400": { "description": "Invalid instance", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/llama-cpp/{name}/completion": { "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", "produces": [ "application/json" ], "tags": [ "backends" ], "summary": "Proxy requests to llama.cpp server instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Proxied response", "schema": { "type": "object", "additionalProperties": true } }, "400": { "description": "Invalid instance", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/llama-cpp/{name}/detokenize": { "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", "produces": [ "application/json" ], "tags": [ "backends" ], "summary": "Proxy requests to llama.cpp server instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Proxied response", "schema": { "type": "object", "additionalProperties": true } }, "400": { "description": "Invalid instance", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/llama-cpp/{name}/embeddings": { "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", "produces": [ "application/json" ], "tags": [ "backends" ], "summary": "Proxy requests to llama.cpp server instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Proxied response", "schema": { "type": "object", "additionalProperties": true } }, "400": { "description": "Invalid instance", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/llama-cpp/{name}/infill": { "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", "produces": [ "application/json" ], "tags": [ "backends" ], "summary": "Proxy requests to llama.cpp server instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Proxied response", "schema": { "type": "object", "additionalProperties": true } }, "400": { "description": "Invalid instance", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/llama-cpp/{name}/metrics": { "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", "produces": [ "application/json" ], "tags": [ "backends" ], "summary": "Proxy requests to llama.cpp server instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Proxied response", "schema": { "type": "object", "additionalProperties": true } }, "400": { "description": "Invalid instance", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/llama-cpp/{name}/props": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", "produces": [ "application/json" ], "tags": [ "backends" ], "summary": "Proxy requests to llama.cpp server instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Proxied response", "schema": { "type": "object", "additionalProperties": true } }, "400": { "description": "Invalid instance", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } }, "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", "produces": [ "application/json" ], "tags": [ "backends" ], "summary": "Proxy requests to llama.cpp server instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Proxied response", "schema": { "type": "object", "additionalProperties": true } }, "400": { "description": "Invalid instance", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/llama-cpp/{name}/reranking": { "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", "produces": [ "application/json" ], "tags": [ "backends" ], "summary": "Proxy requests to llama.cpp server instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Proxied response", "schema": { "type": "object", "additionalProperties": true } }, "400": { "description": "Invalid instance", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/llama-cpp/{name}/slots": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", "produces": [ "application/json" ], "tags": [ "backends" ], "summary": "Proxy requests to llama.cpp server instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Proxied response", "schema": { "type": "object", "additionalProperties": true } }, "400": { "description": "Invalid instance", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/llama-cpp/{name}/tokenize": { "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured", "produces": [ "application/json" ], "tags": [ "backends" ], "summary": "Proxy requests to llama.cpp server instance", "parameters": [ { "type": "string", "description": "Instance Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Proxied response", "schema": { "type": "object", "additionalProperties": true } }, "400": { "description": "Invalid instance", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/nodes": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Returns a map of all nodes configured in the server (node name -\u003e node config)", "tags": [ "nodes" ], "summary": "List all configured nodes", "responses": { "200": { "description": "Map of nodes", "schema": { "type": "object", "additionalProperties": { "$ref": "#/definitions/server.NodeResponse" } } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/nodes/{name}": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Returns the details of a specific node by name", "tags": [ "nodes" ], "summary": "Get details of a specific node", "parameters": [ { "type": "string", "description": "Node Name", "name": "name", "in": "path", "required": true } ], "responses": { "200": { "description": "Node details", "schema": { "$ref": "#/definitions/server.NodeResponse" } }, "400": { "description": "Invalid name format", "schema": { "type": "string" } }, "404": { "description": "Node not found", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/v1/": { "post": { "security": [ { "ApiKeyAuth": [] } ], "description": "Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header.", "consumes": [ "application/json" ], "tags": [ "openai" ], "summary": "OpenAI-compatible proxy endpoint", "responses": { "200": { "description": "OpenAI response" }, "400": { "description": "Invalid request body or instance name", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/v1/models": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Returns a list of instances in a format compatible with OpenAI API", "tags": [ "openai" ], "summary": "List instances in OpenAI-compatible format", "responses": { "200": { "description": "List of OpenAI-compatible instances", "schema": { "$ref": "#/definitions/server.OpenAIListInstancesResponse" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } }, "/version": { "get": { "security": [ { "ApiKeyAuth": [] } ], "description": "Returns the version of the llamactl command", "tags": [ "version" ], "summary": "Get llamactl version", "responses": { "200": { "description": "Version information", "schema": { "type": "string" } }, "500": { "description": "Internal Server Error", "schema": { "type": "string" } } } } } }, "definitions": { "instance.Instance": { "type": "object", "properties": { "created": { "description": "Unix timestamp when the instance was created", "type": "integer" }, "name": { "type": "string" } } }, "instance.Options": { "type": "object", "properties": { "auto_restart": { "description": "Auto restart", "type": "boolean" }, "environment": { "description": "Environment variables", "type": "object", "additionalProperties": { "type": "string" } }, "idle_timeout": { "description": "Idle timeout", "type": "integer" }, "max_restarts": { "type": "integer" }, "on_demand_start": { "description": "On demand start", "type": "boolean" }, "restart_delay": { "description": "seconds", "type": "integer" } } }, "server.NodeResponse": { "type": "object", "properties": { "address": { "type": "string" } } }, "server.OpenAIInstance": { "type": "object", "properties": { "created": { "type": "integer" }, "id": { "type": "string" }, "object": { "type": "string" }, "owned_by": { "type": "string" } } }, "server.OpenAIListInstancesResponse": { "type": "object", "properties": { "data": { "type": "array", "items": { "$ref": "#/definitions/server.OpenAIInstance" } }, "object": { "type": "string" } } }, "server.ParseCommandRequest": { "type": "object", "properties": { "command": { "type": "string" } } } } }