basePath: /api/v1 definitions: instance.Instance: properties: created: description: Unix timestamp when the instance was created type: integer name: type: string type: object instance.Options: properties: auto_restart: description: Auto restart type: boolean environment: additionalProperties: type: string description: Environment variables type: object idle_timeout: description: Idle timeout type: integer max_restarts: type: integer on_demand_start: description: On demand start type: boolean restart_delay: description: seconds type: integer type: object server.NodeResponse: properties: address: type: string type: object server.OpenAIInstance: properties: created: type: integer id: type: string object: type: string owned_by: type: string type: object server.OpenAIListInstancesResponse: properties: data: items: $ref: '#/definitions/server.OpenAIInstance' type: array object: type: string type: object server.ParseCommandRequest: properties: command: type: string type: object info: contact: {} description: llamactl is a control server for managing Llama Server instances. license: name: MIT License url: https://opensource.org/license/mit/ title: llamactl API version: "1.0" paths: /api/v1/backends/llama-cpp/devices: get: description: Returns a list of available devices for the llama server responses: "200": description: List of devices schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: List available devices for llama server tags: - backends /api/v1/backends/llama-cpp/help: get: description: Returns the help text for the llama server command responses: "200": description: Help text schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Get help for llama server tags: - backends /api/v1/backends/llama-cpp/parse-command: post: consumes: - application/json description: Parses a llama-server command string into instance options parameters: - description: Command to parse in: body name: request required: true schema: $ref: '#/definitions/server.ParseCommandRequest' produces: - application/json responses: "200": description: Parsed options schema: $ref: '#/definitions/instance.Options' "400": description: Invalid request or command schema: additionalProperties: type: string type: object "500": description: Internal Server Error schema: additionalProperties: type: string type: object security: - ApiKeyAuth: [] summary: Parse llama-server command tags: - backends /api/v1/backends/llama-cpp/version: get: description: Returns the version of the llama server command responses: "200": description: Version information schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Get version of llama server tags: - backends /api/v1/backends/mlx/parse-command: post: consumes: - application/json description: Parses MLX-LM server command string into instance options parameters: - description: Command to parse in: body name: request required: true schema: $ref: '#/definitions/server.ParseCommandRequest' produces: - application/json responses: "200": description: Parsed options schema: $ref: '#/definitions/instance.Options' "400": description: Invalid request or command schema: additionalProperties: type: string type: object security: - ApiKeyAuth: [] summary: Parse mlx_lm.server command tags: - backends /api/v1/backends/vllm/parse-command: post: consumes: - application/json description: Parses a vLLM serve command string into instance options parameters: - description: Command to parse in: body name: request required: true schema: $ref: '#/definitions/server.ParseCommandRequest' produces: - application/json responses: "200": description: Parsed options schema: $ref: '#/definitions/instance.Options' "400": description: Invalid request or command schema: additionalProperties: type: string type: object security: - ApiKeyAuth: [] summary: Parse vllm serve command tags: - backends /api/v1/instances: get: description: Returns a list of all instances managed by the server responses: "200": description: List of instances schema: items: $ref: '#/definitions/instance.Instance' type: array "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: List all instances tags: - instances /api/v1/instances/{name}: delete: description: Stops and removes a specific instance by name parameters: - description: Instance Name in: path name: name required: true type: string responses: "204": description: No Content "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Delete an instance tags: - instances get: description: Returns the details of a specific instance by name parameters: - description: Instance Name in: path name: name required: true type: string responses: "200": description: Instance details schema: $ref: '#/definitions/instance.Instance' "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Get details of a specific instance tags: - instances post: consumes: - application/json description: Creates a new instance with the provided configuration options parameters: - description: Instance Name in: path name: name required: true type: string - description: Instance configuration options in: body name: options required: true schema: $ref: '#/definitions/instance.Options' responses: "201": description: Created instance details schema: $ref: '#/definitions/instance.Instance' "400": description: Invalid request body schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Create and start a new instance tags: - instances put: consumes: - application/json description: Updates the configuration of a specific instance by name parameters: - description: Instance Name in: path name: name required: true type: string - description: Instance configuration options in: body name: options required: true schema: $ref: '#/definitions/instance.Options' responses: "200": description: Updated instance details schema: $ref: '#/definitions/instance.Instance' "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Update an instance's configuration tags: - instances /api/v1/instances/{name}/logs: get: description: Returns the logs from a specific instance by name with optional line limit parameters: - description: Instance Name in: path name: name required: true type: string - description: 'Number of lines to retrieve (default: all lines)' in: query name: lines type: string responses: "200": description: Instance logs schema: type: string "400": description: Invalid name format or lines parameter schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Get logs from a specific instance tags: - instances /api/v1/instances/{name}/proxy: get: description: Forwards HTTP requests to the llama-server instance running on a specific port parameters: - description: Instance Name in: path name: name required: true type: string responses: "200": description: Request successfully proxied to instance "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string "503": description: Instance is not running schema: type: string security: - ApiKeyAuth: [] summary: Proxy requests to a specific instance, does not autostart instance if stopped tags: - instances post: description: Forwards HTTP requests to the llama-server instance running on a specific port parameters: - description: Instance Name in: path name: name required: true type: string responses: "200": description: Request successfully proxied to instance "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string "503": description: Instance is not running schema: type: string security: - ApiKeyAuth: [] summary: Proxy requests to a specific instance, does not autostart instance if stopped tags: - instances /api/v1/instances/{name}/restart: post: description: Restarts a specific instance by name parameters: - description: Instance Name in: path name: name required: true type: string responses: "200": description: Restarted instance details schema: $ref: '#/definitions/instance.Instance' "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Restart a running instance tags: - instances /api/v1/instances/{name}/start: post: description: Starts a specific instance by name parameters: - description: Instance Name in: path name: name required: true type: string responses: "200": description: Started instance details schema: $ref: '#/definitions/instance.Instance' "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Start a stopped instance tags: - instances /api/v1/instances/{name}/stop: post: description: Stops a specific instance by name parameters: - description: Instance Name in: path name: name required: true type: string responses: "200": description: Stopped instance details schema: $ref: '#/definitions/instance.Instance' "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Stop a running instance tags: - instances /api/v1/nodes: get: description: Returns a map of all nodes configured in the server (node name -> node config) responses: "200": description: Map of nodes schema: additionalProperties: $ref: '#/definitions/server.NodeResponse' type: object "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: List all configured nodes tags: - nodes /api/v1/nodes/{name}: get: description: Returns the details of a specific node by name parameters: - description: Node Name in: path name: name required: true type: string responses: "200": description: Node details schema: $ref: '#/definitions/server.NodeResponse' "400": description: Invalid name format schema: type: string "404": description: Node not found schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Get details of a specific node tags: - nodes /api/v1/version: get: description: Returns the version of the llamactl command responses: "200": description: Version information schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Get llamactl version tags: - version /llama-cpp/{name}/: get: description: Proxies requests to the llama.cpp UI for the specified instance parameters: - description: Instance Name in: query name: name required: true type: string produces: - text/html responses: "200": description: Proxied HTML response schema: type: string "400": description: Invalid instance schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Proxy requests to llama.cpp UI for the instance tags: - backends /llama-cpp/{name}/apply-template: post: description: Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured parameters: - description: Instance Name in: path name: name required: true type: string produces: - application/json responses: "200": description: Proxied response schema: additionalProperties: true type: object "400": description: Invalid instance schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Proxy requests to llama.cpp server instance tags: - backends /llama-cpp/{name}/completion: post: description: Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured parameters: - description: Instance Name in: path name: name required: true type: string produces: - application/json responses: "200": description: Proxied response schema: additionalProperties: true type: object "400": description: Invalid instance schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Proxy requests to llama.cpp server instance tags: - backends /llama-cpp/{name}/detokenize: post: description: Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured parameters: - description: Instance Name in: path name: name required: true type: string produces: - application/json responses: "200": description: Proxied response schema: additionalProperties: true type: object "400": description: Invalid instance schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Proxy requests to llama.cpp server instance tags: - backends /llama-cpp/{name}/embeddings: post: description: Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured parameters: - description: Instance Name in: path name: name required: true type: string produces: - application/json responses: "200": description: Proxied response schema: additionalProperties: true type: object "400": description: Invalid instance schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Proxy requests to llama.cpp server instance tags: - backends /llama-cpp/{name}/infill: post: description: Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured parameters: - description: Instance Name in: path name: name required: true type: string produces: - application/json responses: "200": description: Proxied response schema: additionalProperties: true type: object "400": description: Invalid instance schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Proxy requests to llama.cpp server instance tags: - backends /llama-cpp/{name}/metrics: post: description: Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured parameters: - description: Instance Name in: path name: name required: true type: string produces: - application/json responses: "200": description: Proxied response schema: additionalProperties: true type: object "400": description: Invalid instance schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Proxy requests to llama.cpp server instance tags: - backends /llama-cpp/{name}/props: get: description: Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured parameters: - description: Instance Name in: path name: name required: true type: string produces: - application/json responses: "200": description: Proxied response schema: additionalProperties: true type: object "400": description: Invalid instance schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Proxy requests to llama.cpp server instance tags: - backends post: description: Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured parameters: - description: Instance Name in: path name: name required: true type: string produces: - application/json responses: "200": description: Proxied response schema: additionalProperties: true type: object "400": description: Invalid instance schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Proxy requests to llama.cpp server instance tags: - backends /llama-cpp/{name}/reranking: post: description: Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured parameters: - description: Instance Name in: path name: name required: true type: string produces: - application/json responses: "200": description: Proxied response schema: additionalProperties: true type: object "400": description: Invalid instance schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Proxy requests to llama.cpp server instance tags: - backends /llama-cpp/{name}/slots: get: description: Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured parameters: - description: Instance Name in: path name: name required: true type: string produces: - application/json responses: "200": description: Proxied response schema: additionalProperties: true type: object "400": description: Invalid instance schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Proxy requests to llama.cpp server instance tags: - backends /llama-cpp/{name}/tokenize: post: description: Proxies requests to the specified llama.cpp server instance, starting it on-demand if configured parameters: - description: Instance Name in: path name: name required: true type: string produces: - application/json responses: "200": description: Proxied response schema: additionalProperties: true type: object "400": description: Invalid instance schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Proxy requests to llama.cpp server instance tags: - backends /v1/: post: consumes: - application/json description: Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header. responses: "200": description: OpenAI response "400": description: Invalid request body or instance name schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: OpenAI-compatible proxy endpoint tags: - openai /v1/models: get: description: Returns a list of instances in a format compatible with OpenAI API responses: "200": description: List of OpenAI-compatible instances schema: $ref: '#/definitions/server.OpenAIListInstancesResponse' "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: List instances in OpenAI-compatible format tags: - openai securityDefinitions: ApiKeyAuth: in: header name: X-API-Key type: apiKey swagger: "2.0"