basePath: /api/v1 definitions: backends.BackendType: enum: - llama_cpp - mlx_lm - vllm type: string x-enum-varnames: - BackendTypeLlamaCpp - BackendTypeMlxLm - BackendTypeVllm instance.CreateInstanceOptions: properties: auto_restart: description: Auto restart type: boolean backend_options: additionalProperties: {} type: object backend_type: $ref: '#/definitions/backends.BackendType' idle_timeout: description: Idle timeout type: integer max_restarts: type: integer on_demand_start: description: On demand start type: boolean restart_delay: description: seconds type: integer type: object instance.InstanceStatus: enum: - 0 - 1 - 2 type: integer x-enum-varnames: - Stopped - Running - Failed instance.Process: properties: created: description: Creation time type: integer name: type: string status: allOf: - $ref: '#/definitions/instance.InstanceStatus' description: Status type: object server.OpenAIInstance: properties: created: type: integer id: type: string object: type: string owned_by: type: string type: object server.OpenAIListInstancesResponse: properties: data: items: $ref: '#/definitions/server.OpenAIInstance' type: array object: type: string type: object server.ParseCommandRequest: properties: command: type: string type: object info: contact: {} description: llamactl is a control server for managing Llama Server instances. license: name: MIT License url: https://opensource.org/license/mit/ title: llamactl API version: "1.0" paths: /backends/llama-cpp/devices: get: description: Returns a list of available devices for the llama server responses: "200": description: List of devices schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: List available devices for llama server tags: - backends /backends/llama-cpp/help: get: description: Returns the help text for the llama server command responses: "200": description: Help text schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Get help for llama server tags: - backends /backends/llama-cpp/parse-command: post: consumes: - application/json description: Parses a llama-server command string into instance options parameters: - description: Command to parse in: body name: request required: true schema: $ref: '#/definitions/server.ParseCommandRequest' produces: - application/json responses: "200": description: Parsed options schema: $ref: '#/definitions/instance.CreateInstanceOptions' "400": description: Invalid request or command schema: additionalProperties: type: string type: object "500": description: Internal Server Error schema: additionalProperties: type: string type: object security: - ApiKeyAuth: [] summary: Parse llama-server command tags: - backends /backends/llama-cpp/version: get: description: Returns the version of the llama server command responses: "200": description: Version information schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Get version of llama server tags: - backends /backends/mlx/parse-command: post: consumes: - application/json description: Parses MLX-LM server command string into instance options parameters: - description: Command to parse in: body name: request required: true schema: $ref: '#/definitions/server.ParseCommandRequest' produces: - application/json responses: "200": description: Parsed options schema: $ref: '#/definitions/instance.CreateInstanceOptions' "400": description: Invalid request or command schema: additionalProperties: type: string type: object security: - ApiKeyAuth: [] summary: Parse mlx_lm.server command tags: - backends /backends/vllm/parse-command: post: consumes: - application/json description: Parses a vLLM serve command string into instance options parameters: - description: Command to parse in: body name: request required: true schema: $ref: '#/definitions/server.ParseCommandRequest' produces: - application/json responses: "200": description: Parsed options schema: $ref: '#/definitions/instance.CreateInstanceOptions' "400": description: Invalid request or command schema: additionalProperties: type: string type: object security: - ApiKeyAuth: [] summary: Parse vllm serve command tags: - backends /instances: get: description: Returns a list of all instances managed by the server responses: "200": description: List of instances schema: items: $ref: '#/definitions/instance.Process' type: array "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: List all instances tags: - instances /instances/{name}: delete: description: Stops and removes a specific instance by name parameters: - description: Instance Name in: path name: name required: true type: string responses: "204": description: No Content "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Delete an instance tags: - instances get: description: Returns the details of a specific instance by name parameters: - description: Instance Name in: path name: name required: true type: string responses: "200": description: Instance details schema: $ref: '#/definitions/instance.Process' "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Get details of a specific instance tags: - instances post: consumes: - application/json description: Creates a new instance with the provided configuration options parameters: - description: Instance Name in: path name: name required: true type: string - description: Instance configuration options in: body name: options required: true schema: $ref: '#/definitions/instance.CreateInstanceOptions' responses: "201": description: Created instance details schema: $ref: '#/definitions/instance.Process' "400": description: Invalid request body schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Create and start a new instance tags: - instances put: consumes: - application/json description: Updates the configuration of a specific instance by name parameters: - description: Instance Name in: path name: name required: true type: string - description: Instance configuration options in: body name: options required: true schema: $ref: '#/definitions/instance.CreateInstanceOptions' responses: "200": description: Updated instance details schema: $ref: '#/definitions/instance.Process' "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Update an instance's configuration tags: - instances /instances/{name}/logs: get: description: Returns the logs from a specific instance by name with optional line limit parameters: - description: Instance Name in: path name: name required: true type: string - description: 'Number of lines to retrieve (default: all lines)' in: query name: lines type: string responses: "200": description: Instance logs schema: type: string "400": description: Invalid name format or lines parameter schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Get logs from a specific instance tags: - instances /instances/{name}/proxy: get: description: Forwards HTTP requests to the llama-server instance running on a specific port parameters: - description: Instance Name in: path name: name required: true type: string responses: "200": description: Request successfully proxied to instance "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string "503": description: Instance is not running schema: type: string security: - ApiKeyAuth: [] summary: Proxy requests to a specific instance tags: - instances post: description: Forwards HTTP requests to the llama-server instance running on a specific port parameters: - description: Instance Name in: path name: name required: true type: string responses: "200": description: Request successfully proxied to instance "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string "503": description: Instance is not running schema: type: string security: - ApiKeyAuth: [] summary: Proxy requests to a specific instance tags: - instances /instances/{name}/restart: post: description: Restarts a specific instance by name parameters: - description: Instance Name in: path name: name required: true type: string responses: "200": description: Restarted instance details schema: $ref: '#/definitions/instance.Process' "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Restart a running instance tags: - instances /instances/{name}/start: post: description: Starts a specific instance by name parameters: - description: Instance Name in: path name: name required: true type: string responses: "200": description: Started instance details schema: $ref: '#/definitions/instance.Process' "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Start a stopped instance tags: - instances /instances/{name}/stop: post: description: Stops a specific instance by name parameters: - description: Instance Name in: path name: name required: true type: string responses: "200": description: Stopped instance details schema: $ref: '#/definitions/instance.Process' "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Stop a running instance tags: - instances /v1/: post: consumes: - application/json description: Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header. responses: "200": description: OpenAI response "400": description: Invalid request body or instance name schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: OpenAI-compatible proxy endpoint tags: - openai /v1/models: get: description: Returns a list of instances in a format compatible with OpenAI API responses: "200": description: List of OpenAI-compatible instances schema: $ref: '#/definitions/server.OpenAIListInstancesResponse' "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: List instances in OpenAI-compatible format tags: - openai /version: get: description: Returns the version of the llamactl command responses: "200": description: Version information schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Get llamactl version tags: - version swagger: "2.0"