llamactl/docs/swagger.yaml

basePath: /api/v1
definitions:
  auth.PermissionMode:
    enum:
    - allow_all
    - per_instance
    type: string
    x-enum-varnames:
    - PermissionModeAllowAll
    - PermissionModePerInstance
  config.AppConfig:
    properties:
      auth:
        $ref: '#/definitions/config.AuthConfig'
      backends:
        $ref: '#/definitions/config.BackendConfig'
      build_time:
        type: string
      commit_hash:
        type: string
      data_dir:
        description: Directory where all llamactl data will be stored (database, instances,
          logs, etc.)
        type: string
      database:
        $ref: '#/definitions/config.DatabaseConfig'
      instances:
        $ref: '#/definitions/config.InstancesConfig'
      local_node:
        type: string
      nodes:
        additionalProperties:
          $ref: '#/definitions/config.NodeConfig'
        type: object
      server:
        $ref: '#/definitions/config.ServerConfig'
      version:
        type: string
    type: object
  config.AuthConfig:
    properties:
      inference_keys:
        description: List of keys for OpenAI compatible inference endpoints
        items:
          type: string
        type: array
      management_keys:
        description: List of keys for management endpoints
        items:
          type: string
        type: array
      require_inference_auth:
        description: Require authentication for OpenAI compatible inference endpoints
        type: boolean
      require_management_auth:
        description: Require authentication for management endpoints
        type: boolean
    type: object
  config.BackendConfig:
    properties:
      llama-cpp:
        $ref: '#/definitions/config.BackendSettings'
      mlx:
        $ref: '#/definitions/config.BackendSettings'
      vllm:
        $ref: '#/definitions/config.BackendSettings'
    type: object
  config.BackendSettings:
    properties:
      args:
        items:
          type: string
        type: array
      command:
        type: string
      docker:
        $ref: '#/definitions/config.DockerSettings'
      environment:
        additionalProperties:
          type: string
        type: object
      response_headers:
        additionalProperties:
          type: string
        type: object
    type: object
  config.DatabaseConfig:
    properties:
      connection_max_lifetime:
        example: 1h
        type: string
      max_idle_connections:
        type: integer
      max_open_connections:
        description: Connection settings
        type: integer
      path:
        description: Database file path (relative to the top-level data_dir or absolute)
        type: string
    type: object
  config.DockerSettings:
    properties:
      args:
        items:
          type: string
        type: array
      enabled:
        type: boolean
      environment:
        additionalProperties:
          type: string
        type: object
      image:
        type: string
    type: object
  config.InstancesConfig:
    properties:
      auto_create_dirs:
        description: Automatically create the data directory if it doesn't exist
        type: boolean
      configs_dir:
        description: Instance config directory override (relative to data_dir if not
          absolute)
        type: string
      default_auto_restart:
        description: Default auto-restart setting for new instances
        type: boolean
      default_max_restarts:
        description: Default max restarts for new instances
        type: integer
      default_on_demand_start:
        description: Default on-demand start setting for new instances
        type: boolean
      default_restart_delay:
        description: Default restart delay for new instances (in seconds)
        type: integer
      enable_lru_eviction:
        description: Enable LRU eviction for instance logs
        type: boolean
      logs_dir:
        description: Logs directory override (relative to data_dir if not absolute)
        type: string
      max_instances:
        description: Maximum number of instances that can be created
        type: integer
      max_running_instances:
        description: Maximum number of instances that can be running at the same time
        type: integer
      on_demand_start_timeout:
        description: How long to wait for an instance to start on demand (in seconds)
        type: integer
      port_range:
        description: Port range for instances (e.g., 8000,9000)
        items:
          type: integer
        type: array
      timeout_check_interval:
        description: Interval for checking instance timeouts (in minutes)
        type: integer
    type: object
  config.NodeConfig:
    properties:
      address:
        type: string
      api_key:
        type: string
    type: object
  config.ServerConfig:
    properties:
      allowed_headers:
        description: Allowed headers for CORS (e.g., "Accept", "Authorization", "Content-Type",
          "X-CSRF-Token")
        items:
          type: string
        type: array
      allowed_origins:
        description: Allowed origins for CORS (e.g., "http://localhost:3000")
        items:
          type: string
        type: array
      enable_swagger:
        description: Enable Swagger UI for API documentation
        type: boolean
      host:
        description: Server host to bind to
        type: string
      port:
        description: Server port to bind to
        type: integer
      response_headers:
        additionalProperties:
          type: string
        description: Response headers to send with responses
        type: object
    type: object
  instance.Instance:
    properties:
      created:
        description: Unix timestamp when instance was created
        type: integer
      id:
        type: integer
      name:
        type: string
    type: object
  instance.Options:
    properties:
      auto_restart:
        description: Auto restart
        type: boolean
      command_override:
        type: string
      docker_enabled:
        description: Execution context overrides
        type: boolean
      environment:
        additionalProperties:
          type: string
        description: Environment variables
        type: object
      idle_timeout:
        description: Idle timeout
        type: integer
      max_restarts:
        type: integer
      on_demand_start:
        description: On demand start
        type: boolean
      restart_delay:
        description: seconds
        type: integer
    type: object
  server.CreateKeyRequest:
    properties:
      expires_at:
        type: integer
      instance_ids:
        items:
          type: integer
        type: array
      name:
        type: string
      permission_mode:
        $ref: '#/definitions/auth.PermissionMode'
    type: object
  server.CreateKeyResponse:
    properties:
      created_at:
        type: integer
      expires_at:
        type: integer
      id:
        type: integer
      key:
        type: string
      last_used_at:
        type: integer
      name:
        type: string
      permission_mode:
        $ref: '#/definitions/auth.PermissionMode'
      updated_at:
        type: integer
      user_id:
        type: string
    type: object
  server.KeyPermissionResponse:
    properties:
      instance_id:
        type: integer
      instance_name:
        type: string
    type: object
  server.KeyResponse:
    properties:
      created_at:
        type: integer
      expires_at:
        type: integer
      id:
        type: integer
      last_used_at:
        type: integer
      name:
        type: string
      permission_mode:
        $ref: '#/definitions/auth.PermissionMode'
      updated_at:
        type: integer
      user_id:
        type: string
    type: object
  server.NodeResponse:
    properties:
      address:
        type: string
    type: object
  server.OpenAIInstance:
    properties:
      created:
        type: integer
      id:
        type: string
      object:
        type: string
      owned_by:
        type: string
    type: object
  server.OpenAIListInstancesResponse:
    properties:
      data:
        items:
          $ref: '#/definitions/server.OpenAIInstance'
        type: array
      object:
        type: string
    type: object
  server.ParseCommandRequest:
    properties:
      command:
        type: string
    type: object
info:
  contact: {}
  description: llamactl is a control server for managing Llama Server instances.
  license:
    name: MIT License
    url: https://opensource.org/license/mit/
  title: llamactl API
  version: "1.0"
paths:
  /api/v1/auth/keys:
    get:
      description: Returns a list of all API keys for the system user (excludes key
        hash and plain-text key)
      produces:
      - application/json
      responses:
        "200":
          description: List of API keys
          schema:
            items:
              $ref: '#/definitions/server.KeyResponse'
            type: array
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: List all API keys
      tags:
      - Keys
    post:
      consumes:
      - application/json
      description: Creates a new API key with the specified permissions and returns
        the plain-text key (only shown once)
      parameters:
      - description: API key configuration
        in: body
        name: key
        required: true
        schema:
          $ref: '#/definitions/server.CreateKeyRequest'
      produces:
      - application/json
      responses:
        "201":
          description: Created API key with plain-text key
          schema:
            $ref: '#/definitions/server.CreateKeyResponse'
        "400":
          description: Invalid request body or validation error
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      summary: Create a new API key
      tags:
      - Keys
  /api/v1/auth/keys/{id}:
    delete:
      description: Deletes an API key by ID
      parameters:
      - description: Key ID
        in: path
        name: id
        required: true
        type: integer
      responses:
        "204":
          description: API key deleted successfully
        "400":
          description: Invalid key ID
          schema:
            type: string
        "404":
          description: API key not found
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Delete an API key
      tags:
      - Keys
    get:
      description: Returns details for a specific API key by ID (excludes key hash
        and plain-text key)
      parameters:
      - description: Key ID
        in: path
        name: id
        required: true
        type: integer
      produces:
      - application/json
      responses:
        "200":
          description: API key details
          schema:
            $ref: '#/definitions/server.KeyResponse'
        "400":
          description: Invalid key ID
          schema:
            type: string
        "404":
          description: API key not found
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Get details of a specific API key
      tags:
      - Keys
  /api/v1/auth/keys/{id}/permissions:
    get:
      description: Returns the instance-level permissions for a specific API key (includes
        instance names)
      parameters:
      - description: Key ID
        in: path
        name: id
        required: true
        type: integer
      produces:
      - application/json
      responses:
        "200":
          description: List of key permissions
          schema:
            items:
              $ref: '#/definitions/server.KeyPermissionResponse'
            type: array
        "400":
          description: Invalid key ID
          schema:
            type: string
        "404":
          description: API key not found
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Get API key permissions
      tags:
      - Keys
  /api/v1/backends/llama-cpp/devices:
    get:
      description: Returns a list of available devices for the llama server
      responses:
        "200":
          description: List of devices
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: List available devices for llama server
      tags:
      - Backends
  /api/v1/backends/llama-cpp/help:
    get:
      description: Returns the help text for the llama server command
      responses:
        "200":
          description: Help text
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Get help for llama server
      tags:
      - Backends
  /api/v1/backends/llama-cpp/parse-command:
    post:
      consumes:
      - application/json
      description: Parses a llama-server command string into instance options
      parameters:
      - description: Command to parse
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/server.ParseCommandRequest'
      produces:
      - application/json
      responses:
        "200":
          description: Parsed options
          schema:
            $ref: '#/definitions/instance.Options'
        "400":
          description: Invalid request or command
          schema:
            additionalProperties:
              type: string
            type: object
        "500":
          description: Internal Server Error
          schema:
            additionalProperties:
              type: string
            type: object
      security:
      - ApiKeyAuth: []
      summary: Parse llama-server command
      tags:
      - Backends
  /api/v1/backends/llama-cpp/version:
    get:
      description: Returns the version of the llama server command
      responses:
        "200":
          description: Version information
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Get version of llama server
      tags:
      - Backends
  /api/v1/backends/mlx/parse-command:
    post:
      consumes:
      - application/json
      description: Parses MLX-LM server command string into instance options
      parameters:
      - description: Command to parse
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/server.ParseCommandRequest'
      produces:
      - application/json
      responses:
        "200":
          description: Parsed options
          schema:
            $ref: '#/definitions/instance.Options'
        "400":
          description: Invalid request or command
          schema:
            additionalProperties:
              type: string
            type: object
      security:
      - ApiKeyAuth: []
      summary: Parse mlx_lm.server command
      tags:
      - Backends
  /api/v1/backends/vllm/parse-command:
    post:
      consumes:
      - application/json
      description: Parses a vLLM serve command string into instance options
      parameters:
      - description: Command to parse
        in: body
        name: request
        required: true
        schema:
          $ref: '#/definitions/server.ParseCommandRequest'
      produces:
      - application/json
      responses:
        "200":
          description: Parsed options
          schema:
            $ref: '#/definitions/instance.Options'
        "400":
          description: Invalid request or command
          schema:
            additionalProperties:
              type: string
            type: object
      security:
      - ApiKeyAuth: []
      summary: Parse vllm serve command
      tags:
      - Backends
  /api/v1/config:
    get:
      description: Returns the current server configuration (sanitized)
      responses:
        "200":
          description: Sanitized configuration
          schema:
            $ref: '#/definitions/config.AppConfig'
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Get server configuration
      tags:
      - System
  /api/v1/instances:
    get:
      description: Returns a list of all instances managed by the server
      responses:
        "200":
          description: List of instances
          schema:
            items:
              $ref: '#/definitions/instance.Instance'
            type: array
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: List all instances
      tags:
      - Instances
  /api/v1/instances/{name}:
    delete:
      description: Stops and removes a specific instance by name
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      responses:
        "204":
          description: No Content
        "400":
          description: Invalid name format
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Delete an instance
      tags:
      - Instances
    get:
      description: Returns the details of a specific instance by name
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      responses:
        "200":
          description: Instance details
          schema:
            $ref: '#/definitions/instance.Instance'
        "400":
          description: Invalid name format
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Get details of a specific instance
      tags:
      - Instances
    post:
      consumes:
      - application/json
      description: Creates a new instance with the provided configuration options
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      - description: Instance configuration options
        in: body
        name: options
        required: true
        schema:
          $ref: '#/definitions/instance.Options'
      responses:
        "201":
          description: Created instance details
          schema:
            $ref: '#/definitions/instance.Instance'
        "400":
          description: Invalid request body
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Create and start a new instance
      tags:
      - Instances
    put:
      consumes:
      - application/json
      description: Updates the configuration of a specific instance by name
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      - description: Instance configuration options
        in: body
        name: options
        required: true
        schema:
          $ref: '#/definitions/instance.Options'
      responses:
        "200":
          description: Updated instance details
          schema:
            $ref: '#/definitions/instance.Instance'
        "400":
          description: Invalid name format
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Update an instance's configuration
      tags:
      - Instances
  /api/v1/instances/{name}/logs:
    get:
      description: Returns the logs from a specific instance by name with optional
        line limit
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      - description: 'Number of lines to retrieve (default: all lines)'
        in: query
        name: lines
        type: string
      responses:
        "200":
          description: Instance logs
          schema:
            type: string
        "400":
          description: Invalid name format or lines parameter
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Get logs from a specific instance
      tags:
      - Instances
  /api/v1/instances/{name}/proxy:
    get:
      description: Forwards HTTP requests to the llama-server instance running on
        a specific port
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      responses:
        "200":
          description: Request successfully proxied to instance
        "400":
          description: Invalid name format
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
        "503":
          description: Instance is not running
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to a specific instance, does not autostart instance
        if stopped
      tags:
      - Instances
    post:
      description: Forwards HTTP requests to the llama-server instance running on
        a specific port
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      responses:
        "200":
          description: Request successfully proxied to instance
        "400":
          description: Invalid name format
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
        "503":
          description: Instance is not running
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to a specific instance, does not autostart instance
        if stopped
      tags:
      - Instances
  /api/v1/instances/{name}/restart:
    post:
      description: Restarts a specific instance by name
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      responses:
        "200":
          description: Restarted instance details
          schema:
            $ref: '#/definitions/instance.Instance'
        "400":
          description: Invalid name format
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Restart a running instance
      tags:
      - Instances
  /api/v1/instances/{name}/start:
    post:
      description: Starts a specific instance by name
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      responses:
        "200":
          description: Started instance details
          schema:
            $ref: '#/definitions/instance.Instance'
        "400":
          description: Invalid name format
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Start a stopped instance
      tags:
      - Instances
  /api/v1/instances/{name}/stop:
    post:
      description: Stops a specific instance by name
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      responses:
        "200":
          description: Stopped instance details
          schema:
            $ref: '#/definitions/instance.Instance'
        "400":
          description: Invalid name format
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Stop a running instance
      tags:
      - Instances
  /api/v1/nodes:
    get:
      description: Returns a map of all nodes configured in the server (node name
        -> node config)
      responses:
        "200":
          description: Map of nodes
          schema:
            additionalProperties:
              $ref: '#/definitions/server.NodeResponse'
            type: object
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: List all configured nodes
      tags:
      - Nodes
  /api/v1/nodes/{name}:
    get:
      description: Returns the details of a specific node by name
      parameters:
      - description: Node Name
        in: path
        name: name
        required: true
        type: string
      responses:
        "200":
          description: Node details
          schema:
            $ref: '#/definitions/server.NodeResponse'
        "400":
          description: Invalid name format
          schema:
            type: string
        "404":
          description: Node not found
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Get details of a specific node
      tags:
      - Nodes
  /api/v1/version:
    get:
      description: Returns the version of the llamactl command
      responses:
        "200":
          description: Version information
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Get llamactl version
      tags:
      - System
  /llama-cpp/{name}/:
    get:
      description: Proxies requests to the llama.cpp UI for the specified instance
      parameters:
      - description: Instance Name
        in: query
        name: name
        required: true
        type: string
      produces:
      - text/html
      responses:
        "200":
          description: Proxied HTML response
          schema:
            type: string
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp UI for the instance
      tags:
      - Llama.cpp
  /llama-cpp/{name}/apply-template:
    post:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
  /llama-cpp/{name}/completion:
    post:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
  /llama-cpp/{name}/detokenize:
    post:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
  /llama-cpp/{name}/embeddings:
    post:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
  /llama-cpp/{name}/infill:
    post:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
  /llama-cpp/{name}/metrics:
    post:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
  /llama-cpp/{name}/props:
    get:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
    post:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
  /llama-cpp/{name}/reranking:
    post:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
  /llama-cpp/{name}/slots:
    get:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
  /llama-cpp/{name}/tokenize:
    post:
      description: Proxies requests to the specified llama.cpp server instance, starting
        it on-demand if configured
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      produces:
      - application/json
      responses:
        "200":
          description: Proxied response
          schema:
            additionalProperties: true
            type: object
        "400":
          description: Invalid instance
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: Proxy requests to llama.cpp server instance
      tags:
      - Llama.cpp
  /v1/:
    post:
      consumes:
      - application/json
      description: Handles all POST requests to /v1/*, routing to the appropriate
        instance based on the request body. Requires API key authentication via the
        `Authorization` header.
      responses:
        "200":
          description: OpenAI response
        "400":
          description: Invalid request body or instance name
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: OpenAI-compatible proxy endpoint
      tags:
      - OpenAI
  /v1/models:
    get:
      description: Returns a list of instances in a format compatible with OpenAI
        API
      responses:
        "200":
          description: List of OpenAI-compatible instances
          schema:
            $ref: '#/definitions/server.OpenAIListInstancesResponse'
        "500":
          description: Internal Server Error
          schema:
            type: string
      security:
      - ApiKeyAuth: []
      summary: List instances in OpenAI-compatible format
      tags:
      - OpenAI
securityDefinitions:
  ApiKeyAuth:
    in: header
    name: X-API-Key
    type: apiKey
swagger: "2.0"