llamactl/docs/swagger.yaml

basePath: /api/v1
definitions:
  llamactl.CreateInstanceOptions:
    properties:
      alias:
        type: string
      api_key:
        type: string
      api_key_file:
        type: string
      auto_restart:
        description: Auto restart
        type: boolean
      batch_size:
        type: integer
      cache_reuse:
        type: integer
      cache_type_k:
        type: string
      cache_type_k_draft:
        type: string
      cache_type_v:
        type: string
      cache_type_v_draft:
        type: string
      chat_template:
        type: string
      chat_template_file:
        type: string
      chat_template_kwargs:
        type: string
      check_tensors:
        type: boolean
      cont_batching:
        type: boolean
      control_vector:
        items:
          type: string
        type: array
      control_vector_layer_range:
        type: string
      control_vector_scaled:
        items:
          type: string
        type: array
      cpu_mask:
        type: string
      cpu_mask_batch:
        type: string
      cpu_range:
        type: string
      cpu_range_batch:
        type: string
      cpu_strict:
        type: integer
      cpu_strict_batch:
        type: integer
      ctx_size:
        type: integer
      ctx_size_draft:
        type: integer
      defrag_thold:
        type: number
      device:
        type: string
      device_draft:
        type: string
      draft_max:
        description: Speculative decoding params
        type: integer
      draft_min:
        type: integer
      draft_p_min:
        type: number
      dry_allowed_length:
        type: integer
      dry_base:
        type: number
      dry_multiplier:
        type: number
      dry_penalty_last_n:
        type: integer
      dry_sequence_breaker:
        items:
          type: string
        type: array
      dump_kv_cache:
        type: boolean
      dynatemp_exp:
        type: number
      dynatemp_range:
        type: number
      embd_bge_small_en_default:
        description: Default model params
        type: boolean
      embd_e5_small_en_default:
        type: boolean
      embd_gte_small_default:
        type: boolean
      embedding:
        type: boolean
      escape:
        type: boolean
      fim_qwen_1_5b_default:
        type: boolean
      fim_qwen_3b_default:
        type: boolean
      fim_qwen_7b_default:
        type: boolean
      fim_qwen_7b_spec:
        type: boolean
      fim_qwen_14b_spec:
        type: boolean
      flash_attn:
        type: boolean
      frequency_penalty:
        type: number
      gpu_layers:
        type: integer
      gpu_layers_draft:
        type: integer
      grammar:
        type: string
      grammar_file:
        type: string
      hf_file:
        type: string
      hf_file_v:
        type: string
      hf_repo:
        type: string
      hf_repo_draft:
        type: string
      hf_repo_v:
        type: string
      hf_token:
        type: string
      host:
        type: string
      ignore_eos:
        type: boolean
      jinja:
        type: boolean
      json_schema:
        type: string
      json_schema_file:
        type: string
      keep:
        type: integer
      log_colors:
        type: boolean
      log_disable:
        type: boolean
      log_file:
        type: string
      log_prefix:
        type: boolean
      log_timestamps:
        type: boolean
      logit_bias:
        items:
          type: string
        type: array
      lora:
        items:
          type: string
        type: array
      lora_init_without_apply:
        type: boolean
      lora_scaled:
        items:
          type: string
        type: array
      main_gpu:
        type: integer
      max_restarts:
        type: integer
      metrics:
        type: boolean
      min_p:
        type: number
      mirostat:
        type: integer
      mirostat_ent:
        type: number
      mirostat_lr:
        type: number
      mlock:
        type: boolean
      mmproj:
        type: string
      mmproj_url:
        type: string
      model:
        type: string
      model_draft:
        type: string
      model_url:
        type: string
      model_vocoder:
        description: Audio/TTS params
        type: string
      no_cont_batching:
        type: boolean
      no_context_shift:
        description: Server/Example-specific params
        type: boolean
      no_escape:
        type: boolean
      no_kv_offload:
        type: boolean
      no_mmap:
        type: boolean
      no_mmproj:
        type: boolean
      no_mmproj_offload:
        type: boolean
      no_perf:
        type: boolean
      no_prefill_assistant:
        type: boolean
      no_slots:
        type: boolean
      no_warmup:
        type: boolean
      no_webui:
        type: boolean
      numa:
        type: string
      override_kv:
        items:
          type: string
        type: array
      override_tensor:
        items:
          type: string
        type: array
      parallel:
        type: integer
      path:
        type: string
      poll:
        type: integer
      poll_batch:
        type: integer
      pooling:
        type: string
      port:
        type: integer
      predict:
        type: integer
      presence_penalty:
        type: number
      priority:
        type: integer
      priority_batch:
        type: integer
      props:
        type: boolean
      reasoning_budget:
        type: integer
      reasoning_format:
        type: string
      repeat_last_n:
        type: integer
      repeat_penalty:
        type: number
      reranking:
        type: boolean
      restart_delay_seconds:
        description: RestartDelay duration in seconds
        type: integer
      rope_freq_base:
        type: number
      rope_freq_scale:
        type: number
      rope_scale:
        type: number
      rope_scaling:
        type: string
      samplers:
        description: Sampling params
        type: string
      sampling_seq:
        type: string
      seed:
        type: integer
      slot_prompt_similarity:
        type: number
      slot_save_path:
        type: string
      slots:
        type: boolean
      special:
        type: boolean
      split_mode:
        type: string
      spm_infill:
        type: boolean
      ssl_cert_file:
        type: string
      ssl_key_file:
        type: string
      temperature:
        type: number
      tensor_split:
        type: string
      threads:
        type: integer
      threads_batch:
        type: integer
      threads_http:
        type: integer
      timeout:
        type: integer
      top_k:
        type: integer
      top_p:
        type: number
      tts_use_guide_tokens:
        type: boolean
      typical:
        type: number
      ubatch_size:
        type: integer
      verbose:
        type: boolean
      verbose_prompt:
        description: Common params
        type: boolean
      verbosity:
        type: integer
      xtc_probability:
        type: number
      xtc_threshold:
        type: number
      yarn_attn_factor:
        type: number
      yarn_beta_fast:
        type: number
      yarn_beta_slow:
        type: number
      yarn_ext_factor:
        type: number
      yarn_orig_ctx:
        type: integer
    type: object
  llamactl.Instance:
    properties:
      created:
        description: Creation time
        type: integer
      name:
        type: string
      running:
        description: Status
        type: boolean
    type: object
  llamactl.OpenAIInstance:
    properties:
      created:
        type: integer
      id:
        type: string
      object:
        type: string
      owned_by:
        type: string
    type: object
  llamactl.OpenAIListInstancesResponse:
    properties:
      data:
        items:
          $ref: '#/definitions/llamactl.OpenAIInstance'
        type: array
      object:
        type: string
    type: object
info:
  contact: {}
  description: llamactl is a control server for managing Llama Server instances.
  license:
    name: MIT License
    url: https://opensource.org/license/mit/
  title: llamactl API
  version: "1.0"
paths:
  /instances:
    get:
      description: Returns a list of all instances managed by the server
      responses:
        "200":
          description: List of instances
          schema:
            items:
              $ref: '#/definitions/llamactl.Instance'
            type: array
        "500":
          description: Internal Server Error
          schema:
            type: string
      summary: List all instances
      tags:
      - instances
  /instances/{name}:
    delete:
      description: Stops and removes a specific instance by name
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      responses:
        "204":
          description: No Content
        "400":
          description: Invalid name format
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      summary: Delete an instance
      tags:
      - instances
    get:
      description: Returns the details of a specific instance by name
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      responses:
        "200":
          description: Instance details
          schema:
            $ref: '#/definitions/llamactl.Instance'
        "400":
          description: Invalid name format
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      summary: Get details of a specific instance
      tags:
      - instances
    post:
      consumes:
      - application/json
      description: Creates a new instance with the provided configuration options
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      - description: Instance configuration options
        in: body
        name: options
        required: true
        schema:
          $ref: '#/definitions/llamactl.CreateInstanceOptions'
      responses:
        "201":
          description: Created instance details
          schema:
            $ref: '#/definitions/llamactl.Instance'
        "400":
          description: Invalid request body
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      summary: Create and start a new instance
      tags:
      - instances
    put:
      consumes:
      - application/json
      description: Updates the configuration of a specific instance by name
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      - description: Instance configuration options
        in: body
        name: options
        required: true
        schema:
          $ref: '#/definitions/llamactl.CreateInstanceOptions'
      responses:
        "200":
          description: Updated instance details
          schema:
            $ref: '#/definitions/llamactl.Instance'
        "400":
          description: Invalid name format
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      summary: Update an instance's configuration
      tags:
      - instances
  /instances/{name}/logs:
    get:
      description: Returns the logs from a specific instance by name with optional
        line limit
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      - description: 'Number of lines to retrieve (default: all lines)'
        in: query
        name: lines
        type: string
      responses:
        "200":
          description: Instance logs
          schema:
            type: string
        "400":
          description: Invalid name format or lines parameter
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      summary: Get logs from a specific instance
      tags:
      - instances
  /instances/{name}/proxy:
    get:
      description: Forwards HTTP requests to the llama-server instance running on
        a specific port
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      responses:
        "200":
          description: Request successfully proxied to instance
        "400":
          description: Invalid name format
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
        "503":
          description: Instance is not running
          schema:
            type: string
      summary: Proxy requests to a specific instance
      tags:
      - instances
    post:
      description: Forwards HTTP requests to the llama-server instance running on
        a specific port
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      responses:
        "200":
          description: Request successfully proxied to instance
        "400":
          description: Invalid name format
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
        "503":
          description: Instance is not running
          schema:
            type: string
      summary: Proxy requests to a specific instance
      tags:
      - instances
  /instances/{name}/restart:
    post:
      description: Restarts a specific instance by name
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      responses:
        "200":
          description: Restarted instance details
          schema:
            $ref: '#/definitions/llamactl.Instance'
        "400":
          description: Invalid name format
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      summary: Restart a running instance
      tags:
      - instances
  /instances/{name}/start:
    post:
      description: Starts a specific instance by name
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      responses:
        "200":
          description: Started instance details
          schema:
            $ref: '#/definitions/llamactl.Instance'
        "400":
          description: Invalid name format
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      summary: Start a stopped instance
      tags:
      - instances
  /instances/{name}/stop:
    post:
      description: Stops a specific instance by name
      parameters:
      - description: Instance Name
        in: path
        name: name
        required: true
        type: string
      responses:
        "200":
          description: Stopped instance details
          schema:
            $ref: '#/definitions/llamactl.Instance'
        "400":
          description: Invalid name format
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      summary: Stop a running instance
      tags:
      - instances
  /server/devices:
    get:
      description: Returns a list of available devices for the llama server
      responses:
        "200":
          description: List of devices
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      summary: List available devices for llama server
      tags:
      - server
  /server/help:
    get:
      description: Returns the help text for the llama server command
      responses:
        "200":
          description: Help text
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      summary: Get help for llama server
      tags:
      - server
  /server/version:
    get:
      description: Returns the version of the llama server command
      responses:
        "200":
          description: Version information
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      summary: Get version of llama server
      tags:
      - server
  /v1/:
    post:
      consumes:
      - application/json
      description: Handles all POST requests to /v1/*, routing to the appropriate
        instance based on the request body
      responses:
        "200":
          description: OpenAI response
        "400":
          description: Invalid request body or model name
          schema:
            type: string
        "500":
          description: Internal Server Error
          schema:
            type: string
      summary: OpenAI-compatible proxy endpoint
      tags:
      - openai
  /v1/models:
    get:
      description: Returns a list of instances in a format compatible with OpenAI
        API
      responses:
        "200":
          description: List of OpenAI-compatible instances
          schema:
            $ref: '#/definitions/llamactl.OpenAIListInstancesResponse'
        "500":
          description: Internal Server Error
          schema:
            type: string
      summary: List instances in OpenAI-compatible format
      tags:
      - openai
swagger: "2.0"