basePath: /api/v1 definitions: llamactl.CreateInstanceOptions: properties: alias: type: string api_key: type: string api_key_file: type: string auto_restart: description: Auto restart type: boolean batch_size: type: integer cache_reuse: type: integer cache_type_k: type: string cache_type_k_draft: type: string cache_type_v: type: string cache_type_v_draft: type: string chat_template: type: string chat_template_file: type: string chat_template_kwargs: type: string check_tensors: type: boolean cont_batching: type: boolean control_vector: items: type: string type: array control_vector_layer_range: type: string control_vector_scaled: items: type: string type: array cpu_mask: type: string cpu_mask_batch: type: string cpu_range: type: string cpu_range_batch: type: string cpu_strict: type: integer cpu_strict_batch: type: integer ctx_size: type: integer ctx_size_draft: type: integer defrag_thold: type: number device: type: string device_draft: type: string draft_max: description: Speculative decoding params type: integer draft_min: type: integer draft_p_min: type: number dry_allowed_length: type: integer dry_base: type: number dry_multiplier: type: number dry_penalty_last_n: type: integer dry_sequence_breaker: items: type: string type: array dump_kv_cache: type: boolean dynatemp_exp: type: number dynatemp_range: type: number embd_bge_small_en_default: description: Default model params type: boolean embd_e5_small_en_default: type: boolean embd_gte_small_default: type: boolean embedding: type: boolean escape: type: boolean fim_qwen_1_5b_default: type: boolean fim_qwen_3b_default: type: boolean fim_qwen_7b_default: type: boolean fim_qwen_7b_spec: type: boolean fim_qwen_14b_spec: type: boolean flash_attn: type: boolean frequency_penalty: type: number gpu_layers: type: integer gpu_layers_draft: type: integer grammar: type: string grammar_file: type: string hf_file: type: string hf_file_v: type: string hf_repo: type: string hf_repo_draft: type: string hf_repo_v: type: string hf_token: type: string host: type: string ignore_eos: type: boolean jinja: type: boolean json_schema: type: string json_schema_file: type: string keep: type: integer log_colors: type: boolean log_disable: type: boolean log_file: type: string log_prefix: type: boolean log_timestamps: type: boolean logit_bias: items: type: string type: array lora: items: type: string type: array lora_init_without_apply: type: boolean lora_scaled: items: type: string type: array main_gpu: type: integer max_restarts: type: integer metrics: type: boolean min_p: type: number mirostat: type: integer mirostat_ent: type: number mirostat_lr: type: number mlock: type: boolean mmproj: type: string mmproj_url: type: string model: type: string model_draft: type: string model_url: type: string model_vocoder: description: Audio/TTS params type: string no_cont_batching: type: boolean no_context_shift: description: Server/Example-specific params type: boolean no_escape: type: boolean no_kv_offload: type: boolean no_mmap: type: boolean no_mmproj: type: boolean no_mmproj_offload: type: boolean no_perf: type: boolean no_prefill_assistant: type: boolean no_slots: type: boolean no_warmup: type: boolean no_webui: type: boolean numa: type: string override_kv: items: type: string type: array override_tensor: items: type: string type: array parallel: type: integer path: type: string poll: type: integer poll_batch: type: integer pooling: type: string port: type: integer predict: type: integer presence_penalty: type: number priority: type: integer priority_batch: type: integer props: type: boolean reasoning_budget: type: integer reasoning_format: type: string repeat_last_n: type: integer repeat_penalty: type: number reranking: type: boolean restart_delay_seconds: description: RestartDelay duration in seconds type: integer rope_freq_base: type: number rope_freq_scale: type: number rope_scale: type: number rope_scaling: type: string samplers: description: Sampling params type: string sampling_seq: type: string seed: type: integer slot_prompt_similarity: type: number slot_save_path: type: string slots: type: boolean special: type: boolean split_mode: type: string spm_infill: type: boolean ssl_cert_file: type: string ssl_key_file: type: string temperature: type: number tensor_split: type: string threads: type: integer threads_batch: type: integer threads_http: type: integer timeout: type: integer top_k: type: integer top_p: type: number tts_use_guide_tokens: type: boolean typical: type: number ubatch_size: type: integer verbose: type: boolean verbose_prompt: description: Common params type: boolean verbosity: type: integer xtc_probability: type: number xtc_threshold: type: number yarn_attn_factor: type: number yarn_beta_fast: type: number yarn_beta_slow: type: number yarn_ext_factor: type: number yarn_orig_ctx: type: integer type: object llamactl.Instance: properties: created: description: Creation time type: integer name: type: string running: description: Status type: boolean type: object llamactl.OpenAIInstance: properties: created: type: integer id: type: string object: type: string owned_by: type: string type: object llamactl.OpenAIListInstancesResponse: properties: data: items: $ref: '#/definitions/llamactl.OpenAIInstance' type: array object: type: string type: object info: contact: {} description: llamactl is a control server for managing Llama Server instances. license: name: MIT License url: https://opensource.org/license/mit/ title: llamactl API version: "1.0" paths: /instances: get: description: Returns a list of all instances managed by the server responses: "200": description: List of instances schema: items: $ref: '#/definitions/llamactl.Instance' type: array "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: List all instances tags: - instances /instances/{name}: delete: description: Stops and removes a specific instance by name parameters: - description: Instance Name in: path name: name required: true type: string responses: "204": description: No Content "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Delete an instance tags: - instances get: description: Returns the details of a specific instance by name parameters: - description: Instance Name in: path name: name required: true type: string responses: "200": description: Instance details schema: $ref: '#/definitions/llamactl.Instance' "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Get details of a specific instance tags: - instances post: consumes: - application/json description: Creates a new instance with the provided configuration options parameters: - description: Instance Name in: path name: name required: true type: string - description: Instance configuration options in: body name: options required: true schema: $ref: '#/definitions/llamactl.CreateInstanceOptions' responses: "201": description: Created instance details schema: $ref: '#/definitions/llamactl.Instance' "400": description: Invalid request body schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Create and start a new instance tags: - instances put: consumes: - application/json description: Updates the configuration of a specific instance by name parameters: - description: Instance Name in: path name: name required: true type: string - description: Instance configuration options in: body name: options required: true schema: $ref: '#/definitions/llamactl.CreateInstanceOptions' responses: "200": description: Updated instance details schema: $ref: '#/definitions/llamactl.Instance' "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Update an instance's configuration tags: - instances /instances/{name}/logs: get: description: Returns the logs from a specific instance by name with optional line limit parameters: - description: Instance Name in: path name: name required: true type: string - description: 'Number of lines to retrieve (default: all lines)' in: query name: lines type: string responses: "200": description: Instance logs schema: type: string "400": description: Invalid name format or lines parameter schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Get logs from a specific instance tags: - instances /instances/{name}/proxy: get: description: Forwards HTTP requests to the llama-server instance running on a specific port parameters: - description: Instance Name in: path name: name required: true type: string responses: "200": description: Request successfully proxied to instance "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string "503": description: Instance is not running schema: type: string security: - ApiKeyAuth: [] summary: Proxy requests to a specific instance tags: - instances post: description: Forwards HTTP requests to the llama-server instance running on a specific port parameters: - description: Instance Name in: path name: name required: true type: string responses: "200": description: Request successfully proxied to instance "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string "503": description: Instance is not running schema: type: string security: - ApiKeyAuth: [] summary: Proxy requests to a specific instance tags: - instances /instances/{name}/restart: post: description: Restarts a specific instance by name parameters: - description: Instance Name in: path name: name required: true type: string responses: "200": description: Restarted instance details schema: $ref: '#/definitions/llamactl.Instance' "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Restart a running instance tags: - instances /instances/{name}/start: post: description: Starts a specific instance by name parameters: - description: Instance Name in: path name: name required: true type: string responses: "200": description: Started instance details schema: $ref: '#/definitions/llamactl.Instance' "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Start a stopped instance tags: - instances /instances/{name}/stop: post: description: Stops a specific instance by name parameters: - description: Instance Name in: path name: name required: true type: string responses: "200": description: Stopped instance details schema: $ref: '#/definitions/llamactl.Instance' "400": description: Invalid name format schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Stop a running instance tags: - instances /server/devices: get: description: Returns a list of available devices for the llama server responses: "200": description: List of devices schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: List available devices for llama server tags: - server /server/help: get: description: Returns the help text for the llama server command responses: "200": description: Help text schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Get help for llama server tags: - server /server/version: get: description: Returns the version of the llama server command responses: "200": description: Version information schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: Get version of llama server tags: - server /v1/: post: consumes: - application/json description: Handles all POST requests to /v1/*, routing to the appropriate instance based on the request body. Requires API key authentication via the `Authorization` header. responses: "200": description: OpenAI response "400": description: Invalid request body or model name schema: type: string "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: OpenAI-compatible proxy endpoint tags: - openai /v1/models: get: description: Returns a list of instances in a format compatible with OpenAI API responses: "200": description: List of OpenAI-compatible instances schema: $ref: '#/definitions/llamactl.OpenAIListInstancesResponse' "500": description: Internal Server Error schema: type: string security: - ApiKeyAuth: [] summary: List instances in OpenAI-compatible format tags: - openai swagger: "2.0"