Files
llamactl/docs/swagger.yaml
2025-07-30 21:34:46 +02:00

791 lines
18 KiB
YAML

basePath: /api/v1
definitions:
llamactl.CreateInstanceOptions:
properties:
alias:
type: string
api_key:
type: string
api_key_file:
type: string
auto_restart:
description: Auto restart
type: boolean
batch_size:
type: integer
cache_reuse:
type: integer
cache_type_k:
type: string
cache_type_k_draft:
type: string
cache_type_v:
type: string
cache_type_v_draft:
type: string
chat_template:
type: string
chat_template_file:
type: string
chat_template_kwargs:
type: string
check_tensors:
type: boolean
cont_batching:
type: boolean
control_vector:
items:
type: string
type: array
control_vector_layer_range:
type: string
control_vector_scaled:
items:
type: string
type: array
cpu_mask:
type: string
cpu_mask_batch:
type: string
cpu_range:
type: string
cpu_range_batch:
type: string
cpu_strict:
type: integer
cpu_strict_batch:
type: integer
ctx_size:
type: integer
ctx_size_draft:
type: integer
defrag_thold:
type: number
device:
type: string
device_draft:
type: string
draft_max:
description: Speculative decoding params
type: integer
draft_min:
type: integer
draft_p_min:
type: number
dry_allowed_length:
type: integer
dry_base:
type: number
dry_multiplier:
type: number
dry_penalty_last_n:
type: integer
dry_sequence_breaker:
items:
type: string
type: array
dump_kv_cache:
type: boolean
dynatemp_exp:
type: number
dynatemp_range:
type: number
embd_bge_small_en_default:
description: Default model params
type: boolean
embd_e5_small_en_default:
type: boolean
embd_gte_small_default:
type: boolean
embedding:
type: boolean
escape:
type: boolean
fim_qwen_1_5b_default:
type: boolean
fim_qwen_3b_default:
type: boolean
fim_qwen_7b_default:
type: boolean
fim_qwen_7b_spec:
type: boolean
fim_qwen_14b_spec:
type: boolean
flash_attn:
type: boolean
frequency_penalty:
type: number
gpu_layers:
type: integer
gpu_layers_draft:
type: integer
grammar:
type: string
grammar_file:
type: string
hf_file:
type: string
hf_file_v:
type: string
hf_repo:
type: string
hf_repo_draft:
type: string
hf_repo_v:
type: string
hf_token:
type: string
host:
type: string
ignore_eos:
type: boolean
jinja:
type: boolean
json_schema:
type: string
json_schema_file:
type: string
keep:
type: integer
log_colors:
type: boolean
log_disable:
type: boolean
log_file:
type: string
log_prefix:
type: boolean
log_timestamps:
type: boolean
logit_bias:
items:
type: string
type: array
lora:
items:
type: string
type: array
lora_init_without_apply:
type: boolean
lora_scaled:
items:
type: string
type: array
main_gpu:
type: integer
max_restarts:
type: integer
metrics:
type: boolean
min_p:
type: number
mirostat:
type: integer
mirostat_ent:
type: number
mirostat_lr:
type: number
mlock:
type: boolean
mmproj:
type: string
mmproj_url:
type: string
model:
type: string
model_draft:
type: string
model_url:
type: string
model_vocoder:
description: Audio/TTS params
type: string
no_cont_batching:
type: boolean
no_context_shift:
description: Server/Example-specific params
type: boolean
no_escape:
type: boolean
no_kv_offload:
type: boolean
no_mmap:
type: boolean
no_mmproj:
type: boolean
no_mmproj_offload:
type: boolean
no_perf:
type: boolean
no_prefill_assistant:
type: boolean
no_slots:
type: boolean
no_warmup:
type: boolean
no_webui:
type: boolean
numa:
type: string
override_kv:
items:
type: string
type: array
override_tensor:
items:
type: string
type: array
parallel:
type: integer
path:
type: string
poll:
type: integer
poll_batch:
type: integer
pooling:
type: string
port:
type: integer
predict:
type: integer
presence_penalty:
type: number
priority:
type: integer
priority_batch:
type: integer
props:
type: boolean
reasoning_budget:
type: integer
reasoning_format:
type: string
repeat_last_n:
type: integer
repeat_penalty:
type: number
reranking:
type: boolean
restart_delay_seconds:
description: RestartDelay duration in seconds
type: integer
rope_freq_base:
type: number
rope_freq_scale:
type: number
rope_scale:
type: number
rope_scaling:
type: string
samplers:
description: Sampling params
type: string
sampling_seq:
type: string
seed:
type: integer
slot_prompt_similarity:
type: number
slot_save_path:
type: string
slots:
type: boolean
special:
type: boolean
split_mode:
type: string
spm_infill:
type: boolean
ssl_cert_file:
type: string
ssl_key_file:
type: string
temperature:
type: number
tensor_split:
type: string
threads:
type: integer
threads_batch:
type: integer
threads_http:
type: integer
timeout:
type: integer
top_k:
type: integer
top_p:
type: number
tts_use_guide_tokens:
type: boolean
typical:
type: number
ubatch_size:
type: integer
verbose:
type: boolean
verbose_prompt:
description: Common params
type: boolean
verbosity:
type: integer
xtc_probability:
type: number
xtc_threshold:
type: number
yarn_attn_factor:
type: number
yarn_beta_fast:
type: number
yarn_beta_slow:
type: number
yarn_ext_factor:
type: number
yarn_orig_ctx:
type: integer
type: object
llamactl.Instance:
properties:
created:
description: Creation time
type: integer
name:
type: string
running:
description: Status
type: boolean
type: object
llamactl.OpenAIInstance:
properties:
created:
type: integer
id:
type: string
object:
type: string
owned_by:
type: string
type: object
llamactl.OpenAIListInstancesResponse:
properties:
data:
items:
$ref: '#/definitions/llamactl.OpenAIInstance'
type: array
object:
type: string
type: object
info:
contact: {}
description: llamactl is a control server for managing Llama Server instances.
license:
name: MIT License
url: https://opensource.org/license/mit/
title: llamactl API
version: "1.0"
paths:
/instances:
get:
description: Returns a list of all instances managed by the server
responses:
"200":
description: List of instances
schema:
items:
$ref: '#/definitions/llamactl.Instance'
type: array
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: List all instances
tags:
- instances
/instances/{name}:
delete:
description: Stops and removes a specific instance by name
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
responses:
"204":
description: No Content
"400":
description: Invalid name format
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Delete an instance
tags:
- instances
get:
description: Returns the details of a specific instance by name
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
responses:
"200":
description: Instance details
schema:
$ref: '#/definitions/llamactl.Instance'
"400":
description: Invalid name format
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get details of a specific instance
tags:
- instances
post:
consumes:
- application/json
description: Creates a new instance with the provided configuration options
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
- description: Instance configuration options
in: body
name: options
required: true
schema:
$ref: '#/definitions/llamactl.CreateInstanceOptions'
responses:
"201":
description: Created instance details
schema:
$ref: '#/definitions/llamactl.Instance'
"400":
description: Invalid request body
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Create and start a new instance
tags:
- instances
put:
consumes:
- application/json
description: Updates the configuration of a specific instance by name
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
- description: Instance configuration options
in: body
name: options
required: true
schema:
$ref: '#/definitions/llamactl.CreateInstanceOptions'
responses:
"200":
description: Updated instance details
schema:
$ref: '#/definitions/llamactl.Instance'
"400":
description: Invalid name format
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Update an instance's configuration
tags:
- instances
/instances/{name}/logs:
get:
description: Returns the logs from a specific instance by name with optional
line limit
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
- description: 'Number of lines to retrieve (default: all lines)'
in: query
name: lines
type: string
responses:
"200":
description: Instance logs
schema:
type: string
"400":
description: Invalid name format or lines parameter
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get logs from a specific instance
tags:
- instances
/instances/{name}/proxy:
get:
description: Forwards HTTP requests to the llama-server instance running on
a specific port
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
responses:
"200":
description: Request successfully proxied to instance
"400":
description: Invalid name format
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
"503":
description: Instance is not running
schema:
type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to a specific instance
tags:
- instances
post:
description: Forwards HTTP requests to the llama-server instance running on
a specific port
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
responses:
"200":
description: Request successfully proxied to instance
"400":
description: Invalid name format
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
"503":
description: Instance is not running
schema:
type: string
security:
- ApiKeyAuth: []
summary: Proxy requests to a specific instance
tags:
- instances
/instances/{name}/restart:
post:
description: Restarts a specific instance by name
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
responses:
"200":
description: Restarted instance details
schema:
$ref: '#/definitions/llamactl.Instance'
"400":
description: Invalid name format
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Restart a running instance
tags:
- instances
/instances/{name}/start:
post:
description: Starts a specific instance by name
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
responses:
"200":
description: Started instance details
schema:
$ref: '#/definitions/llamactl.Instance'
"400":
description: Invalid name format
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Start a stopped instance
tags:
- instances
/instances/{name}/stop:
post:
description: Stops a specific instance by name
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
responses:
"200":
description: Stopped instance details
schema:
$ref: '#/definitions/llamactl.Instance'
"400":
description: Invalid name format
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Stop a running instance
tags:
- instances
/server/devices:
get:
description: Returns a list of available devices for the llama server
responses:
"200":
description: List of devices
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: List available devices for llama server
tags:
- server
/server/help:
get:
description: Returns the help text for the llama server command
responses:
"200":
description: Help text
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get help for llama server
tags:
- server
/server/version:
get:
description: Returns the version of the llama server command
responses:
"200":
description: Version information
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: Get version of llama server
tags:
- server
/v1/:
post:
consumes:
- application/json
description: Handles all POST requests to /v1/*, routing to the appropriate
instance based on the request body. Requires API key authentication via the
`Authorization` header.
responses:
"200":
description: OpenAI response
"400":
description: Invalid request body or model name
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: OpenAI-compatible proxy endpoint
tags:
- openai
/v1/models:
get:
description: Returns a list of instances in a format compatible with OpenAI
API
responses:
"200":
description: List of OpenAI-compatible instances
schema:
$ref: '#/definitions/llamactl.OpenAIListInstancesResponse'
"500":
description: Internal Server Error
schema:
type: string
security:
- ApiKeyAuth: []
summary: List instances in OpenAI-compatible format
tags:
- openai
swagger: "2.0"