mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-06 09:04:27 +00:00
Update API documentation and instance configuration
This commit is contained in:
@@ -1,5 +1,350 @@
|
||||
basePath: /api/v1
|
||||
definitions:
|
||||
llamactl.CreateInstanceOptions:
|
||||
properties:
|
||||
alias:
|
||||
type: string
|
||||
api_key:
|
||||
type: string
|
||||
api_key_file:
|
||||
type: string
|
||||
auto_restart:
|
||||
description: Auto restart
|
||||
type: boolean
|
||||
batch_size:
|
||||
type: integer
|
||||
cache_reuse:
|
||||
type: integer
|
||||
cache_type_k:
|
||||
type: string
|
||||
cache_type_k_draft:
|
||||
type: string
|
||||
cache_type_v:
|
||||
type: string
|
||||
cache_type_v_draft:
|
||||
type: string
|
||||
chat_template:
|
||||
type: string
|
||||
chat_template_file:
|
||||
type: string
|
||||
chat_template_kwargs:
|
||||
type: string
|
||||
check_tensors:
|
||||
type: boolean
|
||||
cont_batching:
|
||||
type: boolean
|
||||
control_vector:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
control_vector_layer_range:
|
||||
type: string
|
||||
control_vector_scaled:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
cpu_mask:
|
||||
type: string
|
||||
cpu_mask_batch:
|
||||
type: string
|
||||
cpu_range:
|
||||
type: string
|
||||
cpu_range_batch:
|
||||
type: string
|
||||
cpu_strict:
|
||||
type: integer
|
||||
cpu_strict_batch:
|
||||
type: integer
|
||||
ctx_size:
|
||||
type: integer
|
||||
ctx_size_draft:
|
||||
type: integer
|
||||
defrag_thold:
|
||||
type: number
|
||||
device:
|
||||
type: string
|
||||
device_draft:
|
||||
type: string
|
||||
draft_max:
|
||||
description: Speculative decoding params
|
||||
type: integer
|
||||
draft_min:
|
||||
type: integer
|
||||
draft_p_min:
|
||||
type: number
|
||||
dry_allowed_length:
|
||||
type: integer
|
||||
dry_base:
|
||||
type: number
|
||||
dry_multiplier:
|
||||
type: number
|
||||
dry_penalty_last_n:
|
||||
type: integer
|
||||
dry_sequence_breaker:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
dump_kv_cache:
|
||||
type: boolean
|
||||
dynatemp_exp:
|
||||
type: number
|
||||
dynatemp_range:
|
||||
type: number
|
||||
embd_bge_small_en_default:
|
||||
description: Default model params
|
||||
type: boolean
|
||||
embd_e5_small_en_default:
|
||||
type: boolean
|
||||
embd_gte_small_default:
|
||||
type: boolean
|
||||
embedding:
|
||||
type: boolean
|
||||
escape:
|
||||
type: boolean
|
||||
fim_qwen_1_5b_default:
|
||||
type: boolean
|
||||
fim_qwen_3b_default:
|
||||
type: boolean
|
||||
fim_qwen_7b_default:
|
||||
type: boolean
|
||||
fim_qwen_7b_spec:
|
||||
type: boolean
|
||||
fim_qwen_14b_spec:
|
||||
type: boolean
|
||||
flash_attn:
|
||||
type: boolean
|
||||
frequency_penalty:
|
||||
type: number
|
||||
gpu_layers:
|
||||
type: integer
|
||||
gpu_layers_draft:
|
||||
type: integer
|
||||
grammar:
|
||||
type: string
|
||||
grammar_file:
|
||||
type: string
|
||||
hf_file:
|
||||
type: string
|
||||
hf_file_v:
|
||||
type: string
|
||||
hf_repo:
|
||||
type: string
|
||||
hf_repo_draft:
|
||||
type: string
|
||||
hf_repo_v:
|
||||
type: string
|
||||
hf_token:
|
||||
type: string
|
||||
host:
|
||||
type: string
|
||||
ignore_eos:
|
||||
type: boolean
|
||||
jinja:
|
||||
type: boolean
|
||||
json_schema:
|
||||
type: string
|
||||
json_schema_file:
|
||||
type: string
|
||||
keep:
|
||||
type: integer
|
||||
log_colors:
|
||||
type: boolean
|
||||
log_disable:
|
||||
type: boolean
|
||||
log_file:
|
||||
type: string
|
||||
log_prefix:
|
||||
type: boolean
|
||||
log_timestamps:
|
||||
type: boolean
|
||||
logit_bias:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
lora:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
lora_init_without_apply:
|
||||
type: boolean
|
||||
lora_scaled:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
main_gpu:
|
||||
type: integer
|
||||
max_restarts:
|
||||
type: integer
|
||||
metrics:
|
||||
type: boolean
|
||||
min_p:
|
||||
type: number
|
||||
mirostat:
|
||||
type: integer
|
||||
mirostat_ent:
|
||||
type: number
|
||||
mirostat_lr:
|
||||
type: number
|
||||
mlock:
|
||||
type: boolean
|
||||
mmproj:
|
||||
type: string
|
||||
mmproj_url:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
model_draft:
|
||||
type: string
|
||||
model_url:
|
||||
type: string
|
||||
model_vocoder:
|
||||
description: Audio/TTS params
|
||||
type: string
|
||||
no_cont_batching:
|
||||
type: boolean
|
||||
no_context_shift:
|
||||
description: Server/Example-specific params
|
||||
type: boolean
|
||||
no_escape:
|
||||
type: boolean
|
||||
no_kv_offload:
|
||||
type: boolean
|
||||
no_mmap:
|
||||
type: boolean
|
||||
no_mmproj:
|
||||
type: boolean
|
||||
no_mmproj_offload:
|
||||
type: boolean
|
||||
no_perf:
|
||||
type: boolean
|
||||
no_prefill_assistant:
|
||||
type: boolean
|
||||
no_slots:
|
||||
type: boolean
|
||||
no_warmup:
|
||||
type: boolean
|
||||
no_webui:
|
||||
type: boolean
|
||||
numa:
|
||||
type: string
|
||||
override_kv:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
override_tensor:
|
||||
items:
|
||||
type: string
|
||||
type: array
|
||||
parallel:
|
||||
type: integer
|
||||
path:
|
||||
type: string
|
||||
poll:
|
||||
type: integer
|
||||
poll_batch:
|
||||
type: integer
|
||||
pooling:
|
||||
type: string
|
||||
port:
|
||||
type: integer
|
||||
predict:
|
||||
type: integer
|
||||
presence_penalty:
|
||||
type: number
|
||||
priority:
|
||||
type: integer
|
||||
priority_batch:
|
||||
type: integer
|
||||
props:
|
||||
type: boolean
|
||||
reasoning_budget:
|
||||
type: integer
|
||||
reasoning_format:
|
||||
type: string
|
||||
repeat_last_n:
|
||||
type: integer
|
||||
repeat_penalty:
|
||||
type: number
|
||||
reranking:
|
||||
type: boolean
|
||||
restart_delay_seconds:
|
||||
description: RestartDelay duration in seconds
|
||||
type: integer
|
||||
rope_freq_base:
|
||||
type: number
|
||||
rope_freq_scale:
|
||||
type: number
|
||||
rope_scale:
|
||||
type: number
|
||||
rope_scaling:
|
||||
type: string
|
||||
samplers:
|
||||
description: Sampling params
|
||||
type: string
|
||||
sampling_seq:
|
||||
type: string
|
||||
seed:
|
||||
type: integer
|
||||
slot_prompt_similarity:
|
||||
type: number
|
||||
slot_save_path:
|
||||
type: string
|
||||
slots:
|
||||
type: boolean
|
||||
special:
|
||||
type: boolean
|
||||
split_mode:
|
||||
type: string
|
||||
spm_infill:
|
||||
type: boolean
|
||||
ssl_cert_file:
|
||||
type: string
|
||||
ssl_key_file:
|
||||
type: string
|
||||
temperature:
|
||||
type: number
|
||||
tensor_split:
|
||||
type: string
|
||||
threads:
|
||||
type: integer
|
||||
threads_batch:
|
||||
type: integer
|
||||
threads_http:
|
||||
type: integer
|
||||
timeout:
|
||||
type: integer
|
||||
top_k:
|
||||
type: integer
|
||||
top_p:
|
||||
type: number
|
||||
tts_use_guide_tokens:
|
||||
type: boolean
|
||||
typical:
|
||||
type: number
|
||||
ubatch_size:
|
||||
type: integer
|
||||
verbose:
|
||||
type: boolean
|
||||
verbose_prompt:
|
||||
description: Common params
|
||||
type: boolean
|
||||
verbosity:
|
||||
type: integer
|
||||
xtc_probability:
|
||||
type: number
|
||||
xtc_threshold:
|
||||
type: number
|
||||
yarn_attn_factor:
|
||||
type: number
|
||||
yarn_beta_fast:
|
||||
type: number
|
||||
yarn_beta_slow:
|
||||
type: number
|
||||
yarn_ext_factor:
|
||||
type: number
|
||||
yarn_orig_ctx:
|
||||
type: integer
|
||||
type: object
|
||||
llamactl.Instance:
|
||||
properties:
|
||||
name:
|
||||
@@ -7,14 +352,6 @@ definitions:
|
||||
running:
|
||||
description: Status
|
||||
type: boolean
|
||||
stdErrChan:
|
||||
description: Channel for sending error messages
|
||||
type: object
|
||||
stdOutChan:
|
||||
description: Output channels
|
||||
type: object
|
||||
type: object
|
||||
llamactl.InstanceOptions:
|
||||
type: object
|
||||
info:
|
||||
contact: {}
|
||||
@@ -28,8 +365,6 @@ paths:
|
||||
/instances:
|
||||
get:
|
||||
description: Returns a list of all instances managed by the server
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
"200":
|
||||
description: List of instances
|
||||
@@ -44,35 +379,6 @@ paths:
|
||||
summary: List all instances
|
||||
tags:
|
||||
- instances
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
description: Creates a new instance with the provided configuration options
|
||||
parameters:
|
||||
- description: Instance configuration options
|
||||
in: body
|
||||
name: options
|
||||
required: true
|
||||
schema:
|
||||
$ref: '#/definitions/llamactl.InstanceOptions'
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
"201":
|
||||
description: Created instance details
|
||||
schema:
|
||||
$ref: '#/definitions/llamactl.Instance'
|
||||
"400":
|
||||
description: Invalid request body
|
||||
schema:
|
||||
type: string
|
||||
"500":
|
||||
description: Internal Server Error
|
||||
schema:
|
||||
type: string
|
||||
summary: Create and start a new instance
|
||||
tags:
|
||||
- instances
|
||||
/instances/{name}:
|
||||
delete:
|
||||
description: Stops and removes a specific instance by name
|
||||
@@ -82,8 +388,6 @@ paths:
|
||||
name: name
|
||||
required: true
|
||||
type: string
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
"204":
|
||||
description: No Content
|
||||
@@ -122,6 +426,38 @@ paths:
|
||||
summary: Get details of a specific instance
|
||||
tags:
|
||||
- instances
|
||||
post:
|
||||
consumes:
|
||||
- application/json
|
||||
description: Creates a new instance with the provided configuration options
|
||||
parameters:
|
||||
- description: Instance Name
|
||||
in: path
|
||||
name: name
|
||||
required: true
|
||||
type: string
|
||||
- description: Instance configuration options
|
||||
in: body
|
||||
name: options
|
||||
required: true
|
||||
schema:
|
||||
$ref: '#/definitions/llamactl.CreateInstanceOptions'
|
||||
responses:
|
||||
"201":
|
||||
description: Created instance details
|
||||
schema:
|
||||
$ref: '#/definitions/llamactl.Instance'
|
||||
"400":
|
||||
description: Invalid request body
|
||||
schema:
|
||||
type: string
|
||||
"500":
|
||||
description: Internal Server Error
|
||||
schema:
|
||||
type: string
|
||||
summary: Create and start a new instance
|
||||
tags:
|
||||
- instances
|
||||
put:
|
||||
consumes:
|
||||
- application/json
|
||||
@@ -137,9 +473,7 @@ paths:
|
||||
name: options
|
||||
required: true
|
||||
schema:
|
||||
$ref: '#/definitions/llamactl.InstanceOptions'
|
||||
produces:
|
||||
- application/json
|
||||
$ref: '#/definitions/llamactl.CreateInstanceOptions'
|
||||
responses:
|
||||
"200":
|
||||
description: Updated instance details
|
||||
@@ -156,6 +490,64 @@ paths:
|
||||
summary: Update an instance's configuration
|
||||
tags:
|
||||
- instances
|
||||
/instances/{name}/logs:
|
||||
get:
|
||||
description: Returns the logs from a specific instance by name with optional
|
||||
line limit
|
||||
parameters:
|
||||
- description: Instance Name
|
||||
in: path
|
||||
name: name
|
||||
required: true
|
||||
type: string
|
||||
- description: 'Number of lines to retrieve (default: all lines)'
|
||||
in: query
|
||||
name: lines
|
||||
type: string
|
||||
responses:
|
||||
"200":
|
||||
description: Instance logs
|
||||
schema:
|
||||
type: string
|
||||
"400":
|
||||
description: Invalid name format or lines parameter
|
||||
schema:
|
||||
type: string
|
||||
"500":
|
||||
description: Internal Server Error
|
||||
schema:
|
||||
type: string
|
||||
summary: Get logs from a specific instance
|
||||
tags:
|
||||
- instances
|
||||
/instances/{name}/proxy:
|
||||
get:
|
||||
description: Forwards HTTP requests to the llama-server instance running on
|
||||
a specific port
|
||||
parameters:
|
||||
- description: Instance Name
|
||||
in: path
|
||||
name: name
|
||||
required: true
|
||||
type: string
|
||||
responses:
|
||||
"200":
|
||||
description: Request successfully proxied to instance
|
||||
"400":
|
||||
description: Invalid name format
|
||||
schema:
|
||||
type: string
|
||||
"500":
|
||||
description: Internal Server Error
|
||||
schema:
|
||||
type: string
|
||||
"503":
|
||||
description: Instance is not running
|
||||
schema:
|
||||
type: string
|
||||
summary: Proxy requests to a specific instance
|
||||
tags:
|
||||
- instances
|
||||
/instances/{name}/restart:
|
||||
post:
|
||||
description: Restarts a specific instance by name
|
||||
@@ -165,8 +557,6 @@ paths:
|
||||
name: name
|
||||
required: true
|
||||
type: string
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
"200":
|
||||
description: Restarted instance details
|
||||
@@ -192,8 +582,6 @@ paths:
|
||||
name: name
|
||||
required: true
|
||||
type: string
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
"200":
|
||||
description: Started instance details
|
||||
@@ -219,8 +607,6 @@ paths:
|
||||
name: name
|
||||
required: true
|
||||
type: string
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
"200":
|
||||
description: Stopped instance details
|
||||
|
||||
Reference in New Issue
Block a user