Update API documentation and instance configuration

This commit is contained in:
2025-07-19 22:04:13 +02:00
parent 154e1f36f9
commit 80a9971652
6 changed files with 1779 additions and 263 deletions

View File

@@ -22,9 +22,6 @@ const docTemplate = `{
"/instances": {
"get": {
"description": "Returns a list of all instances managed by the server",
"produces": [
"application/json"
],
"tags": [
"instances"
],
@@ -46,50 +43,6 @@ const docTemplate = `{
}
}
}
},
"post": {
"description": "Creates a new instance with the provided configuration options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"instances"
],
"summary": "Create and start a new instance",
"parameters": [
{
"description": "Instance configuration options",
"name": "options",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/llamactl.InstanceOptions"
}
}
],
"responses": {
"201": {
"description": "Created instance details",
"schema": {
"$ref": "#/definitions/llamactl.Instance"
}
},
"400": {
"description": "Invalid request body",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/instances/{name}": {
@@ -134,9 +87,6 @@ const docTemplate = `{
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"instances"
],
@@ -155,7 +105,7 @@ const docTemplate = `{
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/llamactl.InstanceOptions"
"$ref": "#/definitions/llamactl.CreateInstanceOptions"
}
}
],
@@ -180,14 +130,59 @@ const docTemplate = `{
}
}
},
"delete": {
"description": "Stops and removes a specific instance by name",
"produces": [
"post": {
"description": "Creates a new instance with the provided configuration options",
"consumes": [
"application/json"
],
"tags": [
"instances"
],
"summary": "Create and start a new instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
},
{
"description": "Instance configuration options",
"name": "options",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/llamactl.CreateInstanceOptions"
}
}
],
"responses": {
"201": {
"description": "Created instance details",
"schema": {
"$ref": "#/definitions/llamactl.Instance"
}
},
"400": {
"description": "Invalid request body",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
},
"delete": {
"description": "Stops and removes a specific instance by name",
"tags": [
"instances"
],
"summary": "Delete an instance",
"parameters": [
{
@@ -217,12 +212,94 @@ const docTemplate = `{
}
}
},
"/instances/{name}/logs": {
"get": {
"description": "Returns the logs from a specific instance by name with optional line limit",
"tags": [
"instances"
],
"summary": "Get logs from a specific instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
},
{
"type": "string",
"description": "Number of lines to retrieve (default: all lines)",
"name": "lines",
"in": "query"
}
],
"responses": {
"200": {
"description": "Instance logs",
"schema": {
"type": "string"
}
},
"400": {
"description": "Invalid name format or lines parameter",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/instances/{name}/proxy": {
"get": {
"description": "Forwards HTTP requests to the llama-server instance running on a specific port",
"tags": [
"instances"
],
"summary": "Proxy requests to a specific instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Request successfully proxied to instance"
},
"400": {
"description": "Invalid name format",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
},
"503": {
"description": "Instance is not running",
"schema": {
"type": "string"
}
}
}
}
},
"/instances/{name}/restart": {
"post": {
"description": "Restarts a specific instance by name",
"produces": [
"application/json"
],
"tags": [
"instances"
],
@@ -261,9 +338,6 @@ const docTemplate = `{
"/instances/{name}/start": {
"post": {
"description": "Starts a specific instance by name",
"produces": [
"application/json"
],
"tags": [
"instances"
],
@@ -302,9 +376,6 @@ const docTemplate = `{
"/instances/{name}/stop": {
"post": {
"description": "Stops a specific instance by name",
"produces": [
"application/json"
],
"tags": [
"instances"
],
@@ -411,6 +482,520 @@ const docTemplate = `{
}
},
"definitions": {
"llamactl.CreateInstanceOptions": {
"type": "object",
"properties": {
"alias": {
"type": "string"
},
"api_key": {
"type": "string"
},
"api_key_file": {
"type": "string"
},
"auto_restart": {
"description": "Auto restart",
"type": "boolean"
},
"batch_size": {
"type": "integer"
},
"cache_reuse": {
"type": "integer"
},
"cache_type_k": {
"type": "string"
},
"cache_type_k_draft": {
"type": "string"
},
"cache_type_v": {
"type": "string"
},
"cache_type_v_draft": {
"type": "string"
},
"chat_template": {
"type": "string"
},
"chat_template_file": {
"type": "string"
},
"chat_template_kwargs": {
"type": "string"
},
"check_tensors": {
"type": "boolean"
},
"cont_batching": {
"type": "boolean"
},
"control_vector": {
"type": "array",
"items": {
"type": "string"
}
},
"control_vector_layer_range": {
"type": "string"
},
"control_vector_scaled": {
"type": "array",
"items": {
"type": "string"
}
},
"cpu_mask": {
"type": "string"
},
"cpu_mask_batch": {
"type": "string"
},
"cpu_range": {
"type": "string"
},
"cpu_range_batch": {
"type": "string"
},
"cpu_strict": {
"type": "integer"
},
"cpu_strict_batch": {
"type": "integer"
},
"ctx_size": {
"type": "integer"
},
"ctx_size_draft": {
"type": "integer"
},
"defrag_thold": {
"type": "number"
},
"device": {
"type": "string"
},
"device_draft": {
"type": "string"
},
"draft_max": {
"description": "Speculative decoding params",
"type": "integer"
},
"draft_min": {
"type": "integer"
},
"draft_p_min": {
"type": "number"
},
"dry_allowed_length": {
"type": "integer"
},
"dry_base": {
"type": "number"
},
"dry_multiplier": {
"type": "number"
},
"dry_penalty_last_n": {
"type": "integer"
},
"dry_sequence_breaker": {
"type": "array",
"items": {
"type": "string"
}
},
"dump_kv_cache": {
"type": "boolean"
},
"dynatemp_exp": {
"type": "number"
},
"dynatemp_range": {
"type": "number"
},
"embd_bge_small_en_default": {
"description": "Default model params",
"type": "boolean"
},
"embd_e5_small_en_default": {
"type": "boolean"
},
"embd_gte_small_default": {
"type": "boolean"
},
"embedding": {
"type": "boolean"
},
"escape": {
"type": "boolean"
},
"fim_qwen_14b_spec": {
"type": "boolean"
},
"fim_qwen_1_5b_default": {
"type": "boolean"
},
"fim_qwen_3b_default": {
"type": "boolean"
},
"fim_qwen_7b_default": {
"type": "boolean"
},
"fim_qwen_7b_spec": {
"type": "boolean"
},
"flash_attn": {
"type": "boolean"
},
"frequency_penalty": {
"type": "number"
},
"gpu_layers": {
"type": "integer"
},
"gpu_layers_draft": {
"type": "integer"
},
"grammar": {
"type": "string"
},
"grammar_file": {
"type": "string"
},
"hf_file": {
"type": "string"
},
"hf_file_v": {
"type": "string"
},
"hf_repo": {
"type": "string"
},
"hf_repo_draft": {
"type": "string"
},
"hf_repo_v": {
"type": "string"
},
"hf_token": {
"type": "string"
},
"host": {
"type": "string"
},
"ignore_eos": {
"type": "boolean"
},
"jinja": {
"type": "boolean"
},
"json_schema": {
"type": "string"
},
"json_schema_file": {
"type": "string"
},
"keep": {
"type": "integer"
},
"log_colors": {
"type": "boolean"
},
"log_disable": {
"type": "boolean"
},
"log_file": {
"type": "string"
},
"log_prefix": {
"type": "boolean"
},
"log_timestamps": {
"type": "boolean"
},
"logit_bias": {
"type": "array",
"items": {
"type": "string"
}
},
"lora": {
"type": "array",
"items": {
"type": "string"
}
},
"lora_init_without_apply": {
"type": "boolean"
},
"lora_scaled": {
"type": "array",
"items": {
"type": "string"
}
},
"main_gpu": {
"type": "integer"
},
"max_restarts": {
"type": "integer"
},
"metrics": {
"type": "boolean"
},
"min_p": {
"type": "number"
},
"mirostat": {
"type": "integer"
},
"mirostat_ent": {
"type": "number"
},
"mirostat_lr": {
"type": "number"
},
"mlock": {
"type": "boolean"
},
"mmproj": {
"type": "string"
},
"mmproj_url": {
"type": "string"
},
"model": {
"type": "string"
},
"model_draft": {
"type": "string"
},
"model_url": {
"type": "string"
},
"model_vocoder": {
"description": "Audio/TTS params",
"type": "string"
},
"no_cont_batching": {
"type": "boolean"
},
"no_context_shift": {
"description": "Server/Example-specific params",
"type": "boolean"
},
"no_escape": {
"type": "boolean"
},
"no_kv_offload": {
"type": "boolean"
},
"no_mmap": {
"type": "boolean"
},
"no_mmproj": {
"type": "boolean"
},
"no_mmproj_offload": {
"type": "boolean"
},
"no_perf": {
"type": "boolean"
},
"no_prefill_assistant": {
"type": "boolean"
},
"no_slots": {
"type": "boolean"
},
"no_warmup": {
"type": "boolean"
},
"no_webui": {
"type": "boolean"
},
"numa": {
"type": "string"
},
"override_kv": {
"type": "array",
"items": {
"type": "string"
}
},
"override_tensor": {
"type": "array",
"items": {
"type": "string"
}
},
"parallel": {
"type": "integer"
},
"path": {
"type": "string"
},
"poll": {
"type": "integer"
},
"poll_batch": {
"type": "integer"
},
"pooling": {
"type": "string"
},
"port": {
"type": "integer"
},
"predict": {
"type": "integer"
},
"presence_penalty": {
"type": "number"
},
"priority": {
"type": "integer"
},
"priority_batch": {
"type": "integer"
},
"props": {
"type": "boolean"
},
"reasoning_budget": {
"type": "integer"
},
"reasoning_format": {
"type": "string"
},
"repeat_last_n": {
"type": "integer"
},
"repeat_penalty": {
"type": "number"
},
"reranking": {
"type": "boolean"
},
"restart_delay_seconds": {
"description": "RestartDelay duration in seconds",
"type": "integer"
},
"rope_freq_base": {
"type": "number"
},
"rope_freq_scale": {
"type": "number"
},
"rope_scale": {
"type": "number"
},
"rope_scaling": {
"type": "string"
},
"samplers": {
"description": "Sampling params",
"type": "string"
},
"sampling_seq": {
"type": "string"
},
"seed": {
"type": "integer"
},
"slot_prompt_similarity": {
"type": "number"
},
"slot_save_path": {
"type": "string"
},
"slots": {
"type": "boolean"
},
"special": {
"type": "boolean"
},
"split_mode": {
"type": "string"
},
"spm_infill": {
"type": "boolean"
},
"ssl_cert_file": {
"type": "string"
},
"ssl_key_file": {
"type": "string"
},
"temperature": {
"type": "number"
},
"tensor_split": {
"type": "string"
},
"threads": {
"type": "integer"
},
"threads_batch": {
"type": "integer"
},
"threads_http": {
"type": "integer"
},
"timeout": {
"type": "integer"
},
"top_k": {
"type": "integer"
},
"top_p": {
"type": "number"
},
"tts_use_guide_tokens": {
"type": "boolean"
},
"typical": {
"type": "number"
},
"ubatch_size": {
"type": "integer"
},
"verbose": {
"type": "boolean"
},
"verbose_prompt": {
"description": "Common params",
"type": "boolean"
},
"verbosity": {
"type": "integer"
},
"xtc_probability": {
"type": "number"
},
"xtc_threshold": {
"type": "number"
},
"yarn_attn_factor": {
"type": "number"
},
"yarn_beta_fast": {
"type": "number"
},
"yarn_beta_slow": {
"type": "number"
},
"yarn_ext_factor": {
"type": "number"
},
"yarn_orig_ctx": {
"type": "integer"
}
}
},
"llamactl.Instance": {
"type": "object",
"properties": {
@@ -420,19 +1005,8 @@ const docTemplate = `{
"running": {
"description": "Status",
"type": "boolean"
},
"stdErrChan": {
"description": "Channel for sending error messages",
"type": "object"
},
"stdOutChan": {
"description": "Output channels",
"type": "object"
}
}
},
"llamactl.InstanceOptions": {
"type": "object"
}
}
}`

View File

@@ -15,9 +15,6 @@
"/instances": {
"get": {
"description": "Returns a list of all instances managed by the server",
"produces": [
"application/json"
],
"tags": [
"instances"
],
@@ -39,50 +36,6 @@
}
}
}
},
"post": {
"description": "Creates a new instance with the provided configuration options",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"instances"
],
"summary": "Create and start a new instance",
"parameters": [
{
"description": "Instance configuration options",
"name": "options",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/llamactl.InstanceOptions"
}
}
],
"responses": {
"201": {
"description": "Created instance details",
"schema": {
"$ref": "#/definitions/llamactl.Instance"
}
},
"400": {
"description": "Invalid request body",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/instances/{name}": {
@@ -127,9 +80,6 @@
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"instances"
],
@@ -148,7 +98,7 @@
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/llamactl.InstanceOptions"
"$ref": "#/definitions/llamactl.CreateInstanceOptions"
}
}
],
@@ -173,14 +123,59 @@
}
}
},
"delete": {
"description": "Stops and removes a specific instance by name",
"produces": [
"post": {
"description": "Creates a new instance with the provided configuration options",
"consumes": [
"application/json"
],
"tags": [
"instances"
],
"summary": "Create and start a new instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
},
{
"description": "Instance configuration options",
"name": "options",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/llamactl.CreateInstanceOptions"
}
}
],
"responses": {
"201": {
"description": "Created instance details",
"schema": {
"$ref": "#/definitions/llamactl.Instance"
}
},
"400": {
"description": "Invalid request body",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
},
"delete": {
"description": "Stops and removes a specific instance by name",
"tags": [
"instances"
],
"summary": "Delete an instance",
"parameters": [
{
@@ -210,12 +205,94 @@
}
}
},
"/instances/{name}/logs": {
"get": {
"description": "Returns the logs from a specific instance by name with optional line limit",
"tags": [
"instances"
],
"summary": "Get logs from a specific instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
},
{
"type": "string",
"description": "Number of lines to retrieve (default: all lines)",
"name": "lines",
"in": "query"
}
],
"responses": {
"200": {
"description": "Instance logs",
"schema": {
"type": "string"
}
},
"400": {
"description": "Invalid name format or lines parameter",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
}
}
}
},
"/instances/{name}/proxy": {
"get": {
"description": "Forwards HTTP requests to the llama-server instance running on a specific port",
"tags": [
"instances"
],
"summary": "Proxy requests to a specific instance",
"parameters": [
{
"type": "string",
"description": "Instance Name",
"name": "name",
"in": "path",
"required": true
}
],
"responses": {
"200": {
"description": "Request successfully proxied to instance"
},
"400": {
"description": "Invalid name format",
"schema": {
"type": "string"
}
},
"500": {
"description": "Internal Server Error",
"schema": {
"type": "string"
}
},
"503": {
"description": "Instance is not running",
"schema": {
"type": "string"
}
}
}
}
},
"/instances/{name}/restart": {
"post": {
"description": "Restarts a specific instance by name",
"produces": [
"application/json"
],
"tags": [
"instances"
],
@@ -254,9 +331,6 @@
"/instances/{name}/start": {
"post": {
"description": "Starts a specific instance by name",
"produces": [
"application/json"
],
"tags": [
"instances"
],
@@ -295,9 +369,6 @@
"/instances/{name}/stop": {
"post": {
"description": "Stops a specific instance by name",
"produces": [
"application/json"
],
"tags": [
"instances"
],
@@ -404,6 +475,520 @@
}
},
"definitions": {
"llamactl.CreateInstanceOptions": {
"type": "object",
"properties": {
"alias": {
"type": "string"
},
"api_key": {
"type": "string"
},
"api_key_file": {
"type": "string"
},
"auto_restart": {
"description": "Auto restart",
"type": "boolean"
},
"batch_size": {
"type": "integer"
},
"cache_reuse": {
"type": "integer"
},
"cache_type_k": {
"type": "string"
},
"cache_type_k_draft": {
"type": "string"
},
"cache_type_v": {
"type": "string"
},
"cache_type_v_draft": {
"type": "string"
},
"chat_template": {
"type": "string"
},
"chat_template_file": {
"type": "string"
},
"chat_template_kwargs": {
"type": "string"
},
"check_tensors": {
"type": "boolean"
},
"cont_batching": {
"type": "boolean"
},
"control_vector": {
"type": "array",
"items": {
"type": "string"
}
},
"control_vector_layer_range": {
"type": "string"
},
"control_vector_scaled": {
"type": "array",
"items": {
"type": "string"
}
},
"cpu_mask": {
"type": "string"
},
"cpu_mask_batch": {
"type": "string"
},
"cpu_range": {
"type": "string"
},
"cpu_range_batch": {
"type": "string"
},
"cpu_strict": {
"type": "integer"
},
"cpu_strict_batch": {
"type": "integer"
},
"ctx_size": {
"type": "integer"
},
"ctx_size_draft": {
"type": "integer"
},
"defrag_thold": {
"type": "number"
},
"device": {
"type": "string"
},
"device_draft": {
"type": "string"
},
"draft_max": {
"description": "Speculative decoding params",
"type": "integer"
},
"draft_min": {
"type": "integer"
},
"draft_p_min": {
"type": "number"
},
"dry_allowed_length": {
"type": "integer"
},
"dry_base": {
"type": "number"
},
"dry_multiplier": {
"type": "number"
},
"dry_penalty_last_n": {
"type": "integer"
},
"dry_sequence_breaker": {
"type": "array",
"items": {
"type": "string"
}
},
"dump_kv_cache": {
"type": "boolean"
},
"dynatemp_exp": {
"type": "number"
},
"dynatemp_range": {
"type": "number"
},
"embd_bge_small_en_default": {
"description": "Default model params",
"type": "boolean"
},
"embd_e5_small_en_default": {
"type": "boolean"
},
"embd_gte_small_default": {
"type": "boolean"
},
"embedding": {
"type": "boolean"
},
"escape": {
"type": "boolean"
},
"fim_qwen_14b_spec": {
"type": "boolean"
},
"fim_qwen_1_5b_default": {
"type": "boolean"
},
"fim_qwen_3b_default": {
"type": "boolean"
},
"fim_qwen_7b_default": {
"type": "boolean"
},
"fim_qwen_7b_spec": {
"type": "boolean"
},
"flash_attn": {
"type": "boolean"
},
"frequency_penalty": {
"type": "number"
},
"gpu_layers": {
"type": "integer"
},
"gpu_layers_draft": {
"type": "integer"
},
"grammar": {
"type": "string"
},
"grammar_file": {
"type": "string"
},
"hf_file": {
"type": "string"
},
"hf_file_v": {
"type": "string"
},
"hf_repo": {
"type": "string"
},
"hf_repo_draft": {
"type": "string"
},
"hf_repo_v": {
"type": "string"
},
"hf_token": {
"type": "string"
},
"host": {
"type": "string"
},
"ignore_eos": {
"type": "boolean"
},
"jinja": {
"type": "boolean"
},
"json_schema": {
"type": "string"
},
"json_schema_file": {
"type": "string"
},
"keep": {
"type": "integer"
},
"log_colors": {
"type": "boolean"
},
"log_disable": {
"type": "boolean"
},
"log_file": {
"type": "string"
},
"log_prefix": {
"type": "boolean"
},
"log_timestamps": {
"type": "boolean"
},
"logit_bias": {
"type": "array",
"items": {
"type": "string"
}
},
"lora": {
"type": "array",
"items": {
"type": "string"
}
},
"lora_init_without_apply": {
"type": "boolean"
},
"lora_scaled": {
"type": "array",
"items": {
"type": "string"
}
},
"main_gpu": {
"type": "integer"
},
"max_restarts": {
"type": "integer"
},
"metrics": {
"type": "boolean"
},
"min_p": {
"type": "number"
},
"mirostat": {
"type": "integer"
},
"mirostat_ent": {
"type": "number"
},
"mirostat_lr": {
"type": "number"
},
"mlock": {
"type": "boolean"
},
"mmproj": {
"type": "string"
},
"mmproj_url": {
"type": "string"
},
"model": {
"type": "string"
},
"model_draft": {
"type": "string"
},
"model_url": {
"type": "string"
},
"model_vocoder": {
"description": "Audio/TTS params",
"type": "string"
},
"no_cont_batching": {
"type": "boolean"
},
"no_context_shift": {
"description": "Server/Example-specific params",
"type": "boolean"
},
"no_escape": {
"type": "boolean"
},
"no_kv_offload": {
"type": "boolean"
},
"no_mmap": {
"type": "boolean"
},
"no_mmproj": {
"type": "boolean"
},
"no_mmproj_offload": {
"type": "boolean"
},
"no_perf": {
"type": "boolean"
},
"no_prefill_assistant": {
"type": "boolean"
},
"no_slots": {
"type": "boolean"
},
"no_warmup": {
"type": "boolean"
},
"no_webui": {
"type": "boolean"
},
"numa": {
"type": "string"
},
"override_kv": {
"type": "array",
"items": {
"type": "string"
}
},
"override_tensor": {
"type": "array",
"items": {
"type": "string"
}
},
"parallel": {
"type": "integer"
},
"path": {
"type": "string"
},
"poll": {
"type": "integer"
},
"poll_batch": {
"type": "integer"
},
"pooling": {
"type": "string"
},
"port": {
"type": "integer"
},
"predict": {
"type": "integer"
},
"presence_penalty": {
"type": "number"
},
"priority": {
"type": "integer"
},
"priority_batch": {
"type": "integer"
},
"props": {
"type": "boolean"
},
"reasoning_budget": {
"type": "integer"
},
"reasoning_format": {
"type": "string"
},
"repeat_last_n": {
"type": "integer"
},
"repeat_penalty": {
"type": "number"
},
"reranking": {
"type": "boolean"
},
"restart_delay_seconds": {
"description": "RestartDelay duration in seconds",
"type": "integer"
},
"rope_freq_base": {
"type": "number"
},
"rope_freq_scale": {
"type": "number"
},
"rope_scale": {
"type": "number"
},
"rope_scaling": {
"type": "string"
},
"samplers": {
"description": "Sampling params",
"type": "string"
},
"sampling_seq": {
"type": "string"
},
"seed": {
"type": "integer"
},
"slot_prompt_similarity": {
"type": "number"
},
"slot_save_path": {
"type": "string"
},
"slots": {
"type": "boolean"
},
"special": {
"type": "boolean"
},
"split_mode": {
"type": "string"
},
"spm_infill": {
"type": "boolean"
},
"ssl_cert_file": {
"type": "string"
},
"ssl_key_file": {
"type": "string"
},
"temperature": {
"type": "number"
},
"tensor_split": {
"type": "string"
},
"threads": {
"type": "integer"
},
"threads_batch": {
"type": "integer"
},
"threads_http": {
"type": "integer"
},
"timeout": {
"type": "integer"
},
"top_k": {
"type": "integer"
},
"top_p": {
"type": "number"
},
"tts_use_guide_tokens": {
"type": "boolean"
},
"typical": {
"type": "number"
},
"ubatch_size": {
"type": "integer"
},
"verbose": {
"type": "boolean"
},
"verbose_prompt": {
"description": "Common params",
"type": "boolean"
},
"verbosity": {
"type": "integer"
},
"xtc_probability": {
"type": "number"
},
"xtc_threshold": {
"type": "number"
},
"yarn_attn_factor": {
"type": "number"
},
"yarn_beta_fast": {
"type": "number"
},
"yarn_beta_slow": {
"type": "number"
},
"yarn_ext_factor": {
"type": "number"
},
"yarn_orig_ctx": {
"type": "integer"
}
}
},
"llamactl.Instance": {
"type": "object",
"properties": {
@@ -413,19 +998,8 @@
"running": {
"description": "Status",
"type": "boolean"
},
"stdErrChan": {
"description": "Channel for sending error messages",
"type": "object"
},
"stdOutChan": {
"description": "Output channels",
"type": "object"
}
}
},
"llamactl.InstanceOptions": {
"type": "object"
}
}
}

View File

@@ -1,5 +1,350 @@
basePath: /api/v1
definitions:
llamactl.CreateInstanceOptions:
properties:
alias:
type: string
api_key:
type: string
api_key_file:
type: string
auto_restart:
description: Auto restart
type: boolean
batch_size:
type: integer
cache_reuse:
type: integer
cache_type_k:
type: string
cache_type_k_draft:
type: string
cache_type_v:
type: string
cache_type_v_draft:
type: string
chat_template:
type: string
chat_template_file:
type: string
chat_template_kwargs:
type: string
check_tensors:
type: boolean
cont_batching:
type: boolean
control_vector:
items:
type: string
type: array
control_vector_layer_range:
type: string
control_vector_scaled:
items:
type: string
type: array
cpu_mask:
type: string
cpu_mask_batch:
type: string
cpu_range:
type: string
cpu_range_batch:
type: string
cpu_strict:
type: integer
cpu_strict_batch:
type: integer
ctx_size:
type: integer
ctx_size_draft:
type: integer
defrag_thold:
type: number
device:
type: string
device_draft:
type: string
draft_max:
description: Speculative decoding params
type: integer
draft_min:
type: integer
draft_p_min:
type: number
dry_allowed_length:
type: integer
dry_base:
type: number
dry_multiplier:
type: number
dry_penalty_last_n:
type: integer
dry_sequence_breaker:
items:
type: string
type: array
dump_kv_cache:
type: boolean
dynatemp_exp:
type: number
dynatemp_range:
type: number
embd_bge_small_en_default:
description: Default model params
type: boolean
embd_e5_small_en_default:
type: boolean
embd_gte_small_default:
type: boolean
embedding:
type: boolean
escape:
type: boolean
fim_qwen_1_5b_default:
type: boolean
fim_qwen_3b_default:
type: boolean
fim_qwen_7b_default:
type: boolean
fim_qwen_7b_spec:
type: boolean
fim_qwen_14b_spec:
type: boolean
flash_attn:
type: boolean
frequency_penalty:
type: number
gpu_layers:
type: integer
gpu_layers_draft:
type: integer
grammar:
type: string
grammar_file:
type: string
hf_file:
type: string
hf_file_v:
type: string
hf_repo:
type: string
hf_repo_draft:
type: string
hf_repo_v:
type: string
hf_token:
type: string
host:
type: string
ignore_eos:
type: boolean
jinja:
type: boolean
json_schema:
type: string
json_schema_file:
type: string
keep:
type: integer
log_colors:
type: boolean
log_disable:
type: boolean
log_file:
type: string
log_prefix:
type: boolean
log_timestamps:
type: boolean
logit_bias:
items:
type: string
type: array
lora:
items:
type: string
type: array
lora_init_without_apply:
type: boolean
lora_scaled:
items:
type: string
type: array
main_gpu:
type: integer
max_restarts:
type: integer
metrics:
type: boolean
min_p:
type: number
mirostat:
type: integer
mirostat_ent:
type: number
mirostat_lr:
type: number
mlock:
type: boolean
mmproj:
type: string
mmproj_url:
type: string
model:
type: string
model_draft:
type: string
model_url:
type: string
model_vocoder:
description: Audio/TTS params
type: string
no_cont_batching:
type: boolean
no_context_shift:
description: Server/Example-specific params
type: boolean
no_escape:
type: boolean
no_kv_offload:
type: boolean
no_mmap:
type: boolean
no_mmproj:
type: boolean
no_mmproj_offload:
type: boolean
no_perf:
type: boolean
no_prefill_assistant:
type: boolean
no_slots:
type: boolean
no_warmup:
type: boolean
no_webui:
type: boolean
numa:
type: string
override_kv:
items:
type: string
type: array
override_tensor:
items:
type: string
type: array
parallel:
type: integer
path:
type: string
poll:
type: integer
poll_batch:
type: integer
pooling:
type: string
port:
type: integer
predict:
type: integer
presence_penalty:
type: number
priority:
type: integer
priority_batch:
type: integer
props:
type: boolean
reasoning_budget:
type: integer
reasoning_format:
type: string
repeat_last_n:
type: integer
repeat_penalty:
type: number
reranking:
type: boolean
restart_delay_seconds:
description: RestartDelay duration in seconds
type: integer
rope_freq_base:
type: number
rope_freq_scale:
type: number
rope_scale:
type: number
rope_scaling:
type: string
samplers:
description: Sampling params
type: string
sampling_seq:
type: string
seed:
type: integer
slot_prompt_similarity:
type: number
slot_save_path:
type: string
slots:
type: boolean
special:
type: boolean
split_mode:
type: string
spm_infill:
type: boolean
ssl_cert_file:
type: string
ssl_key_file:
type: string
temperature:
type: number
tensor_split:
type: string
threads:
type: integer
threads_batch:
type: integer
threads_http:
type: integer
timeout:
type: integer
top_k:
type: integer
top_p:
type: number
tts_use_guide_tokens:
type: boolean
typical:
type: number
ubatch_size:
type: integer
verbose:
type: boolean
verbose_prompt:
description: Common params
type: boolean
verbosity:
type: integer
xtc_probability:
type: number
xtc_threshold:
type: number
yarn_attn_factor:
type: number
yarn_beta_fast:
type: number
yarn_beta_slow:
type: number
yarn_ext_factor:
type: number
yarn_orig_ctx:
type: integer
type: object
llamactl.Instance:
properties:
name:
@@ -7,14 +352,6 @@ definitions:
running:
description: Status
type: boolean
stdErrChan:
description: Channel for sending error messages
type: object
stdOutChan:
description: Output channels
type: object
type: object
llamactl.InstanceOptions:
type: object
info:
contact: {}
@@ -28,8 +365,6 @@ paths:
/instances:
get:
description: Returns a list of all instances managed by the server
produces:
- application/json
responses:
"200":
description: List of instances
@@ -44,35 +379,6 @@ paths:
summary: List all instances
tags:
- instances
post:
consumes:
- application/json
description: Creates a new instance with the provided configuration options
parameters:
- description: Instance configuration options
in: body
name: options
required: true
schema:
$ref: '#/definitions/llamactl.InstanceOptions'
produces:
- application/json
responses:
"201":
description: Created instance details
schema:
$ref: '#/definitions/llamactl.Instance'
"400":
description: Invalid request body
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
summary: Create and start a new instance
tags:
- instances
/instances/{name}:
delete:
description: Stops and removes a specific instance by name
@@ -82,8 +388,6 @@ paths:
name: name
required: true
type: string
produces:
- application/json
responses:
"204":
description: No Content
@@ -122,6 +426,38 @@ paths:
summary: Get details of a specific instance
tags:
- instances
post:
consumes:
- application/json
description: Creates a new instance with the provided configuration options
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
- description: Instance configuration options
in: body
name: options
required: true
schema:
$ref: '#/definitions/llamactl.CreateInstanceOptions'
responses:
"201":
description: Created instance details
schema:
$ref: '#/definitions/llamactl.Instance'
"400":
description: Invalid request body
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
summary: Create and start a new instance
tags:
- instances
put:
consumes:
- application/json
@@ -137,9 +473,7 @@ paths:
name: options
required: true
schema:
$ref: '#/definitions/llamactl.InstanceOptions'
produces:
- application/json
$ref: '#/definitions/llamactl.CreateInstanceOptions'
responses:
"200":
description: Updated instance details
@@ -156,6 +490,64 @@ paths:
summary: Update an instance's configuration
tags:
- instances
/instances/{name}/logs:
get:
description: Returns the logs from a specific instance by name with optional
line limit
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
- description: 'Number of lines to retrieve (default: all lines)'
in: query
name: lines
type: string
responses:
"200":
description: Instance logs
schema:
type: string
"400":
description: Invalid name format or lines parameter
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
summary: Get logs from a specific instance
tags:
- instances
/instances/{name}/proxy:
get:
description: Forwards HTTP requests to the llama-server instance running on
a specific port
parameters:
- description: Instance Name
in: path
name: name
required: true
type: string
responses:
"200":
description: Request successfully proxied to instance
"400":
description: Invalid name format
schema:
type: string
"500":
description: Internal Server Error
schema:
type: string
"503":
description: Instance is not running
schema:
type: string
summary: Proxy requests to a specific instance
tags:
- instances
/instances/{name}/restart:
post:
description: Restarts a specific instance by name
@@ -165,8 +557,6 @@ paths:
name: name
required: true
type: string
produces:
- application/json
responses:
"200":
description: Restarted instance details
@@ -192,8 +582,6 @@ paths:
name: name
required: true
type: string
produces:
- application/json
responses:
"200":
description: Started instance details
@@ -219,8 +607,6 @@ paths:
name: name
required: true
type: string
produces:
- application/json
responses:
"200":
description: Stopped instance details