mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-06 09:04:27 +00:00
758 lines
18 KiB
YAML
758 lines
18 KiB
YAML
basePath: /api/v1
|
|
definitions:
|
|
llamactl.CreateInstanceOptions:
|
|
properties:
|
|
alias:
|
|
type: string
|
|
api_key:
|
|
type: string
|
|
api_key_file:
|
|
type: string
|
|
auto_restart:
|
|
description: Auto restart
|
|
type: boolean
|
|
batch_size:
|
|
type: integer
|
|
cache_reuse:
|
|
type: integer
|
|
cache_type_k:
|
|
type: string
|
|
cache_type_k_draft:
|
|
type: string
|
|
cache_type_v:
|
|
type: string
|
|
cache_type_v_draft:
|
|
type: string
|
|
chat_template:
|
|
type: string
|
|
chat_template_file:
|
|
type: string
|
|
chat_template_kwargs:
|
|
type: string
|
|
check_tensors:
|
|
type: boolean
|
|
cont_batching:
|
|
type: boolean
|
|
control_vector:
|
|
items:
|
|
type: string
|
|
type: array
|
|
control_vector_layer_range:
|
|
type: string
|
|
control_vector_scaled:
|
|
items:
|
|
type: string
|
|
type: array
|
|
cpu_mask:
|
|
type: string
|
|
cpu_mask_batch:
|
|
type: string
|
|
cpu_range:
|
|
type: string
|
|
cpu_range_batch:
|
|
type: string
|
|
cpu_strict:
|
|
type: integer
|
|
cpu_strict_batch:
|
|
type: integer
|
|
ctx_size:
|
|
type: integer
|
|
ctx_size_draft:
|
|
type: integer
|
|
defrag_thold:
|
|
type: number
|
|
device:
|
|
type: string
|
|
device_draft:
|
|
type: string
|
|
draft_max:
|
|
description: Speculative decoding params
|
|
type: integer
|
|
draft_min:
|
|
type: integer
|
|
draft_p_min:
|
|
type: number
|
|
dry_allowed_length:
|
|
type: integer
|
|
dry_base:
|
|
type: number
|
|
dry_multiplier:
|
|
type: number
|
|
dry_penalty_last_n:
|
|
type: integer
|
|
dry_sequence_breaker:
|
|
items:
|
|
type: string
|
|
type: array
|
|
dump_kv_cache:
|
|
type: boolean
|
|
dynatemp_exp:
|
|
type: number
|
|
dynatemp_range:
|
|
type: number
|
|
embd_bge_small_en_default:
|
|
description: Default model params
|
|
type: boolean
|
|
embd_e5_small_en_default:
|
|
type: boolean
|
|
embd_gte_small_default:
|
|
type: boolean
|
|
embedding:
|
|
type: boolean
|
|
escape:
|
|
type: boolean
|
|
fim_qwen_1_5b_default:
|
|
type: boolean
|
|
fim_qwen_3b_default:
|
|
type: boolean
|
|
fim_qwen_7b_default:
|
|
type: boolean
|
|
fim_qwen_7b_spec:
|
|
type: boolean
|
|
fim_qwen_14b_spec:
|
|
type: boolean
|
|
flash_attn:
|
|
type: boolean
|
|
frequency_penalty:
|
|
type: number
|
|
gpu_layers:
|
|
type: integer
|
|
gpu_layers_draft:
|
|
type: integer
|
|
grammar:
|
|
type: string
|
|
grammar_file:
|
|
type: string
|
|
hf_file:
|
|
type: string
|
|
hf_file_v:
|
|
type: string
|
|
hf_repo:
|
|
type: string
|
|
hf_repo_draft:
|
|
type: string
|
|
hf_repo_v:
|
|
type: string
|
|
hf_token:
|
|
type: string
|
|
host:
|
|
type: string
|
|
ignore_eos:
|
|
type: boolean
|
|
jinja:
|
|
type: boolean
|
|
json_schema:
|
|
type: string
|
|
json_schema_file:
|
|
type: string
|
|
keep:
|
|
type: integer
|
|
log_colors:
|
|
type: boolean
|
|
log_disable:
|
|
type: boolean
|
|
log_file:
|
|
type: string
|
|
log_prefix:
|
|
type: boolean
|
|
log_timestamps:
|
|
type: boolean
|
|
logit_bias:
|
|
items:
|
|
type: string
|
|
type: array
|
|
lora:
|
|
items:
|
|
type: string
|
|
type: array
|
|
lora_init_without_apply:
|
|
type: boolean
|
|
lora_scaled:
|
|
items:
|
|
type: string
|
|
type: array
|
|
main_gpu:
|
|
type: integer
|
|
max_restarts:
|
|
type: integer
|
|
metrics:
|
|
type: boolean
|
|
min_p:
|
|
type: number
|
|
mirostat:
|
|
type: integer
|
|
mirostat_ent:
|
|
type: number
|
|
mirostat_lr:
|
|
type: number
|
|
mlock:
|
|
type: boolean
|
|
mmproj:
|
|
type: string
|
|
mmproj_url:
|
|
type: string
|
|
model:
|
|
type: string
|
|
model_draft:
|
|
type: string
|
|
model_url:
|
|
type: string
|
|
model_vocoder:
|
|
description: Audio/TTS params
|
|
type: string
|
|
no_cont_batching:
|
|
type: boolean
|
|
no_context_shift:
|
|
description: Server/Example-specific params
|
|
type: boolean
|
|
no_escape:
|
|
type: boolean
|
|
no_kv_offload:
|
|
type: boolean
|
|
no_mmap:
|
|
type: boolean
|
|
no_mmproj:
|
|
type: boolean
|
|
no_mmproj_offload:
|
|
type: boolean
|
|
no_perf:
|
|
type: boolean
|
|
no_prefill_assistant:
|
|
type: boolean
|
|
no_slots:
|
|
type: boolean
|
|
no_warmup:
|
|
type: boolean
|
|
no_webui:
|
|
type: boolean
|
|
numa:
|
|
type: string
|
|
override_kv:
|
|
items:
|
|
type: string
|
|
type: array
|
|
override_tensor:
|
|
items:
|
|
type: string
|
|
type: array
|
|
parallel:
|
|
type: integer
|
|
path:
|
|
type: string
|
|
poll:
|
|
type: integer
|
|
poll_batch:
|
|
type: integer
|
|
pooling:
|
|
type: string
|
|
port:
|
|
type: integer
|
|
predict:
|
|
type: integer
|
|
presence_penalty:
|
|
type: number
|
|
priority:
|
|
type: integer
|
|
priority_batch:
|
|
type: integer
|
|
props:
|
|
type: boolean
|
|
reasoning_budget:
|
|
type: integer
|
|
reasoning_format:
|
|
type: string
|
|
repeat_last_n:
|
|
type: integer
|
|
repeat_penalty:
|
|
type: number
|
|
reranking:
|
|
type: boolean
|
|
restart_delay_seconds:
|
|
description: RestartDelay duration in seconds
|
|
type: integer
|
|
rope_freq_base:
|
|
type: number
|
|
rope_freq_scale:
|
|
type: number
|
|
rope_scale:
|
|
type: number
|
|
rope_scaling:
|
|
type: string
|
|
samplers:
|
|
description: Sampling params
|
|
type: string
|
|
sampling_seq:
|
|
type: string
|
|
seed:
|
|
type: integer
|
|
slot_prompt_similarity:
|
|
type: number
|
|
slot_save_path:
|
|
type: string
|
|
slots:
|
|
type: boolean
|
|
special:
|
|
type: boolean
|
|
split_mode:
|
|
type: string
|
|
spm_infill:
|
|
type: boolean
|
|
ssl_cert_file:
|
|
type: string
|
|
ssl_key_file:
|
|
type: string
|
|
temperature:
|
|
type: number
|
|
tensor_split:
|
|
type: string
|
|
threads:
|
|
type: integer
|
|
threads_batch:
|
|
type: integer
|
|
threads_http:
|
|
type: integer
|
|
timeout:
|
|
type: integer
|
|
top_k:
|
|
type: integer
|
|
top_p:
|
|
type: number
|
|
tts_use_guide_tokens:
|
|
type: boolean
|
|
typical:
|
|
type: number
|
|
ubatch_size:
|
|
type: integer
|
|
verbose:
|
|
type: boolean
|
|
verbose_prompt:
|
|
description: Common params
|
|
type: boolean
|
|
verbosity:
|
|
type: integer
|
|
xtc_probability:
|
|
type: number
|
|
xtc_threshold:
|
|
type: number
|
|
yarn_attn_factor:
|
|
type: number
|
|
yarn_beta_fast:
|
|
type: number
|
|
yarn_beta_slow:
|
|
type: number
|
|
yarn_ext_factor:
|
|
type: number
|
|
yarn_orig_ctx:
|
|
type: integer
|
|
type: object
|
|
llamactl.Instance:
|
|
properties:
|
|
created:
|
|
description: Creation time
|
|
type: integer
|
|
name:
|
|
type: string
|
|
running:
|
|
description: Status
|
|
type: boolean
|
|
type: object
|
|
llamactl.OpenAIInstance:
|
|
properties:
|
|
created:
|
|
type: integer
|
|
id:
|
|
type: string
|
|
object:
|
|
type: string
|
|
owned_by:
|
|
type: string
|
|
type: object
|
|
llamactl.OpenAIListInstancesResponse:
|
|
properties:
|
|
data:
|
|
items:
|
|
$ref: '#/definitions/llamactl.OpenAIInstance'
|
|
type: array
|
|
object:
|
|
type: string
|
|
type: object
|
|
info:
|
|
contact: {}
|
|
description: llamactl is a control server for managing Llama Server instances.
|
|
license:
|
|
name: MIT License
|
|
url: https://opensource.org/license/mit/
|
|
title: llamactl API
|
|
version: "1.0"
|
|
paths:
|
|
/instances:
|
|
get:
|
|
description: Returns a list of all instances managed by the server
|
|
responses:
|
|
"200":
|
|
description: List of instances
|
|
schema:
|
|
items:
|
|
$ref: '#/definitions/llamactl.Instance'
|
|
type: array
|
|
"500":
|
|
description: Internal Server Error
|
|
schema:
|
|
type: string
|
|
summary: List all instances
|
|
tags:
|
|
- instances
|
|
/instances/{name}:
|
|
delete:
|
|
description: Stops and removes a specific instance by name
|
|
parameters:
|
|
- description: Instance Name
|
|
in: path
|
|
name: name
|
|
required: true
|
|
type: string
|
|
responses:
|
|
"204":
|
|
description: No Content
|
|
"400":
|
|
description: Invalid name format
|
|
schema:
|
|
type: string
|
|
"500":
|
|
description: Internal Server Error
|
|
schema:
|
|
type: string
|
|
summary: Delete an instance
|
|
tags:
|
|
- instances
|
|
get:
|
|
description: Returns the details of a specific instance by name
|
|
parameters:
|
|
- description: Instance Name
|
|
in: path
|
|
name: name
|
|
required: true
|
|
type: string
|
|
responses:
|
|
"200":
|
|
description: Instance details
|
|
schema:
|
|
$ref: '#/definitions/llamactl.Instance'
|
|
"400":
|
|
description: Invalid name format
|
|
schema:
|
|
type: string
|
|
"500":
|
|
description: Internal Server Error
|
|
schema:
|
|
type: string
|
|
summary: Get details of a specific instance
|
|
tags:
|
|
- instances
|
|
post:
|
|
consumes:
|
|
- application/json
|
|
description: Creates a new instance with the provided configuration options
|
|
parameters:
|
|
- description: Instance Name
|
|
in: path
|
|
name: name
|
|
required: true
|
|
type: string
|
|
- description: Instance configuration options
|
|
in: body
|
|
name: options
|
|
required: true
|
|
schema:
|
|
$ref: '#/definitions/llamactl.CreateInstanceOptions'
|
|
responses:
|
|
"201":
|
|
description: Created instance details
|
|
schema:
|
|
$ref: '#/definitions/llamactl.Instance'
|
|
"400":
|
|
description: Invalid request body
|
|
schema:
|
|
type: string
|
|
"500":
|
|
description: Internal Server Error
|
|
schema:
|
|
type: string
|
|
summary: Create and start a new instance
|
|
tags:
|
|
- instances
|
|
put:
|
|
consumes:
|
|
- application/json
|
|
description: Updates the configuration of a specific instance by name
|
|
parameters:
|
|
- description: Instance Name
|
|
in: path
|
|
name: name
|
|
required: true
|
|
type: string
|
|
- description: Instance configuration options
|
|
in: body
|
|
name: options
|
|
required: true
|
|
schema:
|
|
$ref: '#/definitions/llamactl.CreateInstanceOptions'
|
|
responses:
|
|
"200":
|
|
description: Updated instance details
|
|
schema:
|
|
$ref: '#/definitions/llamactl.Instance'
|
|
"400":
|
|
description: Invalid name format
|
|
schema:
|
|
type: string
|
|
"500":
|
|
description: Internal Server Error
|
|
schema:
|
|
type: string
|
|
summary: Update an instance's configuration
|
|
tags:
|
|
- instances
|
|
/instances/{name}/logs:
|
|
get:
|
|
description: Returns the logs from a specific instance by name with optional
|
|
line limit
|
|
parameters:
|
|
- description: Instance Name
|
|
in: path
|
|
name: name
|
|
required: true
|
|
type: string
|
|
- description: 'Number of lines to retrieve (default: all lines)'
|
|
in: query
|
|
name: lines
|
|
type: string
|
|
responses:
|
|
"200":
|
|
description: Instance logs
|
|
schema:
|
|
type: string
|
|
"400":
|
|
description: Invalid name format or lines parameter
|
|
schema:
|
|
type: string
|
|
"500":
|
|
description: Internal Server Error
|
|
schema:
|
|
type: string
|
|
summary: Get logs from a specific instance
|
|
tags:
|
|
- instances
|
|
/instances/{name}/proxy:
|
|
get:
|
|
description: Forwards HTTP requests to the llama-server instance running on
|
|
a specific port
|
|
parameters:
|
|
- description: Instance Name
|
|
in: path
|
|
name: name
|
|
required: true
|
|
type: string
|
|
responses:
|
|
"200":
|
|
description: Request successfully proxied to instance
|
|
"400":
|
|
description: Invalid name format
|
|
schema:
|
|
type: string
|
|
"500":
|
|
description: Internal Server Error
|
|
schema:
|
|
type: string
|
|
"503":
|
|
description: Instance is not running
|
|
schema:
|
|
type: string
|
|
summary: Proxy requests to a specific instance
|
|
tags:
|
|
- instances
|
|
post:
|
|
description: Forwards HTTP requests to the llama-server instance running on
|
|
a specific port
|
|
parameters:
|
|
- description: Instance Name
|
|
in: path
|
|
name: name
|
|
required: true
|
|
type: string
|
|
responses:
|
|
"200":
|
|
description: Request successfully proxied to instance
|
|
"400":
|
|
description: Invalid name format
|
|
schema:
|
|
type: string
|
|
"500":
|
|
description: Internal Server Error
|
|
schema:
|
|
type: string
|
|
"503":
|
|
description: Instance is not running
|
|
schema:
|
|
type: string
|
|
summary: Proxy requests to a specific instance
|
|
tags:
|
|
- instances
|
|
/instances/{name}/restart:
|
|
post:
|
|
description: Restarts a specific instance by name
|
|
parameters:
|
|
- description: Instance Name
|
|
in: path
|
|
name: name
|
|
required: true
|
|
type: string
|
|
responses:
|
|
"200":
|
|
description: Restarted instance details
|
|
schema:
|
|
$ref: '#/definitions/llamactl.Instance'
|
|
"400":
|
|
description: Invalid name format
|
|
schema:
|
|
type: string
|
|
"500":
|
|
description: Internal Server Error
|
|
schema:
|
|
type: string
|
|
summary: Restart a running instance
|
|
tags:
|
|
- instances
|
|
/instances/{name}/start:
|
|
post:
|
|
description: Starts a specific instance by name
|
|
parameters:
|
|
- description: Instance Name
|
|
in: path
|
|
name: name
|
|
required: true
|
|
type: string
|
|
responses:
|
|
"200":
|
|
description: Started instance details
|
|
schema:
|
|
$ref: '#/definitions/llamactl.Instance'
|
|
"400":
|
|
description: Invalid name format
|
|
schema:
|
|
type: string
|
|
"500":
|
|
description: Internal Server Error
|
|
schema:
|
|
type: string
|
|
summary: Start a stopped instance
|
|
tags:
|
|
- instances
|
|
/instances/{name}/stop:
|
|
post:
|
|
description: Stops a specific instance by name
|
|
parameters:
|
|
- description: Instance Name
|
|
in: path
|
|
name: name
|
|
required: true
|
|
type: string
|
|
responses:
|
|
"200":
|
|
description: Stopped instance details
|
|
schema:
|
|
$ref: '#/definitions/llamactl.Instance'
|
|
"400":
|
|
description: Invalid name format
|
|
schema:
|
|
type: string
|
|
"500":
|
|
description: Internal Server Error
|
|
schema:
|
|
type: string
|
|
summary: Stop a running instance
|
|
tags:
|
|
- instances
|
|
/server/devices:
|
|
get:
|
|
description: Returns a list of available devices for the llama server
|
|
responses:
|
|
"200":
|
|
description: List of devices
|
|
schema:
|
|
type: string
|
|
"500":
|
|
description: Internal Server Error
|
|
schema:
|
|
type: string
|
|
summary: List available devices for llama server
|
|
tags:
|
|
- server
|
|
/server/help:
|
|
get:
|
|
description: Returns the help text for the llama server command
|
|
responses:
|
|
"200":
|
|
description: Help text
|
|
schema:
|
|
type: string
|
|
"500":
|
|
description: Internal Server Error
|
|
schema:
|
|
type: string
|
|
summary: Get help for llama server
|
|
tags:
|
|
- server
|
|
/server/version:
|
|
get:
|
|
description: Returns the version of the llama server command
|
|
responses:
|
|
"200":
|
|
description: Version information
|
|
schema:
|
|
type: string
|
|
"500":
|
|
description: Internal Server Error
|
|
schema:
|
|
type: string
|
|
summary: Get version of llama server
|
|
tags:
|
|
- server
|
|
/v1/:
|
|
post:
|
|
consumes:
|
|
- application/json
|
|
description: Handles all POST requests to /v1/*, routing to the appropriate
|
|
instance based on the request body
|
|
responses:
|
|
"200":
|
|
description: OpenAI response
|
|
"400":
|
|
description: Invalid request body or model name
|
|
schema:
|
|
type: string
|
|
"500":
|
|
description: Internal Server Error
|
|
schema:
|
|
type: string
|
|
summary: OpenAI-compatible proxy endpoint
|
|
tags:
|
|
- openai
|
|
/v1/models:
|
|
get:
|
|
description: Returns a list of instances in a format compatible with OpenAI
|
|
API
|
|
responses:
|
|
"200":
|
|
description: List of OpenAI-compatible instances
|
|
schema:
|
|
$ref: '#/definitions/llamactl.OpenAIListInstancesResponse'
|
|
"500":
|
|
description: Internal Server Error
|
|
schema:
|
|
type: string
|
|
summary: List instances in OpenAI-compatible format
|
|
tags:
|
|
- openai
|
|
swagger: "2.0"
|