Files
llamactl/server/pkg/options.go

370 lines
14 KiB
Go

package llamactl
import (
"encoding/json"
"reflect"
"strconv"
"strings"
)
type InstanceOptions struct {
Name string `json:"name,omitempty"`
*LlamaServerOptions
}
type LlamaServerOptions struct {
// Common params
VerbosePrompt bool `json:"verbose_prompt,omitempty"`
Threads int `json:"threads,omitempty"`
ThreadsBatch int `json:"threads_batch,omitempty"`
CPUMask string `json:"cpu_mask,omitempty"`
CPURange string `json:"cpu_range,omitempty"`
CPUStrict int `json:"cpu_strict,omitempty"`
Priority int `json:"priority,omitempty"`
Poll int `json:"poll,omitempty"`
CPUMaskBatch string `json:"cpu_mask_batch,omitempty"`
CPURangeBatch string `json:"cpu_range_batch,omitempty"`
CPUStrictBatch int `json:"cpu_strict_batch,omitempty"`
PriorityBatch int `json:"priority_batch,omitempty"`
PollBatch int `json:"poll_batch,omitempty"`
CtxSize int `json:"ctx_size,omitempty"`
Predict int `json:"predict,omitempty"`
BatchSize int `json:"batch_size,omitempty"`
UBatchSize int `json:"ubatch_size,omitempty"`
Keep int `json:"keep,omitempty"`
FlashAttn bool `json:"flash_attn,omitempty"`
NoPerf bool `json:"no_perf,omitempty"`
Escape bool `json:"escape,omitempty"`
NoEscape bool `json:"no_escape,omitempty"`
RopeScaling string `json:"rope_scaling,omitempty"`
RopeScale float64 `json:"rope_scale,omitempty"`
RopeFreqBase float64 `json:"rope_freq_base,omitempty"`
RopeFreqScale float64 `json:"rope_freq_scale,omitempty"`
YarnOrigCtx int `json:"yarn_orig_ctx,omitempty"`
YarnExtFactor float64 `json:"yarn_ext_factor,omitempty"`
YarnAttnFactor float64 `json:"yarn_attn_factor,omitempty"`
YarnBetaSlow float64 `json:"yarn_beta_slow,omitempty"`
YarnBetaFast float64 `json:"yarn_beta_fast,omitempty"`
DumpKVCache bool `json:"dump_kv_cache,omitempty"`
NoKVOffload bool `json:"no_kv_offload,omitempty"`
CacheTypeK string `json:"cache_type_k,omitempty"`
CacheTypeV string `json:"cache_type_v,omitempty"`
DefragThold float64 `json:"defrag_thold,omitempty"`
Parallel int `json:"parallel,omitempty"`
Mlock bool `json:"mlock,omitempty"`
NoMmap bool `json:"no_mmap,omitempty"`
Numa string `json:"numa,omitempty"`
Device string `json:"device,omitempty"`
OverrideTensor []string `json:"override_tensor,omitempty"`
GPULayers int `json:"gpu_layers,omitempty"`
SplitMode string `json:"split_mode,omitempty"`
TensorSplit string `json:"tensor_split,omitempty"`
MainGPU int `json:"main_gpu,omitempty"`
CheckTensors bool `json:"check_tensors,omitempty"`
OverrideKV []string `json:"override_kv,omitempty"`
Lora []string `json:"lora,omitempty"`
LoraScaled []string `json:"lora_scaled,omitempty"`
ControlVector []string `json:"control_vector,omitempty"`
ControlVectorScaled []string `json:"control_vector_scaled,omitempty"`
ControlVectorLayerRange string `json:"control_vector_layer_range,omitempty"`
Model string `json:"model,omitempty"`
ModelURL string `json:"model_url,omitempty"`
HFRepo string `json:"hf_repo,omitempty"`
HFRepoDraft string `json:"hf_repo_draft,omitempty"`
HFFile string `json:"hf_file,omitempty"`
HFRepoV string `json:"hf_repo_v,omitempty"`
HFFileV string `json:"hf_file_v,omitempty"`
HFToken string `json:"hf_token,omitempty"`
LogDisable bool `json:"log_disable,omitempty"`
LogFile string `json:"log_file,omitempty"`
LogColors bool `json:"log_colors,omitempty"`
Verbose bool `json:"verbose,omitempty"`
Verbosity int `json:"verbosity,omitempty"`
LogPrefix bool `json:"log_prefix,omitempty"`
LogTimestamps bool `json:"log_timestamps,omitempty"`
// Sampling params
Samplers string `json:"samplers,omitempty"`
Seed int `json:"seed,omitempty"`
SamplingSeq string `json:"sampling_seq,omitempty"`
IgnoreEOS bool `json:"ignore_eos,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
TopK int `json:"top_k,omitempty"`
TopP float64 `json:"top_p,omitempty"`
MinP float64 `json:"min_p,omitempty"`
XTCProbability float64 `json:"xtc_probability,omitempty"`
XTCThreshold float64 `json:"xtc_threshold,omitempty"`
Typical float64 `json:"typical,omitempty"`
RepeatLastN int `json:"repeat_last_n,omitempty"`
RepeatPenalty float64 `json:"repeat_penalty,omitempty"`
PresencePenalty float64 `json:"presence_penalty,omitempty"`
FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
DryMultiplier float64 `json:"dry_multiplier,omitempty"`
DryBase float64 `json:"dry_base,omitempty"`
DryAllowedLength int `json:"dry_allowed_length,omitempty"`
DryPenaltyLastN int `json:"dry_penalty_last_n,omitempty"`
DrySequenceBreaker []string `json:"dry_sequence_breaker,omitempty"`
DynatempRange float64 `json:"dynatemp_range,omitempty"`
DynatempExp float64 `json:"dynatemp_exp,omitempty"`
Mirostat int `json:"mirostat,omitempty"`
MirostatLR float64 `json:"mirostat_lr,omitempty"`
MirostatEnt float64 `json:"mirostat_ent,omitempty"`
LogitBias []string `json:"logit_bias,omitempty"`
Grammar string `json:"grammar,omitempty"`
GrammarFile string `json:"grammar_file,omitempty"`
JSONSchema string `json:"json_schema,omitempty"`
JSONSchemaFile string `json:"json_schema_file,omitempty"`
// Server/Example-specific params
NoContextShift bool `json:"no_context_shift,omitempty"`
Special bool `json:"special,omitempty"`
NoWarmup bool `json:"no_warmup,omitempty"`
SPMInfill bool `json:"spm_infill,omitempty"`
Pooling string `json:"pooling,omitempty"`
ContBatching bool `json:"cont_batching,omitempty"`
NoContBatching bool `json:"no_cont_batching,omitempty"`
MMProj string `json:"mmproj,omitempty"`
MMProjURL string `json:"mmproj_url,omitempty"`
NoMMProj bool `json:"no_mmproj,omitempty"`
NoMMProjOffload bool `json:"no_mmproj_offload,omitempty"`
Alias string `json:"alias,omitempty"`
Host string `json:"host,omitempty"`
Port int `json:"port,omitempty"`
Path string `json:"path,omitempty"`
NoWebUI bool `json:"no_webui,omitempty"`
Embedding bool `json:"embedding,omitempty"`
Reranking bool `json:"reranking,omitempty"`
APIKey string `json:"api_key,omitempty"`
APIKeyFile string `json:"api_key_file,omitempty"`
SSLKeyFile string `json:"ssl_key_file,omitempty"`
SSLCertFile string `json:"ssl_cert_file,omitempty"`
ChatTemplateKwargs string `json:"chat_template_kwargs,omitempty"`
Timeout int `json:"timeout,omitempty"`
ThreadsHTTP int `json:"threads_http,omitempty"`
CacheReuse int `json:"cache_reuse,omitempty"`
Metrics bool `json:"metrics,omitempty"`
Slots bool `json:"slots,omitempty"`
Props bool `json:"props,omitempty"`
NoSlots bool `json:"no_slots,omitempty"`
SlotSavePath string `json:"slot_save_path,omitempty"`
Jinja bool `json:"jinja,omitempty"`
ReasoningFormat string `json:"reasoning_format,omitempty"`
ReasoningBudget int `json:"reasoning_budget,omitempty"`
ChatTemplate string `json:"chat_template,omitempty"`
ChatTemplateFile string `json:"chat_template_file,omitempty"`
NoPrefillAssistant bool `json:"no_prefill_assistant,omitempty"`
SlotPromptSimilarity float64 `json:"slot_prompt_similarity,omitempty"`
LoraInitWithoutApply bool `json:"lora_init_without_apply,omitempty"`
// Speculative decoding params
DraftMax int `json:"draft_max,omitempty"`
DraftMin int `json:"draft_min,omitempty"`
DraftPMin float64 `json:"draft_p_min,omitempty"`
CtxSizeDraft int `json:"ctx_size_draft,omitempty"`
DeviceDraft string `json:"device_draft,omitempty"`
GPULayersDraft int `json:"gpu_layers_draft,omitempty"`
ModelDraft string `json:"model_draft,omitempty"`
CacheTypeKDraft string `json:"cache_type_k_draft,omitempty"`
CacheTypeVDraft string `json:"cache_type_v_draft,omitempty"`
// Audio/TTS params
ModelVocoder string `json:"model_vocoder,omitempty"`
TTSUseGuideTokens bool `json:"tts_use_guide_tokens,omitempty"`
// Default model params
EmbdBGESmallEnDefault bool `json:"embd_bge_small_en_default,omitempty"`
EmbdE5SmallEnDefault bool `json:"embd_e5_small_en_default,omitempty"`
EmbdGTESmallDefault bool `json:"embd_gte_small_default,omitempty"`
FIMQwen1_5BDefault bool `json:"fim_qwen_1_5b_default,omitempty"`
FIMQwen3BDefault bool `json:"fim_qwen_3b_default,omitempty"`
FIMQwen7BDefault bool `json:"fim_qwen_7b_default,omitempty"`
FIMQwen7BSpec bool `json:"fim_qwen_7b_spec,omitempty"`
FIMQwen14BSpec bool `json:"fim_qwen_14b_spec,omitempty"`
}
// UnmarshalJSON implements custom JSON unmarshaling to support multiple field names
func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
// First unmarshal into a map to handle multiple field names
var raw map[string]interface{}
if err := json.Unmarshal(data, &raw); err != nil {
return err
}
// Create a temporary struct for standard unmarshaling
type tempOptions LlamaServerOptions
temp := tempOptions{}
// Standard unmarshal first
if err := json.Unmarshal(data, &temp); err != nil {
return err
}
// Copy to our struct
*o = LlamaServerOptions(temp)
// Handle alternative field names
fieldMappings := map[string]string{
// Threads alternatives
"t": "threads",
"tb": "threads_batch",
"threads-batch": "threads_batch",
// Context size alternatives
"c": "ctx_size",
"ctx-size": "ctx_size",
// Predict alternatives
"n": "predict",
"n-predict": "predict",
"n_predict": "predict",
// Batch size alternatives
"b": "batch_size",
"batch-size": "batch_size",
// GPU layers alternatives
"ngl": "gpu_layers",
"gpu-layers": "gpu_layers",
"n-gpu-layers": "gpu_layers",
"n_gpu_layers": "gpu_layers",
// Model alternatives
"m": "model",
// Seed alternatives
"s": "seed",
// Flash attention alternatives
"fa": "flash_attn",
"flash-attn": "flash_attn",
// Verbose alternatives
"v": "verbose",
"log-verbose": "verbose",
// Verbosity alternatives
"lv": "verbosity",
"log-verbosity": "verbosity",
// Temperature alternatives
"temp": "temperature",
// Top-k alternatives
"top-k": "top_k",
// Top-p alternatives
"top-p": "top_p",
// Min-p alternatives
"min-p": "min_p",
// Additional mappings can be added here
}
// Process alternative field names
for altName, canonicalName := range fieldMappings {
if value, exists := raw[altName]; exists {
// Use reflection to set the field value
v := reflect.ValueOf(o).Elem()
field := v.FieldByNameFunc(func(fieldName string) bool {
field, _ := v.Type().FieldByName(fieldName)
jsonTag := field.Tag.Get("json")
return jsonTag == canonicalName+",omitempty" || jsonTag == canonicalName
})
if field.IsValid() && field.CanSet() {
switch field.Kind() {
case reflect.Int:
if intVal, ok := value.(float64); ok {
field.SetInt(int64(intVal))
} else if strVal, ok := value.(string); ok {
if intVal, err := strconv.Atoi(strVal); err == nil {
field.SetInt(int64(intVal))
}
}
case reflect.Float64:
if floatVal, ok := value.(float64); ok {
field.SetFloat(floatVal)
} else if strVal, ok := value.(string); ok {
if floatVal, err := strconv.ParseFloat(strVal, 64); err == nil {
field.SetFloat(floatVal)
}
}
case reflect.String:
if strVal, ok := value.(string); ok {
field.SetString(strVal)
}
case reflect.Bool:
if boolVal, ok := value.(bool); ok {
field.SetBool(boolVal)
}
}
}
}
}
return nil
}
// BuildCommandArgs converts InstanceOptions to command line arguments
func (o *LlamaServerOptions) BuildCommandArgs() []string {
var args []string
v := reflect.ValueOf(o).Elem()
t := v.Type()
for i := 0; i < v.NumField(); i++ {
field := v.Field(i)
fieldType := t.Field(i)
// Skip unexported fields
if !field.CanInterface() {
continue
}
// Get the JSON tag to determine the flag name
jsonTag := fieldType.Tag.Get("json")
if jsonTag == "" || jsonTag == "-" {
continue
}
// Remove ",omitempty" from the tag
flagName := jsonTag
if commaIndex := strings.Index(jsonTag, ","); commaIndex != -1 {
flagName = jsonTag[:commaIndex]
}
// Convert snake_case to kebab-case for CLI flags
flagName = strings.ReplaceAll(flagName, "_", "-")
// Add the appropriate arguments based on field type and value
switch field.Kind() {
case reflect.Bool:
if field.Bool() {
args = append(args, "--"+flagName)
}
case reflect.Int:
if field.Int() != 0 {
args = append(args, "--"+flagName, strconv.FormatInt(field.Int(), 10))
}
case reflect.Float64:
if field.Float() != 0 {
args = append(args, "--"+flagName, strconv.FormatFloat(field.Float(), 'f', -1, 64))
}
case reflect.String:
if field.String() != "" {
args = append(args, "--"+flagName, field.String())
}
case reflect.Slice:
if field.Type().Elem().Kind() == reflect.String {
// Handle []string fields
for j := 0; j < field.Len(); j++ {
args = append(args, "--"+flagName, field.Index(j).String())
}
}
}
}
return args
}