mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-06 09:04:27 +00:00
Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5aed01b68f | |||
| 3f9caff33b | |||
| 169254c61a | |||
| 8154b8d0ab | |||
| a26d853ad5 | |||
| 6203b64045 |
@@ -15,12 +15,12 @@ type LlamaServerOptions struct {
|
||||
CPUMask string `json:"cpu_mask,omitempty"`
|
||||
CPURange string `json:"cpu_range,omitempty"`
|
||||
CPUStrict int `json:"cpu_strict,omitempty"`
|
||||
Priority int `json:"priority,omitempty"`
|
||||
Prio int `json:"prio,omitempty"`
|
||||
Poll int `json:"poll,omitempty"`
|
||||
CPUMaskBatch string `json:"cpu_mask_batch,omitempty"`
|
||||
CPURangeBatch string `json:"cpu_range_batch,omitempty"`
|
||||
CPUStrictBatch int `json:"cpu_strict_batch,omitempty"`
|
||||
PriorityBatch int `json:"priority_batch,omitempty"`
|
||||
PrioBatch int `json:"prio_batch,omitempty"`
|
||||
PollBatch int `json:"poll_batch,omitempty"`
|
||||
CtxSize int `json:"ctx_size,omitempty"`
|
||||
Predict int `json:"predict,omitempty"`
|
||||
@@ -83,7 +83,7 @@ type LlamaServerOptions struct {
|
||||
Seed int `json:"seed,omitempty"`
|
||||
SamplingSeq string `json:"sampling_seq,omitempty"`
|
||||
IgnoreEOS bool `json:"ignore_eos,omitempty"`
|
||||
Temperature float64 `json:"temperature,omitempty"`
|
||||
Temperature float64 `json:"temp,omitempty"`
|
||||
TopK int `json:"top_k,omitempty"`
|
||||
TopP float64 `json:"top_p,omitempty"`
|
||||
MinP float64 `json:"min_p,omitempty"`
|
||||
@@ -110,7 +110,7 @@ type LlamaServerOptions struct {
|
||||
JSONSchema string `json:"json_schema,omitempty"`
|
||||
JSONSchemaFile string `json:"json_schema_file,omitempty"`
|
||||
|
||||
// Server/Example-specific params
|
||||
// Example-specific params
|
||||
NoContextShift bool `json:"no_context_shift,omitempty"`
|
||||
Special bool `json:"special,omitempty"`
|
||||
NoWarmup bool `json:"no_warmup,omitempty"`
|
||||
@@ -150,17 +150,15 @@ type LlamaServerOptions struct {
|
||||
NoPrefillAssistant bool `json:"no_prefill_assistant,omitempty"`
|
||||
SlotPromptSimilarity float64 `json:"slot_prompt_similarity,omitempty"`
|
||||
LoraInitWithoutApply bool `json:"lora_init_without_apply,omitempty"`
|
||||
|
||||
// Speculative decoding params
|
||||
DraftMax int `json:"draft_max,omitempty"`
|
||||
DraftMin int `json:"draft_min,omitempty"`
|
||||
DraftPMin float64 `json:"draft_p_min,omitempty"`
|
||||
CtxSizeDraft int `json:"ctx_size_draft,omitempty"`
|
||||
DeviceDraft string `json:"device_draft,omitempty"`
|
||||
GPULayersDraft int `json:"gpu_layers_draft,omitempty"`
|
||||
ModelDraft string `json:"model_draft,omitempty"`
|
||||
CacheTypeKDraft string `json:"cache_type_k_draft,omitempty"`
|
||||
CacheTypeVDraft string `json:"cache_type_v_draft,omitempty"`
|
||||
DraftMax int `json:"draft_max,omitempty"`
|
||||
DraftMin int `json:"draft_min,omitempty"`
|
||||
DraftPMin float64 `json:"draft_p_min,omitempty"`
|
||||
CtxSizeDraft int `json:"ctx_size_draft,omitempty"`
|
||||
DeviceDraft string `json:"device_draft,omitempty"`
|
||||
GPULayersDraft int `json:"gpu_layers_draft,omitempty"`
|
||||
ModelDraft string `json:"model_draft,omitempty"`
|
||||
CacheTypeKDraft string `json:"cache_type_k_draft,omitempty"`
|
||||
CacheTypeVDraft string `json:"cache_type_v_draft,omitempty"`
|
||||
|
||||
// Audio/TTS params
|
||||
ModelVocoder string `json:"model_vocoder,omitempty"`
|
||||
@@ -199,62 +197,75 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
|
||||
|
||||
// Handle alternative field names
|
||||
fieldMappings := map[string]string{
|
||||
// Official llama-server short forms from the documentation
|
||||
"t": "threads", // -t, --threads N
|
||||
"tb": "threads_batch", // -tb, --threads-batch N
|
||||
"C": "cpu_mask", // -C, --cpu-mask M
|
||||
"Cr": "cpu_range", // -Cr, --cpu-range lo-hi
|
||||
"Cb": "cpu_mask_batch", // -Cb, --cpu-mask-batch M
|
||||
"Crb": "cpu_range_batch", // -Crb, --cpu-range-batch lo-hi
|
||||
"c": "ctx_size", // -c, --ctx-size N
|
||||
"n": "predict", // -n, --predict, --n-predict N
|
||||
"b": "batch_size", // -b, --batch-size N
|
||||
"ub": "ubatch_size", // -ub, --ubatch-size N
|
||||
"fa": "flash_attn", // -fa, --flash-attn
|
||||
"e": "escape", // -e, --escape
|
||||
"dkvc": "dump_kv_cache", // -dkvc, --dump-kv-cache
|
||||
"nkvo": "no_kv_offload", // -nkvo, --no-kv-offload
|
||||
"ctk": "cache_type_k", // -ctk, --cache-type-k TYPE
|
||||
"ctv": "cache_type_v", // -ctv, --cache-type-v TYPE
|
||||
"dt": "defrag_thold", // -dt, --defrag-thold N
|
||||
"np": "parallel", // -np, --parallel N
|
||||
"dev": "device", // -dev, --device <dev1,dev2,..>
|
||||
"ot": "override_tensor", // --override-tensor, -ot
|
||||
"ngl": "gpu_layers", // -ngl, --gpu-layers, --n-gpu-layers N
|
||||
"sm": "split_mode", // -sm, --split-mode
|
||||
"ts": "tensor_split", // -ts, --tensor-split N0,N1,N2,...
|
||||
"mg": "main_gpu", // -mg, --main-gpu INDEX
|
||||
"m": "model", // -m, --model FNAME
|
||||
"mu": "model_url", // -mu, --model-url MODEL_URL
|
||||
"hf": "hf_repo", // -hf, -hfr, --hf-repo
|
||||
"hfr": "hf_repo", // -hf, -hfr, --hf-repo
|
||||
"hfd": "hf_repo_draft", // -hfd, -hfrd, --hf-repo-draft
|
||||
"hfrd": "hf_repo_draft", // -hfd, -hfrd, --hf-repo-draft
|
||||
"hff": "hf_file", // -hff, --hf-file FILE
|
||||
"hfv": "hf_repo_v", // -hfv, -hfrv, --hf-repo-v
|
||||
"hfrv": "hf_repo_v", // -hfv, -hfrv, --hf-repo-v
|
||||
"hffv": "hf_file_v", // -hffv, --hf-file-v FILE
|
||||
"hft": "hf_token", // -hft, --hf-token TOKEN
|
||||
"v": "verbose", // -v, --verbose, --log-verbose
|
||||
"lv": "verbosity", // -lv, --verbosity, --log-verbosity N
|
||||
"s": "seed", // -s, --seed SEED
|
||||
"temp": "temperature", // --temp N
|
||||
"l": "logit_bias", // -l, --logit-bias
|
||||
"j": "json_schema", // -j, --json-schema SCHEMA
|
||||
"jf": "json_schema_file", // -jf, --json-schema-file FILE
|
||||
"sp": "special", // -sp, --special
|
||||
"cb": "cont_batching", // -cb, --cont-batching
|
||||
"nocb": "no_cont_batching", // -nocb, --no-cont-batching
|
||||
"a": "alias", // -a, --alias STRING
|
||||
"to": "timeout", // -to, --timeout N
|
||||
"sps": "slot_prompt_similarity", // -sps, --slot-prompt-similarity
|
||||
"cd": "ctx_size_draft", // -cd, --ctx-size-draft N
|
||||
"devd": "device_draft", // -devd, --device-draft
|
||||
"ngld": "gpu_layers_draft", // -ngld, --gpu-layers-draft
|
||||
"md": "model_draft", // -md, --model-draft FNAME
|
||||
"ctkd": "cache_type_k_draft", // -ctkd, --cache-type-k-draft TYPE
|
||||
"ctvd": "cache_type_v_draft", // -ctvd, --cache-type-v-draft TYPE
|
||||
"mv": "model_vocoder", // -mv, --model-vocoder FNAME
|
||||
// Common params
|
||||
"t": "threads", // -t, --threads N
|
||||
"tb": "threads_batch", // -tb, --threads-batch N
|
||||
"C": "cpu_mask", // -C, --cpu-mask M
|
||||
"Cr": "cpu_range", // -Cr, --cpu-range lo-hi
|
||||
"Cb": "cpu_mask_batch", // -Cb, --cpu-mask-batch M
|
||||
"Crb": "cpu_range_batch", // -Crb, --cpu-range-batch lo-hi
|
||||
"c": "ctx_size", // -c, --ctx-size N
|
||||
"n": "predict", // -n, --predict N
|
||||
"n-predict": "predict", // --n-predict N
|
||||
"b": "batch_size", // -b, --batch-size N
|
||||
"ub": "ubatch_size", // -ub, --ubatch-size N
|
||||
"fa": "flash_attn", // -fa, --flash-attn
|
||||
"e": "escape", // -e, --escape
|
||||
"dkvc": "dump_kv_cache", // -dkvc, --dump-kv-cache
|
||||
"nkvo": "no_kv_offload", // -nkvo, --no-kv-offload
|
||||
"ctk": "cache_type_k", // -ctk, --cache-type-k TYPE
|
||||
"ctv": "cache_type_v", // -ctv, --cache-type-v TYPE
|
||||
"dt": "defrag_thold", // -dt, --defrag-thold N
|
||||
"np": "parallel", // -np, --parallel N
|
||||
"dev": "device", // -dev, --device <dev1,dev2,..>
|
||||
"ot": "override_tensor", // --override-tensor, -ot
|
||||
"ngl": "gpu_layers", // -ngl, --gpu-layers, --n-gpu-layers N
|
||||
"n-gpu-layers": "gpu_layers", // --n-gpu-layers N
|
||||
"sm": "split_mode", // -sm, --split-mode
|
||||
"ts": "tensor_split", // -ts, --tensor-split N0,N1,N2,...
|
||||
"mg": "main_gpu", // -mg, --main-gpu INDEX
|
||||
"m": "model", // -m, --model FNAME
|
||||
"mu": "model_url", // -mu, --model-url MODEL_URL
|
||||
"hf": "hf_repo", // -hf, -hfr, --hf-repo
|
||||
"hfr": "hf_repo", // -hf, -hfr, --hf-repo
|
||||
"hfd": "hf_repo_draft", // -hfd, -hfrd, --hf-repo-draft
|
||||
"hfrd": "hf_repo_draft", // -hfd, -hfrd, --hf-repo-draft
|
||||
"hff": "hf_file", // -hff, --hf-file FILE
|
||||
"hfv": "hf_repo_v", // -hfv, -hfrv, --hf-repo-v
|
||||
"hfrv": "hf_repo_v", // -hfv, -hfrv, --hf-repo-v
|
||||
"hffv": "hf_file_v", // -hffv, --hf-file-v FILE
|
||||
"hft": "hf_token", // -hft, --hf-token TOKEN
|
||||
"v": "verbose", // -v, --verbose, --log-verbose
|
||||
"log-verbose": "verbose", // --log-verbose
|
||||
"lv": "verbosity", // -lv, --verbosity, --log-verbosity N
|
||||
"log-verbosity": "verbosity", // --log-verbosity N
|
||||
|
||||
// Sampling params
|
||||
"s": "seed", // -s, --seed SEED
|
||||
"l": "logit_bias", // -l, --logit-bias
|
||||
"j": "json_schema", // -j, --json-schema SCHEMA
|
||||
"jf": "json_schema_file", // -jf, --json-schema-file FILE
|
||||
|
||||
// Example-specific params
|
||||
"sp": "special", // -sp, --special
|
||||
"cb": "cont_batching", // -cb, --cont-batching
|
||||
"nocb": "no_cont_batching", // -nocb, --no-cont-batching
|
||||
"a": "alias", // -a, --alias STRING
|
||||
"embeddings": "embedding", // --embeddings
|
||||
"rerank": "reranking", // --reranking
|
||||
"to": "timeout", // -to, --timeout N
|
||||
"sps": "slot_prompt_similarity", // -sps, --slot-prompt-similarity
|
||||
"draft": "draft-max", // -draft, --draft-max N
|
||||
"draft-n": "draft-max", // --draft-n-max N
|
||||
"draft-n-min": "draft_min", // --draft-n-min N
|
||||
"cd": "ctx_size_draft", // -cd, --ctx-size-draft N
|
||||
"devd": "device_draft", // -devd, --device-draft
|
||||
"ngld": "gpu_layers_draft", // -ngld, --gpu-layers-draft
|
||||
"n-gpu-layers-draft": "gpu_layers_draft", // --n-gpu-layers-draft N
|
||||
"md": "model_draft", // -md, --model-draft FNAME
|
||||
"ctkd": "cache_type_k_draft", // -ctkd, --cache-type-k-draft TYPE
|
||||
"ctvd": "cache_type_v_draft", // -ctvd, --cache-type-v-draft TYPE
|
||||
"mv": "model_vocoder", // -mv, --model-vocoder FNAME
|
||||
}
|
||||
|
||||
// Process alternative field names
|
||||
|
||||
@@ -109,13 +109,13 @@ func TestBuildCommandArgs_NumericFields(t *testing.T) {
|
||||
args := options.BuildCommandArgs()
|
||||
|
||||
expectedPairs := map[string]string{
|
||||
"--port": "8080",
|
||||
"--threads": "4",
|
||||
"--ctx-size": "2048",
|
||||
"--gpu-layers": "16",
|
||||
"--temperature": "0.7",
|
||||
"--top-k": "40",
|
||||
"--top-p": "0.9",
|
||||
"--port": "8080",
|
||||
"--threads": "4",
|
||||
"--ctx-size": "2048",
|
||||
"--gpu-layers": "16",
|
||||
"--temp": "0.7",
|
||||
"--top-k": "40",
|
||||
"--top-p": "0.9",
|
||||
}
|
||||
|
||||
for flag, expectedValue := range expectedPairs {
|
||||
@@ -231,7 +231,7 @@ func TestUnmarshalJSON_StandardFields(t *testing.T) {
|
||||
"verbose": true,
|
||||
"ctx_size": 4096,
|
||||
"gpu_layers": 32,
|
||||
"temperature": 0.7
|
||||
"temp": 0.7
|
||||
}`
|
||||
|
||||
var options llamacpp.LlamaServerOptions
|
||||
|
||||
@@ -11,6 +11,7 @@ import {
|
||||
DialogTitle,
|
||||
} from '@/components/ui/dialog'
|
||||
import { Badge } from '@/components/ui/badge'
|
||||
import { instancesApi } from '@/lib/api'
|
||||
import {
|
||||
RefreshCw,
|
||||
Download,
|
||||
@@ -46,48 +47,44 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
|
||||
const refreshIntervalRef = useRef<NodeJS.Timeout | null>(null)
|
||||
|
||||
// Fetch logs function
|
||||
const fetchLogs = async (lines?: number) => {
|
||||
if (!instanceName) return
|
||||
const fetchLogs = React.useCallback(
|
||||
async (lines?: number) => {
|
||||
if (!instanceName) return
|
||||
|
||||
setLoading(true)
|
||||
setError(null)
|
||||
setLoading(true)
|
||||
setError(null)
|
||||
|
||||
try {
|
||||
const params = lines ? `?lines=${lines}` : ''
|
||||
const response = await fetch(`/api/v1/instances/${instanceName}/logs${params}`)
|
||||
try {
|
||||
const logText = await instancesApi.getLogs(instanceName, lines)
|
||||
setLogs(logText)
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to fetch logs: ${response.status}`)
|
||||
// Auto-scroll to bottom
|
||||
setTimeout(() => {
|
||||
if (logContainerRef.current) {
|
||||
logContainerRef.current.scrollTop = logContainerRef.current.scrollHeight
|
||||
}
|
||||
}, 100)
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : 'Failed to fetch logs')
|
||||
} finally {
|
||||
setLoading(false)
|
||||
}
|
||||
|
||||
const logText = await response.text()
|
||||
setLogs(logText)
|
||||
|
||||
// Auto-scroll to bottom
|
||||
setTimeout(() => {
|
||||
if (logContainerRef.current) {
|
||||
logContainerRef.current.scrollTop = logContainerRef.current.scrollHeight
|
||||
}
|
||||
}, 100)
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : 'Failed to fetch logs')
|
||||
} finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}
|
||||
},
|
||||
[instanceName]
|
||||
)
|
||||
|
||||
// Initial load when dialog opens
|
||||
useEffect(() => {
|
||||
if (open && instanceName) {
|
||||
fetchLogs(lineCount)
|
||||
void fetchLogs(lineCount)
|
||||
}
|
||||
}, [open, instanceName])
|
||||
}, [open, instanceName, fetchLogs, lineCount])
|
||||
|
||||
// Auto-refresh effect
|
||||
useEffect(() => {
|
||||
if (autoRefresh && isRunning && open) {
|
||||
refreshIntervalRef.current = setInterval(() => {
|
||||
fetchLogs(lineCount)
|
||||
void fetchLogs(lineCount)
|
||||
}, 2000) // Refresh every 2 seconds
|
||||
} else {
|
||||
if (refreshIntervalRef.current) {
|
||||
@@ -101,7 +98,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
|
||||
clearInterval(refreshIntervalRef.current)
|
||||
}
|
||||
}
|
||||
}, [autoRefresh, isRunning, open, lineCount])
|
||||
}, [autoRefresh, isRunning, open, lineCount, fetchLogs])
|
||||
|
||||
// Copy logs to clipboard
|
||||
const copyLogs = async () => {
|
||||
@@ -135,7 +132,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
|
||||
|
||||
// Apply new line count
|
||||
const applyLineCount = () => {
|
||||
fetchLogs(lineCount)
|
||||
void fetchLogs(lineCount)
|
||||
setShowSettings(false)
|
||||
}
|
||||
|
||||
@@ -198,7 +195,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={() => fetchLogs(lineCount)}
|
||||
onClick={() => void fetchLogs(lineCount)}
|
||||
disabled={loading}
|
||||
>
|
||||
{loading ? (
|
||||
@@ -290,7 +287,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
|
||||
<div className="flex items-center gap-2 w-full">
|
||||
<Button
|
||||
variant="outline"
|
||||
onClick={copyLogs}
|
||||
onClick={() => void copyLogs()}
|
||||
disabled={!logs}
|
||||
>
|
||||
{copied ? (
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import type { CreateInstanceOptions} from '@/schemas/instanceOptions';
|
||||
import { getAllFieldKeys } from '@/schemas/instanceOptions'
|
||||
import { type CreateInstanceOptions, getAllFieldKeys } from '@/schemas/instanceOptions'
|
||||
|
||||
// Only define the basic fields we want to show by default
|
||||
export const basicFieldsConfig: Record<string, {
|
||||
|
||||
@@ -14,12 +14,12 @@ export const CreateInstanceOptionsSchema = z.object({
|
||||
cpu_mask: z.string().optional(),
|
||||
cpu_range: z.string().optional(),
|
||||
cpu_strict: z.number().optional(),
|
||||
priority: z.number().optional(),
|
||||
prio: z.number().optional(),
|
||||
poll: z.number().optional(),
|
||||
cpu_mask_batch: z.string().optional(),
|
||||
cpu_range_batch: z.string().optional(),
|
||||
cpu_strict_batch: z.number().optional(),
|
||||
priority_batch: z.number().optional(),
|
||||
prio_batch: z.number().optional(),
|
||||
poll_batch: z.number().optional(),
|
||||
ctx_size: z.number().optional(),
|
||||
predict: z.number().optional(),
|
||||
@@ -82,7 +82,7 @@ export const CreateInstanceOptionsSchema = z.object({
|
||||
seed: z.number().optional(),
|
||||
sampling_seq: z.string().optional(),
|
||||
ignore_eos: z.boolean().optional(),
|
||||
temperature: z.number().optional(),
|
||||
temp: z.number().optional(),
|
||||
top_k: z.number().optional(),
|
||||
top_p: z.number().optional(),
|
||||
min_p: z.number().optional(),
|
||||
@@ -109,7 +109,7 @@ export const CreateInstanceOptionsSchema = z.object({
|
||||
json_schema: z.string().optional(),
|
||||
json_schema_file: z.string().optional(),
|
||||
|
||||
// Server/Example-specific params
|
||||
// Example-specific params
|
||||
no_context_shift: z.boolean().optional(),
|
||||
special: z.boolean().optional(),
|
||||
no_warmup: z.boolean().optional(),
|
||||
@@ -149,8 +149,6 @@ export const CreateInstanceOptionsSchema = z.object({
|
||||
no_prefill_assistant: z.boolean().optional(),
|
||||
slot_prompt_similarity: z.number().optional(),
|
||||
lora_init_without_apply: z.boolean().optional(),
|
||||
|
||||
// Speculative decoding params
|
||||
draft_max: z.number().optional(),
|
||||
draft_min: z.number().optional(),
|
||||
draft_p_min: z.number().optional(),
|
||||
|
||||
Reference in New Issue
Block a user