6 Commits

Author SHA1 Message Date
5aed01b68f Merge pull request #17 from lordmathis/fix/forbidden-logs
fix: Refactor log fetching to use instancesApi
2025-08-06 19:12:34 +02:00
3f9caff33b Refactor log fetching to use instancesApi 2025-08-06 19:07:25 +02:00
169254c61a Merge pull request #16 from lordmathis/fix/llama-server-options
fix: Missing or wrong llama server options
2025-08-06 18:51:18 +02:00
8154b8d0ab Fix temp in tests 2025-08-06 18:49:36 +02:00
a26d853ad5 Fix missing or wrong llama server options on frontend 2025-08-06 18:40:05 +02:00
6203b64045 Fix missing or wrong llama server options 2025-08-06 18:31:17 +02:00
5 changed files with 126 additions and 121 deletions

View File

@@ -15,12 +15,12 @@ type LlamaServerOptions struct {
CPUMask string `json:"cpu_mask,omitempty"` CPUMask string `json:"cpu_mask,omitempty"`
CPURange string `json:"cpu_range,omitempty"` CPURange string `json:"cpu_range,omitempty"`
CPUStrict int `json:"cpu_strict,omitempty"` CPUStrict int `json:"cpu_strict,omitempty"`
Priority int `json:"priority,omitempty"` Prio int `json:"prio,omitempty"`
Poll int `json:"poll,omitempty"` Poll int `json:"poll,omitempty"`
CPUMaskBatch string `json:"cpu_mask_batch,omitempty"` CPUMaskBatch string `json:"cpu_mask_batch,omitempty"`
CPURangeBatch string `json:"cpu_range_batch,omitempty"` CPURangeBatch string `json:"cpu_range_batch,omitempty"`
CPUStrictBatch int `json:"cpu_strict_batch,omitempty"` CPUStrictBatch int `json:"cpu_strict_batch,omitempty"`
PriorityBatch int `json:"priority_batch,omitempty"` PrioBatch int `json:"prio_batch,omitempty"`
PollBatch int `json:"poll_batch,omitempty"` PollBatch int `json:"poll_batch,omitempty"`
CtxSize int `json:"ctx_size,omitempty"` CtxSize int `json:"ctx_size,omitempty"`
Predict int `json:"predict,omitempty"` Predict int `json:"predict,omitempty"`
@@ -83,7 +83,7 @@ type LlamaServerOptions struct {
Seed int `json:"seed,omitempty"` Seed int `json:"seed,omitempty"`
SamplingSeq string `json:"sampling_seq,omitempty"` SamplingSeq string `json:"sampling_seq,omitempty"`
IgnoreEOS bool `json:"ignore_eos,omitempty"` IgnoreEOS bool `json:"ignore_eos,omitempty"`
Temperature float64 `json:"temperature,omitempty"` Temperature float64 `json:"temp,omitempty"`
TopK int `json:"top_k,omitempty"` TopK int `json:"top_k,omitempty"`
TopP float64 `json:"top_p,omitempty"` TopP float64 `json:"top_p,omitempty"`
MinP float64 `json:"min_p,omitempty"` MinP float64 `json:"min_p,omitempty"`
@@ -110,7 +110,7 @@ type LlamaServerOptions struct {
JSONSchema string `json:"json_schema,omitempty"` JSONSchema string `json:"json_schema,omitempty"`
JSONSchemaFile string `json:"json_schema_file,omitempty"` JSONSchemaFile string `json:"json_schema_file,omitempty"`
// Server/Example-specific params // Example-specific params
NoContextShift bool `json:"no_context_shift,omitempty"` NoContextShift bool `json:"no_context_shift,omitempty"`
Special bool `json:"special,omitempty"` Special bool `json:"special,omitempty"`
NoWarmup bool `json:"no_warmup,omitempty"` NoWarmup bool `json:"no_warmup,omitempty"`
@@ -150,8 +150,6 @@ type LlamaServerOptions struct {
NoPrefillAssistant bool `json:"no_prefill_assistant,omitempty"` NoPrefillAssistant bool `json:"no_prefill_assistant,omitempty"`
SlotPromptSimilarity float64 `json:"slot_prompt_similarity,omitempty"` SlotPromptSimilarity float64 `json:"slot_prompt_similarity,omitempty"`
LoraInitWithoutApply bool `json:"lora_init_without_apply,omitempty"` LoraInitWithoutApply bool `json:"lora_init_without_apply,omitempty"`
// Speculative decoding params
DraftMax int `json:"draft_max,omitempty"` DraftMax int `json:"draft_max,omitempty"`
DraftMin int `json:"draft_min,omitempty"` DraftMin int `json:"draft_min,omitempty"`
DraftPMin float64 `json:"draft_p_min,omitempty"` DraftPMin float64 `json:"draft_p_min,omitempty"`
@@ -199,7 +197,7 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
// Handle alternative field names // Handle alternative field names
fieldMappings := map[string]string{ fieldMappings := map[string]string{
// Official llama-server short forms from the documentation // Common params
"t": "threads", // -t, --threads N "t": "threads", // -t, --threads N
"tb": "threads_batch", // -tb, --threads-batch N "tb": "threads_batch", // -tb, --threads-batch N
"C": "cpu_mask", // -C, --cpu-mask M "C": "cpu_mask", // -C, --cpu-mask M
@@ -207,7 +205,8 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
"Cb": "cpu_mask_batch", // -Cb, --cpu-mask-batch M "Cb": "cpu_mask_batch", // -Cb, --cpu-mask-batch M
"Crb": "cpu_range_batch", // -Crb, --cpu-range-batch lo-hi "Crb": "cpu_range_batch", // -Crb, --cpu-range-batch lo-hi
"c": "ctx_size", // -c, --ctx-size N "c": "ctx_size", // -c, --ctx-size N
"n": "predict", // -n, --predict, --n-predict N "n": "predict", // -n, --predict N
"n-predict": "predict", // --n-predict N
"b": "batch_size", // -b, --batch-size N "b": "batch_size", // -b, --batch-size N
"ub": "ubatch_size", // -ub, --ubatch-size N "ub": "ubatch_size", // -ub, --ubatch-size N
"fa": "flash_attn", // -fa, --flash-attn "fa": "flash_attn", // -fa, --flash-attn
@@ -221,6 +220,7 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
"dev": "device", // -dev, --device <dev1,dev2,..> "dev": "device", // -dev, --device <dev1,dev2,..>
"ot": "override_tensor", // --override-tensor, -ot "ot": "override_tensor", // --override-tensor, -ot
"ngl": "gpu_layers", // -ngl, --gpu-layers, --n-gpu-layers N "ngl": "gpu_layers", // -ngl, --gpu-layers, --n-gpu-layers N
"n-gpu-layers": "gpu_layers", // --n-gpu-layers N
"sm": "split_mode", // -sm, --split-mode "sm": "split_mode", // -sm, --split-mode
"ts": "tensor_split", // -ts, --tensor-split N0,N1,N2,... "ts": "tensor_split", // -ts, --tensor-split N0,N1,N2,...
"mg": "main_gpu", // -mg, --main-gpu INDEX "mg": "main_gpu", // -mg, --main-gpu INDEX
@@ -236,21 +236,32 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
"hffv": "hf_file_v", // -hffv, --hf-file-v FILE "hffv": "hf_file_v", // -hffv, --hf-file-v FILE
"hft": "hf_token", // -hft, --hf-token TOKEN "hft": "hf_token", // -hft, --hf-token TOKEN
"v": "verbose", // -v, --verbose, --log-verbose "v": "verbose", // -v, --verbose, --log-verbose
"log-verbose": "verbose", // --log-verbose
"lv": "verbosity", // -lv, --verbosity, --log-verbosity N "lv": "verbosity", // -lv, --verbosity, --log-verbosity N
"log-verbosity": "verbosity", // --log-verbosity N
// Sampling params
"s": "seed", // -s, --seed SEED "s": "seed", // -s, --seed SEED
"temp": "temperature", // --temp N
"l": "logit_bias", // -l, --logit-bias "l": "logit_bias", // -l, --logit-bias
"j": "json_schema", // -j, --json-schema SCHEMA "j": "json_schema", // -j, --json-schema SCHEMA
"jf": "json_schema_file", // -jf, --json-schema-file FILE "jf": "json_schema_file", // -jf, --json-schema-file FILE
// Example-specific params
"sp": "special", // -sp, --special "sp": "special", // -sp, --special
"cb": "cont_batching", // -cb, --cont-batching "cb": "cont_batching", // -cb, --cont-batching
"nocb": "no_cont_batching", // -nocb, --no-cont-batching "nocb": "no_cont_batching", // -nocb, --no-cont-batching
"a": "alias", // -a, --alias STRING "a": "alias", // -a, --alias STRING
"embeddings": "embedding", // --embeddings
"rerank": "reranking", // --reranking
"to": "timeout", // -to, --timeout N "to": "timeout", // -to, --timeout N
"sps": "slot_prompt_similarity", // -sps, --slot-prompt-similarity "sps": "slot_prompt_similarity", // -sps, --slot-prompt-similarity
"draft": "draft-max", // -draft, --draft-max N
"draft-n": "draft-max", // --draft-n-max N
"draft-n-min": "draft_min", // --draft-n-min N
"cd": "ctx_size_draft", // -cd, --ctx-size-draft N "cd": "ctx_size_draft", // -cd, --ctx-size-draft N
"devd": "device_draft", // -devd, --device-draft "devd": "device_draft", // -devd, --device-draft
"ngld": "gpu_layers_draft", // -ngld, --gpu-layers-draft "ngld": "gpu_layers_draft", // -ngld, --gpu-layers-draft
"n-gpu-layers-draft": "gpu_layers_draft", // --n-gpu-layers-draft N
"md": "model_draft", // -md, --model-draft FNAME "md": "model_draft", // -md, --model-draft FNAME
"ctkd": "cache_type_k_draft", // -ctkd, --cache-type-k-draft TYPE "ctkd": "cache_type_k_draft", // -ctkd, --cache-type-k-draft TYPE
"ctvd": "cache_type_v_draft", // -ctvd, --cache-type-v-draft TYPE "ctvd": "cache_type_v_draft", // -ctvd, --cache-type-v-draft TYPE

View File

@@ -113,7 +113,7 @@ func TestBuildCommandArgs_NumericFields(t *testing.T) {
"--threads": "4", "--threads": "4",
"--ctx-size": "2048", "--ctx-size": "2048",
"--gpu-layers": "16", "--gpu-layers": "16",
"--temperature": "0.7", "--temp": "0.7",
"--top-k": "40", "--top-k": "40",
"--top-p": "0.9", "--top-p": "0.9",
} }
@@ -231,7 +231,7 @@ func TestUnmarshalJSON_StandardFields(t *testing.T) {
"verbose": true, "verbose": true,
"ctx_size": 4096, "ctx_size": 4096,
"gpu_layers": 32, "gpu_layers": 32,
"temperature": 0.7 "temp": 0.7
}` }`
var options llamacpp.LlamaServerOptions var options llamacpp.LlamaServerOptions

View File

@@ -11,6 +11,7 @@ import {
DialogTitle, DialogTitle,
} from '@/components/ui/dialog' } from '@/components/ui/dialog'
import { Badge } from '@/components/ui/badge' import { Badge } from '@/components/ui/badge'
import { instancesApi } from '@/lib/api'
import { import {
RefreshCw, RefreshCw,
Download, Download,
@@ -46,21 +47,15 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
const refreshIntervalRef = useRef<NodeJS.Timeout | null>(null) const refreshIntervalRef = useRef<NodeJS.Timeout | null>(null)
// Fetch logs function // Fetch logs function
const fetchLogs = async (lines?: number) => { const fetchLogs = React.useCallback(
async (lines?: number) => {
if (!instanceName) return if (!instanceName) return
setLoading(true) setLoading(true)
setError(null) setError(null)
try { try {
const params = lines ? `?lines=${lines}` : '' const logText = await instancesApi.getLogs(instanceName, lines)
const response = await fetch(`/api/v1/instances/${instanceName}/logs${params}`)
if (!response.ok) {
throw new Error(`Failed to fetch logs: ${response.status}`)
}
const logText = await response.text()
setLogs(logText) setLogs(logText)
// Auto-scroll to bottom // Auto-scroll to bottom
@@ -74,20 +69,22 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
} finally { } finally {
setLoading(false) setLoading(false)
} }
} },
[instanceName]
)
// Initial load when dialog opens // Initial load when dialog opens
useEffect(() => { useEffect(() => {
if (open && instanceName) { if (open && instanceName) {
fetchLogs(lineCount) void fetchLogs(lineCount)
} }
}, [open, instanceName]) }, [open, instanceName, fetchLogs, lineCount])
// Auto-refresh effect // Auto-refresh effect
useEffect(() => { useEffect(() => {
if (autoRefresh && isRunning && open) { if (autoRefresh && isRunning && open) {
refreshIntervalRef.current = setInterval(() => { refreshIntervalRef.current = setInterval(() => {
fetchLogs(lineCount) void fetchLogs(lineCount)
}, 2000) // Refresh every 2 seconds }, 2000) // Refresh every 2 seconds
} else { } else {
if (refreshIntervalRef.current) { if (refreshIntervalRef.current) {
@@ -101,7 +98,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
clearInterval(refreshIntervalRef.current) clearInterval(refreshIntervalRef.current)
} }
} }
}, [autoRefresh, isRunning, open, lineCount]) }, [autoRefresh, isRunning, open, lineCount, fetchLogs])
// Copy logs to clipboard // Copy logs to clipboard
const copyLogs = async () => { const copyLogs = async () => {
@@ -135,7 +132,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
// Apply new line count // Apply new line count
const applyLineCount = () => { const applyLineCount = () => {
fetchLogs(lineCount) void fetchLogs(lineCount)
setShowSettings(false) setShowSettings(false)
} }
@@ -198,7 +195,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
<Button <Button
variant="outline" variant="outline"
size="sm" size="sm"
onClick={() => fetchLogs(lineCount)} onClick={() => void fetchLogs(lineCount)}
disabled={loading} disabled={loading}
> >
{loading ? ( {loading ? (
@@ -290,7 +287,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
<div className="flex items-center gap-2 w-full"> <div className="flex items-center gap-2 w-full">
<Button <Button
variant="outline" variant="outline"
onClick={copyLogs} onClick={() => void copyLogs()}
disabled={!logs} disabled={!logs}
> >
{copied ? ( {copied ? (

View File

@@ -1,5 +1,4 @@
import type { CreateInstanceOptions} from '@/schemas/instanceOptions'; import { type CreateInstanceOptions, getAllFieldKeys } from '@/schemas/instanceOptions'
import { getAllFieldKeys } from '@/schemas/instanceOptions'
// Only define the basic fields we want to show by default // Only define the basic fields we want to show by default
export const basicFieldsConfig: Record<string, { export const basicFieldsConfig: Record<string, {

View File

@@ -14,12 +14,12 @@ export const CreateInstanceOptionsSchema = z.object({
cpu_mask: z.string().optional(), cpu_mask: z.string().optional(),
cpu_range: z.string().optional(), cpu_range: z.string().optional(),
cpu_strict: z.number().optional(), cpu_strict: z.number().optional(),
priority: z.number().optional(), prio: z.number().optional(),
poll: z.number().optional(), poll: z.number().optional(),
cpu_mask_batch: z.string().optional(), cpu_mask_batch: z.string().optional(),
cpu_range_batch: z.string().optional(), cpu_range_batch: z.string().optional(),
cpu_strict_batch: z.number().optional(), cpu_strict_batch: z.number().optional(),
priority_batch: z.number().optional(), prio_batch: z.number().optional(),
poll_batch: z.number().optional(), poll_batch: z.number().optional(),
ctx_size: z.number().optional(), ctx_size: z.number().optional(),
predict: z.number().optional(), predict: z.number().optional(),
@@ -82,7 +82,7 @@ export const CreateInstanceOptionsSchema = z.object({
seed: z.number().optional(), seed: z.number().optional(),
sampling_seq: z.string().optional(), sampling_seq: z.string().optional(),
ignore_eos: z.boolean().optional(), ignore_eos: z.boolean().optional(),
temperature: z.number().optional(), temp: z.number().optional(),
top_k: z.number().optional(), top_k: z.number().optional(),
top_p: z.number().optional(), top_p: z.number().optional(),
min_p: z.number().optional(), min_p: z.number().optional(),
@@ -109,7 +109,7 @@ export const CreateInstanceOptionsSchema = z.object({
json_schema: z.string().optional(), json_schema: z.string().optional(),
json_schema_file: z.string().optional(), json_schema_file: z.string().optional(),
// Server/Example-specific params // Example-specific params
no_context_shift: z.boolean().optional(), no_context_shift: z.boolean().optional(),
special: z.boolean().optional(), special: z.boolean().optional(),
no_warmup: z.boolean().optional(), no_warmup: z.boolean().optional(),
@@ -149,8 +149,6 @@ export const CreateInstanceOptionsSchema = z.object({
no_prefill_assistant: z.boolean().optional(), no_prefill_assistant: z.boolean().optional(),
slot_prompt_similarity: z.number().optional(), slot_prompt_similarity: z.number().optional(),
lora_init_without_apply: z.boolean().optional(), lora_init_without_apply: z.boolean().optional(),
// Speculative decoding params
draft_max: z.number().optional(), draft_max: z.number().optional(),
draft_min: z.number().optional(), draft_min: z.number().optional(),
draft_p_min: z.number().optional(), draft_p_min: z.number().optional(),