Merge pull request #17 from lordmathis/fix/forbidden-logs

fix: Refactor log fetching to use instancesApi
Refactor log fetching to use instancesApi
2025-11-07 09:34:22 +00:00 · 2025-08-06 19:12:34 +02:00 · 2025-08-06 19:07:25 +02:00 · 2025-08-06 18:51:18 +02:00 · 2025-08-06 18:49:36 +02:00 · 2025-08-06 18:40:05 +02:00
5 changed files with 126 additions and 121 deletions
--- a/pkg/backends/llamacpp/llama.go
+++ b/pkg/backends/llamacpp/llama.go
@@ -15,12 +15,12 @@ type LlamaServerOptions struct {
 	CPUMask                 string   `json:"cpu_mask,omitempty"`
 	CPURange                string   `json:"cpu_range,omitempty"`
 	CPUStrict               int      `json:"cpu_strict,omitempty"`
-	Priority                int      `json:"priority,omitempty"`
+	Prio                    int      `json:"prio,omitempty"`
 	Poll                    int      `json:"poll,omitempty"`
 	CPUMaskBatch            string   `json:"cpu_mask_batch,omitempty"`
 	CPURangeBatch           string   `json:"cpu_range_batch,omitempty"`
 	CPUStrictBatch          int      `json:"cpu_strict_batch,omitempty"`
-	PriorityBatch           int      `json:"priority_batch,omitempty"`
+	PrioBatch               int      `json:"prio_batch,omitempty"`
 	PollBatch               int      `json:"poll_batch,omitempty"`
 	CtxSize                 int      `json:"ctx_size,omitempty"`
 	Predict                 int      `json:"predict,omitempty"`
@@ -83,7 +83,7 @@ type LlamaServerOptions struct {
 	Seed               int      `json:"seed,omitempty"`
 	SamplingSeq        string   `json:"sampling_seq,omitempty"`
 	IgnoreEOS          bool     `json:"ignore_eos,omitempty"`
-	Temperature        float64  `json:"temperature,omitempty"`
+	Temperature        float64  `json:"temp,omitempty"`
 	TopK               int      `json:"top_k,omitempty"`
 	TopP               float64  `json:"top_p,omitempty"`
 	MinP               float64  `json:"min_p,omitempty"`
@@ -110,7 +110,7 @@ type LlamaServerOptions struct {
 	JSONSchema         string   `json:"json_schema,omitempty"`
 	JSONSchemaFile     string   `json:"json_schema_file,omitempty"`
-	// Server/Example-specific params
+	// Example-specific params
 	NoContextShift       bool    `json:"no_context_shift,omitempty"`
 	Special              bool    `json:"special,omitempty"`
 	NoWarmup             bool    `json:"no_warmup,omitempty"`
@@ -150,8 +150,6 @@ type LlamaServerOptions struct {
 	NoPrefillAssistant   bool    `json:"no_prefill_assistant,omitempty"`
 	SlotPromptSimilarity float64 `json:"slot_prompt_similarity,omitempty"`
 	LoraInitWithoutApply bool    `json:"lora_init_without_apply,omitempty"`
 	// Speculative decoding params
 	DraftMax             int     `json:"draft_max,omitempty"`
 	DraftMin             int     `json:"draft_min,omitempty"`
 	DraftPMin            float64 `json:"draft_p_min,omitempty"`
@@ -199,7 +197,7 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
 	// Handle alternative field names
 	fieldMappings := map[string]string{
-		// Official llama-server short forms from the documentation
+		// Common params
 		"t":             "threads",         // -t, --threads N
 		"tb":            "threads_batch",   // -tb, --threads-batch N
 		"C":             "cpu_mask",        // -C, --cpu-mask M
@@ -207,7 +205,8 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
 		"Cb":            "cpu_mask_batch",  // -Cb, --cpu-mask-batch M
 		"Crb":           "cpu_range_batch", // -Crb, --cpu-range-batch lo-hi
 		"c":             "ctx_size",        // -c, --ctx-size N
-		"n":    "predict",                // -n, --predict, --n-predict N
+		"n":             "predict",         // -n, --predict N
 		"n-predict":     "predict",         // --n-predict N
 		"b":             "batch_size",      // -b, --batch-size N
 		"ub":            "ubatch_size",     // -ub, --ubatch-size N
 		"fa":            "flash_attn",      // -fa, --flash-attn
@@ -221,6 +220,7 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
 		"dev":           "device",          // -dev, --device <dev1,dev2,..>
 		"ot":            "override_tensor", // --override-tensor, -ot
 		"ngl":           "gpu_layers",      // -ngl, --gpu-layers, --n-gpu-layers N
 		"n-gpu-layers":  "gpu_layers",      // --n-gpu-layers N
 		"sm":            "split_mode",      // -sm, --split-mode
 		"ts":            "tensor_split",    // -ts, --tensor-split N0,N1,N2,...
 		"mg":            "main_gpu",        // -mg, --main-gpu INDEX
@@ -236,21 +236,32 @@ func (o *LlamaServerOptions) UnmarshalJSON(data []byte) error {
 		"hffv":          "hf_file_v",       // -hffv, --hf-file-v FILE
 		"hft":           "hf_token",        // -hft, --hf-token TOKEN
 		"v":             "verbose",         // -v, --verbose, --log-verbose
 		"log-verbose":   "verbose",         // --log-verbose
 		"lv":            "verbosity",       // -lv, --verbosity, --log-verbosity N
 		"log-verbosity": "verbosity",       // --log-verbosity N
 		// Sampling params
 		"s":  "seed",             // -s, --seed SEED
 		"temp": "temperature",            // --temp N
 		"l":  "logit_bias",       // -l, --logit-bias
 		"j":  "json_schema",      // -j, --json-schema SCHEMA
 		"jf": "json_schema_file", // -jf, --json-schema-file FILE
 		// Example-specific params
 		"sp":                 "special",                // -sp, --special
 		"cb":                 "cont_batching",          // -cb, --cont-batching
 		"nocb":               "no_cont_batching",       // -nocb, --no-cont-batching
 		"a":                  "alias",                  // -a, --alias STRING
 		"embeddings":         "embedding",              // --embeddings
 		"rerank":             "reranking",              // --reranking
 		"to":                 "timeout",                // -to, --timeout N
 		"sps":                "slot_prompt_similarity", // -sps, --slot-prompt-similarity
 		"draft":              "draft-max",              // -draft, --draft-max N
 		"draft-n":            "draft-max",              // --draft-n-max N
 		"draft-n-min":        "draft_min",              // --draft-n-min N
 		"cd":                 "ctx_size_draft",         // -cd, --ctx-size-draft N
 		"devd":               "device_draft",           // -devd, --device-draft
 		"ngld":               "gpu_layers_draft",       // -ngld, --gpu-layers-draft
 		"n-gpu-layers-draft": "gpu_layers_draft",       // --n-gpu-layers-draft N
 		"md":                 "model_draft",            // -md, --model-draft FNAME
 		"ctkd":               "cache_type_k_draft",     // -ctkd, --cache-type-k-draft TYPE
 		"ctvd":               "cache_type_v_draft",     // -ctvd, --cache-type-v-draft TYPE
--- a/pkg/backends/llamacpp/llama_test.go
+++ b/pkg/backends/llamacpp/llama_test.go
@@ -113,7 +113,7 @@ func TestBuildCommandArgs_NumericFields(t *testing.T) {
 		"--threads":    "4",
 		"--ctx-size":   "2048",
 		"--gpu-layers": "16",
-		"--temperature": "0.7",
+		"--temp":       "0.7",
 		"--top-k":      "40",
 		"--top-p":      "0.9",
 	}
@@ -231,7 +231,7 @@ func TestUnmarshalJSON_StandardFields(t *testing.T) {
 		"verbose": true,
 		"ctx_size": 4096,
 		"gpu_layers": 32,
-		"temperature": 0.7
+		"temp": 0.7
 	}`
 	var options llamacpp.LlamaServerOptions
--- a/webui/src/components/LogDialog.tsx
+++ b/webui/src/components/LogDialog.tsx
@@ -11,6 +11,7 @@ import {
  DialogTitle,
 } from '@/components/ui/dialog'
 import { Badge } from '@/components/ui/badge'
 import { instancesApi } from '@/lib/api'
 import { 
  RefreshCw, 
  Download, 
@@ -46,21 +47,15 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
  const refreshIntervalRef = useRef<NodeJS.Timeout | null>(null)
  // Fetch logs function
-  const fetchLogs = async (lines?: number) => {
+  const fetchLogs = React.useCallback(
    async (lines?: number) => {
      if (!instanceName) return
      setLoading(true)
      setError(null)
      try {
-      const params = lines ? `?lines=${lines}` : ''
+        const logText = await instancesApi.getLogs(instanceName, lines)
      const response = await fetch(`/api/v1/instances/${instanceName}/logs${params}`)
      if (!response.ok) {
        throw new Error(`Failed to fetch logs: ${response.status}`)
      }
      const logText = await response.text()
        setLogs(logText)
        // Auto-scroll to bottom
@@ -74,20 +69,22 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
      } finally {
        setLoading(false)
      }
-  }
+    },
    [instanceName]
  )
  // Initial load when dialog opens
  useEffect(() => {
    if (open && instanceName) {
-      fetchLogs(lineCount)
+      void fetchLogs(lineCount)
    }
-  }, [open, instanceName])
+  }, [open, instanceName, fetchLogs, lineCount])
  // Auto-refresh effect
  useEffect(() => {
    if (autoRefresh && isRunning && open) {
      refreshIntervalRef.current = setInterval(() => {
-        fetchLogs(lineCount)
+        void fetchLogs(lineCount)
      }, 2000) // Refresh every 2 seconds
    } else {
      if (refreshIntervalRef.current) {
@@ -101,7 +98,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
        clearInterval(refreshIntervalRef.current)
      }
    }
-  }, [autoRefresh, isRunning, open, lineCount])
+  }, [autoRefresh, isRunning, open, lineCount, fetchLogs])
  // Copy logs to clipboard
  const copyLogs = async () => {
@@ -135,7 +132,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
  // Apply new line count
  const applyLineCount = () => {
-    fetchLogs(lineCount)
+    void fetchLogs(lineCount)
    setShowSettings(false)
  }
@@ -198,7 +195,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
              <Button
                variant="outline"
                size="sm"
-                onClick={() => fetchLogs(lineCount)}
+                onClick={() => void fetchLogs(lineCount)}
                disabled={loading}
              >
                {loading ? (
@@ -290,7 +287,7 @@ const LogsDialog: React.FC<LogsDialogProps> = ({
          <div className="flex items-center gap-2 w-full">
            <Button
              variant="outline"
-              onClick={copyLogs}
+              onClick={() => void copyLogs()}
              disabled={!logs}
            >
              {copied ? (
--- a/webui/src/lib/zodFormUtils.ts
+++ b/webui/src/lib/zodFormUtils.ts
@@ -1,5 +1,4 @@
-import type { CreateInstanceOptions} from '@/schemas/instanceOptions';
+import { type CreateInstanceOptions, getAllFieldKeys } from '@/schemas/instanceOptions'
 import { getAllFieldKeys } from '@/schemas/instanceOptions'
 // Only define the basic fields we want to show by default
 export const basicFieldsConfig: Record<string, { 
--- a/webui/src/schemas/instanceOptions.ts
+++ b/webui/src/schemas/instanceOptions.ts
@@ -14,12 +14,12 @@ export const CreateInstanceOptionsSchema = z.object({
  cpu_mask: z.string().optional(),
  cpu_range: z.string().optional(),
  cpu_strict: z.number().optional(),
-  priority: z.number().optional(),
+  prio: z.number().optional(),
  poll: z.number().optional(),
  cpu_mask_batch: z.string().optional(),
  cpu_range_batch: z.string().optional(),
  cpu_strict_batch: z.number().optional(),
-  priority_batch: z.number().optional(),
+  prio_batch: z.number().optional(),
  poll_batch: z.number().optional(),
  ctx_size: z.number().optional(),
  predict: z.number().optional(),
@@ -82,7 +82,7 @@ export const CreateInstanceOptionsSchema = z.object({
  seed: z.number().optional(),
  sampling_seq: z.string().optional(),
  ignore_eos: z.boolean().optional(),
-  temperature: z.number().optional(),
+  temp: z.number().optional(),
  top_k: z.number().optional(),
  top_p: z.number().optional(),
  min_p: z.number().optional(),
@@ -109,7 +109,7 @@ export const CreateInstanceOptionsSchema = z.object({
  json_schema: z.string().optional(),
  json_schema_file: z.string().optional(),
-  // Server/Example-specific params
+  // Example-specific params
  no_context_shift: z.boolean().optional(),
  special: z.boolean().optional(),
  no_warmup: z.boolean().optional(),
@@ -149,8 +149,6 @@ export const CreateInstanceOptionsSchema = z.object({
  no_prefill_assistant: z.boolean().optional(),
  slot_prompt_similarity: z.number().optional(),
  lora_init_without_apply: z.boolean().optional(),
  // Speculative decoding params
  draft_max: z.number().optional(),
  draft_min: z.number().optional(),
  draft_p_min: z.number().optional(),
Author	SHA1	Message	Date
Matúš Námešný	5aed01b68f	Merge pull request #17 from lordmathis/fix/forbidden-logs fix: Refactor log fetching to use instancesApi	2025-08-06 19:12:34 +02:00
LordMathis	3f9caff33b	Refactor log fetching to use instancesApi	2025-08-06 19:07:25 +02:00
Matúš Námešný	169254c61a	Merge pull request #16 from lordmathis/fix/llama-server-options fix: Missing or wrong llama server options	2025-08-06 18:51:18 +02:00
LordMathis	8154b8d0ab	Fix temp in tests	2025-08-06 18:49:36 +02:00
LordMathis	a26d853ad5	Fix missing or wrong llama server options on frontend	2025-08-06 18:40:05 +02:00
LordMathis	6203b64045	Fix missing or wrong llama server options	2025-08-06 18:31:17 +02:00