mirror of
https://github.com/lordmathis/llamactl.git
synced 2025-11-06 09:04:27 +00:00
Add vLLM backend support to webui
This commit is contained in:
@@ -9,7 +9,7 @@ import {
|
|||||||
DialogHeader,
|
DialogHeader,
|
||||||
DialogTitle,
|
DialogTitle,
|
||||||
} from "@/components/ui/dialog";
|
} from "@/components/ui/dialog";
|
||||||
import { type CreateInstanceOptions } from "@/types/instance";
|
import { BackendType, type BackendTypeValue, type CreateInstanceOptions } from "@/types/instance";
|
||||||
import { backendsApi } from "@/lib/api";
|
import { backendsApi } from "@/lib/api";
|
||||||
import { toast } from "sonner";
|
import { toast } from "sonner";
|
||||||
|
|
||||||
@@ -25,6 +25,7 @@ const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
|
|||||||
onParsed,
|
onParsed,
|
||||||
}) => {
|
}) => {
|
||||||
const [command, setCommand] = useState('');
|
const [command, setCommand] = useState('');
|
||||||
|
const [backendType, setBackendType] = useState<BackendTypeValue>(BackendType.LLAMA_CPP);
|
||||||
const [loading, setLoading] = useState(false);
|
const [loading, setLoading] = useState(false);
|
||||||
const [error, setError] = useState<string | null>(null);
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
|
||||||
@@ -38,18 +39,31 @@ const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
|
|||||||
setError(null);
|
setError(null);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const options = await backendsApi.llamaCpp.parseCommand(command);
|
let options: CreateInstanceOptions;
|
||||||
|
|
||||||
|
// Parse based on selected backend type
|
||||||
|
switch (backendType) {
|
||||||
|
case BackendType.LLAMA_CPP:
|
||||||
|
options = await backendsApi.llamaCpp.parseCommand(command);
|
||||||
|
break;
|
||||||
|
case BackendType.MLX_LM:
|
||||||
|
options = await backendsApi.mlx.parseCommand(command);
|
||||||
|
break;
|
||||||
|
case BackendType.VLLM:
|
||||||
|
options = await backendsApi.vllm.parseCommand(command);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new Error(`Unsupported backend type: ${backendType}`);
|
||||||
|
}
|
||||||
|
|
||||||
onParsed(options);
|
onParsed(options);
|
||||||
onOpenChange(false);
|
onOpenChange(false);
|
||||||
// Reset form
|
|
||||||
setCommand('');
|
setCommand('');
|
||||||
setError(null);
|
setError(null);
|
||||||
// Show success toast
|
|
||||||
toast.success('Command parsed successfully');
|
toast.success('Command parsed successfully');
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
const errorMessage = err instanceof Error ? err.message : 'Failed to parse command';
|
const errorMessage = err instanceof Error ? err.message : 'Failed to parse command';
|
||||||
setError(errorMessage);
|
setError(errorMessage);
|
||||||
// Show error toast
|
|
||||||
toast.error('Failed to parse command', {
|
toast.error('Failed to parse command', {
|
||||||
description: errorMessage
|
description: errorMessage
|
||||||
});
|
});
|
||||||
@@ -60,31 +74,58 @@ const ParseCommandDialog: React.FC<ParseCommandDialogProps> = ({
|
|||||||
|
|
||||||
const handleOpenChange = (open: boolean) => {
|
const handleOpenChange = (open: boolean) => {
|
||||||
if (!open) {
|
if (!open) {
|
||||||
// Reset form when closing
|
|
||||||
setCommand('');
|
setCommand('');
|
||||||
|
setBackendType(BackendType.LLAMA_CPP);
|
||||||
setError(null);
|
setError(null);
|
||||||
}
|
}
|
||||||
onOpenChange(open);
|
onOpenChange(open);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const getPlaceholderForBackend = (backendType: BackendTypeValue): string => {
|
||||||
|
switch (backendType) {
|
||||||
|
case BackendType.LLAMA_CPP:
|
||||||
|
return "llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096";
|
||||||
|
case BackendType.MLX_LM:
|
||||||
|
return "mlx_lm.server --model mlx-community/Mistral-7B-Instruct-v0.3-4bit --host 0.0.0.0 --port 8080";
|
||||||
|
case BackendType.VLLM:
|
||||||
|
return "vllm serve --model microsoft/DialoGPT-medium --tensor-parallel-size 2 --gpu-memory-utilization 0.9";
|
||||||
|
default:
|
||||||
|
return "Enter your command here...";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<Dialog open={open} onOpenChange={handleOpenChange}>
|
<Dialog open={open} onOpenChange={handleOpenChange}>
|
||||||
<DialogContent className="sm:max-w-[600px]">
|
<DialogContent className="sm:max-w-[600px]">
|
||||||
<DialogHeader>
|
<DialogHeader>
|
||||||
<DialogTitle>Parse Llama Server Command</DialogTitle>
|
<DialogTitle>Parse Backend Command</DialogTitle>
|
||||||
<DialogDescription>
|
<DialogDescription>
|
||||||
Paste your llama-server command to automatically populate the form fields
|
Select your backend type and paste the command to automatically populate the form fields
|
||||||
</DialogDescription>
|
</DialogDescription>
|
||||||
</DialogHeader>
|
</DialogHeader>
|
||||||
|
|
||||||
<div className="space-y-4">
|
<div className="space-y-4">
|
||||||
|
<div>
|
||||||
|
<Label htmlFor="backend-type">Backend Type</Label>
|
||||||
|
<select
|
||||||
|
id="backend-type"
|
||||||
|
value={backendType}
|
||||||
|
onChange={(e) => setBackendType(e.target.value as BackendTypeValue)}
|
||||||
|
className="flex h-10 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50"
|
||||||
|
>
|
||||||
|
<option value={BackendType.LLAMA_CPP}>Llama Server</option>
|
||||||
|
<option value={BackendType.MLX_LM}>MLX LM</option>
|
||||||
|
<option value={BackendType.VLLM}>vLLM</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div>
|
<div>
|
||||||
<Label htmlFor="command">Command</Label>
|
<Label htmlFor="command">Command</Label>
|
||||||
<textarea
|
<textarea
|
||||||
id="command"
|
id="command"
|
||||||
value={command}
|
value={command}
|
||||||
onChange={(e) => setCommand(e.target.value)}
|
onChange={(e) => setCommand(e.target.value)}
|
||||||
placeholder="llama-server --model /path/to/model.gguf --gpu-layers 32 --ctx-size 4096"
|
placeholder={getPlaceholderForBackend(backendType)}
|
||||||
className="w-full h-32 p-3 mt-2 border border-input rounded-md font-mono text-sm resize-vertical focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2"
|
className="w-full h-32 p-3 mt-2 border border-input rounded-md font-mono text-sm resize-vertical focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2"
|
||||||
/>
|
/>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -39,6 +39,7 @@ const ZodFormField: React.FC<ZodFormFieldProps> = ({ fieldKey, value, onChange }
|
|||||||
>
|
>
|
||||||
<option value={BackendType.LLAMA_CPP}>Llama Server</option>
|
<option value={BackendType.LLAMA_CPP}>Llama Server</option>
|
||||||
<option value={BackendType.MLX_LM}>MLX LM</option>
|
<option value={BackendType.MLX_LM}>MLX LM</option>
|
||||||
|
<option value={BackendType.VLLM}>vLLM</option>
|
||||||
</select>
|
</select>
|
||||||
{config.description && (
|
{config.description && (
|
||||||
<p className="text-sm text-muted-foreground">{config.description}</p>
|
<p className="text-sm text-muted-foreground">{config.description}</p>
|
||||||
|
|||||||
@@ -101,6 +101,14 @@ export const backendsApi = {
|
|||||||
body: JSON.stringify({ command }),
|
body: JSON.stringify({ command }),
|
||||||
}),
|
}),
|
||||||
},
|
},
|
||||||
|
vllm: {
|
||||||
|
// POST /backends/vllm/parse-command
|
||||||
|
parseCommand: (command: string) =>
|
||||||
|
apiCall<CreateInstanceOptions>('/backends/vllm/parse-command', {
|
||||||
|
method: 'POST',
|
||||||
|
body: JSON.stringify({ command }),
|
||||||
|
}),
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
// Instance API functions
|
// Instance API functions
|
||||||
|
|||||||
@@ -2,13 +2,17 @@ import {
|
|||||||
type CreateInstanceOptions,
|
type CreateInstanceOptions,
|
||||||
type LlamaCppBackendOptions,
|
type LlamaCppBackendOptions,
|
||||||
type MlxBackendOptions,
|
type MlxBackendOptions,
|
||||||
|
type VllmBackendOptions,
|
||||||
LlamaCppBackendOptionsSchema,
|
LlamaCppBackendOptionsSchema,
|
||||||
MlxBackendOptionsSchema,
|
MlxBackendOptionsSchema,
|
||||||
|
VllmBackendOptionsSchema,
|
||||||
getAllFieldKeys,
|
getAllFieldKeys,
|
||||||
getAllLlamaCppFieldKeys,
|
getAllLlamaCppFieldKeys,
|
||||||
getAllMlxFieldKeys,
|
getAllMlxFieldKeys,
|
||||||
|
getAllVllmFieldKeys,
|
||||||
getLlamaCppFieldType,
|
getLlamaCppFieldType,
|
||||||
getMlxFieldType
|
getMlxFieldType,
|
||||||
|
getVllmFieldType
|
||||||
} from '@/schemas/instanceOptions'
|
} from '@/schemas/instanceOptions'
|
||||||
|
|
||||||
// Instance-level basic fields (not backend-specific)
|
// Instance-level basic fields (not backend-specific)
|
||||||
@@ -117,6 +121,31 @@ const basicMlxFieldsConfig: Record<string, {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// vLLM backend-specific basic fields
|
||||||
|
const basicVllmFieldsConfig: Record<string, {
|
||||||
|
label: string
|
||||||
|
description?: string
|
||||||
|
placeholder?: string
|
||||||
|
required?: boolean
|
||||||
|
}> = {
|
||||||
|
model: {
|
||||||
|
label: 'Model',
|
||||||
|
placeholder: 'microsoft/DialoGPT-medium',
|
||||||
|
description: 'The name or path of the Hugging Face model to use',
|
||||||
|
required: true
|
||||||
|
},
|
||||||
|
tensor_parallel_size: {
|
||||||
|
label: 'Tensor Parallel Size',
|
||||||
|
placeholder: '1',
|
||||||
|
description: 'Number of GPUs to use for distributed serving'
|
||||||
|
},
|
||||||
|
gpu_memory_utilization: {
|
||||||
|
label: 'GPU Memory Utilization',
|
||||||
|
placeholder: '0.9',
|
||||||
|
description: 'The fraction of GPU memory to be used for the model executor'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function isBasicField(key: keyof CreateInstanceOptions): boolean {
|
function isBasicField(key: keyof CreateInstanceOptions): boolean {
|
||||||
return key in basicFieldsConfig
|
return key in basicFieldsConfig
|
||||||
}
|
}
|
||||||
@@ -134,6 +163,8 @@ export function getAdvancedFields(): (keyof CreateInstanceOptions)[] {
|
|||||||
export function getBasicBackendFields(backendType?: string): string[] {
|
export function getBasicBackendFields(backendType?: string): string[] {
|
||||||
if (backendType === 'mlx_lm') {
|
if (backendType === 'mlx_lm') {
|
||||||
return Object.keys(basicMlxFieldsConfig)
|
return Object.keys(basicMlxFieldsConfig)
|
||||||
|
} else if (backendType === 'vllm') {
|
||||||
|
return Object.keys(basicVllmFieldsConfig)
|
||||||
} else if (backendType === 'llama_cpp') {
|
} else if (backendType === 'llama_cpp') {
|
||||||
return Object.keys(basicLlamaCppFieldsConfig)
|
return Object.keys(basicLlamaCppFieldsConfig)
|
||||||
}
|
}
|
||||||
@@ -144,6 +175,8 @@ export function getBasicBackendFields(backendType?: string): string[] {
|
|||||||
export function getAdvancedBackendFields(backendType?: string): string[] {
|
export function getAdvancedBackendFields(backendType?: string): string[] {
|
||||||
if (backendType === 'mlx_lm') {
|
if (backendType === 'mlx_lm') {
|
||||||
return getAllMlxFieldKeys().filter(key => !(key in basicMlxFieldsConfig))
|
return getAllMlxFieldKeys().filter(key => !(key in basicMlxFieldsConfig))
|
||||||
|
} else if (backendType === 'vllm') {
|
||||||
|
return getAllVllmFieldKeys().filter(key => !(key in basicVllmFieldsConfig))
|
||||||
} else if (backendType === 'llama_cpp') {
|
} else if (backendType === 'llama_cpp') {
|
||||||
return getAllLlamaCppFieldKeys().filter(key => !(key in basicLlamaCppFieldsConfig))
|
return getAllLlamaCppFieldKeys().filter(key => !(key in basicLlamaCppFieldsConfig))
|
||||||
}
|
}
|
||||||
@@ -159,7 +192,8 @@ export const basicBackendFieldsConfig: Record<string, {
|
|||||||
required?: boolean
|
required?: boolean
|
||||||
}> = {
|
}> = {
|
||||||
...basicLlamaCppFieldsConfig,
|
...basicLlamaCppFieldsConfig,
|
||||||
...basicMlxFieldsConfig
|
...basicMlxFieldsConfig,
|
||||||
|
...basicVllmFieldsConfig
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get field type for any backend option (union type)
|
// Get field type for any backend option (union type)
|
||||||
@@ -182,6 +216,15 @@ export function getBackendFieldType(key: string): 'text' | 'number' | 'boolean'
|
|||||||
// Schema might not be available
|
// Schema might not be available
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Try vLLM schema
|
||||||
|
try {
|
||||||
|
if (VllmBackendOptionsSchema.shape && key in VllmBackendOptionsSchema.shape) {
|
||||||
|
return getVllmFieldType(key as keyof VllmBackendOptions)
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Schema might not be available
|
||||||
|
}
|
||||||
|
|
||||||
// Default fallback
|
// Default fallback
|
||||||
return 'text'
|
return 'text'
|
||||||
}
|
}
|
||||||
|
|||||||
4
webui/src/schemas/backends/index.ts
Normal file
4
webui/src/schemas/backends/index.ts
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
// Re-export all backend schemas from one place
|
||||||
|
export * from './llamacpp'
|
||||||
|
export * from './mlx'
|
||||||
|
export * from './vllm'
|
||||||
192
webui/src/schemas/backends/llamacpp.ts
Normal file
192
webui/src/schemas/backends/llamacpp.ts
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
import { z } from 'zod'
|
||||||
|
|
||||||
|
// Define the LlamaCpp backend options schema
|
||||||
|
export const LlamaCppBackendOptionsSchema = z.object({
|
||||||
|
// Common params
|
||||||
|
verbose_prompt: z.boolean().optional(),
|
||||||
|
threads: z.number().optional(),
|
||||||
|
threads_batch: z.number().optional(),
|
||||||
|
cpu_mask: z.string().optional(),
|
||||||
|
cpu_range: z.string().optional(),
|
||||||
|
cpu_strict: z.number().optional(),
|
||||||
|
prio: z.number().optional(),
|
||||||
|
poll: z.number().optional(),
|
||||||
|
cpu_mask_batch: z.string().optional(),
|
||||||
|
cpu_range_batch: z.string().optional(),
|
||||||
|
cpu_strict_batch: z.number().optional(),
|
||||||
|
prio_batch: z.number().optional(),
|
||||||
|
poll_batch: z.number().optional(),
|
||||||
|
ctx_size: z.number().optional(),
|
||||||
|
predict: z.number().optional(),
|
||||||
|
batch_size: z.number().optional(),
|
||||||
|
ubatch_size: z.number().optional(),
|
||||||
|
keep: z.number().optional(),
|
||||||
|
flash_attn: z.boolean().optional(),
|
||||||
|
no_perf: z.boolean().optional(),
|
||||||
|
escape: z.boolean().optional(),
|
||||||
|
no_escape: z.boolean().optional(),
|
||||||
|
rope_scaling: z.string().optional(),
|
||||||
|
rope_scale: z.number().optional(),
|
||||||
|
rope_freq_base: z.number().optional(),
|
||||||
|
rope_freq_scale: z.number().optional(),
|
||||||
|
yarn_orig_ctx: z.number().optional(),
|
||||||
|
yarn_ext_factor: z.number().optional(),
|
||||||
|
yarn_attn_factor: z.number().optional(),
|
||||||
|
yarn_beta_slow: z.number().optional(),
|
||||||
|
yarn_beta_fast: z.number().optional(),
|
||||||
|
dump_kv_cache: z.boolean().optional(),
|
||||||
|
no_kv_offload: z.boolean().optional(),
|
||||||
|
cache_type_k: z.string().optional(),
|
||||||
|
cache_type_v: z.string().optional(),
|
||||||
|
defrag_thold: z.number().optional(),
|
||||||
|
parallel: z.number().optional(),
|
||||||
|
mlock: z.boolean().optional(),
|
||||||
|
no_mmap: z.boolean().optional(),
|
||||||
|
numa: z.string().optional(),
|
||||||
|
device: z.string().optional(),
|
||||||
|
override_tensor: z.array(z.string()).optional(),
|
||||||
|
gpu_layers: z.number().optional(),
|
||||||
|
split_mode: z.string().optional(),
|
||||||
|
tensor_split: z.string().optional(),
|
||||||
|
main_gpu: z.number().optional(),
|
||||||
|
check_tensors: z.boolean().optional(),
|
||||||
|
override_kv: z.array(z.string()).optional(),
|
||||||
|
lora: z.array(z.string()).optional(),
|
||||||
|
lora_scaled: z.array(z.string()).optional(),
|
||||||
|
control_vector: z.array(z.string()).optional(),
|
||||||
|
control_vector_scaled: z.array(z.string()).optional(),
|
||||||
|
control_vector_layer_range: z.string().optional(),
|
||||||
|
model: z.string().optional(),
|
||||||
|
model_url: z.string().optional(),
|
||||||
|
hf_repo: z.string().optional(),
|
||||||
|
hf_repo_draft: z.string().optional(),
|
||||||
|
hf_file: z.string().optional(),
|
||||||
|
hf_repo_v: z.string().optional(),
|
||||||
|
hf_file_v: z.string().optional(),
|
||||||
|
hf_token: z.string().optional(),
|
||||||
|
log_disable: z.boolean().optional(),
|
||||||
|
log_file: z.string().optional(),
|
||||||
|
log_colors: z.boolean().optional(),
|
||||||
|
verbose: z.boolean().optional(),
|
||||||
|
verbosity: z.number().optional(),
|
||||||
|
log_prefix: z.boolean().optional(),
|
||||||
|
log_timestamps: z.boolean().optional(),
|
||||||
|
|
||||||
|
// Sampling params
|
||||||
|
samplers: z.string().optional(),
|
||||||
|
seed: z.number().optional(),
|
||||||
|
sampling_seq: z.string().optional(),
|
||||||
|
ignore_eos: z.boolean().optional(),
|
||||||
|
temp: z.number().optional(),
|
||||||
|
top_k: z.number().optional(),
|
||||||
|
top_p: z.number().optional(),
|
||||||
|
min_p: z.number().optional(),
|
||||||
|
xtc_probability: z.number().optional(),
|
||||||
|
xtc_threshold: z.number().optional(),
|
||||||
|
typical: z.number().optional(),
|
||||||
|
repeat_last_n: z.number().optional(),
|
||||||
|
repeat_penalty: z.number().optional(),
|
||||||
|
presence_penalty: z.number().optional(),
|
||||||
|
frequency_penalty: z.number().optional(),
|
||||||
|
dry_multiplier: z.number().optional(),
|
||||||
|
dry_base: z.number().optional(),
|
||||||
|
dry_allowed_length: z.number().optional(),
|
||||||
|
dry_penalty_last_n: z.number().optional(),
|
||||||
|
dry_sequence_breaker: z.array(z.string()).optional(),
|
||||||
|
dynatemp_range: z.number().optional(),
|
||||||
|
dynatemp_exp: z.number().optional(),
|
||||||
|
mirostat: z.number().optional(),
|
||||||
|
mirostat_lr: z.number().optional(),
|
||||||
|
mirostat_ent: z.number().optional(),
|
||||||
|
logit_bias: z.array(z.string()).optional(),
|
||||||
|
grammar: z.string().optional(),
|
||||||
|
grammar_file: z.string().optional(),
|
||||||
|
json_schema: z.string().optional(),
|
||||||
|
json_schema_file: z.string().optional(),
|
||||||
|
|
||||||
|
// Example-specific params
|
||||||
|
no_context_shift: z.boolean().optional(),
|
||||||
|
special: z.boolean().optional(),
|
||||||
|
no_warmup: z.boolean().optional(),
|
||||||
|
spm_infill: z.boolean().optional(),
|
||||||
|
pooling: z.string().optional(),
|
||||||
|
cont_batching: z.boolean().optional(),
|
||||||
|
no_cont_batching: z.boolean().optional(),
|
||||||
|
mmproj: z.string().optional(),
|
||||||
|
mmproj_url: z.string().optional(),
|
||||||
|
no_mmproj: z.boolean().optional(),
|
||||||
|
no_mmproj_offload: z.boolean().optional(),
|
||||||
|
alias: z.string().optional(),
|
||||||
|
host: z.string().optional(),
|
||||||
|
port: z.number().optional(),
|
||||||
|
path: z.string().optional(),
|
||||||
|
no_webui: z.boolean().optional(),
|
||||||
|
embedding: z.boolean().optional(),
|
||||||
|
reranking: z.boolean().optional(),
|
||||||
|
api_key: z.string().optional(),
|
||||||
|
api_key_file: z.string().optional(),
|
||||||
|
ssl_key_file: z.string().optional(),
|
||||||
|
ssl_cert_file: z.string().optional(),
|
||||||
|
chat_template_kwargs: z.string().optional(),
|
||||||
|
timeout: z.number().optional(),
|
||||||
|
threads_http: z.number().optional(),
|
||||||
|
cache_reuse: z.number().optional(),
|
||||||
|
metrics: z.boolean().optional(),
|
||||||
|
slots: z.boolean().optional(),
|
||||||
|
props: z.boolean().optional(),
|
||||||
|
no_slots: z.boolean().optional(),
|
||||||
|
slot_save_path: z.string().optional(),
|
||||||
|
jinja: z.boolean().optional(),
|
||||||
|
reasoning_format: z.string().optional(),
|
||||||
|
reasoning_budget: z.number().optional(),
|
||||||
|
chat_template: z.string().optional(),
|
||||||
|
chat_template_file: z.string().optional(),
|
||||||
|
no_prefill_assistant: z.boolean().optional(),
|
||||||
|
slot_prompt_similarity: z.number().optional(),
|
||||||
|
lora_init_without_apply: z.boolean().optional(),
|
||||||
|
draft_max: z.number().optional(),
|
||||||
|
draft_min: z.number().optional(),
|
||||||
|
draft_p_min: z.number().optional(),
|
||||||
|
ctx_size_draft: z.number().optional(),
|
||||||
|
device_draft: z.string().optional(),
|
||||||
|
gpu_layers_draft: z.number().optional(),
|
||||||
|
model_draft: z.string().optional(),
|
||||||
|
cache_type_k_draft: z.string().optional(),
|
||||||
|
cache_type_v_draft: z.string().optional(),
|
||||||
|
|
||||||
|
// Audio/TTS params
|
||||||
|
model_vocoder: z.string().optional(),
|
||||||
|
tts_use_guide_tokens: z.boolean().optional(),
|
||||||
|
|
||||||
|
// Default model params
|
||||||
|
embd_bge_small_en_default: z.boolean().optional(),
|
||||||
|
embd_e5_small_en_default: z.boolean().optional(),
|
||||||
|
embd_gte_small_default: z.boolean().optional(),
|
||||||
|
fim_qwen_1_5b_default: z.boolean().optional(),
|
||||||
|
fim_qwen_3b_default: z.boolean().optional(),
|
||||||
|
fim_qwen_7b_default: z.boolean().optional(),
|
||||||
|
fim_qwen_7b_spec: z.boolean().optional(),
|
||||||
|
fim_qwen_14b_spec: z.boolean().optional(),
|
||||||
|
})
|
||||||
|
|
||||||
|
// Infer the TypeScript type from the schema
|
||||||
|
export type LlamaCppBackendOptions = z.infer<typeof LlamaCppBackendOptionsSchema>
|
||||||
|
|
||||||
|
// Helper to get all LlamaCpp backend option field keys
|
||||||
|
export function getAllLlamaCppFieldKeys(): (keyof LlamaCppBackendOptions)[] {
|
||||||
|
return Object.keys(LlamaCppBackendOptionsSchema.shape) as (keyof LlamaCppBackendOptions)[]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get field type for LlamaCpp backend options
|
||||||
|
export function getLlamaCppFieldType(key: keyof LlamaCppBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
|
||||||
|
const fieldSchema = LlamaCppBackendOptionsSchema.shape[key]
|
||||||
|
if (!fieldSchema) return 'text'
|
||||||
|
|
||||||
|
// Handle ZodOptional wrapper
|
||||||
|
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
|
||||||
|
|
||||||
|
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
|
||||||
|
if (innerSchema instanceof z.ZodNumber) return 'number'
|
||||||
|
if (innerSchema instanceof z.ZodArray) return 'array'
|
||||||
|
return 'text' // ZodString and others default to text
|
||||||
|
}
|
||||||
51
webui/src/schemas/backends/mlx.ts
Normal file
51
webui/src/schemas/backends/mlx.ts
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
import { z } from 'zod'
|
||||||
|
|
||||||
|
// Define the MLX backend options schema
|
||||||
|
export const MlxBackendOptionsSchema = z.object({
|
||||||
|
// Basic connection options
|
||||||
|
model: z.string().optional(),
|
||||||
|
host: z.string().optional(),
|
||||||
|
port: z.number().optional(),
|
||||||
|
|
||||||
|
// Model and adapter options
|
||||||
|
adapter_path: z.string().optional(),
|
||||||
|
draft_model: z.string().optional(),
|
||||||
|
num_draft_tokens: z.number().optional(),
|
||||||
|
trust_remote_code: z.boolean().optional(),
|
||||||
|
|
||||||
|
// Logging and templates
|
||||||
|
log_level: z.enum(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']).optional(),
|
||||||
|
chat_template: z.string().optional(),
|
||||||
|
use_default_chat_template: z.boolean().optional(),
|
||||||
|
chat_template_args: z.string().optional(), // JSON string
|
||||||
|
|
||||||
|
// Sampling defaults
|
||||||
|
temp: z.number().optional(), // Note: MLX uses "temp" not "temperature"
|
||||||
|
top_p: z.number().optional(),
|
||||||
|
top_k: z.number().optional(),
|
||||||
|
min_p: z.number().optional(),
|
||||||
|
max_tokens: z.number().optional(),
|
||||||
|
})
|
||||||
|
|
||||||
|
// Infer the TypeScript type from the schema
|
||||||
|
export type MlxBackendOptions = z.infer<typeof MlxBackendOptionsSchema>
|
||||||
|
|
||||||
|
// Helper to get all MLX backend option field keys
|
||||||
|
export function getAllMlxFieldKeys(): (keyof MlxBackendOptions)[] {
|
||||||
|
return Object.keys(MlxBackendOptionsSchema.shape) as (keyof MlxBackendOptions)[]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get field type for MLX backend options
|
||||||
|
export function getMlxFieldType(key: keyof MlxBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
|
||||||
|
const fieldSchema = MlxBackendOptionsSchema.shape[key]
|
||||||
|
if (!fieldSchema) return 'text'
|
||||||
|
|
||||||
|
// Handle ZodOptional wrapper
|
||||||
|
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
|
||||||
|
|
||||||
|
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
|
||||||
|
if (innerSchema instanceof z.ZodNumber) return 'number'
|
||||||
|
if (innerSchema instanceof z.ZodArray) return 'array'
|
||||||
|
if (innerSchema instanceof z.ZodEnum) return 'text' // Enum treated as text/select
|
||||||
|
return 'text' // ZodString and others default to text
|
||||||
|
}
|
||||||
150
webui/src/schemas/backends/vllm.ts
Normal file
150
webui/src/schemas/backends/vllm.ts
Normal file
@@ -0,0 +1,150 @@
|
|||||||
|
import { z } from 'zod'
|
||||||
|
|
||||||
|
// Define the vLLM backend options schema
|
||||||
|
export const VllmBackendOptionsSchema = z.object({
|
||||||
|
// Basic connection options (auto-assigned by llamactl)
|
||||||
|
host: z.string().optional(),
|
||||||
|
port: z.number().optional(),
|
||||||
|
|
||||||
|
// Model and engine configuration
|
||||||
|
model: z.string().optional(),
|
||||||
|
tokenizer: z.string().optional(),
|
||||||
|
skip_tokenizer_init: z.boolean().optional(),
|
||||||
|
revision: z.string().optional(),
|
||||||
|
code_revision: z.string().optional(),
|
||||||
|
tokenizer_revision: z.string().optional(),
|
||||||
|
tokenizer_mode: z.string().optional(),
|
||||||
|
trust_remote_code: z.boolean().optional(),
|
||||||
|
download_dir: z.string().optional(),
|
||||||
|
load_format: z.string().optional(),
|
||||||
|
config_format: z.string().optional(),
|
||||||
|
dtype: z.string().optional(),
|
||||||
|
kv_cache_dtype: z.string().optional(),
|
||||||
|
quantization_param_path: z.string().optional(),
|
||||||
|
seed: z.number().optional(),
|
||||||
|
max_model_len: z.number().optional(),
|
||||||
|
guided_decoding_backend: z.string().optional(),
|
||||||
|
distributed_executor_backend: z.string().optional(),
|
||||||
|
worker_use_ray: z.boolean().optional(),
|
||||||
|
ray_workers_use_nsight: z.boolean().optional(),
|
||||||
|
|
||||||
|
// Performance and serving configuration
|
||||||
|
block_size: z.number().optional(),
|
||||||
|
enable_prefix_caching: z.boolean().optional(),
|
||||||
|
disable_sliding_window: z.boolean().optional(),
|
||||||
|
use_v2_block_manager: z.boolean().optional(),
|
||||||
|
num_lookahead_slots: z.number().optional(),
|
||||||
|
swap_space: z.number().optional(),
|
||||||
|
cpu_offload_gb: z.number().optional(),
|
||||||
|
gpu_memory_utilization: z.number().optional(),
|
||||||
|
num_gpu_blocks_override: z.number().optional(),
|
||||||
|
max_num_batched_tokens: z.number().optional(),
|
||||||
|
max_num_seqs: z.number().optional(),
|
||||||
|
max_logprobs: z.number().optional(),
|
||||||
|
disable_log_stats: z.boolean().optional(),
|
||||||
|
quantization: z.string().optional(),
|
||||||
|
rope_scaling: z.string().optional(),
|
||||||
|
rope_theta: z.number().optional(),
|
||||||
|
enforce_eager: z.boolean().optional(),
|
||||||
|
max_context_len_to_capture: z.number().optional(),
|
||||||
|
max_seq_len_to_capture: z.number().optional(),
|
||||||
|
disable_custom_all_reduce: z.boolean().optional(),
|
||||||
|
tokenizer_pool_size: z.number().optional(),
|
||||||
|
tokenizer_pool_type: z.string().optional(),
|
||||||
|
tokenizer_pool_extra_config: z.string().optional(),
|
||||||
|
enable_lora_bias: z.boolean().optional(),
|
||||||
|
lora_extra_vocab_size: z.number().optional(),
|
||||||
|
lora_rank: z.number().optional(),
|
||||||
|
prompt_lookback_distance: z.number().optional(),
|
||||||
|
preemption_mode: z.string().optional(),
|
||||||
|
|
||||||
|
// Distributed and parallel processing
|
||||||
|
tensor_parallel_size: z.number().optional(),
|
||||||
|
pipeline_parallel_size: z.number().optional(),
|
||||||
|
max_parallel_loading_workers: z.number().optional(),
|
||||||
|
disable_async_output_proc: z.boolean().optional(),
|
||||||
|
worker_class: z.string().optional(),
|
||||||
|
enabled_lora_modules: z.string().optional(),
|
||||||
|
max_lora_rank: z.number().optional(),
|
||||||
|
fully_sharded_loras: z.boolean().optional(),
|
||||||
|
lora_modules: z.string().optional(),
|
||||||
|
prompt_adapters: z.string().optional(),
|
||||||
|
max_prompt_adapter_token: z.number().optional(),
|
||||||
|
device: z.string().optional(),
|
||||||
|
scheduler_delay: z.number().optional(),
|
||||||
|
enable_chunked_prefill: z.boolean().optional(),
|
||||||
|
speculative_model: z.string().optional(),
|
||||||
|
speculative_model_quantization: z.string().optional(),
|
||||||
|
speculative_revision: z.string().optional(),
|
||||||
|
speculative_max_model_len: z.number().optional(),
|
||||||
|
speculative_disable_by_batch_size: z.number().optional(),
|
||||||
|
ngpt_speculative_length: z.number().optional(),
|
||||||
|
speculative_disable_mqa: z.boolean().optional(),
|
||||||
|
model_loader_extra_config: z.string().optional(),
|
||||||
|
ignore_patterns: z.string().optional(),
|
||||||
|
preloaded_lora_modules: z.string().optional(),
|
||||||
|
|
||||||
|
// OpenAI server specific options
|
||||||
|
uds: z.string().optional(),
|
||||||
|
uvicorn_log_level: z.string().optional(),
|
||||||
|
response_role: z.string().optional(),
|
||||||
|
ssl_keyfile: z.string().optional(),
|
||||||
|
ssl_certfile: z.string().optional(),
|
||||||
|
ssl_ca_certs: z.string().optional(),
|
||||||
|
ssl_cert_reqs: z.number().optional(),
|
||||||
|
root_path: z.string().optional(),
|
||||||
|
middleware: z.array(z.string()).optional(),
|
||||||
|
return_tokens_as_token_ids: z.boolean().optional(),
|
||||||
|
disable_frontend_multiprocessing: z.boolean().optional(),
|
||||||
|
enable_auto_tool_choice: z.boolean().optional(),
|
||||||
|
tool_call_parser: z.string().optional(),
|
||||||
|
tool_server: z.string().optional(),
|
||||||
|
chat_template: z.string().optional(),
|
||||||
|
chat_template_content_format: z.string().optional(),
|
||||||
|
allow_credentials: z.boolean().optional(),
|
||||||
|
allowed_origins: z.array(z.string()).optional(),
|
||||||
|
allowed_methods: z.array(z.string()).optional(),
|
||||||
|
allowed_headers: z.array(z.string()).optional(),
|
||||||
|
api_key: z.array(z.string()).optional(),
|
||||||
|
enable_log_outputs: z.boolean().optional(),
|
||||||
|
enable_token_usage: z.boolean().optional(),
|
||||||
|
enable_async_engine_debug: z.boolean().optional(),
|
||||||
|
engine_use_ray: z.boolean().optional(),
|
||||||
|
disable_log_requests: z.boolean().optional(),
|
||||||
|
max_log_len: z.number().optional(),
|
||||||
|
|
||||||
|
// Additional engine configuration
|
||||||
|
task: z.string().optional(),
|
||||||
|
multi_modal_config: z.string().optional(),
|
||||||
|
limit_mm_per_prompt: z.string().optional(),
|
||||||
|
enable_sleep_mode: z.boolean().optional(),
|
||||||
|
enable_chunking_request: z.boolean().optional(),
|
||||||
|
compilation_config: z.string().optional(),
|
||||||
|
disable_sliding_window_mask: z.boolean().optional(),
|
||||||
|
enable_trtllm_engine_latency: z.boolean().optional(),
|
||||||
|
override_pooling_config: z.string().optional(),
|
||||||
|
override_neuron_config: z.string().optional(),
|
||||||
|
override_kv_cache_align_size: z.number().optional(),
|
||||||
|
})
|
||||||
|
|
||||||
|
// Infer the TypeScript type from the schema
|
||||||
|
export type VllmBackendOptions = z.infer<typeof VllmBackendOptionsSchema>
|
||||||
|
|
||||||
|
// Helper to get all vLLM backend option field keys
|
||||||
|
export function getAllVllmFieldKeys(): (keyof VllmBackendOptions)[] {
|
||||||
|
return Object.keys(VllmBackendOptionsSchema.shape) as (keyof VllmBackendOptions)[]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get field type for vLLM backend options
|
||||||
|
export function getVllmFieldType(key: keyof VllmBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
|
||||||
|
const fieldSchema = VllmBackendOptionsSchema.shape[key]
|
||||||
|
if (!fieldSchema) return 'text'
|
||||||
|
|
||||||
|
// Handle ZodOptional wrapper
|
||||||
|
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
|
||||||
|
|
||||||
|
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
|
||||||
|
if (innerSchema instanceof z.ZodNumber) return 'number'
|
||||||
|
if (innerSchema instanceof z.ZodArray) return 'array'
|
||||||
|
return 'text' // ZodString and others default to text
|
||||||
|
}
|
||||||
@@ -1,206 +1,27 @@
|
|||||||
import { BackendType } from '@/types/instance'
|
import { BackendType } from '@/types/instance'
|
||||||
import { z } from 'zod'
|
import { z } from 'zod'
|
||||||
|
|
||||||
// Define the LlamaCpp backend options schema
|
// Import backend schemas from separate files
|
||||||
export const LlamaCppBackendOptionsSchema = z.object({
|
import {
|
||||||
// Common params
|
LlamaCppBackendOptionsSchema,
|
||||||
verbose_prompt: z.boolean().optional(),
|
type LlamaCppBackendOptions,
|
||||||
threads: z.number().optional(),
|
getAllLlamaCppFieldKeys,
|
||||||
threads_batch: z.number().optional(),
|
getLlamaCppFieldType,
|
||||||
cpu_mask: z.string().optional(),
|
MlxBackendOptionsSchema,
|
||||||
cpu_range: z.string().optional(),
|
type MlxBackendOptions,
|
||||||
cpu_strict: z.number().optional(),
|
getAllMlxFieldKeys,
|
||||||
prio: z.number().optional(),
|
getMlxFieldType,
|
||||||
poll: z.number().optional(),
|
VllmBackendOptionsSchema,
|
||||||
cpu_mask_batch: z.string().optional(),
|
type VllmBackendOptions,
|
||||||
cpu_range_batch: z.string().optional(),
|
getAllVllmFieldKeys,
|
||||||
cpu_strict_batch: z.number().optional(),
|
getVllmFieldType
|
||||||
prio_batch: z.number().optional(),
|
} from './backends'
|
||||||
poll_batch: z.number().optional(),
|
|
||||||
ctx_size: z.number().optional(),
|
|
||||||
predict: z.number().optional(),
|
|
||||||
batch_size: z.number().optional(),
|
|
||||||
ubatch_size: z.number().optional(),
|
|
||||||
keep: z.number().optional(),
|
|
||||||
flash_attn: z.boolean().optional(),
|
|
||||||
no_perf: z.boolean().optional(),
|
|
||||||
escape: z.boolean().optional(),
|
|
||||||
no_escape: z.boolean().optional(),
|
|
||||||
rope_scaling: z.string().optional(),
|
|
||||||
rope_scale: z.number().optional(),
|
|
||||||
rope_freq_base: z.number().optional(),
|
|
||||||
rope_freq_scale: z.number().optional(),
|
|
||||||
yarn_orig_ctx: z.number().optional(),
|
|
||||||
yarn_ext_factor: z.number().optional(),
|
|
||||||
yarn_attn_factor: z.number().optional(),
|
|
||||||
yarn_beta_slow: z.number().optional(),
|
|
||||||
yarn_beta_fast: z.number().optional(),
|
|
||||||
dump_kv_cache: z.boolean().optional(),
|
|
||||||
no_kv_offload: z.boolean().optional(),
|
|
||||||
cache_type_k: z.string().optional(),
|
|
||||||
cache_type_v: z.string().optional(),
|
|
||||||
defrag_thold: z.number().optional(),
|
|
||||||
parallel: z.number().optional(),
|
|
||||||
mlock: z.boolean().optional(),
|
|
||||||
no_mmap: z.boolean().optional(),
|
|
||||||
numa: z.string().optional(),
|
|
||||||
device: z.string().optional(),
|
|
||||||
override_tensor: z.array(z.string()).optional(),
|
|
||||||
gpu_layers: z.number().optional(),
|
|
||||||
split_mode: z.string().optional(),
|
|
||||||
tensor_split: z.string().optional(),
|
|
||||||
main_gpu: z.number().optional(),
|
|
||||||
check_tensors: z.boolean().optional(),
|
|
||||||
override_kv: z.array(z.string()).optional(),
|
|
||||||
lora: z.array(z.string()).optional(),
|
|
||||||
lora_scaled: z.array(z.string()).optional(),
|
|
||||||
control_vector: z.array(z.string()).optional(),
|
|
||||||
control_vector_scaled: z.array(z.string()).optional(),
|
|
||||||
control_vector_layer_range: z.string().optional(),
|
|
||||||
model: z.string().optional(),
|
|
||||||
model_url: z.string().optional(),
|
|
||||||
hf_repo: z.string().optional(),
|
|
||||||
hf_repo_draft: z.string().optional(),
|
|
||||||
hf_file: z.string().optional(),
|
|
||||||
hf_repo_v: z.string().optional(),
|
|
||||||
hf_file_v: z.string().optional(),
|
|
||||||
hf_token: z.string().optional(),
|
|
||||||
log_disable: z.boolean().optional(),
|
|
||||||
log_file: z.string().optional(),
|
|
||||||
log_colors: z.boolean().optional(),
|
|
||||||
verbose: z.boolean().optional(),
|
|
||||||
verbosity: z.number().optional(),
|
|
||||||
log_prefix: z.boolean().optional(),
|
|
||||||
log_timestamps: z.boolean().optional(),
|
|
||||||
|
|
||||||
// Sampling params
|
|
||||||
samplers: z.string().optional(),
|
|
||||||
seed: z.number().optional(),
|
|
||||||
sampling_seq: z.string().optional(),
|
|
||||||
ignore_eos: z.boolean().optional(),
|
|
||||||
temp: z.number().optional(),
|
|
||||||
top_k: z.number().optional(),
|
|
||||||
top_p: z.number().optional(),
|
|
||||||
min_p: z.number().optional(),
|
|
||||||
xtc_probability: z.number().optional(),
|
|
||||||
xtc_threshold: z.number().optional(),
|
|
||||||
typical: z.number().optional(),
|
|
||||||
repeat_last_n: z.number().optional(),
|
|
||||||
repeat_penalty: z.number().optional(),
|
|
||||||
presence_penalty: z.number().optional(),
|
|
||||||
frequency_penalty: z.number().optional(),
|
|
||||||
dry_multiplier: z.number().optional(),
|
|
||||||
dry_base: z.number().optional(),
|
|
||||||
dry_allowed_length: z.number().optional(),
|
|
||||||
dry_penalty_last_n: z.number().optional(),
|
|
||||||
dry_sequence_breaker: z.array(z.string()).optional(),
|
|
||||||
dynatemp_range: z.number().optional(),
|
|
||||||
dynatemp_exp: z.number().optional(),
|
|
||||||
mirostat: z.number().optional(),
|
|
||||||
mirostat_lr: z.number().optional(),
|
|
||||||
mirostat_ent: z.number().optional(),
|
|
||||||
logit_bias: z.array(z.string()).optional(),
|
|
||||||
grammar: z.string().optional(),
|
|
||||||
grammar_file: z.string().optional(),
|
|
||||||
json_schema: z.string().optional(),
|
|
||||||
json_schema_file: z.string().optional(),
|
|
||||||
|
|
||||||
// Example-specific params
|
|
||||||
no_context_shift: z.boolean().optional(),
|
|
||||||
special: z.boolean().optional(),
|
|
||||||
no_warmup: z.boolean().optional(),
|
|
||||||
spm_infill: z.boolean().optional(),
|
|
||||||
pooling: z.string().optional(),
|
|
||||||
cont_batching: z.boolean().optional(),
|
|
||||||
no_cont_batching: z.boolean().optional(),
|
|
||||||
mmproj: z.string().optional(),
|
|
||||||
mmproj_url: z.string().optional(),
|
|
||||||
no_mmproj: z.boolean().optional(),
|
|
||||||
no_mmproj_offload: z.boolean().optional(),
|
|
||||||
alias: z.string().optional(),
|
|
||||||
host: z.string().optional(),
|
|
||||||
port: z.number().optional(),
|
|
||||||
path: z.string().optional(),
|
|
||||||
no_webui: z.boolean().optional(),
|
|
||||||
embedding: z.boolean().optional(),
|
|
||||||
reranking: z.boolean().optional(),
|
|
||||||
api_key: z.string().optional(),
|
|
||||||
api_key_file: z.string().optional(),
|
|
||||||
ssl_key_file: z.string().optional(),
|
|
||||||
ssl_cert_file: z.string().optional(),
|
|
||||||
chat_template_kwargs: z.string().optional(),
|
|
||||||
timeout: z.number().optional(),
|
|
||||||
threads_http: z.number().optional(),
|
|
||||||
cache_reuse: z.number().optional(),
|
|
||||||
metrics: z.boolean().optional(),
|
|
||||||
slots: z.boolean().optional(),
|
|
||||||
props: z.boolean().optional(),
|
|
||||||
no_slots: z.boolean().optional(),
|
|
||||||
slot_save_path: z.string().optional(),
|
|
||||||
jinja: z.boolean().optional(),
|
|
||||||
reasoning_format: z.string().optional(),
|
|
||||||
reasoning_budget: z.number().optional(),
|
|
||||||
chat_template: z.string().optional(),
|
|
||||||
chat_template_file: z.string().optional(),
|
|
||||||
no_prefill_assistant: z.boolean().optional(),
|
|
||||||
slot_prompt_similarity: z.number().optional(),
|
|
||||||
lora_init_without_apply: z.boolean().optional(),
|
|
||||||
draft_max: z.number().optional(),
|
|
||||||
draft_min: z.number().optional(),
|
|
||||||
draft_p_min: z.number().optional(),
|
|
||||||
ctx_size_draft: z.number().optional(),
|
|
||||||
device_draft: z.string().optional(),
|
|
||||||
gpu_layers_draft: z.number().optional(),
|
|
||||||
model_draft: z.string().optional(),
|
|
||||||
cache_type_k_draft: z.string().optional(),
|
|
||||||
cache_type_v_draft: z.string().optional(),
|
|
||||||
|
|
||||||
// Audio/TTS params
|
|
||||||
model_vocoder: z.string().optional(),
|
|
||||||
tts_use_guide_tokens: z.boolean().optional(),
|
|
||||||
|
|
||||||
// Default model params
|
|
||||||
embd_bge_small_en_default: z.boolean().optional(),
|
|
||||||
embd_e5_small_en_default: z.boolean().optional(),
|
|
||||||
embd_gte_small_default: z.boolean().optional(),
|
|
||||||
fim_qwen_1_5b_default: z.boolean().optional(),
|
|
||||||
fim_qwen_3b_default: z.boolean().optional(),
|
|
||||||
fim_qwen_7b_default: z.boolean().optional(),
|
|
||||||
fim_qwen_7b_spec: z.boolean().optional(),
|
|
||||||
fim_qwen_14b_spec: z.boolean().optional(),
|
|
||||||
})
|
|
||||||
|
|
||||||
// Define the MLX backend options schema
|
|
||||||
export const MlxBackendOptionsSchema = z.object({
|
|
||||||
// Basic connection options
|
|
||||||
model: z.string().optional(),
|
|
||||||
host: z.string().optional(),
|
|
||||||
port: z.number().optional(),
|
|
||||||
|
|
||||||
// Model and adapter options
|
|
||||||
adapter_path: z.string().optional(),
|
|
||||||
draft_model: z.string().optional(),
|
|
||||||
num_draft_tokens: z.number().optional(),
|
|
||||||
trust_remote_code: z.boolean().optional(),
|
|
||||||
|
|
||||||
// Logging and templates
|
|
||||||
log_level: z.enum(['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']).optional(),
|
|
||||||
chat_template: z.string().optional(),
|
|
||||||
use_default_chat_template: z.boolean().optional(),
|
|
||||||
chat_template_args: z.string().optional(), // JSON string
|
|
||||||
|
|
||||||
// Sampling defaults
|
|
||||||
temp: z.number().optional(), // Note: MLX uses "temp" not "temperature"
|
|
||||||
top_p: z.number().optional(),
|
|
||||||
top_k: z.number().optional(),
|
|
||||||
min_p: z.number().optional(),
|
|
||||||
max_tokens: z.number().optional(),
|
|
||||||
})
|
|
||||||
|
|
||||||
// Backend options union
|
// Backend options union
|
||||||
export const BackendOptionsSchema = z.union([
|
export const BackendOptionsSchema = z.union([
|
||||||
LlamaCppBackendOptionsSchema,
|
LlamaCppBackendOptionsSchema,
|
||||||
MlxBackendOptionsSchema,
|
MlxBackendOptionsSchema,
|
||||||
|
VllmBackendOptionsSchema,
|
||||||
])
|
])
|
||||||
|
|
||||||
// Define the main create instance options schema
|
// Define the main create instance options schema
|
||||||
@@ -213,13 +34,27 @@ export const CreateInstanceOptionsSchema = z.object({
|
|||||||
on_demand_start: z.boolean().optional(),
|
on_demand_start: z.boolean().optional(),
|
||||||
|
|
||||||
// Backend configuration
|
// Backend configuration
|
||||||
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM]).optional(),
|
backend_type: z.enum([BackendType.LLAMA_CPP, BackendType.MLX_LM, BackendType.VLLM]).optional(),
|
||||||
backend_options: BackendOptionsSchema.optional(),
|
backend_options: BackendOptionsSchema.optional(),
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Re-export types and schemas from backend files
|
||||||
|
export {
|
||||||
|
LlamaCppBackendOptionsSchema,
|
||||||
|
MlxBackendOptionsSchema,
|
||||||
|
VllmBackendOptionsSchema,
|
||||||
|
type LlamaCppBackendOptions,
|
||||||
|
type MlxBackendOptions,
|
||||||
|
type VllmBackendOptions,
|
||||||
|
getAllLlamaCppFieldKeys,
|
||||||
|
getAllMlxFieldKeys,
|
||||||
|
getAllVllmFieldKeys,
|
||||||
|
getLlamaCppFieldType,
|
||||||
|
getMlxFieldType,
|
||||||
|
getVllmFieldType
|
||||||
|
}
|
||||||
|
|
||||||
// Infer the TypeScript types from the schemas
|
// Infer the TypeScript types from the schemas
|
||||||
export type LlamaCppBackendOptions = z.infer<typeof LlamaCppBackendOptionsSchema>
|
|
||||||
export type MlxBackendOptions = z.infer<typeof MlxBackendOptionsSchema>
|
|
||||||
export type BackendOptions = z.infer<typeof BackendOptionsSchema>
|
export type BackendOptions = z.infer<typeof BackendOptionsSchema>
|
||||||
export type CreateInstanceOptions = z.infer<typeof CreateInstanceOptionsSchema>
|
export type CreateInstanceOptions = z.infer<typeof CreateInstanceOptionsSchema>
|
||||||
|
|
||||||
@@ -228,16 +63,6 @@ export function getAllFieldKeys(): (keyof CreateInstanceOptions)[] {
|
|||||||
return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[]
|
return Object.keys(CreateInstanceOptionsSchema.shape) as (keyof CreateInstanceOptions)[]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper to get all LlamaCpp backend option field keys
|
|
||||||
export function getAllLlamaCppFieldKeys(): (keyof LlamaCppBackendOptions)[] {
|
|
||||||
return Object.keys(LlamaCppBackendOptionsSchema.shape) as (keyof LlamaCppBackendOptions)[]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper to get all MLX backend option field keys
|
|
||||||
export function getAllMlxFieldKeys(): (keyof MlxBackendOptions)[] {
|
|
||||||
return Object.keys(MlxBackendOptionsSchema.shape) as (keyof MlxBackendOptions)[]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get field type from Zod schema
|
// Get field type from Zod schema
|
||||||
export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number' | 'boolean' | 'array' | 'object' {
|
export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number' | 'boolean' | 'array' | 'object' {
|
||||||
const fieldSchema = CreateInstanceOptionsSchema.shape[key]
|
const fieldSchema = CreateInstanceOptionsSchema.shape[key]
|
||||||
@@ -252,32 +77,3 @@ export function getFieldType(key: keyof CreateInstanceOptions): 'text' | 'number
|
|||||||
if (innerSchema instanceof z.ZodObject) return 'object'
|
if (innerSchema instanceof z.ZodObject) return 'object'
|
||||||
return 'text' // ZodString and others default to text
|
return 'text' // ZodString and others default to text
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get field type for LlamaCpp backend options
|
|
||||||
export function getLlamaCppFieldType(key: keyof LlamaCppBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
|
|
||||||
const fieldSchema = LlamaCppBackendOptionsSchema.shape[key]
|
|
||||||
if (!fieldSchema) return 'text'
|
|
||||||
|
|
||||||
// Handle ZodOptional wrapper
|
|
||||||
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
|
|
||||||
|
|
||||||
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
|
|
||||||
if (innerSchema instanceof z.ZodNumber) return 'number'
|
|
||||||
if (innerSchema instanceof z.ZodArray) return 'array'
|
|
||||||
return 'text' // ZodString and others default to text
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get field type for MLX backend options
|
|
||||||
export function getMlxFieldType(key: keyof MlxBackendOptions): 'text' | 'number' | 'boolean' | 'array' {
|
|
||||||
const fieldSchema = MlxBackendOptionsSchema.shape[key]
|
|
||||||
if (!fieldSchema) return 'text'
|
|
||||||
|
|
||||||
// Handle ZodOptional wrapper
|
|
||||||
const innerSchema = fieldSchema instanceof z.ZodOptional ? fieldSchema.unwrap() : fieldSchema
|
|
||||||
|
|
||||||
if (innerSchema instanceof z.ZodBoolean) return 'boolean'
|
|
||||||
if (innerSchema instanceof z.ZodNumber) return 'number'
|
|
||||||
if (innerSchema instanceof z.ZodArray) return 'array'
|
|
||||||
if (innerSchema instanceof z.ZodEnum) return 'text' // Enum treated as text/select
|
|
||||||
return 'text' // ZodString and others default to text
|
|
||||||
}
|
|
||||||
@@ -5,6 +5,7 @@ export { type CreateInstanceOptions } from '@/schemas/instanceOptions'
|
|||||||
export const BackendType = {
|
export const BackendType = {
|
||||||
LLAMA_CPP: 'llama_cpp',
|
LLAMA_CPP: 'llama_cpp',
|
||||||
MLX_LM: 'mlx_lm',
|
MLX_LM: 'mlx_lm',
|
||||||
|
VLLM: 'vllm',
|
||||||
// MLX_VLM: 'mlx_vlm', // Future expansion
|
// MLX_VLM: 'mlx_vlm', // Future expansion
|
||||||
} as const
|
} as const
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user