diff --git a/pkg/backends/vllm/vllm.go b/pkg/backends/vllm/vllm.go index df080ea..7811c4c 100644 --- a/pkg/backends/vllm/vllm.go +++ b/pkg/backends/vllm/vllm.go @@ -132,7 +132,19 @@ type VllmServerOptions struct { // BuildCommandArgs converts VllmServerOptions to command line arguments // Note: This does NOT include the "serve" subcommand, that's handled at the instance level +// For vLLM, the model parameter is passed as a positional argument, not a --model flag func (o *VllmServerOptions) BuildCommandArgs() []string { + var args []string + + // Add model as positional argument if specified + if o.Model != "" { + args = append(args, o.Model) + } + + // Create a copy of the options without the Model field to avoid including it as --model flag + optionsCopy := *o + optionsCopy.Model = "" // Clear model field so it won't be included as a flag + multipleFlags := map[string]bool{ "api-key": true, "allowed-origins": true, @@ -140,7 +152,12 @@ func (o *VllmServerOptions) BuildCommandArgs() []string { "allowed-headers": true, "middleware": true, } - return backends.BuildCommandArgs(o, multipleFlags) + + // Build the rest of the arguments as flags + flagArgs := backends.BuildCommandArgs(&optionsCopy, multipleFlags) + args = append(args, flagArgs...) + + return args } // ParseVllmCommand parses a vLLM serve command string into VllmServerOptions diff --git a/pkg/backends/vllm/vllm_test.go b/pkg/backends/vllm/vllm_test.go index 3f01ff9..db5a295 100644 --- a/pkg/backends/vllm/vllm_test.go +++ b/pkg/backends/vllm/vllm_test.go @@ -97,10 +97,17 @@ func TestBuildCommandArgs(t *testing.T) { args := options.BuildCommandArgs() - // Check core functionality - if !containsFlagWithValue(args, "--model", "microsoft/DialoGPT-medium") { - t.Errorf("Expected --model microsoft/DialoGPT-medium not found in %v", args) + // Check that model is the first positional argument (not a --model flag) + if len(args) == 0 || args[0] != "microsoft/DialoGPT-medium" { + t.Errorf("Expected model 'microsoft/DialoGPT-medium' as first positional argument, got args: %v", args) } + + // Check that --model flag is NOT present (since model should be positional) + if contains(args, "--model") { + t.Errorf("Found --model flag, but model should be positional argument in args: %v", args) + } + + // Check other flags if !containsFlagWithValue(args, "--tensor-parallel-size", "2") { t.Errorf("Expected --tensor-parallel-size 2 not found in %v", args) }