diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index 73e747e..5b55e7d 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -19,13 +19,11 @@ type InstanceManager interface { UpdateInstance(name string, options *instance.Options) (*instance.Instance, error) DeleteInstance(name string) error StartInstance(name string) (*instance.Instance, error) - IsMaxRunningInstancesReached() bool + AtMaxRunning() bool StopInstance(name string) (*instance.Instance, error) EvictLRUInstance() error RestartInstance(name string) (*instance.Instance, error) GetInstanceLogs(name string, numLines int) (string, error) - ResolveInstance(modelName string) (string, error) - RefreshModelRegistry(inst *instance.Instance) error Shutdown() } @@ -36,7 +34,6 @@ type instanceManager struct { db database.InstanceStore remote *remoteManager lifecycle *lifecycleManager - models *modelRegistry // Configuration globalConfig *config.AppConfig @@ -63,16 +60,12 @@ func New(globalConfig *config.AppConfig, db database.InstanceStore) InstanceMana // Initialize remote manager remote := newRemoteManager(globalConfig.Nodes, 30*time.Second) - // Initialize model registry - models := newModelRegistry() - // Create manager instance im := &instanceManager{ registry: registry, ports: ports, db: db, remote: remote, - models: models, globalConfig: globalConfig, } @@ -149,27 +142,9 @@ func (im *instanceManager) loadInstances() error { // Auto-start instances that have auto-restart enabled go im.autoStartInstances() - // Discover models from all running llama.cpp instances - go im.discoverAllModels() - return nil } -// discoverAllModels discovers and registers models for all running llama.cpp instances -func (im *instanceManager) discoverAllModels() { - instances := im.registry.listRunning() - - for _, inst := range instances { - if !inst.IsLlamaCpp() { - continue - } - - if err := im.RefreshModelRegistry(inst); err != nil { - log.Printf("Failed to discover models for instance %s: %v", inst.Name, err) - } - } -} - // loadInstance loads a single persisted instance and adds it to the registry func (im *instanceManager) loadInstance(persistedInst *instance.Instance) error { name := persistedInst.Name diff --git a/pkg/manager/model_registry.go b/pkg/manager/model_registry.go deleted file mode 100644 index 0515c91..0000000 --- a/pkg/manager/model_registry.go +++ /dev/null @@ -1,79 +0,0 @@ -package manager - -import ( - "fmt" - "llamactl/pkg/instance" - "sync" -) - -// modelRegistry maintains a global mapping of model names to instance names -// for llama.cpp instances. Model names must be globally unique across all instances. -type modelRegistry struct { - mu sync.RWMutex - modelToInstance map[string]string // model name → instance name - instanceModels map[string][]string // instance name → model names -} - -// newModelRegistry creates a new model registry -func newModelRegistry() *modelRegistry { - return &modelRegistry{ - modelToInstance: make(map[string]string), - instanceModels: make(map[string][]string), - } -} - -// registerModels registers models from an instance to the registry. -// Skips models that conflict with other instances and returns a list of conflicts. -func (mr *modelRegistry) registerModels(instanceName string, models []instance.Model) []string { - mr.mu.Lock() - defer mr.mu.Unlock() - - // Unregister any existing models for this instance first - mr.removeModels(instanceName) - - // Register models, skipping conflicts - var modelNames []string - var conflicts []string - - for _, model := range models { - // Check if this model conflicts with another instance - if existingInstance, exists := mr.modelToInstance[model.ID]; exists && existingInstance != instanceName { - conflicts = append(conflicts, fmt.Sprintf("%s (already in %s)", model.ID, existingInstance)) - continue // Skip this model - } - - // Register the model - mr.modelToInstance[model.ID] = instanceName - modelNames = append(modelNames, model.ID) - } - - mr.instanceModels[instanceName] = modelNames - - return conflicts -} - -// unregisterModels removes all models for an instance -func (mr *modelRegistry) unregisterModels(instanceName string) { - mr.mu.Lock() - defer mr.mu.Unlock() - mr.removeModels(instanceName) -} - -// removeModels removes all models for an instance (caller must hold lock) -func (mr *modelRegistry) removeModels(instanceName string) { - if models, exists := mr.instanceModels[instanceName]; exists { - for _, modelName := range models { - delete(mr.modelToInstance, modelName) - } - delete(mr.instanceModels, instanceName) - } -} - -// getModelInstance returns the instance name that hosts the given model -func (mr *modelRegistry) getModelInstance(modelName string) (string, bool) { - mr.mu.RLock() - defer mr.mu.RUnlock() - - instanceName, exists := mr.modelToInstance[modelName] - return instanceName, exists -} diff --git a/pkg/manager/operations.go b/pkg/manager/operations.go index 6ff50f1..54ee5d4 100644 --- a/pkg/manager/operations.go +++ b/pkg/manager/operations.go @@ -337,9 +337,6 @@ func (im *instanceManager) DeleteInstance(name string) error { // Release port (use ReleaseByInstance for proper cleanup) im.ports.releaseByInstance(name) - // Unregister models when instance is deleted - im.onInstanceStopped(name) - // Remove from registry if err := im.registry.remove(name); err != nil { return fmt.Errorf("failed to remove instance from registry: %w", err) @@ -386,7 +383,7 @@ func (im *instanceManager) StartInstance(name string) (*instance.Instance, error } // Check max running instances limit for local instances only - if im.IsMaxRunningInstancesReached() { + if im.AtMaxRunning() { return nil, MaxRunningInstancesError(fmt.Errorf("maximum number of running instances (%d) reached", im.globalConfig.Instances.MaxRunningInstances)) } @@ -399,13 +396,10 @@ func (im *instanceManager) StartInstance(name string) (*instance.Instance, error log.Printf("Warning: failed to persist instance %s: %v", name, err) } - // Discover and register models for llama.cpp instances - go im.onInstanceStarted(name) - return inst, nil } -func (im *instanceManager) IsMaxRunningInstancesReached() bool { +func (im *instanceManager) AtMaxRunning() bool { if im.globalConfig.Instances.MaxRunningInstances == -1 { return false } @@ -461,9 +455,6 @@ func (im *instanceManager) StopInstance(name string) (*instance.Instance, error) log.Printf("Warning: failed to persist instance %s: %v", name, err) } - // Unregister models when instance stops - im.onInstanceStopped(name) - return inst, nil } @@ -544,73 +535,3 @@ func (im *instanceManager) setPortInOptions(options *instance.Options, port int) func (im *instanceManager) EvictLRUInstance() error { return im.lifecycle.evictLRU() } - -// ResolveInstance resolves a model name to an instance name. -// Precedence: instance name > model registry -func (im *instanceManager) ResolveInstance(modelName string) (string, error) { - // Check if it's an instance name first - if _, err := im.GetInstance(modelName); err == nil { - return modelName, nil - } - - // Check if it's a model name in the registry - if instanceName, exists := im.models.getModelInstance(modelName); exists { - return instanceName, nil - } - - return "", fmt.Errorf("model or instance '%s' not found", modelName) -} - -// RefreshModelRegistry refreshes the model registry for the given instance -func (im *instanceManager) RefreshModelRegistry(inst *instance.Instance) error { - if !inst.IsRunning() { - return fmt.Errorf("instance %s is not running", inst.Name) - } - - // Fetch models from instance and register them - models, err := inst.GetModels() - if err != nil { - return fmt.Errorf("failed to fetch models: %w", err) - } - - // Register models, skipping conflicts - conflicts := im.models.registerModels(inst.Name, models) - if len(conflicts) > 0 { - log.Printf("Warning: Model name conflicts for instance %s (skipped): %v", inst.Name, conflicts) - } - - // Check if instance name shadows any model names - if otherInstance, exists := im.models.getModelInstance(inst.Name); exists && otherInstance != inst.Name { - log.Printf("Warning: Instance name '%s' shadows model name from instance '%s'", inst.Name, otherInstance) - } - - return nil -} - -// onInstanceStarted is called when an instance successfully starts and becomes healthy -func (im *instanceManager) onInstanceStarted(name string) { - inst, err := im.GetInstance(name) - if err != nil { - log.Printf("Failed to get instance %s for model discovery: %v", name, err) - return - } - - // Only discover models for llama.cpp instances - if !inst.IsLlamaCpp() { - return - } - - if err := inst.WaitForHealthy(30); err != nil { - log.Printf("Instance %s not healthy, skipping model discovery: %v", name, err) - return - } - - if err := im.RefreshModelRegistry(inst); err != nil { - log.Printf("Failed to discover models for instance %s: %v", name, err) - } -} - -// onInstanceStopped is called when an instance stops or is deleted -func (im *instanceManager) onInstanceStopped(name string) { - im.models.unregisterModels(name) -} diff --git a/pkg/server/handlers.go b/pkg/server/handlers.go index 3e232ee..ce72173 100644 --- a/pkg/server/handlers.go +++ b/pkg/server/handlers.go @@ -96,7 +96,7 @@ func (h *Handler) ensureInstanceRunning(inst *instance.Instance) error { return fmt.Errorf("instance is not running and on-demand start is not enabled") } - if h.InstanceManager.IsMaxRunningInstancesReached() { + if h.InstanceManager.AtMaxRunning() { if h.cfg.Instances.EnableLRUEviction { err := h.InstanceManager.EvictLRUInstance() if err != nil { diff --git a/pkg/server/handlers_backends.go b/pkg/server/handlers_backends.go index f912a8a..2cc9304 100644 --- a/pkg/server/handlers_backends.go +++ b/pkg/server/handlers_backends.go @@ -5,7 +5,6 @@ import ( "fmt" "llamactl/pkg/backends" "llamactl/pkg/instance" - "log" "net/http" "os/exec" "strings" @@ -371,11 +370,6 @@ func (h *Handler) LlamaCppLoadModel() http.HandlerFunc { return } - // Refresh the model registry - if err := h.InstanceManager.RefreshModelRegistry(inst); err != nil { - log.Printf("Warning: failed to refresh model registry after load: %v", err) - } - writeJSON(w, http.StatusOK, map[string]string{ "status": "success", "message": fmt.Sprintf("Model %s loaded successfully", modelName), @@ -410,11 +404,6 @@ func (h *Handler) LlamaCppUnloadModel() http.HandlerFunc { return } - // Refresh the model registry - if err := h.InstanceManager.RefreshModelRegistry(inst); err != nil { - log.Printf("Warning: failed to refresh model registry after unload: %v", err) - } - writeJSON(w, http.StatusOK, map[string]string{ "status": "success", "message": fmt.Sprintf("Model %s unloaded successfully", modelName), diff --git a/test_llm.py b/test_llm.py new file mode 100644 index 0000000..944d3e7 --- /dev/null +++ b/test_llm.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +""" +Simple Python script to interact with local LLM server's OpenAI-compatible API +""" + +import requests + +# Local LLM server configuration +LLM_SERVER_URL = "http://localhost:8080/v1/chat/completions" +MODEL_NAME = "proxy-test" # Default model name, can be changed based on your setup + +def send_message(message, model=MODEL_NAME, temperature=0.7, max_tokens=1000): + """ + Send a message to local LLM server API + + Args: + message (str): The message to send + model (str): Model name (depends on your LLM server setup) + temperature (float): Controls randomness (0.0 to 1.0) + max_tokens (int): Maximum tokens in response + + Returns: + str: The AI response or error message + """ + + headers = { + "Content-Type": "application/json", + "Authorization": "Bearer test-inf" + } + + data = { + "model": model, + "messages": [ + { + "role": "user", + "content": message + } + ], + "temperature": temperature, + "max_tokens": max_tokens, + "stream": False + } + + response = requests.post(LLM_SERVER_URL, headers=headers, json=data, timeout=60) + response.raise_for_status() + + result = response.json() + return result["choices"][0]["message"]["content"] + +def main(): + """Run in interactive mode for continuous conversation""" + print("Local LLM Chat Client") + print("-" * 40) + + while True: + try: + user_input = input("\nYou: ").strip() + + if not user_input: + continue + + print("AI: ", end="", flush=True) + response = send_message(user_input) + print(response) + + except KeyboardInterrupt: + print("\nGoodbye!") + break + except EOFError: + print("\nGoodbye!") + break + +if __name__ == "__main__": + main() \ No newline at end of file