Fix update and stop race conditions

This commit is contained in:
2025-07-23 20:27:55 +02:00
parent cc7ecc5aee
commit ac57bd770f
2 changed files with 73 additions and 17 deletions

View File

@@ -52,6 +52,7 @@ type Instance struct {
// Restart control // Restart control
restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts restartCancel context.CancelFunc `json:"-"` // Cancel function for pending restarts
monitorDone chan struct{} `json:"-"` // Channel to signal monitor goroutine completion
} }
// validateAndCopyOptions validates and creates a deep copy of the provided options // validateAndCopyOptions validates and creates a deep copy of the provided options
@@ -174,8 +175,9 @@ func (i *Instance) SetOptions(options *CreateInstanceOptions) {
return return
} }
// Validate and copy options without applying defaults // Validate and copy options and apply defaults
optionsCopy := validateAndCopyOptions(i.Name, options) optionsCopy := validateAndCopyOptions(i.Name, options)
applyDefaultOptions(optionsCopy, i.globalSettings)
i.options = optionsCopy i.options = optionsCopy
// Clear the proxy so it gets recreated with new options // Clear the proxy so it gets recreated with new options
@@ -216,6 +218,12 @@ func (i *Instance) Start() error {
return fmt.Errorf("instance %s has no options set", i.Name) return fmt.Errorf("instance %s has no options set", i.Name)
} }
// Reset restart counter when manually starting (not during auto-restart)
// We can detect auto-restart by checking if restartCancel is set
if i.restartCancel == nil {
i.restarts = 0
}
// Create log files // Create log files
if err := i.createLogFile(); err != nil { if err := i.createLogFile(); err != nil {
return fmt.Errorf("failed to create log files: %w", err) return fmt.Errorf("failed to create log files: %w", err)
@@ -252,6 +260,9 @@ func (i *Instance) Start() error {
i.Running = true i.Running = true
// Create channel for monitor completion signaling
i.monitorDone = make(chan struct{})
go i.readOutput(i.stdout, i.logFile) go i.readOutput(i.stdout, i.logFile)
go i.readOutput(i.stderr, i.logFile) go i.readOutput(i.stderr, i.logFile)
@@ -263,7 +274,6 @@ func (i *Instance) Start() error {
// Stop terminates the subprocess // Stop terminates the subprocess
func (i *Instance) Stop() error { func (i *Instance) Stop() error {
i.mu.Lock() i.mu.Lock()
defer i.mu.Unlock()
if !i.Running { if !i.Running {
// Even if not running, cancel any pending restart // Even if not running, cancel any pending restart
@@ -272,6 +282,7 @@ func (i *Instance) Stop() error {
i.restartCancel = nil i.restartCancel = nil
log.Printf("Cancelled pending restart for instance %s", i.Name) log.Printf("Cancelled pending restart for instance %s", i.Name)
} }
i.mu.Unlock()
return fmt.Errorf("instance %s is not running", i.Name) return fmt.Errorf("instance %s is not running", i.Name)
} }
@@ -281,29 +292,47 @@ func (i *Instance) Stop() error {
i.restartCancel = nil i.restartCancel = nil
} }
// Cancel the context to signal termination // Set running to false first to signal intentional stop
i.cancel() i.Running = false
// Clean up the proxy // Clean up the proxy
i.proxy = nil i.proxy = nil
// Wait for process to exit (with timeout) // Get the monitor done channel before releasing the lock
done := make(chan error, 1) monitorDone := i.monitorDone
go func() {
done <- i.cmd.Wait()
}()
select { i.mu.Unlock()
case <-done:
// Process exited normally // First, try to gracefully stop with SIGINT
case <-time.After(5 * time.Second): if i.cmd.Process != nil {
// Force kill if it doesn't exit within 5 seconds if err := i.cmd.Process.Signal(syscall.SIGINT); err != nil {
if i.cmd.Process != nil { log.Printf("Failed to send SIGINT to instance %s: %v", i.Name, err)
i.cmd.Process.Kill()
} }
} }
i.Running = false // Don't call cmd.Wait() here - let the monitor goroutine handle it
// Instead, wait for the monitor to complete or timeout
select {
case <-monitorDone:
// Process exited normally
case <-time.After(30 * time.Second):
// Force kill if it doesn't exit within 30 seconds
if i.cmd.Process != nil {
killErr := i.cmd.Process.Kill()
if killErr != nil {
log.Printf("Failed to force kill instance %s: %v", i.Name, killErr)
}
log.Printf("Instance %s did not stop in time, force killed", i.Name)
// Wait a bit more for the monitor to finish after force kill
select {
case <-monitorDone:
// Monitor completed after force kill
case <-time.After(2 * time.Second):
log.Printf("Warning: Monitor goroutine did not complete after force kill for instance %s", i.Name)
}
}
}
i.closeLogFile() // Close log files after stopping i.closeLogFile() // Close log files after stopping
@@ -370,10 +399,17 @@ func (i *Instance) readOutput(reader io.ReadCloser, logFile *os.File) {
} }
func (i *Instance) monitorProcess() { func (i *Instance) monitorProcess() {
defer func() {
if i.monitorDone != nil {
close(i.monitorDone)
}
}()
err := i.cmd.Wait() err := i.cmd.Wait()
i.mu.Lock() i.mu.Lock()
// Check if the instance was intentionally stopped
if !i.Running { if !i.Running {
i.mu.Unlock() i.mu.Unlock()
return return

View File

@@ -99,6 +99,7 @@ func (im *instanceManager) GetInstance(name string) (*Instance, error) {
} }
// UpdateInstance updates the options of an existing instance and returns it. // UpdateInstance updates the options of an existing instance and returns it.
// If the instance is running, it will be restarted to apply the new options.
func (im *instanceManager) UpdateInstance(name string, options *CreateInstanceOptions) (*Instance, error) { func (im *instanceManager) UpdateInstance(name string, options *CreateInstanceOptions) (*Instance, error) {
im.mu.RLock() im.mu.RLock()
instance, exists := im.instances[name] instance, exists := im.instances[name]
@@ -117,7 +118,26 @@ func (im *instanceManager) UpdateInstance(name string, options *CreateInstanceOp
return nil, err return nil, err
} }
// Check if instance is running before updating options
wasRunning := instance.Running
// If the instance is running, stop it first
if wasRunning {
if err := instance.Stop(); err != nil {
return nil, fmt.Errorf("failed to stop instance %s for update: %w", name, err)
}
}
// Now update the options while the instance is stopped
instance.SetOptions(options) instance.SetOptions(options)
// If it was running before, start it again with the new options
if wasRunning {
if err := instance.Start(); err != nil {
return nil, fmt.Errorf("failed to start instance %s after update: %w", name, err)
}
}
return instance, nil return instance, nil
} }