diff --git a/docs/installation.md b/docs/installation.md
index 9442877..1e4f4ae 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -42,15 +42,10 @@ Note: MLX backend is only available on macOS with Apple Silicon (M1, M2, M3, etc
 vLLM provides high-throughput distributed serving for LLMs. Install vLLM:
 
 ```bash
-# Install via pip (requires Python 3.8+, GPU required)
-pip install vllm
-
-# Or in a virtual environment (recommended)
+# Install in a virtual environment
 python -m venv vllm-env
 source vllm-env/bin/activate
 pip install vllm
-
-# For production deployments, consider container-based installation
 ```
 
 ## Installation Methods
diff --git a/docs/managing-instances.md b/docs/managing-instances.md
index c298b15..4ac9477 100644
--- a/docs/managing-instances.md
+++ b/docs/managing-instances.md
@@ -78,7 +78,8 @@ curl -X POST http://localhost:8080/api/instances/my-llama-instance \
       "threads": 8,
       "ctx_size": 4096,
       "gpu_layers": 32
-    }
+    },
+    "nodes": ["main"]
   }'
 
 # Create MLX instance (macOS only)
@@ -93,7 +94,8 @@ curl -X POST http://localhost:8080/api/instances/my-mlx-instance \
       "max_tokens": 2048
     },
     "auto_restart": true,
-    "max_restarts": 3
+    "max_restarts": 3,
+    "nodes": ["main"]
   }'
 
 # Create vLLM instance
@@ -112,7 +114,8 @@ curl -X POST http://localhost:8080/api/instances/my-vllm-instance \
       "CUDA_VISIBLE_DEVICES": "0,1",
       "NCCL_DEBUG": "INFO",
       "PYTHONPATH": "/custom/path"
-    }
+    },
+    "nodes": ["main"]
   }'
 
 # Create llama.cpp instance with HuggingFace model
@@ -124,7 +127,8 @@ curl -X POST http://localhost:8080/api/instances/gemma-3-27b \
       "hf_repo": "unsloth/gemma-3-27b-it-GGUF",
       "hf_file": "gemma-3-27b-it-GGUF.gguf",
       "gpu_layers": 32
-    }
+    },
+    "nodes": ["main"]
   }'
 
 # Create instance on specific remote node
@@ -138,6 +142,18 @@ curl -X POST http://localhost:8080/api/instances/remote-llama \
     },
     "nodes": ["worker1"]
   }'
+
+# Create instance on multiple nodes for high availability
+curl -X POST http://localhost:8080/api/instances/multi-node-llama \
+  -H "Content-Type: application/json" \
+  -d '{
+    "backend_type": "llama_cpp",
+    "backend_options": {
+      "model": "/models/llama-7b.gguf",
+      "gpu_layers": 32
+    },
+    "nodes": ["worker1", "worker2", "worker3"]
+  }'
 ```
 
 ## Start Instance
diff --git a/docs/quick-start.md b/docs/quick-start.md
index 3fc562e..15311c0 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -28,13 +28,17 @@ You should see the Llamactl web interface.
 
 1. Click the "Add Instance" button
 2. Fill in the instance configuration:
-   - **Name**: Give your instance a descriptive name
-   - **Backend Type**: Choose from llama.cpp, MLX, or vLLM
-   - **Model**: Model path or huggingface repo
-   - **Additional Options**: Backend-specific parameters
+     - **Name**: Give your instance a descriptive name
+     - **Node**: Select which node to deploy the instance to (defaults to "main" for single-node setups)
+     - **Backend Type**: Choose from llama.cpp, MLX, or vLLM
+     - **Model**: Model path or huggingface repo
+     - **Additional Options**: Backend-specific parameters
 
-!!! tip "Auto-Assignment"
-    Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and generates API keys if authentication is enabled. You typically don't need to manually specify these values.
+    !!! tip "Auto-Assignment"
+        Llamactl automatically assigns ports from the configured port range (default: 8000-9000) and generates API keys if authentication is enabled. You typically don't need to manually specify these values.
+
+    !!! note "Remote Node Deployment"
+        If you have configured remote nodes in your configuration file, you can select which node to deploy the instance to. This allows you to distribute instances across multiple machines. See the [Configuration](configuration.md#remote-node-configuration) guide for details on setting up remote nodes.
 
 3. Click "Create Instance"
 
@@ -61,7 +65,8 @@ Here are basic example configurations for each backend:
     "threads": 4,
     "ctx_size": 2048,
     "gpu_layers": 32
-  }
+  },
+  "nodes": ["main"]
 }
 ```
 
@@ -74,7 +79,8 @@ Here are basic example configurations for each backend:
     "model": "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
     "temp": 0.7,
     "max_tokens": 2048
-  }
+  },
+  "nodes": ["main"]
 }
 ```
 
@@ -87,7 +93,21 @@ Here are basic example configurations for each backend:
     "model": "microsoft/DialoGPT-medium",
     "tensor_parallel_size": 2,
     "gpu_memory_utilization": 0.9
-  }
+  },
+  "nodes": ["main"]
+}
+```
+
+**Multi-node deployment example:**
+```json
+{
+  "name": "distributed-model",
+  "backend_type": "llama_cpp",
+  "backend_options": {
+    "model": "/path/to/model.gguf",
+    "gpu_layers": 32
+  },
+  "nodes": ["worker1", "worker2"]
 }
 ```