diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 0000000..8df96b6
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,65 @@
+name: Build and Deploy Documentation
+
+on:
+  push:
+    branches: [ main ]
+    paths:
+      - 'docs/**'
+      - 'mkdocs.yml'
+      - 'docs-requirements.txt'
+      - '.github/workflows/docs.yml'
+  pull_request:
+    branches: [ main ]
+    paths:
+      - 'docs/**'
+      - 'mkdocs.yml'
+      - 'docs-requirements.txt'
+
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+concurrency:
+  group: "pages"
+  cancel-in-progress: false
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # Needed for git-revision-date-localized plugin
+
+      - name: Setup Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+
+      - name: Install dependencies
+        run: |
+          pip install -r docs-requirements.txt
+
+      - name: Build documentation
+        run: |
+          mkdocs build --strict
+
+      - name: Upload documentation artifact
+        if: github.ref == 'refs/heads/main'
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: ./site
+
+  deploy:
+    if: github.ref == 'refs/heads/main'
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-latest
+    needs: build
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 78c8613..1f4a50e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -129,6 +129,50 @@ Use this format for pull request titles:
 - Use meaningful component and variable names
 - Prefer functional components over class components
 
+## Documentation Development
+
+This project uses MkDocs for documentation. When working on documentation:
+
+### Setup Documentation Environment
+
+```bash
+# Install documentation dependencies
+pip install -r docs-requirements.txt
+```
+
+### Development Workflow
+
+```bash
+# Serve documentation locally for development
+mkdocs serve
+```
+The documentation will be available at http://localhost:8000
+
+```bash
+# Build static documentation site
+mkdocs build
+```
+The built site will be in the `site/` directory.
+
+### Documentation Structure
+
+- `docs/` - Documentation content (Markdown files)
+- `mkdocs.yml` - MkDocs configuration
+- `docs-requirements.txt` - Python dependencies for documentation
+
+### Adding New Documentation
+
+When adding new documentation:
+
+1. Create Markdown files in the appropriate `docs/` subdirectory
+2. Update the navigation in `mkdocs.yml`
+3. Test locally with `mkdocs serve`
+4. Submit a pull request
+
+### Documentation Deployment
+
+Documentation is automatically built and deployed to GitHub Pages when changes are pushed to the main branch.
+
 ## Getting Help
 
 - Check existing [issues](https://github.com/lordmathis/llamactl/issues)
diff --git a/docs-requirements.txt b/docs-requirements.txt
new file mode 100644
index 0000000..256e652
--- /dev/null
+++ b/docs-requirements.txt
@@ -0,0 +1,4 @@
+mkdocs-material==9.5.3
+mkdocs==1.5.3
+pymdown-extensions==10.7
+mkdocs-git-revision-date-localized-plugin==1.2.4
diff --git a/docs/advanced/backends.md b/docs/advanced/backends.md
new file mode 100644
index 0000000..e2542ea
--- /dev/null
+++ b/docs/advanced/backends.md
@@ -0,0 +1,316 @@
+# Backends
+
+LlamaCtl supports multiple backends for running large language models. This guide covers the available backends and their configuration.
+
+## Llama.cpp Backend
+
+The primary backend for LlamaCtl, providing robust support for GGUF models.
+
+### Features
+
+- **GGUF Support**: Native support for GGUF model format
+- **GPU Acceleration**: CUDA, OpenCL, and Metal support
+- **Memory Optimization**: Efficient memory usage and mapping
+- **Multi-threading**: Configurable CPU thread utilization
+- **Quantization**: Support for various quantization levels
+
+### Configuration
+
+```yaml
+backends:
+  llamacpp:
+    binary_path: "/usr/local/bin/llama-server"
+    default_options:
+      threads: 4
+      context_size: 2048
+      batch_size: 512
+    gpu:
+      enabled: true
+      layers: 35
+```
+
+### Supported Options
+
+| Option | Description | Default |
+|--------|-------------|---------|
+| `threads` | Number of CPU threads | 4 |
+| `context_size` | Context window size | 2048 |
+| `batch_size` | Batch size for processing | 512 |
+| `gpu_layers` | Layers to offload to GPU | 0 |
+| `memory_lock` | Lock model in memory | false |
+| `no_mmap` | Disable memory mapping | false |
+| `rope_freq_base` | RoPE frequency base | 10000 |
+| `rope_freq_scale` | RoPE frequency scale | 1.0 |
+
+### GPU Acceleration
+
+#### CUDA Setup
+
+```bash
+# Install CUDA toolkit
+sudo apt update
+sudo apt install nvidia-cuda-toolkit
+
+# Verify CUDA installation
+nvcc --version
+nvidia-smi
+```
+
+#### Configuration for GPU
+
+```json
+{
+  "name": "gpu-accelerated",
+  "model_path": "/models/llama-2-13b.gguf",
+  "port": 8081,
+  "options": {
+    "gpu_layers": 35,
+    "threads": 2,
+    "context_size": 4096
+  }
+}
+```
+
+### Performance Tuning
+
+#### Memory Optimization
+
+```yaml
+# For limited memory systems
+options:
+  context_size: 1024
+  batch_size: 256
+  no_mmap: true
+  memory_lock: false
+
+# For high-memory systems
+options:
+  context_size: 8192
+  batch_size: 1024
+  memory_lock: true
+  no_mmap: false
+```
+
+#### CPU Optimization
+
+```yaml
+# Match thread count to CPU cores
+# For 8-core CPU:
+options:
+  threads: 6  # Leave 2 cores for system
+  
+# For high-performance CPUs:
+options:
+  threads: 16
+  batch_size: 1024
+```
+
+## Future Backends
+
+LlamaCtl is designed to support multiple backends. Planned additions:
+
+### vLLM Backend
+
+High-performance inference engine optimized for serving:
+
+- **Features**: Fast inference, batching, streaming
+- **Models**: Supports various model formats
+- **Scaling**: Horizontal scaling support
+
+### TensorRT-LLM Backend
+
+NVIDIA's optimized inference engine:
+
+- **Features**: Maximum GPU performance
+- **Models**: Optimized for NVIDIA GPUs
+- **Deployment**: Production-ready inference
+
+### Ollama Backend
+
+Integration with Ollama for easy model management:
+
+- **Features**: Simplified model downloading
+- **Models**: Large model library
+- **Integration**: Seamless model switching
+
+## Backend Selection
+
+### Automatic Detection
+
+LlamaCtl can automatically detect the best backend:
+
+```yaml
+backends:
+  auto_detect: true
+  preference_order:
+    - "llamacpp"
+    - "vllm"
+    - "tensorrt"
+```
+
+### Manual Selection
+
+Force a specific backend for an instance:
+
+```json
+{
+  "name": "manual-backend",
+  "backend": "llamacpp",
+  "model_path": "/models/model.gguf",
+  "port": 8081
+}
+```
+
+## Backend-Specific Features
+
+### Llama.cpp Features
+
+#### Model Formats
+
+- **GGUF**: Primary format, best compatibility
+- **GGML**: Legacy format (limited support)
+
+#### Quantization Levels
+
+- `Q2_K`: Smallest size, lower quality
+- `Q4_K_M`: Balanced size and quality
+- `Q5_K_M`: Higher quality, larger size
+- `Q6_K`: Near-original quality
+- `Q8_0`: Minimal loss, largest size
+
+#### Advanced Options
+
+```yaml
+advanced:
+  rope_scaling:
+    type: "linear"
+    factor: 2.0
+  attention:
+    flash_attention: true
+    grouped_query: true
+```
+
+## Monitoring Backend Performance
+
+### Metrics Collection
+
+Monitor backend-specific metrics:
+
+```bash
+# Get backend statistics
+curl http://localhost:8080/api/instances/my-instance/backend/stats
+```
+
+**Response:**
+```json
+{
+  "backend": "llamacpp",
+  "version": "b1234",
+  "metrics": {
+    "tokens_per_second": 15.2,
+    "memory_usage": 4294967296,
+    "gpu_utilization": 85.5,
+    "context_usage": 75.0
+  }
+}
+```
+
+### Performance Optimization
+
+#### Benchmark Different Configurations
+
+```bash
+# Test various thread counts
+for threads in 2 4 8 16; do
+  echo "Testing $threads threads"
+  curl -X PUT http://localhost:8080/api/instances/benchmark \
+    -d "{\"options\": {\"threads\": $threads}}"
+  # Run performance test
+done
+```
+
+#### Memory Usage Optimization
+
+```bash
+# Monitor memory usage
+watch -n 1 'curl -s http://localhost:8080/api/instances/my-instance/stats | jq .memory_usage'
+```
+
+## Troubleshooting Backends
+
+### Common Llama.cpp Issues
+
+**Model won't load:**
+```bash
+# Check model file
+file /path/to/model.gguf
+
+# Verify format
+llama-server --model /path/to/model.gguf --dry-run
+```
+
+**GPU not detected:**
+```bash
+# Check CUDA installation
+nvidia-smi
+
+# Verify llama.cpp GPU support
+llama-server --help | grep -i gpu
+```
+
+**Performance issues:**
+```bash
+# Check system resources
+htop
+nvidia-smi
+
+# Verify configuration
+curl http://localhost:8080/api/instances/my-instance/config
+```
+
+## Custom Backend Development
+
+### Backend Interface
+
+Implement the backend interface for custom backends:
+
+```go
+type Backend interface {
+    Start(config InstanceConfig) error
+    Stop(instance *Instance) error
+    Health(instance *Instance) (*HealthStatus, error)
+    Stats(instance *Instance) (*Stats, error)
+}
+```
+
+### Registration
+
+Register your custom backend:
+
+```go
+func init() {
+    backends.Register("custom", &CustomBackend{})
+}
+```
+
+## Best Practices
+
+### Production Deployments
+
+1. **Resource allocation**: Plan for peak usage
+2. **Backend selection**: Choose based on requirements
+3. **Monitoring**: Set up comprehensive monitoring
+4. **Fallback**: Configure backup backends
+
+### Development
+
+1. **Rapid iteration**: Use smaller models
+2. **Resource monitoring**: Track usage patterns
+3. **Configuration testing**: Validate settings
+4. **Performance profiling**: Optimize bottlenecks
+
+## Next Steps
+
+- Learn about [Monitoring](monitoring.md) backend performance
+- Explore [Troubleshooting](troubleshooting.md) guides
+- Set up [Production Monitoring](monitoring.md)
diff --git a/docs/advanced/monitoring.md b/docs/advanced/monitoring.md
new file mode 100644
index 0000000..7c3c76e
--- /dev/null
+++ b/docs/advanced/monitoring.md
@@ -0,0 +1,420 @@
+# Monitoring
+
+Comprehensive monitoring setup for LlamaCtl in production environments.
+
+## Overview
+
+Effective monitoring of LlamaCtl involves tracking:
+
+- Instance health and performance
+- System resource usage
+- API response times
+- Error rates and alerts
+
+## Built-in Monitoring
+
+### Health Checks
+
+LlamaCtl provides built-in health monitoring:
+
+```bash
+# Check overall system health
+curl http://localhost:8080/api/system/health
+
+# Check specific instance health
+curl http://localhost:8080/api/instances/{name}/health
+```
+
+### Metrics Endpoint
+
+Access Prometheus-compatible metrics:
+
+```bash
+curl http://localhost:8080/metrics
+```
+
+**Available Metrics:**
+- `llamactl_instances_total`: Total number of instances
+- `llamactl_instances_running`: Number of running instances
+- `llamactl_instance_memory_bytes`: Instance memory usage
+- `llamactl_instance_cpu_percent`: Instance CPU usage
+- `llamactl_api_requests_total`: Total API requests
+- `llamactl_api_request_duration_seconds`: API response times
+
+## Prometheus Integration
+
+### Configuration
+
+Add LlamaCtl as a Prometheus target:
+
+```yaml
+# prometheus.yml
+scrape_configs:
+  - job_name: 'llamactl'
+    static_configs:
+      - targets: ['localhost:8080']
+    metrics_path: '/metrics'
+    scrape_interval: 15s
+```
+
+### Custom Metrics
+
+Enable additional metrics in LlamaCtl:
+
+```yaml
+# config.yaml
+monitoring:
+  enabled: true
+  prometheus:
+    enabled: true
+    path: "/metrics"
+  metrics:
+    - instance_stats
+    - api_performance
+    - system_resources
+```
+
+## Grafana Dashboards
+
+### LlamaCtl Dashboard
+
+Import the official Grafana dashboard:
+
+1. Download dashboard JSON from releases
+2. Import into Grafana
+3. Configure Prometheus data source
+
+### Key Panels
+
+**Instance Overview:**
+- Instance count and status
+- Resource usage per instance
+- Health status indicators
+
+**Performance Metrics:**
+- API response times
+- Tokens per second
+- Memory usage trends
+
+**System Resources:**
+- CPU and memory utilization
+- Disk I/O and network usage
+- GPU utilization (if applicable)
+
+### Custom Queries
+
+**Instance Uptime:**
+```promql
+(time() - llamactl_instance_start_time_seconds) / 3600
+```
+
+**Memory Usage Percentage:**
+```promql
+(llamactl_instance_memory_bytes / llamactl_system_memory_total_bytes) * 100
+```
+
+**API Error Rate:**
+```promql
+rate(llamactl_api_requests_total{status=~"4.."}[5m]) / rate(llamactl_api_requests_total[5m]) * 100
+```
+
+## Alerting
+
+### Prometheus Alerts
+
+Configure alerts for critical conditions:
+
+```yaml
+# alerts.yml
+groups:
+  - name: llamactl
+    rules:
+      - alert: InstanceDown
+        expr: llamactl_instance_up == 0
+        for: 1m
+        labels:
+          severity: critical
+        annotations:
+          summary: "LlamaCtl instance {{ $labels.instance_name }} is down"
+          
+      - alert: HighMemoryUsage
+        expr: llamactl_instance_memory_percent > 90
+        for: 5m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High memory usage on {{ $labels.instance_name }}"
+          
+      - alert: APIHighLatency
+        expr: histogram_quantile(0.95, rate(llamactl_api_request_duration_seconds_bucket[5m])) > 2
+        for: 2m
+        labels:
+          severity: warning
+        annotations:
+          summary: "High API latency detected"
+```
+
+### Notification Channels
+
+Configure alert notifications:
+
+**Slack Integration:**
+```yaml
+# alertmanager.yml
+route:
+  group_by: ['alertname']
+  receiver: 'slack'
+
+receivers:
+  - name: 'slack'
+    slack_configs:
+      - api_url: 'https://hooks.slack.com/services/...'
+        channel: '#alerts'
+        title: 'LlamaCtl Alert'
+        text: '{{ range .Alerts }}{{ .Annotations.summary }}{{ end }}'
+```
+
+## Log Management
+
+### Centralized Logging
+
+Configure log aggregation:
+
+```yaml
+# config.yaml
+logging:
+  level: "info"
+  output: "json"
+  destinations:
+    - type: "file"
+      path: "/var/log/llamactl/app.log"
+    - type: "syslog"
+      facility: "local0"
+    - type: "elasticsearch"
+      url: "http://elasticsearch:9200"
+```
+
+### Log Analysis
+
+Use ELK stack for log analysis:
+
+**Elasticsearch Index Template:**
+```json
+{
+  "index_patterns": ["llamactl-*"],
+  "mappings": {
+    "properties": {
+      "timestamp": {"type": "date"},
+      "level": {"type": "keyword"},
+      "message": {"type": "text"},
+      "instance": {"type": "keyword"},
+      "component": {"type": "keyword"}
+    }
+  }
+}
+```
+
+**Kibana Visualizations:**
+- Log volume over time
+- Error rate by instance
+- Performance trends
+- Resource usage patterns
+
+## Application Performance Monitoring
+
+### OpenTelemetry Integration
+
+Enable distributed tracing:
+
+```yaml
+# config.yaml
+telemetry:
+  enabled: true
+  otlp:
+    endpoint: "http://jaeger:14268/api/traces"
+  sampling_rate: 0.1
+```
+
+### Custom Spans
+
+Add custom tracing to track operations:
+
+```go
+ctx, span := tracer.Start(ctx, "instance.start")
+defer span.End()
+
+// Track instance startup time
+span.SetAttributes(
+    attribute.String("instance.name", name),
+    attribute.String("model.path", modelPath),
+)
+```
+
+## Health Check Configuration
+
+### Readiness Probes
+
+Configure Kubernetes readiness probes:
+
+```yaml
+readinessProbe:
+  httpGet:
+    path: /api/health
+    port: 8080
+  initialDelaySeconds: 30
+  periodSeconds: 10
+```
+
+### Liveness Probes
+
+Configure liveness probes:
+
+```yaml
+livenessProbe:
+  httpGet:
+    path: /api/health/live
+    port: 8080
+  initialDelaySeconds: 60
+  periodSeconds: 30
+```
+
+### Custom Health Checks
+
+Implement custom health checks:
+
+```go
+func (h *HealthHandler) CustomCheck(ctx context.Context) error {
+    // Check database connectivity
+    if err := h.db.Ping(); err != nil {
+        return fmt.Errorf("database unreachable: %w", err)
+    }
+    
+    // Check instance responsiveness
+    for _, instance := range h.instances {
+        if !instance.IsHealthy() {
+            return fmt.Errorf("instance %s unhealthy", instance.Name)
+        }
+    }
+    
+    return nil
+}
+```
+
+## Performance Profiling
+
+### pprof Integration
+
+Enable Go profiling:
+
+```yaml
+# config.yaml
+debug:
+  pprof_enabled: true
+  pprof_port: 6060
+```
+
+Access profiling endpoints:
+```bash
+# CPU profile
+go tool pprof http://localhost:6060/debug/pprof/profile
+
+# Memory profile
+go tool pprof http://localhost:6060/debug/pprof/heap
+
+# Goroutine profile
+go tool pprof http://localhost:6060/debug/pprof/goroutine
+```
+
+### Continuous Profiling
+
+Set up continuous profiling with Pyroscope:
+
+```yaml
+# config.yaml
+profiling:
+  enabled: true
+  pyroscope:
+    server_address: "http://pyroscope:4040"
+    application_name: "llamactl"
+```
+
+## Security Monitoring
+
+### Audit Logging
+
+Enable security audit logs:
+
+```yaml
+# config.yaml
+audit:
+  enabled: true
+  log_file: "/var/log/llamactl/audit.log"
+  events:
+    - "auth.login"
+    - "auth.logout"
+    - "instance.create"
+    - "instance.delete"
+    - "config.update"
+```
+
+### Rate Limiting Monitoring
+
+Track rate limiting metrics:
+
+```bash
+# Monitor rate limit hits
+curl http://localhost:8080/metrics | grep rate_limit
+```
+
+## Troubleshooting Monitoring
+
+### Common Issues
+
+**Metrics not appearing:**
+1. Check Prometheus configuration
+2. Verify network connectivity
+3. Review LlamaCtl logs for errors
+
+**High memory usage:**
+1. Check for memory leaks in profiles
+2. Monitor garbage collection metrics
+3. Review instance configurations
+
+**Alert fatigue:**
+1. Tune alert thresholds
+2. Implement alert severity levels
+3. Use alert routing and suppression
+
+### Debug Tools
+
+**Monitoring health:**
+```bash
+# Check monitoring endpoints
+curl -v http://localhost:8080/metrics
+curl -v http://localhost:8080/api/health
+
+# Review logs
+tail -f /var/log/llamactl/app.log
+```
+
+## Best Practices
+
+### Production Monitoring
+
+1. **Comprehensive coverage**: Monitor all critical components
+2. **Appropriate alerting**: Balance sensitivity and noise
+3. **Regular review**: Analyze trends and patterns
+4. **Documentation**: Maintain runbooks for alerts
+
+### Performance Optimization
+
+1. **Baseline establishment**: Know normal operating parameters
+2. **Trend analysis**: Identify performance degradation early
+3. **Capacity planning**: Monitor resource growth trends
+4. **Optimization cycles**: Regular performance tuning
+
+## Next Steps
+
+- Set up [Troubleshooting](troubleshooting.md) procedures
+- Learn about [Backend optimization](backends.md)
+- Configure [Production deployment](../development/building.md)
diff --git a/docs/advanced/troubleshooting.md b/docs/advanced/troubleshooting.md
new file mode 100644
index 0000000..58b85a7
--- /dev/null
+++ b/docs/advanced/troubleshooting.md
@@ -0,0 +1,560 @@
+# Troubleshooting
+
+Common issues and solutions for LlamaCtl deployment and operation.
+
+## Installation Issues
+
+### Binary Not Found
+
+**Problem:** `llamactl: command not found`
+
+**Solutions:**
+1. Verify the binary is in your PATH:
+   ```bash
+   echo $PATH
+   which llamactl
+   ```
+
+2. Add to PATH or use full path:
+   ```bash
+   export PATH=$PATH:/path/to/llamactl
+   # or
+   /full/path/to/llamactl
+   ```
+
+3. Check binary permissions:
+   ```bash
+   chmod +x llamactl
+   ```
+
+### Permission Denied
+
+**Problem:** Permission errors when starting LlamaCtl
+
+**Solutions:**
+1. Check file permissions:
+   ```bash
+   ls -la llamactl
+   chmod +x llamactl
+   ```
+
+2. Verify directory permissions:
+   ```bash
+   # Check models directory
+   ls -la /path/to/models/
+   
+   # Check logs directory
+   sudo mkdir -p /var/log/llamactl
+   sudo chown $USER:$USER /var/log/llamactl
+   ```
+
+3. Run with appropriate user:
+   ```bash
+   # Don't run as root unless necessary
+   sudo -u llamactl ./llamactl
+   ```
+
+## Startup Issues
+
+### Port Already in Use
+
+**Problem:** `bind: address already in use`
+
+**Solutions:**
+1. Find process using the port:
+   ```bash
+   sudo netstat -tulpn | grep :8080
+   # or
+   sudo lsof -i :8080
+   ```
+
+2. Kill the conflicting process:
+   ```bash
+   sudo kill -9 <PID>
+   ```
+
+3. Use a different port:
+   ```bash
+   llamactl --port 8081
+   ```
+
+### Configuration Errors
+
+**Problem:** Invalid configuration preventing startup
+
+**Solutions:**
+1. Validate configuration file:
+   ```bash
+   llamactl --config /path/to/config.yaml --validate
+   ```
+
+2. Check YAML syntax:
+   ```bash
+   yamllint config.yaml
+   ```
+
+3. Use minimal configuration:
+   ```yaml
+   server:
+     host: "localhost"
+     port: 8080
+   ```
+
+## Instance Management Issues
+
+### Model Loading Failures
+
+**Problem:** Instance fails to start with model loading errors
+
+**Diagnostic Steps:**
+1. Check model file exists:
+   ```bash
+   ls -la /path/to/model.gguf
+   file /path/to/model.gguf
+   ```
+
+2. Verify model format:
+   ```bash
+   # Check if it's a valid GGUF file
+   hexdump -C /path/to/model.gguf | head -5
+   ```
+
+3. Test with llama.cpp directly:
+   ```bash
+   llama-server --model /path/to/model.gguf --port 8081
+   ```
+
+**Common Solutions:**
+- **Corrupted model:** Re-download the model file
+- **Wrong format:** Ensure model is in GGUF format
+- **Insufficient memory:** Reduce context size or use smaller model
+- **Path issues:** Use absolute paths, check file permissions
+
+### Memory Issues
+
+**Problem:** Out of memory errors or system becomes unresponsive
+
+**Diagnostic Steps:**
+1. Check system memory:
+   ```bash
+   free -h
+   cat /proc/meminfo
+   ```
+
+2. Monitor memory usage:
+   ```bash
+   top -p $(pgrep llamactl)
+   ```
+
+3. Check instance memory requirements:
+   ```bash
+   curl http://localhost:8080/api/instances/{name}/stats
+   ```
+
+**Solutions:**
+1. **Reduce context size:**
+   ```json
+   {
+     "options": {
+       "context_size": 1024
+     }
+   }
+   ```
+
+2. **Enable memory mapping:**
+   ```json
+   {
+     "options": {
+       "no_mmap": false
+     }
+   }
+   ```
+
+3. **Use quantized models:**
+   - Try Q4_K_M instead of higher precision models
+   - Use smaller model variants (7B instead of 13B)
+
+### GPU Issues
+
+**Problem:** GPU not detected or not being used
+
+**Diagnostic Steps:**
+1. Check GPU availability:
+   ```bash
+   nvidia-smi
+   ```
+
+2. Verify CUDA installation:
+   ```bash
+   nvcc --version
+   ```
+
+3. Check llama.cpp GPU support:
+   ```bash
+   llama-server --help | grep -i gpu
+   ```
+
+**Solutions:**
+1. **Install CUDA drivers:**
+   ```bash
+   sudo apt update
+   sudo apt install nvidia-driver-470 nvidia-cuda-toolkit
+   ```
+
+2. **Rebuild llama.cpp with GPU support:**
+   ```bash
+   cmake -DLLAMA_CUBLAS=ON ..
+   make
+   ```
+
+3. **Configure GPU layers:**
+   ```json
+   {
+     "options": {
+       "gpu_layers": 35
+     }
+   }
+   ```
+
+## Performance Issues
+
+### Slow Response Times
+
+**Problem:** API responses are slow or timeouts occur
+
+**Diagnostic Steps:**
+1. Check API response times:
+   ```bash
+   time curl http://localhost:8080/api/instances
+   ```
+
+2. Monitor system resources:
+   ```bash
+   htop
+   iotop
+   ```
+
+3. Check instance logs:
+   ```bash
+   curl http://localhost:8080/api/instances/{name}/logs
+   ```
+
+**Solutions:**
+1. **Optimize thread count:**
+   ```json
+   {
+     "options": {
+       "threads": 6
+     }
+   }
+   ```
+
+2. **Adjust batch size:**
+   ```json
+   {
+     "options": {
+       "batch_size": 512
+     }
+   }
+   ```
+
+3. **Enable GPU acceleration:**
+   ```json
+   {
+     "options": {
+       "gpu_layers": 35
+     }
+   }
+   ```
+
+### High CPU Usage
+
+**Problem:** LlamaCtl consuming excessive CPU
+
+**Diagnostic Steps:**
+1. Identify CPU-intensive processes:
+   ```bash
+   top -p $(pgrep -f llamactl)
+   ```
+
+2. Check thread allocation:
+   ```bash
+   curl http://localhost:8080/api/instances/{name}/config
+   ```
+
+**Solutions:**
+1. **Reduce thread count:**
+   ```json
+   {
+     "options": {
+       "threads": 4
+     }
+   }
+   ```
+
+2. **Limit concurrent instances:**
+   ```yaml
+   limits:
+     max_instances: 3
+   ```
+
+## Network Issues
+
+### Connection Refused
+
+**Problem:** Cannot connect to LlamaCtl web interface
+
+**Diagnostic Steps:**
+1. Check if service is running:
+   ```bash
+   ps aux | grep llamactl
+   ```
+
+2. Verify port binding:
+   ```bash
+   netstat -tulpn | grep :8080
+   ```
+
+3. Test local connectivity:
+   ```bash
+   curl http://localhost:8080/api/health
+   ```
+
+**Solutions:**
+1. **Check firewall settings:**
+   ```bash
+   sudo ufw status
+   sudo ufw allow 8080
+   ```
+
+2. **Bind to correct interface:**
+   ```yaml
+   server:
+     host: "0.0.0.0"  # Instead of "localhost"
+     port: 8080
+   ```
+
+### CORS Errors
+
+**Problem:** Web UI shows CORS errors in browser console
+
+**Solutions:**
+1. **Enable CORS in configuration:**
+   ```yaml
+   server:
+     cors_enabled: true
+     cors_origins:
+       - "http://localhost:3000"
+       - "https://yourdomain.com"
+   ```
+
+2. **Use reverse proxy:**
+   ```nginx
+   server {
+       listen 80;
+       location / {
+           proxy_pass http://localhost:8080;
+           proxy_set_header Host $host;
+           proxy_set_header X-Real-IP $remote_addr;
+       }
+   }
+   ```
+
+## Database Issues
+
+### Startup Database Errors
+
+**Problem:** Database connection failures on startup
+
+**Diagnostic Steps:**
+1. Check database service:
+   ```bash
+   systemctl status postgresql
+   # or
+   systemctl status mysql
+   ```
+
+2. Test database connectivity:
+   ```bash
+   psql -h localhost -U llamactl -d llamactl
+   ```
+
+**Solutions:**
+1. **Start database service:**
+   ```bash
+   sudo systemctl start postgresql
+   sudo systemctl enable postgresql
+   ```
+
+2. **Create database and user:**
+   ```sql
+   CREATE DATABASE llamactl;
+   CREATE USER llamactl WITH PASSWORD 'password';
+   GRANT ALL PRIVILEGES ON DATABASE llamactl TO llamactl;
+   ```
+
+## Web UI Issues
+
+### Blank Page or Loading Issues
+
+**Problem:** Web UI doesn't load or shows blank page
+
+**Diagnostic Steps:**
+1. Check browser console for errors (F12)
+2. Verify API connectivity:
+   ```bash
+   curl http://localhost:8080/api/system/status
+   ```
+
+3. Check static file serving:
+   ```bash
+   curl http://localhost:8080/
+   ```
+
+**Solutions:**
+1. **Clear browser cache**
+2. **Try different browser**
+3. **Check for JavaScript errors in console**
+4. **Verify API endpoint accessibility**
+
+### Authentication Issues
+
+**Problem:** Unable to login or authentication failures
+
+**Diagnostic Steps:**
+1. Check authentication configuration:
+   ```bash
+   curl http://localhost:8080/api/config | jq .auth
+   ```
+
+2. Verify user credentials:
+   ```bash
+   # Test login endpoint
+   curl -X POST http://localhost:8080/api/auth/login \
+     -H "Content-Type: application/json" \
+     -d '{"username":"admin","password":"password"}'
+   ```
+
+**Solutions:**
+1. **Reset admin password:**
+   ```bash
+   llamactl --reset-admin-password
+   ```
+
+2. **Disable authentication temporarily:**
+   ```yaml
+   auth:
+     enabled: false
+   ```
+
+## Log Analysis
+
+### Enable Debug Logging
+
+For detailed troubleshooting, enable debug logging:
+
+```yaml
+logging:
+  level: "debug"
+  output: "/var/log/llamactl/debug.log"
+```
+
+### Key Log Patterns
+
+Look for these patterns in logs:
+
+**Startup issues:**
+```
+ERRO Failed to start server
+ERRO Database connection failed
+ERRO Port binding failed
+```
+
+**Instance issues:**
+```
+ERRO Failed to start instance
+ERRO Model loading failed
+ERRO Process crashed
+```
+
+**Performance issues:**
+```
+WARN High memory usage detected
+WARN Request timeout
+WARN Resource limit exceeded
+```
+
+## Getting Help
+
+### Collecting Information
+
+When seeking help, provide:
+
+1. **System information:**
+   ```bash
+   uname -a
+   llamactl --version
+   ```
+
+2. **Configuration:**
+   ```bash
+   llamactl --config-dump
+   ```
+
+3. **Logs:**
+   ```bash
+   tail -100 /var/log/llamactl/app.log
+   ```
+
+4. **Error details:**
+   - Exact error messages
+   - Steps to reproduce
+   - Environment details
+
+### Support Channels
+
+- **GitHub Issues:** Report bugs and feature requests
+- **Documentation:** Check this documentation first
+- **Community:** Join discussions in GitHub Discussions
+
+## Preventive Measures
+
+### Health Monitoring
+
+Set up monitoring to catch issues early:
+
+```bash
+# Regular health checks
+*/5 * * * * curl -f http://localhost:8080/api/health || alert
+```
+
+### Resource Monitoring
+
+Monitor system resources:
+
+```bash
+# Disk space monitoring
+df -h /var/log/llamactl/
+df -h /path/to/models/
+
+# Memory monitoring
+free -h
+```
+
+### Backup Configuration
+
+Regular configuration backups:
+
+```bash
+# Backup configuration
+cp ~/.llamactl/config.yaml ~/.llamactl/config.yaml.backup
+
+# Backup instance configurations
+curl http://localhost:8080/api/instances > instances-backup.json
+```
+
+## Next Steps
+
+- Set up [Monitoring](monitoring.md) to prevent issues
+- Learn about [Advanced Configuration](backends.md)
+- Review [Best Practices](../development/contributing.md)
diff --git a/docs/development/building.md b/docs/development/building.md
new file mode 100644
index 0000000..6215854
--- /dev/null
+++ b/docs/development/building.md
@@ -0,0 +1,464 @@
+# Building from Source
+
+This guide covers building LlamaCtl from source code for development and production deployment.
+
+## Prerequisites
+
+### Required Tools
+
+- **Go 1.24+**: Download from [golang.org](https://golang.org/dl/)
+- **Node.js 22+**: Download from [nodejs.org](https://nodejs.org/)
+- **Git**: For cloning the repository
+- **Make**: For build automation (optional)
+
+### System Requirements
+
+- **Memory**: 4GB+ RAM for building
+- **Disk**: 2GB+ free space
+- **OS**: Linux, macOS, or Windows
+
+## Quick Build
+
+### Clone and Build
+
+```bash
+# Clone the repository
+git clone https://github.com/lordmathis/llamactl.git
+cd llamactl
+
+# Build the application
+go build -o llamactl cmd/server/main.go
+```
+
+### Run
+
+```bash
+./llamactl
+```
+
+## Development Build
+
+### Setup Development Environment
+
+```bash
+# Clone repository
+git clone https://github.com/lordmathis/llamactl.git
+cd llamactl
+
+# Install Go dependencies
+go mod download
+
+# Install frontend dependencies
+cd webui
+npm ci
+cd ..
+```
+
+### Build Components
+
+```bash
+# Build backend only
+go build -o llamactl cmd/server/main.go
+
+# Build frontend only
+cd webui
+npm run build
+cd ..
+
+# Build everything
+make build
+```
+
+### Development Server
+
+```bash
+# Run backend in development mode
+go run cmd/server/main.go --dev
+
+# Run frontend dev server (separate terminal)
+cd webui
+npm run dev
+```
+
+## Production Build
+
+### Optimized Build
+
+```bash
+# Build with optimizations
+go build -ldflags="-s -w" -o llamactl cmd/server/main.go
+
+# Or use the Makefile
+make build-prod
+```
+
+### Build Flags
+
+Common build flags for production:
+
+```bash
+go build \
+  -ldflags="-s -w -X main.version=1.0.0 -X main.buildTime=$(date -u +%Y-%m-%dT%H:%M:%SZ)" \
+  -trimpath \
+  -o llamactl \
+  cmd/server/main.go
+```
+
+**Flag explanations:**
+- `-s`: Strip symbol table
+- `-w`: Strip debug information
+- `-X`: Set variable values at build time
+- `-trimpath`: Remove absolute paths from binary
+
+## Cross-Platform Building
+
+### Build for Multiple Platforms
+
+```bash
+# Linux AMD64
+GOOS=linux GOARCH=amd64 go build -o llamactl-linux-amd64 cmd/server/main.go
+
+# Linux ARM64
+GOOS=linux GOARCH=arm64 go build -o llamactl-linux-arm64 cmd/server/main.go
+
+# macOS AMD64
+GOOS=darwin GOARCH=amd64 go build -o llamactl-darwin-amd64 cmd/server/main.go
+
+# macOS ARM64 (Apple Silicon)
+GOOS=darwin GOARCH=arm64 go build -o llamactl-darwin-arm64 cmd/server/main.go
+
+# Windows AMD64
+GOOS=windows GOARCH=amd64 go build -o llamactl-windows-amd64.exe cmd/server/main.go
+```
+
+### Automated Cross-Building
+
+Use the provided Makefile:
+
+```bash
+# Build all platforms
+make build-all
+
+# Build specific platform
+make build-linux
+make build-darwin
+make build-windows
+```
+
+## Build with Docker
+
+### Development Container
+
+```dockerfile
+# Dockerfile.dev
+FROM golang:1.24-alpine AS builder
+
+WORKDIR /app
+COPY go.mod go.sum ./
+RUN go mod download
+
+COPY . .
+RUN go build -o llamactl cmd/server/main.go
+
+FROM alpine:latest
+RUN apk --no-cache add ca-certificates
+WORKDIR /root/
+COPY --from=builder /app/llamactl .
+
+EXPOSE 8080
+CMD ["./llamactl"]
+```
+
+```bash
+# Build development image
+docker build -f Dockerfile.dev -t llamactl:dev .
+
+# Run container
+docker run -p 8080:8080 llamactl:dev
+```
+
+### Production Container
+
+```dockerfile
+# Dockerfile
+FROM node:22-alpine AS frontend-builder
+
+WORKDIR /app/webui
+COPY webui/package*.json ./
+RUN npm ci
+
+COPY webui/ ./
+RUN npm run build
+
+FROM golang:1.24-alpine AS backend-builder
+
+WORKDIR /app
+COPY go.mod go.sum ./
+RUN go mod download
+
+COPY . .
+COPY --from=frontend-builder /app/webui/dist ./webui/dist
+
+RUN CGO_ENABLED=0 GOOS=linux go build \
+    -ldflags="-s -w" \
+    -o llamactl \
+    cmd/server/main.go
+
+FROM alpine:latest
+
+RUN apk --no-cache add ca-certificates tzdata
+RUN adduser -D -s /bin/sh llamactl
+
+WORKDIR /home/llamactl
+COPY --from=backend-builder /app/llamactl .
+RUN chown llamactl:llamactl llamactl
+
+USER llamactl
+EXPOSE 8080
+
+CMD ["./llamactl"]
+```
+
+## Advanced Build Options
+
+### Static Linking
+
+For deployments without external dependencies:
+
+```bash
+CGO_ENABLED=0 go build \
+  -ldflags="-s -w -extldflags '-static'" \
+  -o llamactl-static \
+  cmd/server/main.go
+```
+
+### Debug Build
+
+Build with debug information:
+
+```bash
+go build -gcflags="all=-N -l" -o llamactl-debug cmd/server/main.go
+```
+
+### Race Detection Build
+
+Build with race detection (development only):
+
+```bash
+go build -race -o llamactl-race cmd/server/main.go
+```
+
+## Build Automation
+
+### Makefile
+
+```makefile
+# Makefile
+VERSION := $(shell git describe --tags --always --dirty)
+BUILD_TIME := $(shell date -u +%Y-%m-%dT%H:%M:%SZ)
+LDFLAGS := -s -w -X main.version=$(VERSION) -X main.buildTime=$(BUILD_TIME)
+
+.PHONY: build clean test install
+
+build:
+	@echo "Building LlamaCtl..."
+	@cd webui && npm run build
+	@go build -ldflags="$(LDFLAGS)" -o llamactl cmd/server/main.go
+
+build-prod:
+	@echo "Building production binary..."
+	@cd webui && npm run build
+	@CGO_ENABLED=0 go build -ldflags="$(LDFLAGS)" -trimpath -o llamactl cmd/server/main.go
+
+build-all: build-linux build-darwin build-windows
+
+build-linux:
+	@GOOS=linux GOARCH=amd64 go build -ldflags="$(LDFLAGS)" -o dist/llamactl-linux-amd64 cmd/server/main.go
+	@GOOS=linux GOARCH=arm64 go build -ldflags="$(LDFLAGS)" -o dist/llamactl-linux-arm64 cmd/server/main.go
+
+build-darwin:
+	@GOOS=darwin GOARCH=amd64 go build -ldflags="$(LDFLAGS)" -o dist/llamactl-darwin-amd64 cmd/server/main.go
+	@GOOS=darwin GOARCH=arm64 go build -ldflags="$(LDFLAGS)" -o dist/llamactl-darwin-arm64 cmd/server/main.go
+
+build-windows:
+	@GOOS=windows GOARCH=amd64 go build -ldflags="$(LDFLAGS)" -o dist/llamactl-windows-amd64.exe cmd/server/main.go
+
+test:
+	@go test ./...
+
+clean:
+	@rm -f llamactl llamactl-*
+	@rm -rf dist/
+
+install: build
+	@cp llamactl $(GOPATH)/bin/llamactl
+```
+
+### GitHub Actions
+
+```yaml
+# .github/workflows/build.yml
+name: Build
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4
+    
+    - name: Set up Go
+      uses: actions/setup-go@v4
+      with:
+        go-version: '1.24'
+    
+    - name: Set up Node.js
+      uses: actions/setup-node@v4
+      with:
+        node-version: '22'
+    
+    - name: Install dependencies
+      run: |
+        go mod download
+        cd webui && npm ci
+    
+    - name: Run tests
+      run: |
+        go test ./...
+        cd webui && npm test
+    
+    - name: Build
+      run: make build
+
+  build:
+    needs: test
+    runs-on: ubuntu-latest
+    if: github.ref == 'refs/heads/main'
+    
+    steps:
+    - uses: actions/checkout@v4
+    
+    - name: Set up Go
+      uses: actions/setup-go@v4
+      with:
+        go-version: '1.24'
+    
+    - name: Set up Node.js
+      uses: actions/setup-node@v4
+      with:
+        node-version: '22'
+    
+    - name: Build all platforms
+      run: make build-all
+    
+    - name: Upload artifacts
+      uses: actions/upload-artifact@v4
+      with:
+        name: binaries
+        path: dist/
+```
+
+## Build Troubleshooting
+
+### Common Issues
+
+**Go version mismatch:**
+```bash
+# Check Go version
+go version
+
+# Update Go
+# Download from https://golang.org/dl/
+```
+
+**Node.js issues:**
+```bash
+# Clear npm cache
+npm cache clean --force
+
+# Remove node_modules and reinstall
+rm -rf webui/node_modules
+cd webui && npm ci
+```
+
+**Build failures:**
+```bash
+# Clean and rebuild
+make clean
+go mod tidy
+make build
+```
+
+### Performance Issues
+
+**Slow builds:**
+```bash
+# Use build cache
+export GOCACHE=$(go env GOCACHE)
+
+# Parallel builds
+export GOMAXPROCS=$(nproc)
+```
+
+**Large binary size:**
+```bash
+# Use UPX compression
+upx --best llamactl
+
+# Analyze binary size
+go tool nm -size llamactl | head -20
+```
+
+## Deployment
+
+### System Service
+
+Create a systemd service:
+
+```ini
+# /etc/systemd/system/llamactl.service
+[Unit]
+Description=LlamaCtl Server
+After=network.target
+
+[Service]
+Type=simple
+User=llamactl
+Group=llamactl
+ExecStart=/usr/local/bin/llamactl
+Restart=always
+RestartSec=5
+
+[Install]
+WantedBy=multi-user.target
+```
+
+```bash
+# Enable and start service
+sudo systemctl enable llamactl
+sudo systemctl start llamactl
+```
+
+### Configuration
+
+```bash
+# Create configuration directory
+sudo mkdir -p /etc/llamactl
+
+# Copy configuration
+sudo cp config.yaml /etc/llamactl/
+
+# Set permissions
+sudo chown -R llamactl:llamactl /etc/llamactl
+```
+
+## Next Steps
+
+- Configure [Installation](../getting-started/installation.md)
+- Set up [Configuration](../getting-started/configuration.md)
+- Learn about [Contributing](contributing.md)
diff --git a/docs/development/contributing.md b/docs/development/contributing.md
new file mode 100644
index 0000000..c2c146f
--- /dev/null
+++ b/docs/development/contributing.md
@@ -0,0 +1,373 @@
+# Contributing
+
+Thank you for your interest in contributing to LlamaCtl! This guide will help you get started with development and contribution.
+
+## Development Setup
+
+### Prerequisites
+
+- Go 1.24 or later
+- Node.js 22 or later
+- `llama-server` executable (from [llama.cpp](https://github.com/ggml-org/llama.cpp))
+- Git
+
+### Getting Started
+
+1. **Fork and Clone**
+   ```bash
+   # Fork the repository on GitHub, then clone your fork
+   git clone https://github.com/yourusername/llamactl.git
+   cd llamactl
+   
+   # Add upstream remote
+   git remote add upstream https://github.com/lordmathis/llamactl.git
+   ```
+
+2. **Install Dependencies**
+   ```bash
+   # Go dependencies
+   go mod download
+   
+   # Frontend dependencies
+   cd webui && npm ci && cd ..
+   ```
+
+3. **Run Development Environment**
+   ```bash
+   # Start backend server
+   go run ./cmd/server
+   ```
+   
+   In a separate terminal:
+   ```bash
+   # Start frontend dev server
+   cd webui && npm run dev
+   ```
+
+## Development Workflow
+
+### Setting Up Your Environment
+
+1. **Configuration**
+   Create a development configuration file:
+   ```yaml
+   # dev-config.yaml
+   server:
+     host: "localhost"
+     port: 8080
+   logging:
+     level: "debug"
+   ```
+
+2. **Test Data**
+   Set up test models and instances for development.
+
+### Making Changes
+
+1. **Create a Branch**
+   ```bash
+   git checkout -b feature/your-feature-name
+   ```
+
+2. **Development Commands**
+   ```bash
+   # Backend
+   go test ./... -v                    # Run tests
+   go test -race ./... -v              # Run with race detector
+   go fmt ./... && go vet ./...        # Format and vet code
+   go build ./cmd/server               # Build binary
+   
+   # Frontend (from webui/ directory)
+   npm run test                        # Run tests
+   npm run lint                        # Lint code
+   npm run type-check                  # TypeScript check
+   npm run build                       # Build for production
+   ```
+
+3. **Code Quality**
+   ```bash
+   # Run all checks before committing
+   make lint
+   make test
+   make build
+   ```
+
+## Project Structure
+
+### Backend (Go)
+
+```
+cmd/
+├── server/              # Main application entry point
+pkg/
+├── backends/           # Model backend implementations
+├── config/            # Configuration management
+├── instance/          # Instance lifecycle management
+├── manager/           # Instance manager
+├── server/            # HTTP server and routes
+├── testutil/          # Test utilities
+└── validation/        # Input validation
+```
+
+### Frontend (React/TypeScript)
+
+```
+webui/src/
+├── components/        # React components
+├── contexts/         # React contexts
+├── hooks/           # Custom hooks
+├── lib/             # Utility libraries
+├── schemas/         # Zod schemas
+└── types/           # TypeScript types
+```
+
+## Coding Standards
+
+### Go Code
+
+- Follow standard Go formatting (`gofmt`)
+- Use `go vet` and address all warnings
+- Write comprehensive tests for new functionality
+- Include documentation comments for exported functions
+- Use meaningful variable and function names
+
+Example:
+```go
+// CreateInstance creates a new model instance with the given configuration.
+// It validates the configuration and ensures the instance name is unique.
+func (m *Manager) CreateInstance(ctx context.Context, config InstanceConfig) (*Instance, error) {
+    if err := config.Validate(); err != nil {
+        return nil, fmt.Errorf("invalid configuration: %w", err)
+    }
+    
+    // Implementation...
+}
+```
+
+### TypeScript/React Code
+
+- Use TypeScript strict mode
+- Follow React best practices
+- Use functional components with hooks
+- Implement proper error boundaries
+- Write unit tests for components
+
+Example:
+```typescript
+interface InstanceCardProps {
+  instance: Instance;
+  onStart: (name: string) => Promise<void>;
+  onStop: (name: string) => Promise<void>;
+}
+
+export const InstanceCard: React.FC<InstanceCardProps> = ({
+  instance,
+  onStart,
+  onStop,
+}) => {
+  // Implementation...
+};
+```
+
+## Testing
+
+### Backend Tests
+
+```bash
+# Run all tests
+go test ./...
+
+# Run tests with coverage
+go test ./... -coverprofile=coverage.out
+go tool cover -html=coverage.out
+
+# Run specific package tests
+go test ./pkg/manager -v
+
+# Run with race detection
+go test -race ./...
+```
+
+### Frontend Tests
+
+```bash
+cd webui
+
+# Run unit tests
+npm run test
+
+# Run tests with coverage
+npm run test:coverage
+
+# Run E2E tests
+npm run test:e2e
+```
+
+### Integration Tests
+
+```bash
+# Run integration tests (requires llama-server)
+go test ./... -tags=integration
+```
+
+## Pull Request Process
+
+### Before Submitting
+
+1. **Update your branch**
+   ```bash
+   git fetch upstream
+   git rebase upstream/main
+   ```
+
+2. **Run all tests**
+   ```bash
+   make test-all
+   ```
+
+3. **Update documentation** if needed
+
+4. **Write clear commit messages**
+   ```
+   feat: add instance health monitoring
+   
+   - Implement health check endpoint
+   - Add periodic health monitoring
+   - Update API documentation
+   
+   Fixes #123
+   ```
+
+### Submitting a PR
+
+1. **Push your branch**
+   ```bash
+   git push origin feature/your-feature-name
+   ```
+
+2. **Create Pull Request**
+   - Use the PR template
+   - Provide clear description
+   - Link related issues
+   - Add screenshots for UI changes
+
+3. **PR Review Process**
+   - Automated checks must pass
+   - Code review by maintainers
+   - Address feedback promptly
+   - Keep PR scope focused
+
+## Issue Guidelines
+
+### Reporting Bugs
+
+Use the bug report template and include:
+
+- Steps to reproduce
+- Expected vs actual behavior
+- Environment details (OS, Go version, etc.)
+- Relevant logs or error messages
+- Minimal reproduction case
+
+### Feature Requests
+
+Use the feature request template and include:
+
+- Clear description of the problem
+- Proposed solution
+- Alternative solutions considered
+- Implementation complexity estimate
+
+### Security Issues
+
+For security vulnerabilities:
+- Do NOT create public issues
+- Email security@llamactl.dev
+- Provide detailed description
+- Allow time for fix before disclosure
+
+## Development Best Practices
+
+### API Design
+
+- Follow REST principles
+- Use consistent naming conventions
+- Provide comprehensive error messages
+- Include proper HTTP status codes
+- Document all endpoints
+
+### Error Handling
+
+```go
+// Wrap errors with context
+if err := instance.Start(); err != nil {
+    return fmt.Errorf("failed to start instance %s: %w", instance.Name, err)
+}
+
+// Use structured logging
+log.WithFields(log.Fields{
+    "instance": instance.Name,
+    "error": err,
+}).Error("Failed to start instance")
+```
+
+### Configuration
+
+- Use environment variables for deployment
+- Provide sensible defaults
+- Validate configuration on startup
+- Support configuration file reloading
+
+### Performance
+
+- Profile code for bottlenecks
+- Use efficient data structures
+- Implement proper caching
+- Monitor resource usage
+
+## Release Process
+
+### Version Management
+
+- Use semantic versioning (SemVer)
+- Tag releases properly
+- Maintain CHANGELOG.md
+- Create release notes
+
+### Building Releases
+
+```bash
+# Build all platforms
+make build-all
+
+# Create release package
+make package
+```
+
+## Getting Help
+
+### Communication Channels
+
+- **GitHub Issues**: Bug reports and feature requests
+- **GitHub Discussions**: General questions and ideas
+- **Code Review**: PR comments and feedback
+
+### Development Questions
+
+When asking for help:
+
+1. Check existing documentation
+2. Search previous issues
+3. Provide minimal reproduction case
+4. Include relevant environment details
+
+## Recognition
+
+Contributors are recognized in:
+
+- CONTRIBUTORS.md file
+- Release notes
+- Documentation credits
+- Annual contributor highlights
+
+Thank you for contributing to LlamaCtl!
diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md
new file mode 100644
index 0000000..6c8ae7f
--- /dev/null
+++ b/docs/getting-started/configuration.md
@@ -0,0 +1,154 @@
+# Configuration
+
+LlamaCtl can be configured through various methods to suit your needs.
+
+## Configuration File
+
+Create a configuration file at `~/.llamactl/config.yaml`:
+
+```yaml
+# Server configuration
+server:
+  host: "0.0.0.0"
+  port: 8080
+  cors_enabled: true
+
+# Authentication (optional)
+auth:
+  enabled: false
+  # When enabled, configure your authentication method
+  # jwt_secret: "your-secret-key"
+
+# Default instance settings
+defaults:
+  backend: "llamacpp"
+  timeout: 300
+  log_level: "info"
+
+# Paths
+paths:
+  models_dir: "/path/to/your/models"
+  logs_dir: "/var/log/llamactl"
+  data_dir: "/var/lib/llamactl"
+
+# Instance limits
+limits:
+  max_instances: 10
+  max_memory_per_instance: "8GB"
+```
+
+## Environment Variables
+
+You can also configure LlamaCtl using environment variables:
+
+```bash
+# Server settings
+export LLAMACTL_HOST=0.0.0.0
+export LLAMACTL_PORT=8080
+
+# Paths
+export LLAMACTL_MODELS_DIR=/path/to/models
+export LLAMACTL_LOGS_DIR=/var/log/llamactl
+
+# Limits
+export LLAMACTL_MAX_INSTANCES=5
+```
+
+## Command Line Options
+
+View all available command line options:
+
+```bash
+llamactl --help
+```
+
+Common options:
+
+```bash
+# Specify config file
+llamactl --config /path/to/config.yaml
+
+# Set log level
+llamactl --log-level debug
+
+# Run on different port
+llamactl --port 9090
+```
+
+## Instance Configuration
+
+When creating instances, you can specify various options:
+
+### Basic Options
+
+- `name`: Unique identifier for the instance
+- `model_path`: Path to the GGUF model file
+- `port`: Port for the instance to listen on
+
+### Advanced Options
+
+- `threads`: Number of CPU threads to use
+- `context_size`: Context window size
+- `batch_size`: Batch size for processing
+- `gpu_layers`: Number of layers to offload to GPU
+- `memory_lock`: Lock model in memory
+- `no_mmap`: Disable memory mapping
+
+### Example Instance Configuration
+
+```json
+{
+  "name": "production-model",
+  "model_path": "/models/llama-2-13b-chat.gguf",
+  "port": 8081,
+  "options": {
+    "threads": 8,
+    "context_size": 4096,
+    "batch_size": 512,
+    "gpu_layers": 35,
+    "memory_lock": true
+  }
+}
+```
+
+## Security Configuration
+
+### Enable Authentication
+
+To enable authentication, update your config file:
+
+```yaml
+auth:
+  enabled: true
+  jwt_secret: "your-very-secure-secret-key"
+  token_expiry: "24h"
+```
+
+### HTTPS Configuration
+
+For production deployments, configure HTTPS:
+
+```yaml
+server:
+  tls:
+    enabled: true
+    cert_file: "/path/to/cert.pem"
+    key_file: "/path/to/key.pem"
+```
+
+## Logging Configuration
+
+Configure logging levels and outputs:
+
+```yaml
+logging:
+  level: "info"  # debug, info, warn, error
+  format: "json"  # json or text
+  output: "/var/log/llamactl/app.log"
+```
+
+## Next Steps
+
+- Learn about [Managing Instances](../user-guide/managing-instances.md)
+- Explore [Advanced Configuration](../advanced/monitoring.md)
+- Set up [Monitoring](../advanced/monitoring.md)
diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md
new file mode 100644
index 0000000..7a71629
--- /dev/null
+++ b/docs/getting-started/installation.md
@@ -0,0 +1,55 @@
+# Installation
+
+This guide will walk you through installing LlamaCtl on your system.
+
+## Prerequisites
+
+Before installing LlamaCtl, ensure you have:
+
+- Go 1.19 or later
+- Git
+- Sufficient disk space for your models
+
+## Installation Methods
+
+### Option 1: Download Binary (Recommended)
+
+Download the latest release from our [GitHub releases page](https://github.com/lordmathis/llamactl/releases):
+
+```bash
+# Download for Linux
+curl -L https://github.com/lordmathis/llamactl/releases/latest/download/llamactl-linux-amd64 -o llamactl
+
+# Make executable
+chmod +x llamactl
+
+# Move to PATH (optional)
+sudo mv llamactl /usr/local/bin/
+```
+
+### Option 2: Build from Source
+
+If you prefer to build from source:
+
+```bash
+# Clone the repository
+git clone https://github.com/lordmathis/llamactl.git
+cd llamactl
+
+# Build the application
+go build -o llamactl cmd/server/main.go
+```
+
+For detailed build instructions, see the [Building from Source](../development/building.md) guide.
+
+## Verification
+
+Verify your installation by checking the version:
+
+```bash
+llamactl --version
+```
+
+## Next Steps
+
+Now that LlamaCtl is installed, continue to the [Quick Start](quick-start.md) guide to get your first instance running!
diff --git a/docs/getting-started/quick-start.md b/docs/getting-started/quick-start.md
new file mode 100644
index 0000000..2d77e2e
--- /dev/null
+++ b/docs/getting-started/quick-start.md
@@ -0,0 +1,86 @@
+# Quick Start
+
+This guide will help you get LlamaCtl up and running in just a few minutes.
+
+## Step 1: Start LlamaCtl
+
+Start the LlamaCtl server:
+
+```bash
+llamactl
+```
+
+By default, LlamaCtl will start on `http://localhost:8080`.
+
+## Step 2: Access the Web UI
+
+Open your web browser and navigate to:
+
+```
+http://localhost:8080
+```
+
+You should see the LlamaCtl web interface.
+
+## Step 3: Create Your First Instance
+
+1. Click the "Add Instance" button
+2. Fill in the instance configuration:
+   - **Name**: Give your instance a descriptive name
+   - **Model Path**: Path to your Llama.cpp model file
+   - **Port**: Port for the instance to run on
+   - **Additional Options**: Any extra Llama.cpp parameters
+
+3. Click "Create Instance"
+
+## Step 4: Start Your Instance
+
+Once created, you can:
+
+- **Start** the instance by clicking the start button
+- **Monitor** its status in real-time
+- **View logs** by clicking the logs button
+- **Stop** the instance when needed
+
+## Example Configuration
+
+Here's a basic example configuration for a Llama 2 model:
+
+```json
+{
+  "name": "llama2-7b",
+  "model_path": "/path/to/llama-2-7b-chat.gguf",
+  "port": 8081,
+  "options": {
+    "threads": 4,
+    "context_size": 2048
+  }
+}
+```
+
+## Using the API
+
+You can also manage instances via the REST API:
+
+```bash
+# List all instances
+curl http://localhost:8080/api/instances
+
+# Create a new instance
+curl -X POST http://localhost:8080/api/instances \
+  -H "Content-Type: application/json" \
+  -d '{
+    "name": "my-model",
+    "model_path": "/path/to/model.gguf",
+    "port": 8081
+  }'
+
+# Start an instance
+curl -X POST http://localhost:8080/api/instances/my-model/start
+```
+
+## Next Steps
+
+- Learn more about the [Web UI](../user-guide/web-ui.md)
+- Explore the [API Reference](../user-guide/api-reference.md)
+- Configure advanced settings in the [Configuration](configuration.md) guide
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..f1fd69f
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,41 @@
+# LlamaCtl Documentation
+
+Welcome to the LlamaCtl documentation! LlamaCtl is a powerful management tool for Llama.cpp instances that provides both a web interface and REST API for managing large language models.
+
+## What is LlamaCtl?
+
+LlamaCtl is designed to simplify the deployment and management of Llama.cpp instances. It provides:
+
+- **Instance Management**: Start, stop, and monitor multiple Llama.cpp instances
+- **Web UI**: User-friendly interface for managing your models
+- **REST API**: Programmatic access to all functionality
+- **Health Monitoring**: Real-time status and health checks
+- **Configuration Management**: Easy setup and configuration options
+
+## Key Features
+
+- 🚀 **Easy Setup**: Quick installation and configuration
+- 🌐 **Web Interface**: Intuitive web UI for model management
+- 🔧 **REST API**: Full API access for automation
+- 📊 **Monitoring**: Real-time health and status monitoring
+- 🔒 **Security**: Authentication and access control
+- 📱 **Responsive**: Works on desktop and mobile devices
+
+## Quick Links
+
+- [Installation Guide](getting-started/installation.md) - Get LlamaCtl up and running
+- [Quick Start](getting-started/quick-start.md) - Your first steps with LlamaCtl
+- [Web UI Guide](user-guide/web-ui.md) - Learn to use the web interface
+- [API Reference](user-guide/api-reference.md) - Complete API documentation
+
+## Getting Help
+
+If you need help or have questions:
+
+- Check the [Troubleshooting](advanced/troubleshooting.md) guide
+- Visit our [GitHub repository](https://github.com/lordmathis/llamactl)
+- Read the [Contributing guide](development/contributing.md) to help improve LlamaCtl
+
+---
+
+Ready to get started? Head over to the [Installation Guide](getting-started/installation.md)!
diff --git a/docs/user-guide/api-reference.md b/docs/user-guide/api-reference.md
new file mode 100644
index 0000000..813aa06
--- /dev/null
+++ b/docs/user-guide/api-reference.md
@@ -0,0 +1,470 @@
+# API Reference
+
+Complete reference for the LlamaCtl REST API.
+
+## Base URL
+
+All API endpoints are relative to the base URL:
+
+```
+http://localhost:8080/api
+```
+
+## Authentication
+
+If authentication is enabled, include the JWT token in the Authorization header:
+
+```bash
+curl -H "Authorization: Bearer <your-jwt-token>" \
+  http://localhost:8080/api/instances
+```
+
+## Instances
+
+### List All Instances
+
+Get a list of all instances.
+
+```http
+GET /api/instances
+```
+
+**Response:**
+```json
+{
+  "instances": [
+    {
+      "name": "llama2-7b",
+      "status": "running",
+      "model_path": "/models/llama-2-7b.gguf",
+      "port": 8081,
+      "created_at": "2024-01-15T10:30:00Z",
+      "updated_at": "2024-01-15T12:45:00Z"
+    }
+  ]
+}
+```
+
+### Get Instance Details
+
+Get detailed information about a specific instance.
+
+```http
+GET /api/instances/{name}
+```
+
+**Response:**
+```json
+{
+  "name": "llama2-7b",
+  "status": "running",
+  "model_path": "/models/llama-2-7b.gguf",
+  "port": 8081,
+  "pid": 12345,
+  "options": {
+    "threads": 4,
+    "context_size": 2048,
+    "gpu_layers": 0
+  },
+  "stats": {
+    "memory_usage": 4294967296,
+    "cpu_usage": 25.5,
+    "uptime": 3600
+  },
+  "created_at": "2024-01-15T10:30:00Z",
+  "updated_at": "2024-01-15T12:45:00Z"
+}
+```
+
+### Create Instance
+
+Create a new instance.
+
+```http
+POST /api/instances
+```
+
+**Request Body:**
+```json
+{
+  "name": "my-instance",
+  "model_path": "/path/to/model.gguf",
+  "port": 8081,
+  "options": {
+    "threads": 4,
+    "context_size": 2048,
+    "gpu_layers": 0
+  }
+}
+```
+
+**Response:**
+```json
+{
+  "message": "Instance created successfully",
+  "instance": {
+    "name": "my-instance",
+    "status": "stopped",
+    "model_path": "/path/to/model.gguf",
+    "port": 8081,
+    "created_at": "2024-01-15T14:30:00Z"
+  }
+}
+```
+
+### Update Instance
+
+Update an existing instance configuration.
+
+```http
+PUT /api/instances/{name}
+```
+
+**Request Body:**
+```json
+{
+  "options": {
+    "threads": 8,
+    "context_size": 4096
+  }
+}
+```
+
+### Delete Instance
+
+Delete an instance (must be stopped first).
+
+```http
+DELETE /api/instances/{name}
+```
+
+**Response:**
+```json
+{
+  "message": "Instance deleted successfully"
+}
+```
+
+## Instance Operations
+
+### Start Instance
+
+Start a stopped instance.
+
+```http
+POST /api/instances/{name}/start
+```
+
+**Response:**
+```json
+{
+  "message": "Instance start initiated",
+  "status": "starting"
+}
+```
+
+### Stop Instance
+
+Stop a running instance.
+
+```http
+POST /api/instances/{name}/stop
+```
+
+**Request Body (Optional):**
+```json
+{
+  "force": false,
+  "timeout": 30
+}
+```
+
+**Response:**
+```json
+{
+  "message": "Instance stop initiated",
+  "status": "stopping"
+}
+```
+
+### Restart Instance
+
+Restart an instance (stop then start).
+
+```http
+POST /api/instances/{name}/restart
+```
+
+### Get Instance Health
+
+Check instance health status.
+
+```http
+GET /api/instances/{name}/health
+```
+
+**Response:**
+```json
+{
+  "status": "healthy",
+  "checks": {
+    "process": "running",
+    "port": "open",
+    "response": "ok"
+  },
+  "last_check": "2024-01-15T14:30:00Z"
+}
+```
+
+### Get Instance Logs
+
+Retrieve instance logs.
+
+```http
+GET /api/instances/{name}/logs
+```
+
+**Query Parameters:**
+- `lines`: Number of lines to return (default: 100)
+- `follow`: Stream logs (boolean)
+- `level`: Filter by log level (debug, info, warn, error)
+
+**Response:**
+```json
+{
+  "logs": [
+    {
+      "timestamp": "2024-01-15T14:30:00Z",
+      "level": "info",
+      "message": "Model loaded successfully"
+    }
+  ]
+}
+```
+
+## Batch Operations
+
+### Start All Instances
+
+Start all stopped instances.
+
+```http
+POST /api/instances/start-all
+```
+
+### Stop All Instances
+
+Stop all running instances.
+
+```http
+POST /api/instances/stop-all
+```
+
+## System Information
+
+### Get System Status
+
+Get overall system status and metrics.
+
+```http
+GET /api/system/status
+```
+
+**Response:**
+```json
+{
+  "version": "1.0.0",
+  "uptime": 86400,
+  "instances": {
+    "total": 5,
+    "running": 3,
+    "stopped": 2
+  },
+  "resources": {
+    "cpu_usage": 45.2,
+    "memory_usage": 8589934592,
+    "memory_total": 17179869184,
+    "disk_usage": 75.5
+  }
+}
+```
+
+### Get System Information
+
+Get detailed system information.
+
+```http
+GET /api/system/info
+```
+
+**Response:**
+```json
+{
+  "hostname": "server-01",
+  "os": "linux",
+  "arch": "amd64",
+  "cpu_count": 8,
+  "memory_total": 17179869184,
+  "version": "1.0.0",
+  "build_time": "2024-01-15T10:00:00Z"
+}
+```
+
+## Configuration
+
+### Get Configuration
+
+Get current LlamaCtl configuration.
+
+```http
+GET /api/config
+```
+
+### Update Configuration
+
+Update LlamaCtl configuration (requires restart).
+
+```http
+PUT /api/config
+```
+
+## Authentication
+
+### Login
+
+Authenticate and receive a JWT token.
+
+```http
+POST /api/auth/login
+```
+
+**Request Body:**
+```json
+{
+  "username": "admin",
+  "password": "password"
+}
+```
+
+**Response:**
+```json
+{
+  "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...",
+  "expires_at": "2024-01-16T14:30:00Z"
+}
+```
+
+### Refresh Token
+
+Refresh an existing JWT token.
+
+```http
+POST /api/auth/refresh
+```
+
+## Error Responses
+
+All endpoints may return error responses in the following format:
+
+```json
+{
+  "error": "Error message",
+  "code": "ERROR_CODE",
+  "details": "Additional error details"
+}
+```
+
+### Common HTTP Status Codes
+
+- `200`: Success
+- `201`: Created
+- `400`: Bad Request
+- `401`: Unauthorized
+- `403`: Forbidden
+- `404`: Not Found
+- `409`: Conflict (e.g., instance already exists)
+- `500`: Internal Server Error
+
+## WebSocket API
+
+### Real-time Updates
+
+Connect to WebSocket for real-time updates:
+
+```javascript
+const ws = new WebSocket('ws://localhost:8080/api/ws');
+
+ws.onmessage = function(event) {
+  const data = JSON.parse(event.data);
+  console.log('Update:', data);
+};
+```
+
+**Message Types:**
+- `instance_status_changed`: Instance status updates
+- `instance_stats_updated`: Resource usage updates
+- `system_alert`: System-level alerts
+
+## Rate Limiting
+
+API requests are rate limited to:
+- **100 requests per minute** for regular endpoints
+- **10 requests per minute** for resource-intensive operations
+
+Rate limit headers are included in responses:
+- `X-RateLimit-Limit`: Request limit
+- `X-RateLimit-Remaining`: Remaining requests
+- `X-RateLimit-Reset`: Reset time (Unix timestamp)
+
+## SDKs and Libraries
+
+### Go Client
+
+```go
+import "github.com/lordmathis/llamactl-go-client"
+
+client := llamactl.NewClient("http://localhost:8080")
+instances, err := client.ListInstances()
+```
+
+### Python Client
+
+```python
+from llamactl import Client
+
+client = Client("http://localhost:8080")
+instances = client.list_instances()
+```
+
+## Examples
+
+### Complete Instance Lifecycle
+
+```bash
+# Create instance
+curl -X POST http://localhost:8080/api/instances \
+  -H "Content-Type: application/json" \
+  -d '{
+    "name": "example",
+    "model_path": "/models/example.gguf",
+    "port": 8081
+  }'
+
+# Start instance
+curl -X POST http://localhost:8080/api/instances/example/start
+
+# Check status
+curl http://localhost:8080/api/instances/example
+
+# Stop instance
+curl -X POST http://localhost:8080/api/instances/example/stop
+
+# Delete instance
+curl -X DELETE http://localhost:8080/api/instances/example
+```
+
+## Next Steps
+
+- Learn about [Managing Instances](managing-instances.md) in detail
+- Explore [Advanced Configuration](../advanced/backends.md)
+- Set up [Monitoring](../advanced/monitoring.md) for production use
diff --git a/docs/user-guide/managing-instances.md b/docs/user-guide/managing-instances.md
new file mode 100644
index 0000000..fcb3455
--- /dev/null
+++ b/docs/user-guide/managing-instances.md
@@ -0,0 +1,171 @@
+# Managing Instances
+
+Learn how to effectively manage your Llama.cpp instances with LlamaCtl.
+
+## Instance Lifecycle
+
+### Creating Instances
+
+Instances can be created through the Web UI or API:
+
+#### Via Web UI
+1. Click "Add Instance" button
+2. Fill in the configuration form
+3. Click "Create"
+
+#### Via API
+```bash
+curl -X POST http://localhost:8080/api/instances \
+  -H "Content-Type: application/json" \
+  -d '{
+    "name": "my-instance",
+    "model_path": "/path/to/model.gguf",
+    "port": 8081
+  }'
+```
+
+### Starting and Stopping
+
+#### Start an Instance
+```bash
+# Via API
+curl -X POST http://localhost:8080/api/instances/{name}/start
+
+# The instance will begin loading the model
+```
+
+#### Stop an Instance
+```bash
+# Via API
+curl -X POST http://localhost:8080/api/instances/{name}/stop
+
+# Graceful shutdown with configurable timeout
+```
+
+### Monitoring Status
+
+Check instance status in real-time:
+
+```bash
+# Get instance details
+curl http://localhost:8080/api/instances/{name}
+
+# Get health status
+curl http://localhost:8080/api/instances/{name}/health
+```
+
+## Instance States
+
+Instances can be in one of several states:
+
+- **Stopped**: Instance is not running
+- **Starting**: Instance is initializing and loading the model
+- **Running**: Instance is active and ready to serve requests
+- **Stopping**: Instance is shutting down gracefully
+- **Error**: Instance encountered an error
+
+## Configuration Management
+
+### Updating Instance Configuration
+
+Modify instance settings:
+
+```bash
+curl -X PUT http://localhost:8080/api/instances/{name} \
+  -H "Content-Type: application/json" \
+  -d '{
+    "options": {
+      "threads": 8,
+      "context_size": 4096
+    }
+  }'
+```
+
+!!! note
+    Configuration changes require restarting the instance to take effect.
+
+### Viewing Configuration
+
+```bash
+# Get current configuration
+curl http://localhost:8080/api/instances/{name}/config
+```
+
+## Resource Management
+
+### Memory Usage
+
+Monitor memory consumption:
+
+```bash
+# Get resource usage
+curl http://localhost:8080/api/instances/{name}/stats
+```
+
+### CPU and GPU Usage
+
+Track performance metrics:
+
+- CPU thread utilization
+- GPU memory usage (if applicable)
+- Request processing times
+
+## Troubleshooting Common Issues
+
+### Instance Won't Start
+
+1. **Check model path**: Ensure the model file exists and is readable
+2. **Port conflicts**: Verify the port isn't already in use
+3. **Resource limits**: Check available memory and CPU
+4. **Permissions**: Ensure proper file system permissions
+
+### Performance Issues
+
+1. **Adjust thread count**: Match to your CPU cores
+2. **Optimize context size**: Balance memory usage and capability
+3. **GPU offloading**: Use `gpu_layers` for GPU acceleration
+4. **Batch size tuning**: Optimize for your workload
+
+### Memory Problems
+
+1. **Reduce context size**: Lower memory requirements
+2. **Disable memory mapping**: Use `no_mmap` option
+3. **Enable memory locking**: Use `memory_lock` for performance
+4. **Monitor system resources**: Check available RAM
+
+## Best Practices
+
+### Production Deployments
+
+1. **Resource allocation**: Plan memory and CPU requirements
+2. **Health monitoring**: Set up regular health checks
+3. **Graceful shutdowns**: Use proper stop procedures
+4. **Backup configurations**: Save instance configurations
+5. **Log management**: Configure appropriate logging levels
+
+### Development Environments
+
+1. **Resource sharing**: Use smaller models for development
+2. **Quick iterations**: Optimize for fast startup times
+3. **Debug logging**: Enable detailed logging for troubleshooting
+
+## Batch Operations
+
+### Managing Multiple Instances
+
+```bash
+# Start all instances
+curl -X POST http://localhost:8080/api/instances/start-all
+
+# Stop all instances
+curl -X POST http://localhost:8080/api/instances/stop-all
+
+# Get status of all instances
+curl http://localhost:8080/api/instances
+```
+
+## Next Steps
+
+- Learn about the [Web UI](web-ui.md) interface
+- Explore the complete [API Reference](api-reference.md)
+- Set up [Monitoring](../advanced/monitoring.md) for production use
diff --git a/docs/user-guide/web-ui.md b/docs/user-guide/web-ui.md
new file mode 100644
index 0000000..5207556
--- /dev/null
+++ b/docs/user-guide/web-ui.md
@@ -0,0 +1,216 @@
+# Web UI Guide
+
+The LlamaCtl Web UI provides an intuitive interface for managing your Llama.cpp instances.
+
+## Overview
+
+The web interface is accessible at `http://localhost:8080` (or your configured host/port) and provides:
+
+- Instance management dashboard
+- Real-time status monitoring
+- Configuration management
+- Log viewing
+- System information
+
+## Dashboard
+
+### Instance Cards
+
+Each instance is displayed as a card showing:
+
+- **Instance name** and status indicator
+- **Model information** (name, size)
+- **Current state** (stopped, starting, running, error)
+- **Resource usage** (memory, CPU)
+- **Action buttons** (start, stop, configure, logs)
+
+### Status Indicators
+
+- 🟢 **Green**: Instance is running and healthy
+- 🟡 **Yellow**: Instance is starting or stopping
+- 🔴 **Red**: Instance has encountered an error
+- ⚪ **Gray**: Instance is stopped
+
+## Creating Instances
+
+### Add Instance Dialog
+
+1. Click the **"Add Instance"** button
+2. Fill in the required fields:
+   - **Name**: Unique identifier for your instance
+   - **Model Path**: Full path to your GGUF model file
+   - **Port**: Port number for the instance
+
+3. Configure optional settings:
+   - **Threads**: Number of CPU threads
+   - **Context Size**: Context window size
+   - **GPU Layers**: Layers to offload to GPU
+   - **Additional Options**: Advanced Llama.cpp parameters
+
+4. Click **"Create"** to save the instance
+
+### Model Path Helper
+
+Use the file browser to select model files:
+
+- Navigate to your models directory
+- Select the `.gguf` file
+- Path is automatically filled in the form
+
+## Managing Instances
+
+### Starting Instances
+
+1. Click the **"Start"** button on an instance card
+2. Watch the status change to "Starting"
+3. Monitor progress in the logs
+4. Instance becomes "Running" when ready
+
+### Stopping Instances
+
+1. Click the **"Stop"** button
+2. Instance gracefully shuts down
+3. Status changes to "Stopped"
+
+### Viewing Logs
+
+1. Click the **"Logs"** button on any instance
+2. Real-time log viewer opens
+3. Filter by log level (Debug, Info, Warning, Error)
+4. Search through log entries
+5. Download logs for offline analysis
+
+## Configuration Management
+
+### Editing Instance Settings
+
+1. Click the **"Configure"** button
+2. Modify settings in the configuration dialog
+3. Changes require instance restart to take effect
+4. Click **"Save"** to apply changes
+
+### Advanced Options
+
+Access advanced Llama.cpp options:
+
+```yaml
+# Example advanced configuration
+options:
+  rope_freq_base: 10000
+  rope_freq_scale: 1.0
+  yarn_ext_factor: -1.0
+  yarn_attn_factor: 1.0
+  yarn_beta_fast: 32.0
+  yarn_beta_slow: 1.0
+```
+
+## System Information
+
+### Health Dashboard
+
+Monitor overall system health:
+
+- **System Resources**: CPU, memory, disk usage
+- **Instance Summary**: Running/stopped instance counts
+- **Performance Metrics**: Request rates, response times
+
+### Resource Usage
+
+Track resource consumption:
+
+- Per-instance memory usage
+- CPU utilization
+- GPU memory (if applicable)
+- Network I/O
+
+## User Interface Features
+
+### Theme Support
+
+Switch between light and dark themes:
+
+1. Click the theme toggle button
+2. Setting is remembered across sessions
+
+### Responsive Design
+
+The UI adapts to different screen sizes:
+
+- **Desktop**: Full-featured dashboard
+- **Tablet**: Condensed layout
+- **Mobile**: Stack-based navigation
+
+### Keyboard Shortcuts
+
+- `Ctrl+N`: Create new instance
+- `Ctrl+R`: Refresh dashboard
+- `Ctrl+L`: Open logs for selected instance
+- `Esc`: Close dialogs
+
+## Authentication
+
+### Login
+
+If authentication is enabled:
+
+1. Navigate to the web UI
+2. Enter your credentials
+3. JWT token is stored for the session
+4. Automatic logout on token expiry
+
+### Session Management
+
+- Sessions persist across browser restarts
+- Logout clears authentication tokens
+- Configurable session timeout
+
+## Troubleshooting
+
+### Common UI Issues
+
+**Page won't load:**
+- Check if LlamaCtl server is running
+- Verify the correct URL and port
+- Check browser console for errors
+
+**Instance won't start from UI:**
+- Verify model path is correct
+- Check for port conflicts
+- Review instance logs for errors
+
+**Real-time updates not working:**
+- Check WebSocket connection
+- Verify firewall settings
+- Try refreshing the page
+
+### Browser Compatibility
+
+Supported browsers:
+- Chrome/Chromium 90+
+- Firefox 88+
+- Safari 14+
+- Edge 90+
+
+## Mobile Access
+
+### Responsive Features
+
+On mobile devices:
+
+- Touch-friendly interface
+- Swipe gestures for navigation
+- Optimized button sizes
+- Condensed information display
+
+### Limitations
+
+Some features may be limited on mobile:
+- Log viewing (use horizontal scrolling)
+- Complex configuration forms
+- File browser functionality
+
+## Next Steps
+
+- Learn about [API Reference](api-reference.md) for programmatic access
+- Set up [Monitoring](../advanced/monitoring.md) for production use
+- Explore [Advanced Configuration](../advanced/backends.md) options
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 0000000..f23c70e
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,75 @@
+site_name: LlamaCtl Documentation
+site_description: User documentation for LlamaCtl - A management tool for Llama.cpp instances
+site_author: LlamaCtl Team
+site_url: https://llamactl.org
+
+repo_name: lordmathis/llamactl
+repo_url: https://github.com/lordmathis/llamactl
+
+theme:
+  name: material
+  palette:
+    # Palette toggle for light mode
+    - scheme: default
+      primary: indigo
+      accent: indigo
+      toggle:
+        icon: material/brightness-7
+        name: Switch to dark mode
+    # Palette toggle for dark mode
+    - scheme: slate
+      primary: indigo
+      accent: indigo
+      toggle:
+        icon: material/brightness-4
+        name: Switch to light mode
+  features:
+    - navigation.tabs
+    - navigation.sections
+    - navigation.expand
+    - navigation.top
+    - search.highlight
+    - search.share
+    - content.code.copy
+
+markdown_extensions:
+  - pymdownx.highlight:
+      anchor_linenums: true
+  - pymdownx.inlinehilite
+  - pymdownx.snippets
+  - pymdownx.superfences
+  - admonition
+  - pymdownx.details
+  - pymdownx.tabbed:
+      alternate_style: true
+  - attr_list
+  - md_in_html
+  - toc:
+      permalink: true
+
+nav:
+  - Home: index.md
+  - Getting Started:
+    - Installation: getting-started/installation.md
+    - Quick Start: getting-started/quick-start.md
+    - Configuration: getting-started/configuration.md
+  - User Guide:
+    - Managing Instances: user-guide/managing-instances.md
+    - Web UI: user-guide/web-ui.md
+    - API Reference: user-guide/api-reference.md
+  - Advanced:
+    - Backends: advanced/backends.md
+    - Monitoring: advanced/monitoring.md
+    - Troubleshooting: advanced/troubleshooting.md
+  - Development:
+    - Contributing: development/contributing.md
+    - Building from Source: development/building.md
+
+plugins:
+  - search
+  - git-revision-date-localized
+
+extra:
+  social:
+    - icon: fontawesome/brands/github
+      link: https://github.com/lordmathis/llamactl