Add Ollama service to docker-compose, expand seed skills with D&D and monitoring, create entrypoint for auto-model-pull

2026-03-22 22:41:49 -04:00 · 2026-03-22 22:41:49 -04:00 · 6853999534
commit 6853999534
parent e4dd4da188
6 changed files with 208 additions and 5 deletions
--- a/10
+++ b/10
@ -6,9 +6,15 @@ COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt

 COPY . .
+
+# Install entrypoint deps (curl for health check)
+RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
+
+COPY entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+
 RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
 USER appuser

-EXPOSE 8080
-
+ENTRYPOINT ["/entrypoint.sh"]
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
--- a/config.yaml
+++ b/config.yaml
@ -20,11 +20,11 @@ rag:
 # Compression settings
 compression:
  enabled: true
-  strategy: "extractive"  # "extractive" (sumy), "ollama" (phi-3-mini), or "none"
+  strategy: "ollama"  # "extractive" (sumy), "ollama" (phi-3-mini), or "none"
  keep_last_n: 3          # Number of recent exchanges to keep uncompressed
  max_tokens: 2000        # Target token budget for conversation history
  ollama_model: "phi3:mini"  # Only used if strategy is "ollama"
-  ollama_url: "http://localhost:11434"  # Ollama API endpoint
+  ollama_url: "http://ollama:11434"  # Ollama API endpoint (uses docker service name)

 # Authentication (set and forget - simple API key)
 auth:
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -8,9 +8,25 @@ services:
    volumes:
      - ./data:/app/data
      - ./config.yaml:/app/config.yaml:ro
+    depends_on:
+      - ollama
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
      interval: 30s
      timeout: 10s
      retries: 3
+
+  ollama:
+    image: ollama/ollama:latest
+    volumes:
+      - ./ollama:/root/.ollama
+    ports:
+      - "11434:11434"  # Optional: expose for debugging
+    restart: unless-stopped
+    command: serve
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
--- a/entrypoint.sh
+++ b/entrypoint.sh
@ -0,0 +1,76 @@
+#!/usr/bin/env python3
+"""
+Entrypoint for AI Skills API.
+Ensures Ollama model is available if compression uses ollama.
+"""
+
+import os
+import sys
+import time
+import logging
+import httpx
+
+logger = logging.getLogger("entrypoint")
+
+def wait_for_ollama(ollama_url: str, timeout: int = 30) -> bool:
+    """Wait for Ollama service to be ready"""
+    start = time.time()
+    while time.time() - start < timeout:
+        try:
+            resp = httpx.get(f"{ollama_url}/api/tags", timeout=5)
+            if resp.status_code == 200:
+                logger.info("Ollama is ready")
+                return True
+        except Exception:
+            pass
+        time.sleep(2)
+    return False
+
+def ensure_model(model: str, ollama_url: str) -> bool:
+    """Check if model is installed, pull if missing"""
+    try:
+        resp = httpx.get(f"{ollama_url}/api/tags")
+        resp.raise_for_status()
+        models = [m["name"] for m in resp.json().get("models", [])]
+        if model in models:
+            logger.info(f"Model {model} already available")
+            return True
+    except Exception as e:
+        logger.warning(f"Could not check models: {e}")
+        return False
+    
+    # Pull the model
+    logger.info(f"Pulling model {model}...")
+    try:
+        resp = httpx.post(
+            f"{ollama_url}/api/pull",
+            json={"name": model},
+            timeout=600  # 10 minutes max for pull
+        )
+        resp.raise_for_status()
+        logger.info(f"Model {model} pulled successfully")
+        return True
+    except Exception as e:
+        logger.error(f"Failed to pull model {model}: {e}")
+        return False
+
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO)
+    
+    ollama_url = os.getenv("OLLAMA_URL", "http://ollama:11434")
+    compression_strategy = os.getenv("COMPRESSION_STRATEGY", "extractive")
+    ollama_model = os.getenv("OLLAMA_MODEL", "phi3:mini")
+    
+    if compression_strategy == "ollama":
+        logger.info("Compression uses Ollama, checking model availability...")
+        if not wait_for_ollama(ollama_url):
+            logger.error("Ollama not ready after timeout")
+            sys.exit(1)
+        
+        if not ensure_model(ollama_model, ollama_url):
+            logger.warning(f"Model {ollama_model} not available, falling back to extractive")
+            # Set env var to override strategy for this run
+            os.environ["COMPRESSION_STRATEGY"] = "extractive"
+    
+    # Execute the main command
+    os.execvp(sys.argv[1], sys.argv[1:])
--- a/examples/seed-data.py
+++ b/examples/seed-data.py
@ -124,6 +124,95 @@ SKILLS = [
 - Body wraps at 72 chars
 - Reference issues/PRs when applicable""",
        "tags": ["git", "workflow", "documentation"]
+    },
+    {
+        "id": "dnd-npc-creation",
+        "name": "D&D NPC Creation",
+        "category": "dnd",
+        "description": "Standards for creating memorable non-player characters",
+        "content": """NPC creation guidelines:
+- Give each NPC one distinctive trait (speech pattern, habit, appearance)
+- Motivation > backstory - what do they want NOW?
+- Tie NPCs to locations or other NPCs (web of connections)
+- Use the "Three Details" rule: name, appearance, mannerism
+- Avoid stereotypes; subvert expectations thoughtfully
+- Consider how they change over time (arcs aren't just for PCs)""",
+        "tags": ["dnd", "npc", "character", "writing"]
+    },
+    {
+        "id": "dnd-plot-hooks",
+        "name": "D&D Plot Hook Generation",
+        "category": "dnd",
+        "description": "Patterns for compelling quest seeds and story hooks",
+        "content": """Effective plot hooks include:
+- Personal connection to a PC's backstory
+- Urgent need (timer = engagement)
+- Moral ambiguity (not just "kill monsters")
+- Mystery with multiple potential solutions
+- Hook should lead to 3+ possible directions
+- Include a "weird" element to spark curiosity
+- Avoid railroading; present options, not one path""",
+        "tags": ["dnd", "plot", "quest", "writing"]
+    },
+    {
+        "id": "homelab-backup-strategy",
+        "name": "Home Lab Backup Standards",
+        "category": "homelab",
+        "description": "Reliable backup patterns for self-hosted services",
+        "content": """Backup best practices:
+- 3-2-1 rule: 3 copies, 2 media types, 1 offsite
+- Use Borg/Restic with deduplication and encryption
+- Test restores quarterly (backup is worthless without verification)
+- Backup databases with point-in-time recovery (WAL for Postgres)
+- Store backups on different physical disks than production
+- Automate with systemd timers or cron, monitor failures
+- Document restore procedures in runbooks""",
+        "tags": ["backup", "borg", "restic", "disaster-recovery"]
+    },
+    {
+        "id": "homelab-monitoring",
+        "name": "Home Lab Monitoring Stack",
+        "category": "homelab",
+        "description": "Prometheus + Grafana + Alertmanager setup patterns",
+        "content": """Monitoring standards:
+- Prometheus scrapes metrics from all services (expose /metrics endpoint)
+- Grafana dashboards for: system resources, app metrics, business KPIs
+- Alertmanager with tiered alerts: info/warning/critical
+- Use node_exporter for host metrics, docker_exporter for containers
+- Retention: 30 days for warnings, 90 days for critical, 1 year for compliance
+- Set up blackbox exporters for external uptime monitoring
+- Document runbooks for each critical alert""",
+        "tags": ["monitoring", "prometheus", "grafana", "observability"]
+    },
+    {
+        "id": "python-testing-pytest",
+        "name": "Python Testing with pytest",
+        "category": "coding",
+        "description": "Comprehensive pytest patterns and practices",
+        "content": """Testing standards:
+- Use pytest fixtures with function scope for isolation
+- Test one behavior per test function (single responsibility)
+- Use descriptive test names that explain the expectation
+- Mock external services (HTTP, DB) with pytest-mock
+- Parameterize tests for multiple input combinations
+- Aim for 80%+ coverage, but prioritize critical paths
+- Use hypothesis for property-based testing on complex logic""",
+        "tags": ["python", "testing", "pytest", "tdd"]
+    },
+    {
+        "id": "docker-security",
+        "name": "Docker Security Hardening",
+        "category": "security",
+        "description": "Security best practices for containerized applications",
+        "content": """Docker security checklist:
+- Use distroless or alpine base images (minimal attack surface)
+- Run as non-root user (USER directive in Dockerfile)
+- Scan images with trivy or grype in CI
+- Use read-only filesystems where possible (volumes for writes)
+- Drop capabilities you don't need (--cap-drop ALL, then add back)
+- Never store secrets in images - use Docker secrets or env files
+- Keep base images updated (automate with Renovate/Dependabot)""",
+        "tags": ["docker", "security", "hardening"]
    }
 ]

--- a/template/docker-compose.yml
+++ b/template/docker-compose.yml
@ -9,4 +9,20 @@ services:
    volumes:
      - ./logs:/app/logs
    restart: unless-stopped
-    # Add your agent's specific configuration here
+    depends_on:
+      - skills-api
+      - ollama
+
+  # Only needed if you want compression to use Ollama
+  # The main skills-api already includes Ollama if you use the full-stack compose
+  ollama:
+    image: ollama/ollama:latest
+    volumes:
+      - ./ollama:/root/.ollama
+    restart: unless-stopped
+    command: serve
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
+      interval: 30s
+      timeout: 10s
+      retries: 3