Add Ollama service to docker-compose, expand seed skills with D&D and monitoring, create entrypoint for auto-model-pull

2026-03-22 22:41:49 -04:00 · 2026-03-22 22:41:49 -04:00 · 6853999534
commit 6853999534
parent e4dd4da188
6 changed files with 208 additions and 5 deletions
--- a/10
+++ b/10
@ -6,9 +6,15 @@ COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
 # Install entrypoint deps (curl for health check)
 RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
 COPY entrypoint.sh /entrypoint.sh
 RUN chmod +x /entrypoint.sh
 RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
 USER appuser
-EXPOSE 8080
+ENTRYPOINT ["/entrypoint.sh"]
 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
--- a/config.yaml
+++ b/config.yaml
@ -20,11 +20,11 @@ rag:
 # Compression settings
 compression:
  enabled: true
-  strategy: "extractive"  # "extractive" (sumy), "ollama" (phi-3-mini), or "none"
+  strategy: "ollama"  # "extractive" (sumy), "ollama" (phi-3-mini), or "none"
  keep_last_n: 3          # Number of recent exchanges to keep uncompressed
  max_tokens: 2000        # Target token budget for conversation history
  ollama_model: "phi3:mini"  # Only used if strategy is "ollama"
-  ollama_url: "http://localhost:11434"  # Ollama API endpoint
+  ollama_url: "http://ollama:11434"  # Ollama API endpoint (uses docker service name)
 # Authentication (set and forget - simple API key)
 auth:
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -8,9 +8,25 @@ services:
    volumes:
      - ./data:/app/data
      - ./config.yaml:/app/config.yaml:ro
    depends_on:
      - ollama
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
      interval: 30s
      timeout: 10s
      retries: 3
  ollama:
    image: ollama/ollama:latest
    volumes:
      - ./ollama:/root/.ollama
    ports:
      - "11434:11434"  # Optional: expose for debugging
    restart: unless-stopped
    command: serve
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
      interval: 30s
      timeout: 10s
      retries: 3
--- a/entrypoint.sh
+++ b/entrypoint.sh
@ -0,0 +1,76 @@
 #!/usr/bin/env python3
 """
 Entrypoint for AI Skills API.
 Ensures Ollama model is available if compression uses ollama.
 """
 import os
 import sys
 import time
 import logging
 import httpx
 logger = logging.getLogger("entrypoint")
 def wait_for_ollama(ollama_url: str, timeout: int = 30) -> bool:
    """Wait for Ollama service to be ready"""
    start = time.time()
    while time.time() - start < timeout:
        try:
            resp = httpx.get(f"{ollama_url}/api/tags", timeout=5)
            if resp.status_code == 200:
                logger.info("Ollama is ready")
                return True
        except Exception:
            pass
        time.sleep(2)
    return False
 def ensure_model(model: str, ollama_url: str) -> bool:
    """Check if model is installed, pull if missing"""
    try:
        resp = httpx.get(f"{ollama_url}/api/tags")
        resp.raise_for_status()
        models = [m["name"] for m in resp.json().get("models", [])]
        if model in models:
            logger.info(f"Model {model} already available")
            return True
    except Exception as e:
        logger.warning(f"Could not check models: {e}")
        return False
    # Pull the model
    logger.info(f"Pulling model {model}...")
    try:
        resp = httpx.post(
            f"{ollama_url}/api/pull",
            json={"name": model},
            timeout=600  # 10 minutes max for pull
        )
        resp.raise_for_status()
        logger.info(f"Model {model} pulled successfully")
        return True
    except Exception as e:
        logger.error(f"Failed to pull model {model}: {e}")
        return False
 if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    ollama_url = os.getenv("OLLAMA_URL", "http://ollama:11434")
    compression_strategy = os.getenv("COMPRESSION_STRATEGY", "extractive")
    ollama_model = os.getenv("OLLAMA_MODEL", "phi3:mini")
    if compression_strategy == "ollama":
        logger.info("Compression uses Ollama, checking model availability...")
        if not wait_for_ollama(ollama_url):
            logger.error("Ollama not ready after timeout")
            sys.exit(1)
        if not ensure_model(ollama_model, ollama_url):
            logger.warning(f"Model {ollama_model} not available, falling back to extractive")
            # Set env var to override strategy for this run
            os.environ["COMPRESSION_STRATEGY"] = "extractive"
    # Execute the main command
    os.execvp(sys.argv[1], sys.argv[1:])
--- a/examples/seed-data.py
+++ b/examples/seed-data.py
@ -124,6 +124,95 @@ SKILLS = [
 - Body wraps at 72 chars
 - Reference issues/PRs when applicable""",
        "tags": ["git", "workflow", "documentation"]
    },
    {
        "id": "dnd-npc-creation",
        "name": "D&D NPC Creation",
        "category": "dnd",
        "description": "Standards for creating memorable non-player characters",
        "content": """NPC creation guidelines:
 - Give each NPC one distinctive trait (speech pattern, habit, appearance)
 - Motivation > backstory - what do they want NOW?
 - Tie NPCs to locations or other NPCs (web of connections)
 - Use the "Three Details" rule: name, appearance, mannerism
 - Avoid stereotypes; subvert expectations thoughtfully
 - Consider how they change over time (arcs aren't just for PCs)""",
        "tags": ["dnd", "npc", "character", "writing"]
    },
    {
        "id": "dnd-plot-hooks",
        "name": "D&D Plot Hook Generation",
        "category": "dnd",
        "description": "Patterns for compelling quest seeds and story hooks",
        "content": """Effective plot hooks include:
 - Personal connection to a PC's backstory
 - Urgent need (timer = engagement)
 - Moral ambiguity (not just "kill monsters")
 - Mystery with multiple potential solutions
 - Hook should lead to 3+ possible directions
 - Include a "weird" element to spark curiosity
 - Avoid railroading; present options, not one path""",
        "tags": ["dnd", "plot", "quest", "writing"]
    },
    {
        "id": "homelab-backup-strategy",
        "name": "Home Lab Backup Standards",
        "category": "homelab",
        "description": "Reliable backup patterns for self-hosted services",
        "content": """Backup best practices:
 - 3-2-1 rule: 3 copies, 2 media types, 1 offsite
 - Use Borg/Restic with deduplication and encryption
 - Test restores quarterly (backup is worthless without verification)
 - Backup databases with point-in-time recovery (WAL for Postgres)
 - Store backups on different physical disks than production
 - Automate with systemd timers or cron, monitor failures
 - Document restore procedures in runbooks""",
        "tags": ["backup", "borg", "restic", "disaster-recovery"]
    },
    {
        "id": "homelab-monitoring",
        "name": "Home Lab Monitoring Stack",
        "category": "homelab",
        "description": "Prometheus + Grafana + Alertmanager setup patterns",
        "content": """Monitoring standards:
 - Prometheus scrapes metrics from all services (expose /metrics endpoint)
 - Grafana dashboards for: system resources, app metrics, business KPIs
 - Alertmanager with tiered alerts: info/warning/critical
 - Use node_exporter for host metrics, docker_exporter for containers
 - Retention: 30 days for warnings, 90 days for critical, 1 year for compliance
 - Set up blackbox exporters for external uptime monitoring
 - Document runbooks for each critical alert""",
        "tags": ["monitoring", "prometheus", "grafana", "observability"]
    },
    {
        "id": "python-testing-pytest",
        "name": "Python Testing with pytest",
        "category": "coding",
        "description": "Comprehensive pytest patterns and practices",
        "content": """Testing standards:
 - Use pytest fixtures with function scope for isolation
 - Test one behavior per test function (single responsibility)
 - Use descriptive test names that explain the expectation
 - Mock external services (HTTP, DB) with pytest-mock
 - Parameterize tests for multiple input combinations
 - Aim for 80%+ coverage, but prioritize critical paths
 - Use hypothesis for property-based testing on complex logic""",
        "tags": ["python", "testing", "pytest", "tdd"]
    },
    {
        "id": "docker-security",
        "name": "Docker Security Hardening",
        "category": "security",
        "description": "Security best practices for containerized applications",
        "content": """Docker security checklist:
 - Use distroless or alpine base images (minimal attack surface)
 - Run as non-root user (USER directive in Dockerfile)
 - Scan images with trivy or grype in CI
 - Use read-only filesystems where possible (volumes for writes)
 - Drop capabilities you don't need (--cap-drop ALL, then add back)
 - Never store secrets in images - use Docker secrets or env files
 - Keep base images updated (automate with Renovate/Dependabot)""",
        "tags": ["docker", "security", "hardening"]
    }
 ]
--- a/template/docker-compose.yml
+++ b/template/docker-compose.yml
@ -9,4 +9,20 @@ services:
    volumes:
      - ./logs:/app/logs
    restart: unless-stopped
-    # Add your agent's specific configuration here
+    depends_on:
      - skills-api
      - ollama
  # Only needed if you want compression to use Ollama
  # The main skills-api already includes Ollama if you use the full-stack compose
  ollama:
    image: ollama/ollama:latest
    volumes:
      - ./ollama:/root/.ollama
    restart: unless-stopped
    command: serve
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
      interval: 30s
      timeout: 10s
      retries: 3