diff --git a/Dockerfile b/Dockerfile index 3109694..0bd68b9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,9 +6,15 @@ COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY . . + +# Install entrypoint deps (curl for health check) +RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/* + +COPY entrypoint.sh /entrypoint.sh +RUN chmod +x /entrypoint.sh + RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app USER appuser -EXPOSE 8080 - +ENTRYPOINT ["/entrypoint.sh"] CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"] diff --git a/config.yaml b/config.yaml index 6a0cce2..08fbd91 100644 --- a/config.yaml +++ b/config.yaml @@ -20,11 +20,11 @@ rag: # Compression settings compression: enabled: true - strategy: "extractive" # "extractive" (sumy), "ollama" (phi-3-mini), or "none" + strategy: "ollama" # "extractive" (sumy), "ollama" (phi-3-mini), or "none" keep_last_n: 3 # Number of recent exchanges to keep uncompressed max_tokens: 2000 # Target token budget for conversation history ollama_model: "phi3:mini" # Only used if strategy is "ollama" - ollama_url: "http://localhost:11434" # Ollama API endpoint + ollama_url: "http://ollama:11434" # Ollama API endpoint (uses docker service name) # Authentication (set and forget - simple API key) auth: diff --git a/docker-compose.yml b/docker-compose.yml index 7f28cd9..27a87cd 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -8,9 +8,25 @@ services: volumes: - ./data:/app/data - ./config.yaml:/app/config.yaml:ro + depends_on: + - ollama restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8080/health"] interval: 30s timeout: 10s retries: 3 + + ollama: + image: ollama/ollama:latest + volumes: + - ./ollama:/root/.ollama + ports: + - "11434:11434" # Optional: expose for debugging + restart: unless-stopped + command: serve + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"] + interval: 30s + timeout: 10s + retries: 3 diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 0000000..b15a637 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env python3 +""" +Entrypoint for AI Skills API. +Ensures Ollama model is available if compression uses ollama. +""" + +import os +import sys +import time +import logging +import httpx + +logger = logging.getLogger("entrypoint") + +def wait_for_ollama(ollama_url: str, timeout: int = 30) -> bool: + """Wait for Ollama service to be ready""" + start = time.time() + while time.time() - start < timeout: + try: + resp = httpx.get(f"{ollama_url}/api/tags", timeout=5) + if resp.status_code == 200: + logger.info("Ollama is ready") + return True + except Exception: + pass + time.sleep(2) + return False + +def ensure_model(model: str, ollama_url: str) -> bool: + """Check if model is installed, pull if missing""" + try: + resp = httpx.get(f"{ollama_url}/api/tags") + resp.raise_for_status() + models = [m["name"] for m in resp.json().get("models", [])] + if model in models: + logger.info(f"Model {model} already available") + return True + except Exception as e: + logger.warning(f"Could not check models: {e}") + return False + + # Pull the model + logger.info(f"Pulling model {model}...") + try: + resp = httpx.post( + f"{ollama_url}/api/pull", + json={"name": model}, + timeout=600 # 10 minutes max for pull + ) + resp.raise_for_status() + logger.info(f"Model {model} pulled successfully") + return True + except Exception as e: + logger.error(f"Failed to pull model {model}: {e}") + return False + +if __name__ == "__main__": + logging.basicConfig(level=logging.INFO) + + ollama_url = os.getenv("OLLAMA_URL", "http://ollama:11434") + compression_strategy = os.getenv("COMPRESSION_STRATEGY", "extractive") + ollama_model = os.getenv("OLLAMA_MODEL", "phi3:mini") + + if compression_strategy == "ollama": + logger.info("Compression uses Ollama, checking model availability...") + if not wait_for_ollama(ollama_url): + logger.error("Ollama not ready after timeout") + sys.exit(1) + + if not ensure_model(ollama_model, ollama_url): + logger.warning(f"Model {ollama_model} not available, falling back to extractive") + # Set env var to override strategy for this run + os.environ["COMPRESSION_STRATEGY"] = "extractive" + + # Execute the main command + os.execvp(sys.argv[1], sys.argv[1:]) \ No newline at end of file diff --git a/examples/seed-data.py b/examples/seed-data.py index b347a70..e8ebd49 100644 --- a/examples/seed-data.py +++ b/examples/seed-data.py @@ -124,6 +124,95 @@ SKILLS = [ - Body wraps at 72 chars - Reference issues/PRs when applicable""", "tags": ["git", "workflow", "documentation"] + }, + { + "id": "dnd-npc-creation", + "name": "D&D NPC Creation", + "category": "dnd", + "description": "Standards for creating memorable non-player characters", + "content": """NPC creation guidelines: +- Give each NPC one distinctive trait (speech pattern, habit, appearance) +- Motivation > backstory - what do they want NOW? +- Tie NPCs to locations or other NPCs (web of connections) +- Use the "Three Details" rule: name, appearance, mannerism +- Avoid stereotypes; subvert expectations thoughtfully +- Consider how they change over time (arcs aren't just for PCs)""", + "tags": ["dnd", "npc", "character", "writing"] + }, + { + "id": "dnd-plot-hooks", + "name": "D&D Plot Hook Generation", + "category": "dnd", + "description": "Patterns for compelling quest seeds and story hooks", + "content": """Effective plot hooks include: +- Personal connection to a PC's backstory +- Urgent need (timer = engagement) +- Moral ambiguity (not just "kill monsters") +- Mystery with multiple potential solutions +- Hook should lead to 3+ possible directions +- Include a "weird" element to spark curiosity +- Avoid railroading; present options, not one path""", + "tags": ["dnd", "plot", "quest", "writing"] + }, + { + "id": "homelab-backup-strategy", + "name": "Home Lab Backup Standards", + "category": "homelab", + "description": "Reliable backup patterns for self-hosted services", + "content": """Backup best practices: +- 3-2-1 rule: 3 copies, 2 media types, 1 offsite +- Use Borg/Restic with deduplication and encryption +- Test restores quarterly (backup is worthless without verification) +- Backup databases with point-in-time recovery (WAL for Postgres) +- Store backups on different physical disks than production +- Automate with systemd timers or cron, monitor failures +- Document restore procedures in runbooks""", + "tags": ["backup", "borg", "restic", "disaster-recovery"] + }, + { + "id": "homelab-monitoring", + "name": "Home Lab Monitoring Stack", + "category": "homelab", + "description": "Prometheus + Grafana + Alertmanager setup patterns", + "content": """Monitoring standards: +- Prometheus scrapes metrics from all services (expose /metrics endpoint) +- Grafana dashboards for: system resources, app metrics, business KPIs +- Alertmanager with tiered alerts: info/warning/critical +- Use node_exporter for host metrics, docker_exporter for containers +- Retention: 30 days for warnings, 90 days for critical, 1 year for compliance +- Set up blackbox exporters for external uptime monitoring +- Document runbooks for each critical alert""", + "tags": ["monitoring", "prometheus", "grafana", "observability"] + }, + { + "id": "python-testing-pytest", + "name": "Python Testing with pytest", + "category": "coding", + "description": "Comprehensive pytest patterns and practices", + "content": """Testing standards: +- Use pytest fixtures with function scope for isolation +- Test one behavior per test function (single responsibility) +- Use descriptive test names that explain the expectation +- Mock external services (HTTP, DB) with pytest-mock +- Parameterize tests for multiple input combinations +- Aim for 80%+ coverage, but prioritize critical paths +- Use hypothesis for property-based testing on complex logic""", + "tags": ["python", "testing", "pytest", "tdd"] + }, + { + "id": "docker-security", + "name": "Docker Security Hardening", + "category": "security", + "description": "Security best practices for containerized applications", + "content": """Docker security checklist: +- Use distroless or alpine base images (minimal attack surface) +- Run as non-root user (USER directive in Dockerfile) +- Scan images with trivy or grype in CI +- Use read-only filesystems where possible (volumes for writes) +- Drop capabilities you don't need (--cap-drop ALL, then add back) +- Never store secrets in images - use Docker secrets or env files +- Keep base images updated (automate with Renovate/Dependabot)""", + "tags": ["docker", "security", "hardening"] } ] diff --git a/template/docker-compose.yml b/template/docker-compose.yml index 80144a6..69256e4 100644 --- a/template/docker-compose.yml +++ b/template/docker-compose.yml @@ -9,4 +9,20 @@ services: volumes: - ./logs:/app/logs restart: unless-stopped - # Add your agent's specific configuration here \ No newline at end of file + depends_on: + - skills-api + - ollama + + # Only needed if you want compression to use Ollama + # The main skills-api already includes Ollama if you use the full-stack compose + ollama: + image: ollama/ollama:latest + volumes: + - ./ollama:/root/.ollama + restart: unless-stopped + command: serve + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"] + interval: 30s + timeout: 10s + retries: 3 \ No newline at end of file