Add Ollama service to docker-compose, expand seed skills with D&D and monitoring, create entrypoint for auto-model-pull

This commit is contained in:
Lukas Parsons 2026-03-22 22:41:49 -04:00
parent e4dd4da188
commit 6853999534
6 changed files with 208 additions and 5 deletions

View file

@ -6,9 +6,15 @@ COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
# Install entrypoint deps (curl for health check)
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
COPY entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
USER appuser
EXPOSE 8080
ENTRYPOINT ["/entrypoint.sh"]
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]

View file

@ -20,11 +20,11 @@ rag:
# Compression settings
compression:
enabled: true
strategy: "extractive" # "extractive" (sumy), "ollama" (phi-3-mini), or "none"
strategy: "ollama" # "extractive" (sumy), "ollama" (phi-3-mini), or "none"
keep_last_n: 3 # Number of recent exchanges to keep uncompressed
max_tokens: 2000 # Target token budget for conversation history
ollama_model: "phi3:mini" # Only used if strategy is "ollama"
ollama_url: "http://localhost:11434" # Ollama API endpoint
ollama_url: "http://ollama:11434" # Ollama API endpoint (uses docker service name)
# Authentication (set and forget - simple API key)
auth:

View file

@ -8,9 +8,25 @@ services:
volumes:
- ./data:/app/data
- ./config.yaml:/app/config.yaml:ro
depends_on:
- ollama
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
interval: 30s
timeout: 10s
retries: 3
ollama:
image: ollama/ollama:latest
volumes:
- ./ollama:/root/.ollama
ports:
- "11434:11434" # Optional: expose for debugging
restart: unless-stopped
command: serve
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
interval: 30s
timeout: 10s
retries: 3

76
entrypoint.sh Normal file
View file

@ -0,0 +1,76 @@
#!/usr/bin/env python3
"""
Entrypoint for AI Skills API.
Ensures Ollama model is available if compression uses ollama.
"""
import os
import sys
import time
import logging
import httpx
logger = logging.getLogger("entrypoint")
def wait_for_ollama(ollama_url: str, timeout: int = 30) -> bool:
"""Wait for Ollama service to be ready"""
start = time.time()
while time.time() - start < timeout:
try:
resp = httpx.get(f"{ollama_url}/api/tags", timeout=5)
if resp.status_code == 200:
logger.info("Ollama is ready")
return True
except Exception:
pass
time.sleep(2)
return False
def ensure_model(model: str, ollama_url: str) -> bool:
"""Check if model is installed, pull if missing"""
try:
resp = httpx.get(f"{ollama_url}/api/tags")
resp.raise_for_status()
models = [m["name"] for m in resp.json().get("models", [])]
if model in models:
logger.info(f"Model {model} already available")
return True
except Exception as e:
logger.warning(f"Could not check models: {e}")
return False
# Pull the model
logger.info(f"Pulling model {model}...")
try:
resp = httpx.post(
f"{ollama_url}/api/pull",
json={"name": model},
timeout=600 # 10 minutes max for pull
)
resp.raise_for_status()
logger.info(f"Model {model} pulled successfully")
return True
except Exception as e:
logger.error(f"Failed to pull model {model}: {e}")
return False
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
ollama_url = os.getenv("OLLAMA_URL", "http://ollama:11434")
compression_strategy = os.getenv("COMPRESSION_STRATEGY", "extractive")
ollama_model = os.getenv("OLLAMA_MODEL", "phi3:mini")
if compression_strategy == "ollama":
logger.info("Compression uses Ollama, checking model availability...")
if not wait_for_ollama(ollama_url):
logger.error("Ollama not ready after timeout")
sys.exit(1)
if not ensure_model(ollama_model, ollama_url):
logger.warning(f"Model {ollama_model} not available, falling back to extractive")
# Set env var to override strategy for this run
os.environ["COMPRESSION_STRATEGY"] = "extractive"
# Execute the main command
os.execvp(sys.argv[1], sys.argv[1:])

View file

@ -124,6 +124,95 @@ SKILLS = [
- Body wraps at 72 chars
- Reference issues/PRs when applicable""",
"tags": ["git", "workflow", "documentation"]
},
{
"id": "dnd-npc-creation",
"name": "D&D NPC Creation",
"category": "dnd",
"description": "Standards for creating memorable non-player characters",
"content": """NPC creation guidelines:
- Give each NPC one distinctive trait (speech pattern, habit, appearance)
- Motivation > backstory - what do they want NOW?
- Tie NPCs to locations or other NPCs (web of connections)
- Use the "Three Details" rule: name, appearance, mannerism
- Avoid stereotypes; subvert expectations thoughtfully
- Consider how they change over time (arcs aren't just for PCs)""",
"tags": ["dnd", "npc", "character", "writing"]
},
{
"id": "dnd-plot-hooks",
"name": "D&D Plot Hook Generation",
"category": "dnd",
"description": "Patterns for compelling quest seeds and story hooks",
"content": """Effective plot hooks include:
- Personal connection to a PC's backstory
- Urgent need (timer = engagement)
- Moral ambiguity (not just "kill monsters")
- Mystery with multiple potential solutions
- Hook should lead to 3+ possible directions
- Include a "weird" element to spark curiosity
- Avoid railroading; present options, not one path""",
"tags": ["dnd", "plot", "quest", "writing"]
},
{
"id": "homelab-backup-strategy",
"name": "Home Lab Backup Standards",
"category": "homelab",
"description": "Reliable backup patterns for self-hosted services",
"content": """Backup best practices:
- 3-2-1 rule: 3 copies, 2 media types, 1 offsite
- Use Borg/Restic with deduplication and encryption
- Test restores quarterly (backup is worthless without verification)
- Backup databases with point-in-time recovery (WAL for Postgres)
- Store backups on different physical disks than production
- Automate with systemd timers or cron, monitor failures
- Document restore procedures in runbooks""",
"tags": ["backup", "borg", "restic", "disaster-recovery"]
},
{
"id": "homelab-monitoring",
"name": "Home Lab Monitoring Stack",
"category": "homelab",
"description": "Prometheus + Grafana + Alertmanager setup patterns",
"content": """Monitoring standards:
- Prometheus scrapes metrics from all services (expose /metrics endpoint)
- Grafana dashboards for: system resources, app metrics, business KPIs
- Alertmanager with tiered alerts: info/warning/critical
- Use node_exporter for host metrics, docker_exporter for containers
- Retention: 30 days for warnings, 90 days for critical, 1 year for compliance
- Set up blackbox exporters for external uptime monitoring
- Document runbooks for each critical alert""",
"tags": ["monitoring", "prometheus", "grafana", "observability"]
},
{
"id": "python-testing-pytest",
"name": "Python Testing with pytest",
"category": "coding",
"description": "Comprehensive pytest patterns and practices",
"content": """Testing standards:
- Use pytest fixtures with function scope for isolation
- Test one behavior per test function (single responsibility)
- Use descriptive test names that explain the expectation
- Mock external services (HTTP, DB) with pytest-mock
- Parameterize tests for multiple input combinations
- Aim for 80%+ coverage, but prioritize critical paths
- Use hypothesis for property-based testing on complex logic""",
"tags": ["python", "testing", "pytest", "tdd"]
},
{
"id": "docker-security",
"name": "Docker Security Hardening",
"category": "security",
"description": "Security best practices for containerized applications",
"content": """Docker security checklist:
- Use distroless or alpine base images (minimal attack surface)
- Run as non-root user (USER directive in Dockerfile)
- Scan images with trivy or grype in CI
- Use read-only filesystems where possible (volumes for writes)
- Drop capabilities you don't need (--cap-drop ALL, then add back)
- Never store secrets in images - use Docker secrets or env files
- Keep base images updated (automate with Renovate/Dependabot)""",
"tags": ["docker", "security", "hardening"]
}
]

View file

@ -9,4 +9,20 @@ services:
volumes:
- ./logs:/app/logs
restart: unless-stopped
# Add your agent's specific configuration here
depends_on:
- skills-api
- ollama
# Only needed if you want compression to use Ollama
# The main skills-api already includes Ollama if you use the full-stack compose
ollama:
image: ollama/ollama:latest
volumes:
- ./ollama:/root/.ollama
restart: unless-stopped
command: serve
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
interval: 30s
timeout: 10s
retries: 3