Add Ollama service to docker-compose, expand seed skills with D&D and monitoring, create entrypoint for auto-model-pull
This commit is contained in:
parent
e4dd4da188
commit
6853999534
6 changed files with 208 additions and 5 deletions
10
Dockerfile
10
Dockerfile
|
|
@ -6,9 +6,15 @@ COPY requirements.txt .
|
|||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
|
||||
# Install entrypoint deps (curl for health check)
|
||||
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY entrypoint.sh /entrypoint.sh
|
||||
RUN chmod +x /entrypoint.sh
|
||||
|
||||
RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
|
||||
USER appuser
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
||||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
|
||||
|
|
|
|||
|
|
@ -20,11 +20,11 @@ rag:
|
|||
# Compression settings
|
||||
compression:
|
||||
enabled: true
|
||||
strategy: "extractive" # "extractive" (sumy), "ollama" (phi-3-mini), or "none"
|
||||
strategy: "ollama" # "extractive" (sumy), "ollama" (phi-3-mini), or "none"
|
||||
keep_last_n: 3 # Number of recent exchanges to keep uncompressed
|
||||
max_tokens: 2000 # Target token budget for conversation history
|
||||
ollama_model: "phi3:mini" # Only used if strategy is "ollama"
|
||||
ollama_url: "http://localhost:11434" # Ollama API endpoint
|
||||
ollama_url: "http://ollama:11434" # Ollama API endpoint (uses docker service name)
|
||||
|
||||
# Authentication (set and forget - simple API key)
|
||||
auth:
|
||||
|
|
|
|||
|
|
@ -8,9 +8,25 @@ services:
|
|||
volumes:
|
||||
- ./data:/app/data
|
||||
- ./config.yaml:/app/config.yaml:ro
|
||||
depends_on:
|
||||
- ollama
|
||||
restart: unless-stopped
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
|
||||
ollama:
|
||||
image: ollama/ollama:latest
|
||||
volumes:
|
||||
- ./ollama:/root/.ollama
|
||||
ports:
|
||||
- "11434:11434" # Optional: expose for debugging
|
||||
restart: unless-stopped
|
||||
command: serve
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
|
|
|
|||
76
entrypoint.sh
Normal file
76
entrypoint.sh
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Entrypoint for AI Skills API.
|
||||
Ensures Ollama model is available if compression uses ollama.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import logging
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger("entrypoint")
|
||||
|
||||
def wait_for_ollama(ollama_url: str, timeout: int = 30) -> bool:
|
||||
"""Wait for Ollama service to be ready"""
|
||||
start = time.time()
|
||||
while time.time() - start < timeout:
|
||||
try:
|
||||
resp = httpx.get(f"{ollama_url}/api/tags", timeout=5)
|
||||
if resp.status_code == 200:
|
||||
logger.info("Ollama is ready")
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
time.sleep(2)
|
||||
return False
|
||||
|
||||
def ensure_model(model: str, ollama_url: str) -> bool:
|
||||
"""Check if model is installed, pull if missing"""
|
||||
try:
|
||||
resp = httpx.get(f"{ollama_url}/api/tags")
|
||||
resp.raise_for_status()
|
||||
models = [m["name"] for m in resp.json().get("models", [])]
|
||||
if model in models:
|
||||
logger.info(f"Model {model} already available")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not check models: {e}")
|
||||
return False
|
||||
|
||||
# Pull the model
|
||||
logger.info(f"Pulling model {model}...")
|
||||
try:
|
||||
resp = httpx.post(
|
||||
f"{ollama_url}/api/pull",
|
||||
json={"name": model},
|
||||
timeout=600 # 10 minutes max for pull
|
||||
)
|
||||
resp.raise_for_status()
|
||||
logger.info(f"Model {model} pulled successfully")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to pull model {model}: {e}")
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
ollama_url = os.getenv("OLLAMA_URL", "http://ollama:11434")
|
||||
compression_strategy = os.getenv("COMPRESSION_STRATEGY", "extractive")
|
||||
ollama_model = os.getenv("OLLAMA_MODEL", "phi3:mini")
|
||||
|
||||
if compression_strategy == "ollama":
|
||||
logger.info("Compression uses Ollama, checking model availability...")
|
||||
if not wait_for_ollama(ollama_url):
|
||||
logger.error("Ollama not ready after timeout")
|
||||
sys.exit(1)
|
||||
|
||||
if not ensure_model(ollama_model, ollama_url):
|
||||
logger.warning(f"Model {ollama_model} not available, falling back to extractive")
|
||||
# Set env var to override strategy for this run
|
||||
os.environ["COMPRESSION_STRATEGY"] = "extractive"
|
||||
|
||||
# Execute the main command
|
||||
os.execvp(sys.argv[1], sys.argv[1:])
|
||||
|
|
@ -124,6 +124,95 @@ SKILLS = [
|
|||
- Body wraps at 72 chars
|
||||
- Reference issues/PRs when applicable""",
|
||||
"tags": ["git", "workflow", "documentation"]
|
||||
},
|
||||
{
|
||||
"id": "dnd-npc-creation",
|
||||
"name": "D&D NPC Creation",
|
||||
"category": "dnd",
|
||||
"description": "Standards for creating memorable non-player characters",
|
||||
"content": """NPC creation guidelines:
|
||||
- Give each NPC one distinctive trait (speech pattern, habit, appearance)
|
||||
- Motivation > backstory - what do they want NOW?
|
||||
- Tie NPCs to locations or other NPCs (web of connections)
|
||||
- Use the "Three Details" rule: name, appearance, mannerism
|
||||
- Avoid stereotypes; subvert expectations thoughtfully
|
||||
- Consider how they change over time (arcs aren't just for PCs)""",
|
||||
"tags": ["dnd", "npc", "character", "writing"]
|
||||
},
|
||||
{
|
||||
"id": "dnd-plot-hooks",
|
||||
"name": "D&D Plot Hook Generation",
|
||||
"category": "dnd",
|
||||
"description": "Patterns for compelling quest seeds and story hooks",
|
||||
"content": """Effective plot hooks include:
|
||||
- Personal connection to a PC's backstory
|
||||
- Urgent need (timer = engagement)
|
||||
- Moral ambiguity (not just "kill monsters")
|
||||
- Mystery with multiple potential solutions
|
||||
- Hook should lead to 3+ possible directions
|
||||
- Include a "weird" element to spark curiosity
|
||||
- Avoid railroading; present options, not one path""",
|
||||
"tags": ["dnd", "plot", "quest", "writing"]
|
||||
},
|
||||
{
|
||||
"id": "homelab-backup-strategy",
|
||||
"name": "Home Lab Backup Standards",
|
||||
"category": "homelab",
|
||||
"description": "Reliable backup patterns for self-hosted services",
|
||||
"content": """Backup best practices:
|
||||
- 3-2-1 rule: 3 copies, 2 media types, 1 offsite
|
||||
- Use Borg/Restic with deduplication and encryption
|
||||
- Test restores quarterly (backup is worthless without verification)
|
||||
- Backup databases with point-in-time recovery (WAL for Postgres)
|
||||
- Store backups on different physical disks than production
|
||||
- Automate with systemd timers or cron, monitor failures
|
||||
- Document restore procedures in runbooks""",
|
||||
"tags": ["backup", "borg", "restic", "disaster-recovery"]
|
||||
},
|
||||
{
|
||||
"id": "homelab-monitoring",
|
||||
"name": "Home Lab Monitoring Stack",
|
||||
"category": "homelab",
|
||||
"description": "Prometheus + Grafana + Alertmanager setup patterns",
|
||||
"content": """Monitoring standards:
|
||||
- Prometheus scrapes metrics from all services (expose /metrics endpoint)
|
||||
- Grafana dashboards for: system resources, app metrics, business KPIs
|
||||
- Alertmanager with tiered alerts: info/warning/critical
|
||||
- Use node_exporter for host metrics, docker_exporter for containers
|
||||
- Retention: 30 days for warnings, 90 days for critical, 1 year for compliance
|
||||
- Set up blackbox exporters for external uptime monitoring
|
||||
- Document runbooks for each critical alert""",
|
||||
"tags": ["monitoring", "prometheus", "grafana", "observability"]
|
||||
},
|
||||
{
|
||||
"id": "python-testing-pytest",
|
||||
"name": "Python Testing with pytest",
|
||||
"category": "coding",
|
||||
"description": "Comprehensive pytest patterns and practices",
|
||||
"content": """Testing standards:
|
||||
- Use pytest fixtures with function scope for isolation
|
||||
- Test one behavior per test function (single responsibility)
|
||||
- Use descriptive test names that explain the expectation
|
||||
- Mock external services (HTTP, DB) with pytest-mock
|
||||
- Parameterize tests for multiple input combinations
|
||||
- Aim for 80%+ coverage, but prioritize critical paths
|
||||
- Use hypothesis for property-based testing on complex logic""",
|
||||
"tags": ["python", "testing", "pytest", "tdd"]
|
||||
},
|
||||
{
|
||||
"id": "docker-security",
|
||||
"name": "Docker Security Hardening",
|
||||
"category": "security",
|
||||
"description": "Security best practices for containerized applications",
|
||||
"content": """Docker security checklist:
|
||||
- Use distroless or alpine base images (minimal attack surface)
|
||||
- Run as non-root user (USER directive in Dockerfile)
|
||||
- Scan images with trivy or grype in CI
|
||||
- Use read-only filesystems where possible (volumes for writes)
|
||||
- Drop capabilities you don't need (--cap-drop ALL, then add back)
|
||||
- Never store secrets in images - use Docker secrets or env files
|
||||
- Keep base images updated (automate with Renovate/Dependabot)""",
|
||||
"tags": ["docker", "security", "hardening"]
|
||||
}
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -9,4 +9,20 @@ services:
|
|||
volumes:
|
||||
- ./logs:/app/logs
|
||||
restart: unless-stopped
|
||||
# Add your agent's specific configuration here
|
||||
depends_on:
|
||||
- skills-api
|
||||
- ollama
|
||||
|
||||
# Only needed if you want compression to use Ollama
|
||||
# The main skills-api already includes Ollama if you use the full-stack compose
|
||||
ollama:
|
||||
image: ollama/ollama:latest
|
||||
volumes:
|
||||
- ./ollama:/root/.ollama
|
||||
restart: unless-stopped
|
||||
command: serve
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
Loading…
Add table
Reference in a new issue