Add Ollama service to docker-compose, expand seed skills with D&D and monitoring, create entrypoint for auto-model-pull
This commit is contained in:
parent
e4dd4da188
commit
6853999534
6 changed files with 208 additions and 5 deletions
10
Dockerfile
10
Dockerfile
|
|
@ -6,9 +6,15 @@ COPY requirements.txt .
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
|
# Install entrypoint deps (curl for health check)
|
||||||
|
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
COPY entrypoint.sh /entrypoint.sh
|
||||||
|
RUN chmod +x /entrypoint.sh
|
||||||
|
|
||||||
RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
|
RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
|
||||||
USER appuser
|
USER appuser
|
||||||
|
|
||||||
EXPOSE 8080
|
ENTRYPOINT ["/entrypoint.sh"]
|
||||||
|
|
||||||
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
|
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]
|
||||||
|
|
|
||||||
|
|
@ -20,11 +20,11 @@ rag:
|
||||||
# Compression settings
|
# Compression settings
|
||||||
compression:
|
compression:
|
||||||
enabled: true
|
enabled: true
|
||||||
strategy: "extractive" # "extractive" (sumy), "ollama" (phi-3-mini), or "none"
|
strategy: "ollama" # "extractive" (sumy), "ollama" (phi-3-mini), or "none"
|
||||||
keep_last_n: 3 # Number of recent exchanges to keep uncompressed
|
keep_last_n: 3 # Number of recent exchanges to keep uncompressed
|
||||||
max_tokens: 2000 # Target token budget for conversation history
|
max_tokens: 2000 # Target token budget for conversation history
|
||||||
ollama_model: "phi3:mini" # Only used if strategy is "ollama"
|
ollama_model: "phi3:mini" # Only used if strategy is "ollama"
|
||||||
ollama_url: "http://localhost:11434" # Ollama API endpoint
|
ollama_url: "http://ollama:11434" # Ollama API endpoint (uses docker service name)
|
||||||
|
|
||||||
# Authentication (set and forget - simple API key)
|
# Authentication (set and forget - simple API key)
|
||||||
auth:
|
auth:
|
||||||
|
|
|
||||||
|
|
@ -8,9 +8,25 @@ services:
|
||||||
volumes:
|
volumes:
|
||||||
- ./data:/app/data
|
- ./data:/app/data
|
||||||
- ./config.yaml:/app/config.yaml:ro
|
- ./config.yaml:/app/config.yaml:ro
|
||||||
|
depends_on:
|
||||||
|
- ollama
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
|
test: ["CMD", "curl", "-f", "http://localhost:8080/health"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
timeout: 10s
|
timeout: 10s
|
||||||
retries: 3
|
retries: 3
|
||||||
|
|
||||||
|
ollama:
|
||||||
|
image: ollama/ollama:latest
|
||||||
|
volumes:
|
||||||
|
- ./ollama:/root/.ollama
|
||||||
|
ports:
|
||||||
|
- "11434:11434" # Optional: expose for debugging
|
||||||
|
restart: unless-stopped
|
||||||
|
command: serve
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
|
|
||||||
76
entrypoint.sh
Normal file
76
entrypoint.sh
Normal file
|
|
@ -0,0 +1,76 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Entrypoint for AI Skills API.
|
||||||
|
Ensures Ollama model is available if compression uses ollama.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
logger = logging.getLogger("entrypoint")
|
||||||
|
|
||||||
|
def wait_for_ollama(ollama_url: str, timeout: int = 30) -> bool:
|
||||||
|
"""Wait for Ollama service to be ready"""
|
||||||
|
start = time.time()
|
||||||
|
while time.time() - start < timeout:
|
||||||
|
try:
|
||||||
|
resp = httpx.get(f"{ollama_url}/api/tags", timeout=5)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
logger.info("Ollama is ready")
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
time.sleep(2)
|
||||||
|
return False
|
||||||
|
|
||||||
|
def ensure_model(model: str, ollama_url: str) -> bool:
|
||||||
|
"""Check if model is installed, pull if missing"""
|
||||||
|
try:
|
||||||
|
resp = httpx.get(f"{ollama_url}/api/tags")
|
||||||
|
resp.raise_for_status()
|
||||||
|
models = [m["name"] for m in resp.json().get("models", [])]
|
||||||
|
if model in models:
|
||||||
|
logger.info(f"Model {model} already available")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Could not check models: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Pull the model
|
||||||
|
logger.info(f"Pulling model {model}...")
|
||||||
|
try:
|
||||||
|
resp = httpx.post(
|
||||||
|
f"{ollama_url}/api/pull",
|
||||||
|
json={"name": model},
|
||||||
|
timeout=600 # 10 minutes max for pull
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
logger.info(f"Model {model} pulled successfully")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to pull model {model}: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
ollama_url = os.getenv("OLLAMA_URL", "http://ollama:11434")
|
||||||
|
compression_strategy = os.getenv("COMPRESSION_STRATEGY", "extractive")
|
||||||
|
ollama_model = os.getenv("OLLAMA_MODEL", "phi3:mini")
|
||||||
|
|
||||||
|
if compression_strategy == "ollama":
|
||||||
|
logger.info("Compression uses Ollama, checking model availability...")
|
||||||
|
if not wait_for_ollama(ollama_url):
|
||||||
|
logger.error("Ollama not ready after timeout")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if not ensure_model(ollama_model, ollama_url):
|
||||||
|
logger.warning(f"Model {ollama_model} not available, falling back to extractive")
|
||||||
|
# Set env var to override strategy for this run
|
||||||
|
os.environ["COMPRESSION_STRATEGY"] = "extractive"
|
||||||
|
|
||||||
|
# Execute the main command
|
||||||
|
os.execvp(sys.argv[1], sys.argv[1:])
|
||||||
|
|
@ -124,6 +124,95 @@ SKILLS = [
|
||||||
- Body wraps at 72 chars
|
- Body wraps at 72 chars
|
||||||
- Reference issues/PRs when applicable""",
|
- Reference issues/PRs when applicable""",
|
||||||
"tags": ["git", "workflow", "documentation"]
|
"tags": ["git", "workflow", "documentation"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "dnd-npc-creation",
|
||||||
|
"name": "D&D NPC Creation",
|
||||||
|
"category": "dnd",
|
||||||
|
"description": "Standards for creating memorable non-player characters",
|
||||||
|
"content": """NPC creation guidelines:
|
||||||
|
- Give each NPC one distinctive trait (speech pattern, habit, appearance)
|
||||||
|
- Motivation > backstory - what do they want NOW?
|
||||||
|
- Tie NPCs to locations or other NPCs (web of connections)
|
||||||
|
- Use the "Three Details" rule: name, appearance, mannerism
|
||||||
|
- Avoid stereotypes; subvert expectations thoughtfully
|
||||||
|
- Consider how they change over time (arcs aren't just for PCs)""",
|
||||||
|
"tags": ["dnd", "npc", "character", "writing"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "dnd-plot-hooks",
|
||||||
|
"name": "D&D Plot Hook Generation",
|
||||||
|
"category": "dnd",
|
||||||
|
"description": "Patterns for compelling quest seeds and story hooks",
|
||||||
|
"content": """Effective plot hooks include:
|
||||||
|
- Personal connection to a PC's backstory
|
||||||
|
- Urgent need (timer = engagement)
|
||||||
|
- Moral ambiguity (not just "kill monsters")
|
||||||
|
- Mystery with multiple potential solutions
|
||||||
|
- Hook should lead to 3+ possible directions
|
||||||
|
- Include a "weird" element to spark curiosity
|
||||||
|
- Avoid railroading; present options, not one path""",
|
||||||
|
"tags": ["dnd", "plot", "quest", "writing"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "homelab-backup-strategy",
|
||||||
|
"name": "Home Lab Backup Standards",
|
||||||
|
"category": "homelab",
|
||||||
|
"description": "Reliable backup patterns for self-hosted services",
|
||||||
|
"content": """Backup best practices:
|
||||||
|
- 3-2-1 rule: 3 copies, 2 media types, 1 offsite
|
||||||
|
- Use Borg/Restic with deduplication and encryption
|
||||||
|
- Test restores quarterly (backup is worthless without verification)
|
||||||
|
- Backup databases with point-in-time recovery (WAL for Postgres)
|
||||||
|
- Store backups on different physical disks than production
|
||||||
|
- Automate with systemd timers or cron, monitor failures
|
||||||
|
- Document restore procedures in runbooks""",
|
||||||
|
"tags": ["backup", "borg", "restic", "disaster-recovery"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "homelab-monitoring",
|
||||||
|
"name": "Home Lab Monitoring Stack",
|
||||||
|
"category": "homelab",
|
||||||
|
"description": "Prometheus + Grafana + Alertmanager setup patterns",
|
||||||
|
"content": """Monitoring standards:
|
||||||
|
- Prometheus scrapes metrics from all services (expose /metrics endpoint)
|
||||||
|
- Grafana dashboards for: system resources, app metrics, business KPIs
|
||||||
|
- Alertmanager with tiered alerts: info/warning/critical
|
||||||
|
- Use node_exporter for host metrics, docker_exporter for containers
|
||||||
|
- Retention: 30 days for warnings, 90 days for critical, 1 year for compliance
|
||||||
|
- Set up blackbox exporters for external uptime monitoring
|
||||||
|
- Document runbooks for each critical alert""",
|
||||||
|
"tags": ["monitoring", "prometheus", "grafana", "observability"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "python-testing-pytest",
|
||||||
|
"name": "Python Testing with pytest",
|
||||||
|
"category": "coding",
|
||||||
|
"description": "Comprehensive pytest patterns and practices",
|
||||||
|
"content": """Testing standards:
|
||||||
|
- Use pytest fixtures with function scope for isolation
|
||||||
|
- Test one behavior per test function (single responsibility)
|
||||||
|
- Use descriptive test names that explain the expectation
|
||||||
|
- Mock external services (HTTP, DB) with pytest-mock
|
||||||
|
- Parameterize tests for multiple input combinations
|
||||||
|
- Aim for 80%+ coverage, but prioritize critical paths
|
||||||
|
- Use hypothesis for property-based testing on complex logic""",
|
||||||
|
"tags": ["python", "testing", "pytest", "tdd"]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "docker-security",
|
||||||
|
"name": "Docker Security Hardening",
|
||||||
|
"category": "security",
|
||||||
|
"description": "Security best practices for containerized applications",
|
||||||
|
"content": """Docker security checklist:
|
||||||
|
- Use distroless or alpine base images (minimal attack surface)
|
||||||
|
- Run as non-root user (USER directive in Dockerfile)
|
||||||
|
- Scan images with trivy or grype in CI
|
||||||
|
- Use read-only filesystems where possible (volumes for writes)
|
||||||
|
- Drop capabilities you don't need (--cap-drop ALL, then add back)
|
||||||
|
- Never store secrets in images - use Docker secrets or env files
|
||||||
|
- Keep base images updated (automate with Renovate/Dependabot)""",
|
||||||
|
"tags": ["docker", "security", "hardening"]
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,4 +9,20 @@ services:
|
||||||
volumes:
|
volumes:
|
||||||
- ./logs:/app/logs
|
- ./logs:/app/logs
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
# Add your agent's specific configuration here
|
depends_on:
|
||||||
|
- skills-api
|
||||||
|
- ollama
|
||||||
|
|
||||||
|
# Only needed if you want compression to use Ollama
|
||||||
|
# The main skills-api already includes Ollama if you use the full-stack compose
|
||||||
|
ollama:
|
||||||
|
image: ollama/ollama:latest
|
||||||
|
volumes:
|
||||||
|
- ./ollama:/root/.ollama
|
||||||
|
restart: unless-stopped
|
||||||
|
command: serve
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:11434/api/tags"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
Loading…
Add table
Reference in a new issue