ai-skills-api/entrypoint.sh

76 lines
No EOL
2.4 KiB
Bash

#!/usr/bin/env python3
"""
Entrypoint for AI Skills API.
Ensures Ollama model is available if compression uses ollama.
"""
import os
import sys
import time
import logging
import httpx
logger = logging.getLogger("entrypoint")
def wait_for_ollama(ollama_url: str, timeout: int = 30) -> bool:
"""Wait for Ollama service to be ready"""
start = time.time()
while time.time() - start < timeout:
try:
resp = httpx.get(f"{ollama_url}/api/tags", timeout=5)
if resp.status_code == 200:
logger.info("Ollama is ready")
return True
except Exception:
pass
time.sleep(2)
return False
def ensure_model(model: str, ollama_url: str) -> bool:
"""Check if model is installed, pull if missing"""
try:
resp = httpx.get(f"{ollama_url}/api/tags")
resp.raise_for_status()
models = [m["name"] for m in resp.json().get("models", [])]
if model in models:
logger.info(f"Model {model} already available")
return True
except Exception as e:
logger.warning(f"Could not check models: {e}")
return False
# Pull the model
logger.info(f"Pulling model {model}...")
try:
resp = httpx.post(
f"{ollama_url}/api/pull",
json={"name": model},
timeout=600 # 10 minutes max for pull
)
resp.raise_for_status()
logger.info(f"Model {model} pulled successfully")
return True
except Exception as e:
logger.error(f"Failed to pull model {model}: {e}")
return False
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
ollama_url = os.getenv("OLLAMA_URL", "http://ollama:11434")
compression_strategy = os.getenv("COMPRESSION_STRATEGY", "extractive")
ollama_model = os.getenv("OLLAMA_MODEL", "phi3:mini")
if compression_strategy == "ollama":
logger.info("Compression uses Ollama, checking model availability...")
if not wait_for_ollama(ollama_url):
logger.error("Ollama not ready after timeout")
sys.exit(1)
if not ensure_model(ollama_model, ollama_url):
logger.warning(f"Model {ollama_model} not available, falling back to extractive")
# Set env var to override strategy for this run
os.environ["COMPRESSION_STRATEGY"] = "extractive"
# Execute the main command
os.execvp(sys.argv[1], sys.argv[1:])