76 lines
No EOL
2.4 KiB
Bash
76 lines
No EOL
2.4 KiB
Bash
#!/usr/bin/env python3
|
|
"""
|
|
Entrypoint for AI Skills API.
|
|
Ensures Ollama model is available if compression uses ollama.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
import logging
|
|
import httpx
|
|
|
|
logger = logging.getLogger("entrypoint")
|
|
|
|
def wait_for_ollama(ollama_url: str, timeout: int = 30) -> bool:
|
|
"""Wait for Ollama service to be ready"""
|
|
start = time.time()
|
|
while time.time() - start < timeout:
|
|
try:
|
|
resp = httpx.get(f"{ollama_url}/api/tags", timeout=5)
|
|
if resp.status_code == 200:
|
|
logger.info("Ollama is ready")
|
|
return True
|
|
except Exception:
|
|
pass
|
|
time.sleep(2)
|
|
return False
|
|
|
|
def ensure_model(model: str, ollama_url: str) -> bool:
|
|
"""Check if model is installed, pull if missing"""
|
|
try:
|
|
resp = httpx.get(f"{ollama_url}/api/tags")
|
|
resp.raise_for_status()
|
|
models = [m["name"] for m in resp.json().get("models", [])]
|
|
if model in models:
|
|
logger.info(f"Model {model} already available")
|
|
return True
|
|
except Exception as e:
|
|
logger.warning(f"Could not check models: {e}")
|
|
return False
|
|
|
|
# Pull the model
|
|
logger.info(f"Pulling model {model}...")
|
|
try:
|
|
resp = httpx.post(
|
|
f"{ollama_url}/api/pull",
|
|
json={"name": model},
|
|
timeout=600 # 10 minutes max for pull
|
|
)
|
|
resp.raise_for_status()
|
|
logger.info(f"Model {model} pulled successfully")
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Failed to pull model {model}: {e}")
|
|
return False
|
|
|
|
if __name__ == "__main__":
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
ollama_url = os.getenv("OLLAMA_URL", "http://ollama:11434")
|
|
compression_strategy = os.getenv("COMPRESSION_STRATEGY", "extractive")
|
|
ollama_model = os.getenv("OLLAMA_MODEL", "phi3:mini")
|
|
|
|
if compression_strategy == "ollama":
|
|
logger.info("Compression uses Ollama, checking model availability...")
|
|
if not wait_for_ollama(ollama_url):
|
|
logger.error("Ollama not ready after timeout")
|
|
sys.exit(1)
|
|
|
|
if not ensure_model(ollama_model, ollama_url):
|
|
logger.warning(f"Model {ollama_model} not available, falling back to extractive")
|
|
# Set env var to override strategy for this run
|
|
os.environ["COMPRESSION_STRATEGY"] = "extractive"
|
|
|
|
# Execute the main command
|
|
os.execvp(sys.argv[1], sys.argv[1:]) |