diff --git a/CLAUDE.md b/CLAUDE.md index f335a91..621a353 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -34,7 +34,7 @@ Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_ "command": "python", "args": ["/path/to/ai-skills-api/mcp/skills.py"], "env": { - "SKILLS_API_URL": "http://localhost:8080" + "SKILLS_API_URL": "http://helm:8675" } } } diff --git a/README.md b/README.md index 6acc982..3c1e547 100644 --- a/README.md +++ b/README.md @@ -16,8 +16,8 @@ pip install -r requirements.txt uvicorn main:app --reload ``` -API available at `http://localhost:8080` -Docs at `http://localhost:8080/docs` +API available at `http://helm:8675` +Docs at `http://helm:8675/docs` ## Endpoints @@ -52,7 +52,7 @@ Docs at `http://localhost:8080/docs` ### Create a skill ```bash -curl -X POST http://localhost:8080/skills \ +curl -X POST http://helm:8675/skills \ -H "Content-Type: application/json" \ -d '{ "id": "homelab-docker-compose", @@ -65,12 +65,12 @@ curl -X POST http://localhost:8080/skills \ ### Get context bundle ```bash -curl "http://localhost:8080/context?project=/home/server/apps/media-server&skills=homelab-docker-compose,react-v2" +curl "http://helm:8675/context?project=/home/server/apps/media-server&skills=homelab-docker-compose,react-v2" ``` ### Check cache ```bash -curl -X POST http://localhost:8080/cache/lookup \ +curl -X POST http://helm:8675/cache/lookup \ -H "Content-Type: application/json" \ -d '{ "prompt": "How do I configure traefik?", diff --git a/TOKEN-SAVING-PATTERN.md b/TOKEN-SAVING-PATTERN.md index 493a106..333c513 100644 --- a/TOKEN-SAVING-PATTERN.md +++ b/TOKEN-SAVING-PATTERN.md @@ -11,14 +11,14 @@ This is what actually reduces API consumption. ```bash # First ask (miss - hits API) -curl -X POST http://localhost:8080/cache/semantic-lookup \ +curl -X POST http://helm:8675/cache/semantic-lookup \ -H "Content-Type: application/json" \ -d '{"prompt": "How do I setup Traefik?", "model": "claude-3-opus"}' # Response: {"hit": false} # -> Call LLM, get response # -> Store response: -curl -X POST http://localhost:8080/cache/semantic-store \ +curl -X POST http://helm:8675/cache/semantic-store \ -H "Content-Type: application/json" \ -d '{ "prompt": "How do I setup Traefik?", @@ -29,7 +29,7 @@ curl -X POST http://localhost:8080/cache/semantic-store \ }' # Second ask, slightly different (HIT - no API call) -curl -X POST http://localhost:8080/cache/semantic-lookup \ +curl -X POST http://helm:8675/cache/semantic-lookup \ -H "Content-Type: application/json" \ -d '{"prompt": "Traefik setup help", "model": "claude-3-opus"}' @@ -51,7 +51,7 @@ curl "http://localhost:8080/context?project=/opt/home-server" # Returns: 50 skills, 10 conventions = ~3000 tokens # RAG endpoint - returns only relevant -curl "http://localhost:8080/context/rag?query=How+do+I+setup+Docker+Compose&project=/opt/home-server" +curl "http://helm:8675/context/rag?query=How+do+I+setup+Docker+Compose&project=/opt/home-server" # Returns: 3 skills about Docker, 2 conventions = ~600 tokens ``` @@ -66,7 +66,7 @@ curl "http://localhost:8080/context/rag?query=How+do+I+setup+Docker+Compose&proj ```bash # Compress a long conversation -curl -X POST http://localhost:8080/compress \ +curl -X POST http://helm:8675/compress \ -H "Content-Type: application/json" \ -d '{ "messages": [...], # Your conversation history @@ -95,7 +95,7 @@ curl -X POST http://localhost:8080/compress \ async def query_llm(prompt, conversation_history, project=None): # 1. Check semantic cache FIRST cache_result = await httpx.post( - "http://localhost:8080/cache/semantic-lookup", + "http://helm:8675/cache/semantic-lookup", json={"prompt": prompt, "model": "claude-3-opus"} ) @@ -105,13 +105,13 @@ async def query_llm(prompt, conversation_history, project=None): # 2. Get ONLY relevant context (not everything) context = await httpx.get( - "http://localhost:8080/context/rag", + "http://helm:8675/context/rag", params={"query": prompt, "project": project} ) # 3. Compress conversation history compressed = await httpx.post( - "http://localhost:8080/compress", + "http://helm:8675/compress", json={"messages": conversation_history, "keep_last_n": 3} ) @@ -130,7 +130,7 @@ async def query_llm(prompt, conversation_history, project=None): # 6. Store in semantic cache await httpx.post( - "http://localhost:8080/cache/semantic-store", + "http://helm:8675/cache/semantic-store", json={ "prompt": prompt, "response": response, diff --git a/docker-compose.yml b/docker-compose.yml index af8aac4..26f84a7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,7 @@ services: api: build: . ports: - - "8080:8080" + - "8675:8080" environment: - DATABASE_URL=sqlite+aiosqlite:///./ai.db volumes: diff --git a/examples/seed-data.py b/examples/seed-data.py index 846f819..b347a70 100644 --- a/examples/seed-data.py +++ b/examples/seed-data.py @@ -3,7 +3,7 @@ import httpx -BASE_URL = "http://localhost:8080" +BASE_URL = "http://helm:8675" SKILLS = [ { @@ -214,7 +214,7 @@ def seed(): except Exception as e: print(f" ✗ {snippet['id']}: {e}") - print("\nDone! Check http://localhost:8080/docs") + print("\nDone! Check http://helm:8675/docs") if __name__ == "__main__": diff --git a/mcp/.env.example b/mcp/.env.example index 1937ed1..743ddb7 100644 --- a/mcp/.env.example +++ b/mcp/.env.example @@ -1,2 +1,2 @@ -SKILLS_API_URL=http://localhost:8080 +SKILLS_API_URL=http://helm:8675 GAME_SERVERS_DIR=/opt/game-servers diff --git a/mcp/docker-compose.yml b/mcp/docker-compose.yml index 8ef8398..b8dbce2 100644 --- a/mcp/docker-compose.yml +++ b/mcp/docker-compose.yml @@ -29,7 +29,7 @@ services: dockerfile: mcp/Dockerfile command: python skills.py environment: - - SKILLS_API_URL=http://host.docker.internal:8080 + - SKILLS_API_URL=http://host.docker.internal:8675 extra_hosts: - "host.docker.internal:host-gateway" network_mode: host diff --git a/mcp/skills.py b/mcp/skills.py index 151aef6..e4e1cc8 100644 --- a/mcp/skills.py +++ b/mcp/skills.py @@ -4,7 +4,7 @@ import os mcp = FastMCP("skills") -SKILLS_API_URL = os.getenv("SKILLS_API_URL", "http://localhost:8080") +SKILLS_API_URL = os.getenv("SKILLS_API_URL", "http://helm:8675") @mcp.tool()