diff --git a/CLAUDE.md b/CLAUDE.md
index f335a91..621a353 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -34,7 +34,7 @@ Add to your Claude Desktop config (`~/Library/Application Support/Claude/claude_
       "command": "python",
       "args": ["/path/to/ai-skills-api/mcp/skills.py"],
       "env": {
-        "SKILLS_API_URL": "http://localhost:8080"
+        "SKILLS_API_URL": "http://helm:8675"
       }
     }
   }
diff --git a/README.md b/README.md
index 6acc982..3c1e547 100644
--- a/README.md
+++ b/README.md
@@ -16,8 +16,8 @@ pip install -r requirements.txt
 uvicorn main:app --reload
 ```
 
-API available at `http://localhost:8080`
-Docs at `http://localhost:8080/docs`
+API available at `http://helm:8675`
+Docs at `http://helm:8675/docs`
 
 ## Endpoints
 
@@ -52,7 +52,7 @@ Docs at `http://localhost:8080/docs`
 
 ### Create a skill
 ```bash
-curl -X POST http://localhost:8080/skills \
+curl -X POST http://helm:8675/skills \
   -H "Content-Type: application/json" \
   -d '{
     "id": "homelab-docker-compose",
@@ -65,12 +65,12 @@ curl -X POST http://localhost:8080/skills \
 
 ### Get context bundle
 ```bash
-curl "http://localhost:8080/context?project=/home/server/apps/media-server&skills=homelab-docker-compose,react-v2"
+curl "http://helm:8675/context?project=/home/server/apps/media-server&skills=homelab-docker-compose,react-v2"
 ```
 
 ### Check cache
 ```bash
-curl -X POST http://localhost:8080/cache/lookup \
+curl -X POST http://helm:8675/cache/lookup \
   -H "Content-Type: application/json" \
   -d '{
     "prompt": "How do I configure traefik?",
diff --git a/TOKEN-SAVING-PATTERN.md b/TOKEN-SAVING-PATTERN.md
index 493a106..333c513 100644
--- a/TOKEN-SAVING-PATTERN.md
+++ b/TOKEN-SAVING-PATTERN.md
@@ -11,14 +11,14 @@ This is what actually reduces API consumption.
 
 ```bash
 # First ask (miss - hits API)
-curl -X POST http://localhost:8080/cache/semantic-lookup \
+curl -X POST http://helm:8675/cache/semantic-lookup \
   -H "Content-Type: application/json" \
   -d '{"prompt": "How do I setup Traefik?", "model": "claude-3-opus"}'
 
 # Response: {"hit": false}
 # -> Call LLM, get response
 # -> Store response:
-curl -X POST http://localhost:8080/cache/semantic-store \
+curl -X POST http://helm:8675/cache/semantic-store \
   -H "Content-Type: application/json" \
   -d '{
     "prompt": "How do I setup Traefik?",
@@ -29,7 +29,7 @@ curl -X POST http://localhost:8080/cache/semantic-store \
   }'
 
 # Second ask, slightly different (HIT - no API call)
-curl -X POST http://localhost:8080/cache/semantic-lookup \
+curl -X POST http://helm:8675/cache/semantic-lookup \
   -H "Content-Type: application/json" \
   -d '{"prompt": "Traefik setup help", "model": "claude-3-opus"}'
 
@@ -51,7 +51,7 @@ curl "http://localhost:8080/context?project=/opt/home-server"
 # Returns: 50 skills, 10 conventions = ~3000 tokens
 
 # RAG endpoint - returns only relevant
-curl "http://localhost:8080/context/rag?query=How+do+I+setup+Docker+Compose&project=/opt/home-server"
+curl "http://helm:8675/context/rag?query=How+do+I+setup+Docker+Compose&project=/opt/home-server"
 # Returns: 3 skills about Docker, 2 conventions = ~600 tokens
 ```
 
@@ -66,7 +66,7 @@ curl "http://localhost:8080/context/rag?query=How+do+I+setup+Docker+Compose&proj
 
 ```bash
 # Compress a long conversation
-curl -X POST http://localhost:8080/compress \
+curl -X POST http://helm:8675/compress \
   -H "Content-Type: application/json" \
   -d '{
     "messages": [...],  # Your conversation history
@@ -95,7 +95,7 @@ curl -X POST http://localhost:8080/compress \
 async def query_llm(prompt, conversation_history, project=None):
     # 1. Check semantic cache FIRST
     cache_result = await httpx.post(
-        "http://localhost:8080/cache/semantic-lookup",
+        "http://helm:8675/cache/semantic-lookup",
         json={"prompt": prompt, "model": "claude-3-opus"}
     )
     
@@ -105,13 +105,13 @@ async def query_llm(prompt, conversation_history, project=None):
     
     # 2. Get ONLY relevant context (not everything)
     context = await httpx.get(
-        "http://localhost:8080/context/rag",
+         "http://helm:8675/context/rag",
         params={"query": prompt, "project": project}
     )
     
     # 3. Compress conversation history
     compressed = await httpx.post(
-        "http://localhost:8080/compress",
+         "http://helm:8675/compress",
         json={"messages": conversation_history, "keep_last_n": 3}
     )
     
@@ -130,7 +130,7 @@ async def query_llm(prompt, conversation_history, project=None):
     
     # 6. Store in semantic cache
     await httpx.post(
-        "http://localhost:8080/cache/semantic-store",
+         "http://helm:8675/cache/semantic-store",
         json={
             "prompt": prompt,
             "response": response,
diff --git a/docker-compose.yml b/docker-compose.yml
index af8aac4..26f84a7 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -2,7 +2,7 @@ services:
   api:
     build: .
     ports:
-      - "8080:8080"
+      - "8675:8080"
     environment:
       - DATABASE_URL=sqlite+aiosqlite:///./ai.db
     volumes:
diff --git a/examples/seed-data.py b/examples/seed-data.py
index 846f819..b347a70 100644
--- a/examples/seed-data.py
+++ b/examples/seed-data.py
@@ -3,7 +3,7 @@
 
 import httpx
 
-BASE_URL = "http://localhost:8080"
+BASE_URL = "http://helm:8675"
 
 SKILLS = [
     {
@@ -214,7 +214,7 @@ def seed():
             except Exception as e:
                 print(f"  ✗ {snippet['id']}: {e}")
 
-        print("\nDone! Check http://localhost:8080/docs")
+        print("\nDone! Check http://helm:8675/docs")
 
 
 if __name__ == "__main__":
diff --git a/mcp/.env.example b/mcp/.env.example
index 1937ed1..743ddb7 100644
--- a/mcp/.env.example
+++ b/mcp/.env.example
@@ -1,2 +1,2 @@
-SKILLS_API_URL=http://localhost:8080
+SKILLS_API_URL=http://helm:8675
 GAME_SERVERS_DIR=/opt/game-servers
diff --git a/mcp/docker-compose.yml b/mcp/docker-compose.yml
index 8ef8398..b8dbce2 100644
--- a/mcp/docker-compose.yml
+++ b/mcp/docker-compose.yml
@@ -29,7 +29,7 @@ services:
       dockerfile: mcp/Dockerfile
     command: python skills.py
     environment:
-      - SKILLS_API_URL=http://host.docker.internal:8080
+      - SKILLS_API_URL=http://host.docker.internal:8675
     extra_hosts:
       - "host.docker.internal:host-gateway"
     network_mode: host
diff --git a/mcp/skills.py b/mcp/skills.py
index 151aef6..e4e1cc8 100644
--- a/mcp/skills.py
+++ b/mcp/skills.py
@@ -4,7 +4,7 @@ import os
 
 mcp = FastMCP("skills")
 
-SKILLS_API_URL = os.getenv("SKILLS_API_URL", "http://localhost:8080")
+SKILLS_API_URL = os.getenv("SKILLS_API_URL", "http://helm:8675")
 
 
 @mcp.tool()