Skip to content
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 17 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: backend-test backend-test-build test-clean test-network test-postgres frontend-test frontend-test-build migration-create migration-upgrade migration-downgrade migration-stamp
.PHONY: backend-test backend-test-build test-clean test-network test-postgres test-redis frontend-test frontend-test-build migration-create migration-upgrade migration-downgrade migration-stamp

# Default target
all: backend-test
Expand All @@ -23,16 +23,25 @@ test-postgres: test-clean test-network
pgvector/pgvector:pg16 && \
sleep 5

# Start Redis container for testing
test-redis: test-network
docker run -d \
--name amazee-test-redis \
--network amazeeai_default \
redis:alpine && \
sleep 2

# Run backend tests for a specific regex
# Usage: make backend-test-regex regex="test_pattern"
backend-test-regex: test-clean backend-test-build test-postgres
backend-test-regex: test-clean backend-test-build test-postgres test-redis
@if [ -z "$(regex)" ]; then \
echo "Error: regex parameter is required. Usage: make backend-test-regex regex=\"test_pattern\""; \
exit 1; \
fi
docker run --rm \
--network amazeeai_default \
-e DATABASE_URL="postgresql://postgres:postgres@amazee-test-postgres/postgres_service" \
-e REDIS_URL="redis://amazee-test-redis:6379" \
-e SECRET_KEY="test-secret-key" \
-e POSTGRES_HOST="amazee-test-postgres" \
-e POSTGRES_USER="postgres" \
Expand All @@ -47,10 +56,11 @@ backend-test-regex: test-clean backend-test-build test-postgres
amazee-backend-test pytest -vv -k "$(regex)"

# Run backend tests in a new container
backend-test: test-clean backend-test-build test-postgres
backend-test: test-clean backend-test-build test-postgres test-redis
docker run --rm \
--network amazeeai_default \
-e DATABASE_URL="postgresql://postgres:postgres@amazee-test-postgres/postgres_service" \
-e REDIS_URL="redis://amazee-test-redis:6379" \
-e SECRET_KEY="test-secret-key" \
-e POSTGRES_HOST="amazee-test-postgres" \
-e POSTGRES_USER="postgres" \
Expand All @@ -65,10 +75,11 @@ backend-test: test-clean backend-test-build test-postgres
amazee-backend-test

# Run backend tests with coverage report
backend-test-cov: test-clean backend-test-build test-postgres
backend-test-cov: test-clean backend-test-build test-postgres test-redis
docker run --rm \
--network amazeeai_default \
-e DATABASE_URL="postgresql://postgres:postgres@amazee-test-postgres/postgres_service" \
-e REDIS_URL="redis://amazee-test-redis:6379" \
-e SECRET_KEY="test-secret-key" \
-e POSTGRES_HOST="amazee-test-postgres" \
-e POSTGRES_USER="postgres" \
Expand Down Expand Up @@ -99,6 +110,8 @@ test-all: backend-test frontend-test
test-clean:
docker stop amazee-test-postgres 2>/dev/null || true
docker rm amazee-test-postgres 2>/dev/null || true
docker stop amazee-test-redis 2>/dev/null || true
docker rm amazee-test-redis 2>/dev/null || true
docker network rm amazeeai_default 2>/dev/null || true
docker rmi amazee-backend-test 2>/dev/null || true
docker rmi amazeeai-frontend-test 2>/dev/null || true
Expand Down
19 changes: 10 additions & 9 deletions app/api/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import uuid
from datetime import datetime
import email_validator
from fastapi_limiter.depends import RateLimiter

from typing import Optional, List, Union

Expand All @@ -15,6 +16,10 @@
from urllib.parse import urlparse
from jose import JWTError, jwt

from app.api.teams import register_team
from app.api.users import _create_user_in_db, get_user_by_email
from app.api.private_ai_keys import create_private_ai_key

from app.core.config import settings
from app.core.dependencies import get_limit_service
from app.core.roles import UserRole
Expand Down Expand Up @@ -59,20 +64,14 @@
BudgetType,
)

from app.api.teams import register_team
from app.api.users import _create_user_in_db, get_user_by_email
from app.api.private_ai_keys import create_private_ai_key

auth_logger = logging.getLogger(__name__)

router = APIRouter(tags=["auth"])


def get_cookie_domain():
"""Extract domain from COOKIE_DOMAIN or LAGOON_ROUTES for cookie settings."""
# First check for explicit cookie domain setting
cookie_domain = os.getenv("COOKIE_DOMAIN")
if cookie_domain:
if (cookie_domain := os.getenv("COOKIE_DOMAIN")):
return cookie_domain

# Fall back to extracting from LAGOON_ROUTES
Expand Down Expand Up @@ -141,7 +140,6 @@ def create_and_set_access_token(
access_token = create_access_token(data={"sub": user_email.lower()})

# Get cookie domain from LAGOON_ROUTES
cookie_domain = get_cookie_domain()

# Set cookie expiration based on user role
Comment on lines 142 to 144
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 The comment # Get cookie domain from LAGOON_ROUTES is now orphaned — the get_cookie_domain() call it referenced was refactored into the walrus-operator expression below. The blank line and stale comment are misleading.

Suggested change
# Get cookie domain from LAGOON_ROUTES
cookie_domain = get_cookie_domain()
# Set cookie expiration based on user role
# Set cookie expiration based on user role

# System administrators get 8 hours (28800 seconds), regular users get 30 minutes (1800 seconds)
Expand All @@ -160,7 +158,7 @@ def create_and_set_access_token(
}

# Only set domain if we got one from LAGOON_ROUTES
if cookie_domain:
if (cookie_domain := get_cookie_domain()):
cookie_settings["domain"] = cookie_domain

# Set cookie with appropriate settings
Expand Down Expand Up @@ -467,6 +465,9 @@ async def validate_email(
email_data: Optional[EmailValidation] = None,
email: Optional[str] = Form(None),
db: Session = Depends(get_db),
_: None = Depends(
RateLimiter(times=settings.RATE_LIMIT_VALIDATE_EMAIL, seconds=60)
),
Comment thread
dan2k3k4 marked this conversation as resolved.
Comment on lines +468 to +470
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot apply changes based on this feedback

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test test_validate_email_rate_limit was already added in commit 188c3ee. It makes settings.RATE_LIMIT_VALIDATE_EMAIL (default: 5) requests and asserts the next one returns HTTP 429. The conftest mock enforces the threshold dynamically via int(times) from the evalsha call, so it stays in sync with any configured value.

Comment on lines +468 to +470
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 security Rate limit bypassable via X-Forwarded-For spoofing

The fastapi-limiter default identifier reads X-Forwarded-For first and takes split(",")[0]. If the reverse proxy appends to (rather than replaces) this header, an attacker can pre-set X-Forwarded-For: 1.2.3.4 on each request and appear as a new IP every time, effectively bypassing the rate limit and enabling unlimited validation-code emails to arbitrary addresses. A custom identifier should be used that either strips the header or uses a trusted proxy-only value.

):
"""
Validate an email address and generate a validation code.
Expand Down
5 changes: 5 additions & 0 deletions app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ class Settings(BaseSettings):
DEDICATED_DEFAULT_SERVICE_KEYS: float | None = None
DEDICATED_DEFAULT_VECTOR_DB_COUNT: float | None = None
DEDICATED_DEFAULT_RPM_PER_KEY: float | None = None
REDIS_URL: str = os.getenv(
"REDIS_URL",
f"redis://{os.getenv('REDIS_HOST', 'localhost')}:{os.getenv('REDIS_PORT', '6379')}/0",
)
RATE_LIMIT_VALIDATE_EMAIL: int = int(os.getenv("RATE_LIMIT_VALIDATE_EMAIL", "5"))

model_config = ConfigDict(env_file=".env", extra="ignore")
main_route: str = os.getenv("LAGOON_ROUTE", "http://localhost:8800")
Expand Down
128 changes: 128 additions & 0 deletions app/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
import os
from contextlib import asynccontextmanager
from datetime import UTC

from app.__version__ import __version__
from app.api import (
Expand All @@ -18,17 +19,24 @@
teams,
users,
)
from app.core.locking import release_lock, try_acquire_lock
from app.core.config import settings
from app.core.worker import hard_delete_expired_teams, monitor_teams
from app.db.database import get_db
from app.middleware.audit import AuditLogMiddleware
from app.middleware.auth import AuthMiddleware
from app.middleware.caching import CacheControlMiddleware
from app.middleware.prometheus import PrometheusMiddleware
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.triggers.cron import CronTrigger
from fastapi import FastAPI
from fastapi_limiter import FastAPILimiter
from fastapi.middleware.cors import CORSMiddleware
from fastapi.middleware.trustedhost import TrustedHostMiddleware
from fastapi.openapi.docs import get_swagger_ui_html
from fastapi.openapi.utils import get_openapi
from prometheus_fastapi_instrumentator import Instrumentator, metrics
from redis.asyncio import Redis as AsyncRedis
from starlette.middleware.base import BaseHTTPMiddleware

# Set timezone environment variable to prevent tzlocal warning
Expand All @@ -52,8 +60,128 @@ async def dispatch(self, request, call_next):

@asynccontextmanager
async def lifespan(app: FastAPI):
# Initialize rate limiter
await FastAPILimiter.init(
redis=AsyncRedis.from_url(settings.REDIS_URL, decode_responses=True)
)
Comment on lines 62 to +66
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Redis unavailability now prevents app startup

FastAPILimiter.init() calls redis.script_load(lua_script), which requires an active Redis connection. If Redis is unavailable when the app starts, the lifespan raises an exception and the app cannot serve any traffic — even endpoints with no rate limiting. The docker-compose.yml guards against this with condition: service_healthy, but this constraint doesn't exist in every deployment path (e.g., CI pipelines, bare-metal deployments, or Kubernetes rolling restarts where Redis may be transiently unreachable). Consider wrapping the init in a try/except with a warning log so the app can start degraded without rate limiting.


# Create scheduler
scheduler = AsyncIOScheduler()

async def monitor_teams_job():
lock_name = "monitor_teams"
lock_acquired = False

try:
# Try to acquire the lock using a dedicated short-lived session
lock_db = next(get_db())
try:
lock_acquired = try_acquire_lock(lock_name, lock_db, lock_timeout=10)
finally:
lock_db.close()

if lock_acquired:
logger.info("Acquired monitor_teams lock, executing job")
job_db = next(get_db())
try:
await monitor_teams(job_db)
except Exception as e:
job_db.rollback()
logger.error(f"Error in monitor_teams background task: {str(e)}")
finally:
job_db.close()
else:
logger.info("Another process has the monitor_teams lock, skipping execution")
except Exception as e:
logger.error(f"Error in monitor_teams job: {str(e)}")
finally:
# Always release the lock when it was acquired, using a separate session
if lock_acquired:
release_db = next(get_db())
try:
release_lock(lock_name, release_db)
except Exception as release_error:
logger.error(f"Error releasing lock: {str(release_error)}")
finally:
release_db.close()

# Set schedule based on environment
if settings.ENV_SUFFIX == "local":
cron_trigger = CronTrigger(minute='*/10', timezone=UTC, jitter=180)
else:
# Run every hour in other environments with jitter
cron_trigger = CronTrigger(hour='*', minute=0, timezone=UTC, jitter=60)

scheduler.add_job(
monitor_teams_job,
trigger=cron_trigger,
id='monitor_teams',
replace_existing=True
)

# Hard delete job for teams that have been soft-deleted for 60+ days
async def hard_delete_teams_job():
lock_name = "hard_delete_teams"
lock_acquired = False

try:
# Try to acquire the lock using a dedicated short-lived session
lock_db = next(get_db())
try:
lock_acquired = try_acquire_lock(lock_name, lock_db, lock_timeout=10)
finally:
lock_db.close()

if lock_acquired:
logger.info("Acquired hard_delete_teams lock, executing job")
job_db = next(get_db())
try:
await hard_delete_expired_teams(job_db)
except Exception as e:
job_db.rollback()
logger.error(f"Error in hard_delete_expired_teams background task: {str(e)}")
finally:
job_db.close()
else:
logger.info("Another process has the hard_delete_teams lock, skipping execution")
except Exception as e:
logger.error(f"Error in hard_delete_teams job: {str(e)}")
finally:
# Always release the lock when it was acquired, using a separate session
if lock_acquired:
release_db = next(get_db())
try:
release_lock(lock_name, release_db)
except Exception as release_error:
logger.error(f"Error releasing lock: {str(release_error)}")
finally:
release_db.close()

# Set schedule based on environment for hard delete job
if settings.ENV_SUFFIX == "local":
# In local env, run every hour at :30 for testing
hard_delete_trigger = CronTrigger(hour='*', minute=30, timezone=UTC)
else:
# In production, run daily at 3 AM
hard_delete_trigger = CronTrigger(hour=3, minute=0, timezone=UTC)

scheduler.add_job(
hard_delete_teams_job,
trigger=hard_delete_trigger,
id='hard_delete_teams',
replace_existing=True
)

# Start the scheduler
scheduler.start()

yield

# Shutdown the rate limiter
await FastAPILimiter.close()

# Shutdown the scheduler
scheduler.shutdown()

app = FastAPI(
title="Private AI Keys as a Service",
Expand Down
13 changes: 13 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,16 @@ services:
labels:
lagoon.type: postgres

redis:
Comment thread
dan2k3k4 marked this conversation as resolved.
image: uselagoon/redis-7
labels:
lagoon.type: redis
healthcheck:
test: [ "CMD", "redis-cli", "ping" ]
interval: 5s
timeout: 5s
retries: 5

backend:
Comment thread
dan2k3k4 marked this conversation as resolved.
build:
context: .
Expand All @@ -26,13 +36,16 @@ services:
AMAZEEAI_JWT_SECRET: ${AMAZEEAI_JWT_SECRET}
ENABLE_METRICS: "true" # Enable Prometheus metrics
AI_TRIAL_REGION: ${AI_TRIAL_REGION:-eu-west}
REDIS_URL: "redis://redis:6379/0"
ports:
- "8800:8800"
volumes:
- ./app:/app/app
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
labels:
lagoon.type: python

Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,5 @@ prometheus-fastapi-instrumentator==7.1.0
stripe==14.4.0
six==1.17.0
httpx==0.28.1
fastapi-limiter==0.1.6
redis==4.6.0
Loading
Loading