diff --git a/apps/web/src/services/api/client.ts b/apps/web/src/services/api/client.ts
index 216ec07b5..604c6601a 100644
--- a/apps/web/src/services/api/client.ts
+++ b/apps/web/src/services/api/client.ts
@@ -69,7 +69,7 @@ const DEFAULT_RETRY_CONFIG: RetryConfig = {
   maxRetries: 3,
   baseDelay: 1000, // 1 second
   maxDelay: 10000, // 10 seconds
-  retryableStatusCodes: [429, 500, 502, 503, 504], // Rate limit, server errors, gateway errors
+  retryableStatusCodes: [500, 502, 503, 504], // Server errors, gateway errors (429 excluded - don't retry rate limits)
   retryableNetworkErrors: [
     'ECONNABORTED', // Timeout
     'ETIMEDOUT', // Timeout
@@ -758,9 +758,32 @@ apiClient.interceptors.response.use(
     const retryCount = (originalRequest as any)?._retryCount || 0;
     const maxRetries = DEFAULT_RETRY_CONFIG.maxRetries;
     
-    // INT-API-005: For 429 rate limit errors, use a specific max retries limit (3)
+    // INT-API-005: For 429 rate limit errors, don't retry - respect the rate limit
     const isRateLimitError = status === 429;
-    const effectiveMaxRetries = isRateLimitError ? 3 : maxRetries; // Define here so it's accessible in both if blocks
+    // Don't retry 429 errors - respect the rate limit and show error immediately
+    if (isRateLimitError) {
+      const apiError = parseApiError(error);
+      // Extract retry-after header if present
+      const retryAfter = error.response?.headers['retry-after'] || error.response?.headers['Retry-After'];
+      const retryAfterSeconds = retryAfter ? parseInt(retryAfter, 10) : 60;
+      
+      logger.warn('[API] Rate limit exceeded, not retrying', {
+        url: originalRequest?.url,
+        retry_after: retryAfterSeconds,
+        request_id: apiError.request_id,
+      });
+      
+      // Show user-friendly error message
+      if (apiError.message) {
+        toast.error(apiError.message, {
+          duration: retryAfterSeconds * 1000, // Show for the retry-after duration
+        });
+      }
+      
+      return Promise.reject(apiError);
+    }
+    
+    const effectiveMaxRetries = maxRetries; // Use default max retries for other errors
 
     // Check if error is retryable
     if (isRetryableError(error, DEFAULT_RETRY_CONFIG) && originalRequest && retryCount < effectiveMaxRetries) {
@@ -768,9 +791,9 @@ apiClient.interceptors.response.use(
       const method = originalRequest.method?.toUpperCase();
       const isIdempotent = isIdempotentMethod(method);
       
-      // INT-API-005: Allow retry for 429 rate limit errors even for non-idempotent methods
-      // For non-idempotent methods, only retry on network errors, 5xx errors, or 429 rate limit
-      if (!isIdempotent && status && status !== 429 && status !== 500 && status !== 502 && status !== 503 && status !== 504) {
+      // For non-idempotent methods, only retry on network errors or 5xx errors
+      // (429 rate limit errors are handled above and don't retry)
+      if (!isIdempotent && status && status !== 500 && status !== 502 && status !== 503 && status !== 504) {
         // Don't retry non-idempotent methods on client errors (except 429 and 5xx)
         const apiError = parseApiError(error);
         return Promise.reject(apiError);
@@ -779,15 +802,14 @@ apiClient.interceptors.response.use(
       // Mark that we're retrying this request
       (originalRequest as any)._retryCount = retryCount + 1;
 
-      // INT-API-005: Calculate delay (respect Retry-After header if present for 429, otherwise exponential backoff with jitter)
-      // For 429 rate limit errors, getRetryDelay will use Retry-After header if present
+      // Calculate delay (exponential backoff with jitter)
       const delay = getRetryDelay(error, retryCount, DEFAULT_RETRY_CONFIG.baseDelay, DEFAULT_RETRY_CONFIG.maxDelay);
 
       // Log retry attempt with request_id if available
       const apiError = parseApiError(error);
       const errorType = status ? `HTTP ${status}` : error.code || 'Network Error';
       
-      // INT-API-005: Log retry attempt with appropriate max retries (3 for 429, default for others)
+      // Log retry attempt
       if (apiError.request_id) {
         console.warn(
           `[API Retry] ${errorType} error, retrying (${retryCount + 1}/${effectiveMaxRetries}) - Request ID: ${apiError.request_id}`,
@@ -801,8 +823,6 @@ apiClient.interceptors.response.use(
             url: originalRequest?.url,
             method: originalRequest?.method,
             is_idempotent: isIdempotent,
-            is_rate_limit: isRateLimitError,
-            retry_after_header: error.response?.headers['retry-after'] || error.response?.headers['Retry-After'] || 'N/A',
           },
         );
       } else {
@@ -817,8 +837,6 @@ apiClient.interceptors.response.use(
             url: originalRequest?.url,
             method: originalRequest?.method,
             is_idempotent: isIdempotent,
-            is_rate_limit: isRateLimitError,
-            retry_after_header: error.response?.headers['retry-after'] || error.response?.headers['Retry-After'] || 'N/A',
           },
         );
       }
@@ -917,7 +935,7 @@ apiClient.interceptors.response.use(
       }
 
       toast.error(errorMessage, {
-        duration: status === 429 ? 8000 : 5000, // Longer duration for rate limit errors
+            duration: 5000, // Standard duration for errors
       });
     }
 
diff --git a/veza-backend-api/internal/api/router.go b/veza-backend-api/internal/api/router.go
index 1759319d6..4adc5ed8e 100644
--- a/veza-backend-api/internal/api/router.go
+++ b/veza-backend-api/internal/api/router.go
@@ -229,9 +229,7 @@ func (r *APIRouter) Setup(router *gin.Engine) error {
 	// Swagger Documentation
 	router.GET("/swagger/*any", ginSwagger.WrapHandler(swaggerFiles.Handler))
 	// INT-DOC-001: Expose /docs endpoint as alias for Swagger UI
-	router.GET("/docs", func(c *gin.Context) {
-		c.Redirect(302, "/swagger/index.html")
-	})
+	router.GET("/docs", ginSwagger.WrapHandler(swaggerFiles.Handler))
 	router.GET("/docs/*any", ginSwagger.WrapHandler(swaggerFiles.Handler))
 
 	// BE-SVC-019: API versioning endpoint (before version middleware)