veza/tests/e2e/27-chunked-upload-s3.spec.ts
senke 8699004974 feat(track): native S3 multipart for chunked uploads (v1.0.9 item 1.5)
Replaces the historical chunked-upload flow when TRACK_STORAGE_BACKEND=s3:

  before: chunks → assembled file on disk → MigrateLocalToS3IfConfigured
          opens the file → manager.Uploader streams in 10 MB parts
  after:  chunks → io.Pipe → manager.Uploader streams in 10 MB parts
          (no assembled file on local disk)

Eliminates the second local copy of every upload and ~500 MB of disk
I/O per concurrent 500 MB upload. The local-storage path
(TRACK_STORAGE_BACKEND=local, default) is unchanged — it still goes
through CompleteChunkedUpload + CreateTrackFromPath because ClamAV needs
the assembled file (chunked path skips ClamAV by design, see audit).

New surface:
  - TrackChunkService.StreamChunkedUpload(ctx, uploadID, dst io.Writer)
    — extracted from CompleteChunkedUpload, writes chunks in order to
    any io.Writer, computes SHA-256 + verifies expected size, cleans
    up Redis state on success and preserves it on failure (resumable).
  - TrackService.CreateTrackFromChunkedUploadToS3 — orchestrates
    io.Pipe + goroutine, deletes orphan S3 objects on assembly failure,
    creates the Track row with storage_backend=s3 + storage_key.

Tests: 4 chunk-service stream tests (happy / writer error / size
mismatch / delegation) + 4 service tests (happy / wrong backend /
stream error / S3 upload error). One E2E @critical-s3 spec gated on
S3 availability via /health/deep so it ships today and starts running
once MinIO is added to the e2e workflow services block.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 23:12:56 +02:00

165 lines
6.5 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { test, expect } from '@chromatic-com/playwright';
import { CONFIG } from './helpers';
/**
* v1.0.9 item 1.5 — chunked uploads stream straight to S3 multipart
* (no local assembled file) when TRACK_STORAGE_BACKEND=s3. Verifies
* the fast path is wired by:
*
* 1. Detecting S3 availability at runtime via /api/v1/health/deep —
* skip with a clear message when the CI environment is local-only
* so this spec is shippable today and starts running automatically
* once MinIO is added to the e2e workflow services block.
* 2. Uploading via the chunked endpoints (initiate / chunk / complete).
* 3. Asserting the resulting track row carries `storage_backend=s3`
* and a `storage_key` rather than a local FilePath. That's the
* single observable difference between the new fast path and
* the legacy local-then-migrate path; if a refactor accidentally
* reverts to local-first, this assertion fires.
*/
async function s3IsAvailable(
request: import('@playwright/test').APIRequestContext,
): Promise<boolean> {
const resp = await request.get(`${CONFIG.apiURL}/api/v1/health/deep`).catch(() => null);
if (!resp || resp.status() !== 200) return false;
const body = await resp.json().catch(() => null);
const checks = body?.data?.checks ?? body?.checks ?? {};
const s3 = checks?.s3_storage;
// /health/deep returns an object per check with a `status` field;
// any of "ok" / "healthy" / "up" indicates the service is reachable.
if (!s3) return false;
const status = (s3.status ?? '').toString().toLowerCase();
return status === 'ok' || status === 'healthy' || status === 'up';
}
async function loginAsCreator(
request: import('@playwright/test').APIRequestContext,
): Promise<{ accessToken: string; cookies: string }> {
const resp = await request.post(`${CONFIG.apiURL}/api/v1/auth/login`, {
data: {
email: CONFIG.users.creator.email,
password: CONFIG.users.creator.password,
remember_me: false,
},
});
expect(resp.status(), 'creator login must succeed for the chunked upload test').toBe(200);
const body = await resp.json();
const data = body?.data ?? body;
const accessToken: string = data?.token?.access_token ?? '';
expect(accessToken.length).toBeGreaterThan(0);
const cookieHeader = resp
.headersArray()
.filter((h) => h.name.toLowerCase() === 'set-cookie')
.map((h) => h.value.split(';')[0])
.join('; ');
return { accessToken, cookies: cookieHeader };
}
test.describe('UPLOAD — chunked native S3 multipart (v1.0.9 item 1.5)', () => {
test('28. chunked upload routes through CreateTrackFromChunkedUploadToS3 when backend=s3 @critical-s3', async ({
request,
}) => {
test.setTimeout(60_000);
if (!(await s3IsAvailable(request))) {
test.skip(true, 'S3 backend not configured in this environment — fast path skipped.');
}
const { accessToken } = await loginAsCreator(request);
const authHeader = { Authorization: `Bearer ${accessToken}` };
// Build a 32 KB synthetic MP3 split into 4 × 8 KB chunks. Real
// audio isn't required — the upload pipeline is content-agnostic
// until ClamAV (which the chunked path skips, see audit). The
// assertion is on storage_backend, not playback.
const chunkSize = 8 * 1024;
const totalChunks = 4;
const totalSize = chunkSize * totalChunks;
const initResp = await request.post(
`${CONFIG.apiURL}/api/v1/tracks/initiate`,
{
headers: authHeader,
data: {
filename: `e2e-s3-${Date.now()}.mp3`,
total_chunks: totalChunks,
total_size: totalSize,
},
},
);
expect(initResp.status()).toBe(200);
const initBody = await initResp.json();
const uploadID: string =
initBody?.data?.upload_id ?? initBody?.upload_id;
expect(uploadID, 'initiate must return an upload_id').toBeTruthy();
for (let i = 1; i <= totalChunks; i++) {
const chunk = Buffer.alloc(chunkSize, i);
const chunkResp = await request.post(
`${CONFIG.apiURL}/api/v1/tracks/chunk`,
{
headers: authHeader,
multipart: {
upload_id: uploadID,
chunk_number: String(i),
chunk: { name: `chunk-${i}`, mimeType: 'application/octet-stream', buffer: chunk },
},
},
);
expect(chunkResp.status(), `chunk ${i} upload must succeed`).toBe(200);
}
const completeResp = await request.post(
`${CONFIG.apiURL}/api/v1/tracks/complete`,
{
headers: authHeader,
data: { upload_id: uploadID },
},
);
expect(completeResp.status(), 'complete must return 201 even on the S3 fast path').toBe(201);
const completeBody = await completeResp.json();
const track = completeBody?.data?.track ?? completeBody?.track;
expect(track?.id, 'complete must return the track row').toBeTruthy();
// The DB row carries storage_backend / storage_key on the fast path.
// We re-fetch via the public track endpoint to assert from the
// server's perspective rather than trusting the complete response
// (which mirrors the in-memory model).
const detailResp = await request.get(
`${CONFIG.apiURL}/api/v1/tracks/${track.id}`,
{ headers: authHeader },
);
expect(detailResp.status()).toBe(200);
// storage_backend / storage_key are JSON-tagged "-" on the model
// (admin-only fields), so we can't assert their values from a
// public detail response. The strongest assertion the public API
// surface allows is that the stream URL is a signed S3 URL
// (302 redirect to https://… with a presigned query string)
// rather than a local-disk path. v1.0.8 Phase 2 wired this for
// both backends: GET /tracks/:id/stream → 302 to the signed URL
// when storage_backend=s3.
const streamResp = await request.get(
`${CONFIG.apiURL}/api/v1/tracks/${track.id}/stream`,
{
headers: authHeader,
maxRedirects: 0, // capture the 302 itself, don't follow it
},
);
// 302 = S3 redirect path (= storage_backend=s3 confirmed); 200 =
// local-disk serve (= we accidentally took the legacy path, fail
// the test loud).
expect(
streamResp.status(),
'with TRACK_STORAGE_BACKEND=s3 the stream endpoint must redirect to a signed URL',
).toBe(302);
const location = streamResp.headers()['location'] ?? '';
expect(
location,
'redirect target must be an HTTPS signed URL (S3-style), not a local /uploads path',
).toMatch(/^https?:\/\/.*\?.*[Xx]-[Aa]mz-/);
});
});