Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions apps/backend/__tests__/integration/s3-sdk/index.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -415,4 +415,100 @@ describe('AWS S3 - SDK', () => {
expect(result.Metadata?.cid).toBeDefined()
})
})

// Raw HTTP requests that mimic the AWS CLI / botocore, which (unlike the JS
// SDK used above) does NOT send the `x-id` query param for GetObject/
// PutObject and sends object bodies with no Content-Type header. These guard
// two regressions:
// 1. getS3Method must fall back to the HTTP method (GET->GetObject,
// PUT->PutObject) when `x-id` is absent — otherwise dispatch returns
// "Method not found".
// 2. The request body must be read as raw bytes regardless of Content-Type;
// a missing Content-Type previously left req.body as {} and broke
// uploads deep in the IPLD chunker.
describe('Raw HTTP requests (AWS CLI style: no x-id, no Content-Type)', () => {
const S3_BASE = `${BASE_PATH}/s3`
// handleS3Auth only needs an Authorization header containing
// `Credential=<alphanumeric>/`; AuthManager is mocked to return `user`.
const AUTH =
'AWS4-HMAC-SHA256 Credential=clitestkey/20200101/us-east-1/s3/aws4_request, SignedHeaders=host, Signature=deadbeef'

// Passing a Uint8Array/Buffer body to fetch leaves Content-Type unset,
// reproducing the AWS CLI's behaviour. No `x-id` query param is added.
const rawS3 = (method: string, path: string, body?: Uint8Array) =>
fetch(`${S3_BASE}${path}`, {
method,
headers: { Authorization: AUTH },
// Cast: TS 5.7 types Buffer/Uint8Array as Uint8Array<ArrayBufferLike>,
// which doesn't structurally match the DOM BodyInit union. A binary
// body still sends with no Content-Type, which is the point here.
body: body as unknown as BodyInit | undefined,
})

const CliBody = Buffer.from('hello from the aws cli')

it('PutObject without x-id/Content-Type stores the object', async () => {
const res = await rawS3('PUT', '/cli-test/hello.txt', CliBody)
expect(res.status).toBe(200)
expect(res.headers.get('etag')).toMatch(MD5_ETAG_RE)
}, 15_000)

it('GetObject without x-id returns the exact bytes', async () => {
const res = await rawS3('GET', '/cli-test/hello.txt')
expect(res.status).toBe(200)
const got = Buffer.from(await res.arrayBuffer())
expect(got).toEqual(CliBody)
}, 15_000)

it('multipart upload via raw requests round-trips (the original 500)', async () => {
const key = '/cli-test/mpu.bin'
const part1 = Buffer.from('AAAAAAAAAAAAAAAA')
const part2 = Buffer.from('BBBBBBBBBBBBBBBB')

const create = await rawS3('POST', `${key}?uploads`)
expect(create.status).toBe(200)
const uploadId = (await create.text()).match(
/<UploadId>([^<]+)<\/UploadId>/,
)?.[1]
expect(uploadId).toBeDefined()

// Parts must be uploaded sequentially (the chunker enforces ordering).
const up1 = await rawS3(
'PUT',
`${key}?partNumber=1&uploadId=${uploadId}`,
part1,
)
expect(up1.status).toBe(200)
const etag1 = up1.headers.get('etag')!
expect(etag1).toMatch(MD5_ETAG_RE)

const up2 = await rawS3(
'PUT',
`${key}?partNumber=2&uploadId=${uploadId}`,
part2,
)
expect(up2.status).toBe(200)
const etag2 = up2.headers.get('etag')!

// The part list in the body is used only to compute the composite ETag.
const completeBody = Buffer.from(
'<CompleteMultipartUpload>' +
`<Part><ETag>${etag1}</ETag><PartNumber>1</PartNumber></Part>` +
`<Part><ETag>${etag2}</ETag><PartNumber>2</PartNumber></Part>` +
'</CompleteMultipartUpload>',
)
const complete = await rawS3(
'POST',
`${key}?uploadId=${uploadId}`,
completeBody,
)
expect(complete.status).toBe(200)
expect(complete.headers.get('etag')).toMatch(MULTIPART_ETAG_RE)

const get = await rawS3('GET', key)
expect(get.status).toBe(200)
const got = Buffer.from(await get.arrayBuffer())
expect(got).toEqual(Buffer.concat([part1, part2]))
}, 30_000)
})
})
123 changes: 123 additions & 0 deletions apps/backend/__tests__/unit/core/s3.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -369,4 +369,127 @@ describe('S3UseCases', () => {
)
})
})

describe('listObjects', () => {
// dbLimit for delimiter listings is min(maxKeys * 10 + 100, 10_000), so
// maxKeys=2 yields dbLimit=120 — small enough to construct test data for.
const DELIMITER_DB_LIMIT = (maxKeys: number) =>
Math.min(maxKeys * 10 + 100, 10_000)

const makeListing = (key: string) => ({
key,
cid: 'cid',
size: 0n,
lastModified: new Date(0),
})

it('advances continuation token past a folded CommonPrefix when the DB batch is exhausted inside one prefix group', async () => {
// Regression test for Cursor Bugbot finding on PR #696 / #709.
//
// Scenario: maxKeys=2, delimiter='/', and a single virtual directory
// ('big/') contains more keys than fit in one DB batch. Every fetched
// row folds into the same CommonPrefix, so the in-loop maxKeys cap is
// never hit and the loop exhausts the batch with isTruncated=false.
// The fallback branch must then set the continuation token to a value
// that sorts *after* every key in 'big/' — otherwise the next page
// re-scans the rest of that directory and emits 'big/' again.
const maxKeys = 2
const dbLimit = DELIMITER_DB_LIMIT(maxKeys)

// Fill the entire DB batch with keys that all fold into 'big/'.
const fullBatch = Array.from({ length: dbLimit }, (_, i) =>
makeListing(`big/${String(i).padStart(6, '0')}`),
)

jest
.spyOn(s3ObjectMappingsRepository, 'listObjects')
.mockResolvedValue(fullBatch as any)

const result = await S3UseCases.listObjects({
bucket: 'my-bucket',
prefix: '',
delimiter: '/',
maxKeys,
continuationToken: null,
})

expect(result.commonPrefixes).toEqual(['big/'])
expect(result.objects).toEqual([])
expect(result.isTruncated).toBe(true)
// Token must start with the folded prefix and sort strictly after every
// key inside it. `￿` (U+FFFF) is the sentinel chosen for this purpose.
expect(result.nextContinuationToken).toBe('big/￿')
// Sanity: the token sorts after the last key we returned in the batch.
expect(
result.nextContinuationToken! > fullBatch[fullBatch.length - 1].key,
).toBe(true)
})

it('uses the raw last key as the token when the last scanned key did not fold into a prefix', async () => {
// If the DB batch is full but the last key has no delimiter occurrence
// after the prefix, there's no CommonPrefix to skip past — fall back to
// the raw last key, which is the safe pre-fix behaviour.
const maxKeys = 2
const dbLimit = DELIMITER_DB_LIMIT(maxKeys)

// Pad the batch with folded entries, but make the LAST one a top-level
// key with no delimiter after the prefix.
const batch = [
...Array.from({ length: dbLimit - 1 }, (_, i) =>
makeListing(`folder/${String(i).padStart(6, '0')}`),
),
makeListing('zzz-top-level'),
]

jest
.spyOn(s3ObjectMappingsRepository, 'listObjects')
.mockResolvedValue(batch as any)

const result = await S3UseCases.listObjects({
bucket: 'my-bucket',
prefix: '',
delimiter: '/',
maxKeys,
continuationToken: null,
})

expect(result.isTruncated).toBe(true)
// The last key doesn't fold into a CommonPrefix, so the token stays as
// the raw key — no sentinel needed.
expect(result.nextContinuationToken).toBe('zzz-top-level')
})

it('uses the raw last key as the token when no delimiter is set', async () => {
// Without a delimiter, the dbLimit is maxKeys + 1, and there are no
// CommonPrefixes to repeat — the safe fallback is just the last key.
const maxKeys = 2
const dbLimit = maxKeys + 1 // = 3

const batch = [
makeListing('a.txt'),
makeListing('b.txt'),
makeListing('c.txt'),
]

jest
.spyOn(s3ObjectMappingsRepository, 'listObjects')
.mockResolvedValue(batch as any)

const result = await S3UseCases.listObjects({
bucket: 'my-bucket',
prefix: '',
delimiter: null,
maxKeys,
continuationToken: null,
})

// maxKeys=2 ⇒ first two keys returned, third triggers truncation in
// buildListResult (not the fallback), token = key just returned.
expect(result.objects.map((o) => o.key)).toEqual(['a.txt', 'b.txt'])
expect(result.isTruncated).toBe(true)
expect(result.nextContinuationToken).toBe('b.txt')
// dbLimit branch shouldn't have triggered, so no sentinel appended.
expect(batch.length).toBe(dbLimit)
})
})
})
64 changes: 53 additions & 11 deletions apps/backend/__tests__/unit/repositories/nodes.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ import {
nodesRepository,
Node,
} from '../../../src/infrastructure/repositories/objects/nodes.js'
import { metadataRepository } from '../../../src/infrastructure/repositories/objects/metadata.js'
import { dbMigration } from '../../utils/dbMigrate.js'
import { MetadataType } from '@autonomys/auto-dag-data'
import { MetadataType, OffchainMetadata } from '@autonomys/auto-dag-data'

describe('Nodes Repository', () => {
beforeAll(async () => {
Expand Down Expand Up @@ -370,22 +371,22 @@ describe('Nodes Repository', () => {
expect(result?.piece_offset).toBe(100)
})

it('should remove nodes by root CID', async () => {
it('should remove encoded_node only for published nodes by root CID', async () => {
const rootCid = 'test-root-cid-remove'
const nodes: Node[] = [
{
cid: 'test-cid-remove-1',
cid: 'test-cid-remove-published',
root_cid: rootCid,
head_cid: 'test-head-cid-remove',
type: 'file',
encoded_node: 'test-encoded-node-remove-1',
piece_index: null,
piece_offset: null,
block_published_on: null,
block_published_on: 100,
tx_published_on: null,
},
{
cid: 'test-cid-remove-2',
cid: 'test-cid-remove-unpublished',
root_cid: rootCid,
head_cid: 'test-head-cid-remove',
type: 'file',
Expand All @@ -399,13 +400,16 @@ describe('Nodes Repository', () => {

await nodesRepository.saveNodes(nodes)
await nodesRepository.removeNodeDataByRootCid(rootCid)
const results = await nodesRepository.getNodesByRootCid(rootCid)
const fullNodes = await Promise.all(
results.map((r) => nodesRepository.getNode(r.cid)),

const publishedNode = await nodesRepository.getNode(
'test-cid-remove-published',
)
fullNodes.forEach((n) => {
expect(n?.encoded_node).toBeNull()
})
expect(publishedNode?.encoded_node).toBeNull()

const unpublishedNode = await nodesRepository.getNode(
'test-cid-remove-unpublished',
)
expect(unpublishedNode?.encoded_node).toBe('test-encoded-node-remove-2')
})

it('should get nodes by CIDs', async () => {
Expand Down Expand Up @@ -466,6 +470,44 @@ describe('Nodes Repository', () => {

expect(result?.block_published_on).toBe(12345)
expect(result?.tx_published_on).toBe('tx-hash')
expect(result?.encoded_node).toBe('test-encoded-node-published')
})

it('should clear encoded_node on publish when metadata is already archived', async () => {
const rootCid = 'test-root-cid-publish-archived'
const headCid = 'test-head-cid-publish-archived'
const metadata: OffchainMetadata = {
totalSize: 100n,
type: 'file',
dataCid: 'test-data-cid-publish-archived',
totalChunks: 1,
chunks: [],
name: 'test-file-publish-archived',
}

await metadataRepository.setMetadata(rootCid, headCid, metadata)
await metadataRepository.markAsArchived(headCid)

const node: Node = {
cid: 'test-cid-publish-after-archive',
root_cid: rootCid,
head_cid: headCid,
type: 'file',
encoded_node: 'data-that-should-be-cleared',
piece_index: null,
piece_offset: null,
block_published_on: null,
tx_published_on: null,
}

await nodesRepository.saveNode(node)
await nodesRepository.updateNodePublishedOn(node.cid, 99999, 'tx-recovery')

const result = await nodesRepository.getNode(node.cid)

expect(result?.block_published_on).toBe(99999)
expect(result?.tx_published_on).toBe('tx-recovery')
expect(result?.encoded_node).toBeNull()
})

it('should get uploaded nodes by root CID', async () => {
Expand Down
35 changes: 20 additions & 15 deletions apps/backend/src/app/apis/download.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,26 @@ const createServer = async () => {
logger.debug('Initializing download API server')
const app = express()

if (config.express.corsAllowedOrigins) {
logger.debug(
'Configuring CORS with allowed origins: %j',
config.express.corsAllowedOrigins,
)
app.use(
cors({
origin: config.express.corsAllowedOrigins,
}),
)
} else {
logger.warn('CORS is not configured - no allowed origins specified, blocking cross-origin requests')
}

// The S3 controller handles its own raw body parsing (binary object
// payloads). It is mounted before the JSON/urlencoded parsers below so those
// never run for /s3 — otherwise body-parser would set req.body to {} and the
// raw object bytes would be lost.
app.use('/s3', s3Controller)

app.use(
express.json({
limit: config.express.requestSizeLimit,
Expand All @@ -36,22 +56,7 @@ const createServer = async () => {
config.express.requestSizeLimit,
)

if (config.express.corsAllowedOrigins) {
logger.debug(
'Configuring CORS with allowed origins: %j',
config.express.corsAllowedOrigins,
)
app.use(
cors({
origin: config.express.corsAllowedOrigins,
}),
)
} else {
logger.warn('CORS is not configured - no allowed origins specified, blocking cross-origin requests')
}

app.use('/downloads', downloadController)
app.use('/s3', s3Controller)
app.use('/features', featuresController)

logger.debug('Download controller mounted at /downloads')
Expand Down
Loading
Loading