autonomys · EmilFattakhov · May 27, 2026 · May 22, 2026 · May 26, 2026 · May 26, 2026
diff --git a/apps/backend/__tests__/integration/s3-sdk/index.spec.ts b/apps/backend/__tests__/integration/s3-sdk/index.spec.ts
@@ -415,4 +415,100 @@ describe('AWS S3 - SDK', () => {
       expect(result.Metadata?.cid).toBeDefined()
     })
   })
+
+  // Raw HTTP requests that mimic the AWS CLI / botocore, which (unlike the JS
+  // SDK used above) does NOT send the `x-id` query param for GetObject/
+  // PutObject and sends object bodies with no Content-Type header. These guard
+  // two regressions:
+  //   1. getS3Method must fall back to the HTTP method (GET->GetObject,
+  //      PUT->PutObject) when `x-id` is absent — otherwise dispatch returns
+  //      "Method not found".
+  //   2. The request body must be read as raw bytes regardless of Content-Type;
+  //      a missing Content-Type previously left req.body as {} and broke
+  //      uploads deep in the IPLD chunker.
+  describe('Raw HTTP requests (AWS CLI style: no x-id, no Content-Type)', () => {
+    const S3_BASE = `${BASE_PATH}/s3`
+    // handleS3Auth only needs an Authorization header containing
+    // `Credential=<alphanumeric>/`; AuthManager is mocked to return `user`.
+    const AUTH =
+      'AWS4-HMAC-SHA256 Credential=clitestkey/20200101/us-east-1/s3/aws4_request, SignedHeaders=host, Signature=deadbeef'
+
+    // Passing a Uint8Array/Buffer body to fetch leaves Content-Type unset,
+    // reproducing the AWS CLI's behaviour. No `x-id` query param is added.
+    const rawS3 = (method: string, path: string, body?: Uint8Array) =>
+      fetch(`${S3_BASE}${path}`, {
+        method,
+        headers: { Authorization: AUTH },
+        // Cast: TS 5.7 types Buffer/Uint8Array as Uint8Array<ArrayBufferLike>,
+        // which doesn't structurally match the DOM BodyInit union. A binary
+        // body still sends with no Content-Type, which is the point here.
+        body: body as unknown as BodyInit | undefined,
+      })
+
+    const CliBody = Buffer.from('hello from the aws cli')
+
+    it('PutObject without x-id/Content-Type stores the object', async () => {
+      const res = await rawS3('PUT', '/cli-test/hello.txt', CliBody)
+      expect(res.status).toBe(200)
+      expect(res.headers.get('etag')).toMatch(MD5_ETAG_RE)
+    }, 15_000)
+
+    it('GetObject without x-id returns the exact bytes', async () => {
+      const res = await rawS3('GET', '/cli-test/hello.txt')
+      expect(res.status).toBe(200)
+      const got = Buffer.from(await res.arrayBuffer())
+      expect(got).toEqual(CliBody)
+    }, 15_000)
+
+    it('multipart upload via raw requests round-trips (the original 500)', async () => {
+      const key = '/cli-test/mpu.bin'
+      const part1 = Buffer.from('AAAAAAAAAAAAAAAA')
+      const part2 = Buffer.from('BBBBBBBBBBBBBBBB')
+
+      const create = await rawS3('POST', `${key}?uploads`)
+      expect(create.status).toBe(200)
+      const uploadId = (await create.text()).match(
+        /<UploadId>([^<]+)<\/UploadId>/,
+      )?.[1]
+      expect(uploadId).toBeDefined()
+
+      // Parts must be uploaded sequentially (the chunker enforces ordering).
+      const up1 = await rawS3(
+        'PUT',
+        `${key}?partNumber=1&uploadId=${uploadId}`,
+        part1,
+      )
+      expect(up1.status).toBe(200)
+      const etag1 = up1.headers.get('etag')!
+      expect(etag1).toMatch(MD5_ETAG_RE)
+
+      const up2 = await rawS3(
+        'PUT',
+        `${key}?partNumber=2&uploadId=${uploadId}`,
+        part2,
+      )
+      expect(up2.status).toBe(200)
+      const etag2 = up2.headers.get('etag')!
+
+      // The part list in the body is used only to compute the composite ETag.
+      const completeBody = Buffer.from(
+        '<CompleteMultipartUpload>' +
+          `<Part><ETag>${etag1}</ETag><PartNumber>1</PartNumber></Part>` +
+          `<Part><ETag>${etag2}</ETag><PartNumber>2</PartNumber></Part>` +
+          '</CompleteMultipartUpload>',
+      )
+      const complete = await rawS3(
+        'POST',
+        `${key}?uploadId=${uploadId}`,
+        completeBody,
+      )
+      expect(complete.status).toBe(200)
+      expect(complete.headers.get('etag')).toMatch(MULTIPART_ETAG_RE)
+
+      const get = await rawS3('GET', key)
+      expect(get.status).toBe(200)
+      const got = Buffer.from(await get.arrayBuffer())
+      expect(got).toEqual(Buffer.concat([part1, part2]))
+    }, 30_000)
+  })
 })
diff --git a/apps/backend/__tests__/unit/core/s3.spec.ts b/apps/backend/__tests__/unit/core/s3.spec.ts
@@ -369,4 +369,127 @@ describe('S3UseCases', () => {
       )
     })
   })
+
+  describe('listObjects', () => {
+    // dbLimit for delimiter listings is min(maxKeys * 10 + 100, 10_000), so
+    // maxKeys=2 yields dbLimit=120 — small enough to construct test data for.
+    const DELIMITER_DB_LIMIT = (maxKeys: number) =>
+      Math.min(maxKeys * 10 + 100, 10_000)
+
+    const makeListing = (key: string) => ({
+      key,
+      cid: 'cid',
+      size: 0n,
+      lastModified: new Date(0),
+    })
+
+    it('advances continuation token past a folded CommonPrefix when the DB batch is exhausted inside one prefix group', async () => {
+      // Regression test for Cursor Bugbot finding on PR #696 / #709.
+      //
+      // Scenario: maxKeys=2, delimiter='/', and a single virtual directory
+      // ('big/') contains more keys than fit in one DB batch.  Every fetched
+      // row folds into the same CommonPrefix, so the in-loop maxKeys cap is
+      // never hit and the loop exhausts the batch with isTruncated=false.
+      // The fallback branch must then set the continuation token to a value
+      // that sorts *after* every key in 'big/' — otherwise the next page
+      // re-scans the rest of that directory and emits 'big/' again.
+      const maxKeys = 2
+      const dbLimit = DELIMITER_DB_LIMIT(maxKeys)
+
+      // Fill the entire DB batch with keys that all fold into 'big/'.
+      const fullBatch = Array.from({ length: dbLimit }, (_, i) =>
+        makeListing(`big/${String(i).padStart(6, '0')}`),
+      )
+
+      jest
+        .spyOn(s3ObjectMappingsRepository, 'listObjects')
+        .mockResolvedValue(fullBatch as any)
+
+      const result = await S3UseCases.listObjects({
+        bucket: 'my-bucket',
+        prefix: '',
+        delimiter: '/',
+        maxKeys,
+        continuationToken: null,
+      })
+
+      expect(result.commonPrefixes).toEqual(['big/'])
+      expect(result.objects).toEqual([])
+      expect(result.isTruncated).toBe(true)
+      // Token must start with the folded prefix and sort strictly after every
+      // key inside it.  `` (U+FFFF) is the sentinel chosen for this purpose.
+      expect(result.nextContinuationToken).toBe('big/')
+      // Sanity: the token sorts after the last key we returned in the batch.
+      expect(
+        result.nextContinuationToken! > fullBatch[fullBatch.length - 1].key,
+      ).toBe(true)
+    })
+
+    it('uses the raw last key as the token when the last scanned key did not fold into a prefix', async () => {
+      // If the DB batch is full but the last key has no delimiter occurrence
+      // after the prefix, there's no CommonPrefix to skip past — fall back to
+      // the raw last key, which is the safe pre-fix behaviour.
+      const maxKeys = 2
+      const dbLimit = DELIMITER_DB_LIMIT(maxKeys)
+
+      // Pad the batch with folded entries, but make the LAST one a top-level
+      // key with no delimiter after the prefix.
+      const batch = [
+        ...Array.from({ length: dbLimit - 1 }, (_, i) =>
+          makeListing(`folder/${String(i).padStart(6, '0')}`),
+        ),
+        makeListing('zzz-top-level'),
+      ]
+
+      jest
+        .spyOn(s3ObjectMappingsRepository, 'listObjects')
+        .mockResolvedValue(batch as any)
+
+      const result = await S3UseCases.listObjects({
+        bucket: 'my-bucket',
+        prefix: '',
+        delimiter: '/',
+        maxKeys,
+        continuationToken: null,
+      })
+
+      expect(result.isTruncated).toBe(true)
+      // The last key doesn't fold into a CommonPrefix, so the token stays as
+      // the raw key — no sentinel needed.
+      expect(result.nextContinuationToken).toBe('zzz-top-level')
+    })
+
+    it('uses the raw last key as the token when no delimiter is set', async () => {
+      // Without a delimiter, the dbLimit is maxKeys + 1, and there are no
+      // CommonPrefixes to repeat — the safe fallback is just the last key.
+      const maxKeys = 2
+      const dbLimit = maxKeys + 1 // = 3
+
+      const batch = [
+        makeListing('a.txt'),
+        makeListing('b.txt'),
+        makeListing('c.txt'),
+      ]
+
+      jest
+        .spyOn(s3ObjectMappingsRepository, 'listObjects')
+        .mockResolvedValue(batch as any)
+
+      const result = await S3UseCases.listObjects({
+        bucket: 'my-bucket',
+        prefix: '',
+        delimiter: null,
+        maxKeys,
+        continuationToken: null,
+      })
+
+      // maxKeys=2 ⇒ first two keys returned, third triggers truncation in
+      // buildListResult (not the fallback), token = key just returned.
+      expect(result.objects.map((o) => o.key)).toEqual(['a.txt', 'b.txt'])
+      expect(result.isTruncated).toBe(true)
+      expect(result.nextContinuationToken).toBe('b.txt')
+      // dbLimit branch shouldn't have triggered, so no sentinel appended.
+      expect(batch.length).toBe(dbLimit)
+    })
+  })
 })
diff --git a/apps/backend/__tests__/unit/repositories/nodes.spec.ts b/apps/backend/__tests__/unit/repositories/nodes.spec.ts
@@ -10,8 +10,9 @@ import {
   nodesRepository,
   Node,
 } from '../../../src/infrastructure/repositories/objects/nodes.js'
+import { metadataRepository } from '../../../src/infrastructure/repositories/objects/metadata.js'
 import { dbMigration } from '../../utils/dbMigrate.js'
-import { MetadataType } from '@autonomys/auto-dag-data'
+import { MetadataType, OffchainMetadata } from '@autonomys/auto-dag-data'
 
 describe('Nodes Repository', () => {
   beforeAll(async () => {
@@ -370,22 +371,22 @@ describe('Nodes Repository', () => {
     expect(result?.piece_offset).toBe(100)
   })
 
-  it('should remove nodes by root CID', async () => {
+  it('should remove encoded_node only for published nodes by root CID', async () => {
     const rootCid = 'test-root-cid-remove'
     const nodes: Node[] = [
       {
-        cid: 'test-cid-remove-1',
+        cid: 'test-cid-remove-published',
         root_cid: rootCid,
         head_cid: 'test-head-cid-remove',
         type: 'file',
         encoded_node: 'test-encoded-node-remove-1',
         piece_index: null,
         piece_offset: null,
-        block_published_on: null,
+        block_published_on: 100,
         tx_published_on: null,
       },
       {
-        cid: 'test-cid-remove-2',
+        cid: 'test-cid-remove-unpublished',
         root_cid: rootCid,
         head_cid: 'test-head-cid-remove',
         type: 'file',
@@ -399,13 +400,16 @@ describe('Nodes Repository', () => {
 
     await nodesRepository.saveNodes(nodes)
     await nodesRepository.removeNodeDataByRootCid(rootCid)
-    const results = await nodesRepository.getNodesByRootCid(rootCid)
-    const fullNodes = await Promise.all(
-      results.map((r) => nodesRepository.getNode(r.cid)),
+
+    const publishedNode = await nodesRepository.getNode(
+      'test-cid-remove-published',
     )
-    fullNodes.forEach((n) => {
-      expect(n?.encoded_node).toBeNull()
-    })
+    expect(publishedNode?.encoded_node).toBeNull()
+
+    const unpublishedNode = await nodesRepository.getNode(
+      'test-cid-remove-unpublished',
+    )
+    expect(unpublishedNode?.encoded_node).toBe('test-encoded-node-remove-2')
   })
 
   it('should get nodes by CIDs', async () => {
@@ -466,6 +470,44 @@ describe('Nodes Repository', () => {
 
     expect(result?.block_published_on).toBe(12345)
     expect(result?.tx_published_on).toBe('tx-hash')
+    expect(result?.encoded_node).toBe('test-encoded-node-published')
+  })
+
+  it('should clear encoded_node on publish when metadata is already archived', async () => {
+    const rootCid = 'test-root-cid-publish-archived'
+    const headCid = 'test-head-cid-publish-archived'
+    const metadata: OffchainMetadata = {
+      totalSize: 100n,
+      type: 'file',
+      dataCid: 'test-data-cid-publish-archived',
+      totalChunks: 1,
+      chunks: [],
+      name: 'test-file-publish-archived',
+    }
+
+    await metadataRepository.setMetadata(rootCid, headCid, metadata)
+    await metadataRepository.markAsArchived(headCid)
+
+    const node: Node = {
+      cid: 'test-cid-publish-after-archive',
+      root_cid: rootCid,
+      head_cid: headCid,
+      type: 'file',
+      encoded_node: 'data-that-should-be-cleared',
+      piece_index: null,
+      piece_offset: null,
+      block_published_on: null,
+      tx_published_on: null,
+    }
+
+    await nodesRepository.saveNode(node)
+    await nodesRepository.updateNodePublishedOn(node.cid, 99999, 'tx-recovery')
+
+    const result = await nodesRepository.getNode(node.cid)
+
+    expect(result?.block_published_on).toBe(99999)
+    expect(result?.tx_published_on).toBe('tx-recovery')
+    expect(result?.encoded_node).toBeNull()
   })
 
   it('should get uploaded nodes by root CID', async () => {

diff --git a/apps/backend/src/app/apis/download.ts b/apps/backend/src/app/apis/download.ts
@@ -15,6 +15,26 @@ const createServer = async () => {
   logger.debug('Initializing download API server')
   const app = express()
 
+  if (config.express.corsAllowedOrigins) {
+    logger.debug(
+      'Configuring CORS with allowed origins: %j',
+      config.express.corsAllowedOrigins,
+    )
+    app.use(
+      cors({
+        origin: config.express.corsAllowedOrigins,
+      }),
+    )
+  } else {
+    logger.warn('CORS is not configured - no allowed origins specified, blocking cross-origin requests')
+  }
+
+  // The S3 controller handles its own raw body parsing (binary object
+  // payloads). It is mounted before the JSON/urlencoded parsers below so those
+  // never run for /s3 — otherwise body-parser would set req.body to {} and the
+  // raw object bytes would be lost.
+  app.use('/s3', s3Controller)
+
   app.use(
     express.json({
       limit: config.express.requestSizeLimit,
@@ -36,22 +56,7 @@ const createServer = async () => {
     config.express.requestSizeLimit,
   )
 
-  if (config.express.corsAllowedOrigins) {
-    logger.debug(
-      'Configuring CORS with allowed origins: %j',
-      config.express.corsAllowedOrigins,
-    )
-    app.use(
-      cors({
-        origin: config.express.corsAllowedOrigins,
-      }),
-    )
-  } else {
-    logger.warn('CORS is not configured - no allowed origins specified, blocking cross-origin requests')
-  }
-
   app.use('/downloads', downloadController)
-  app.use('/s3', s3Controller)
   app.use('/features', featuresController)
 
   logger.debug('Download controller mounted at /downloads')