diff --git a/CHANGES b/CHANGES index 8ae835854..416ce003e 100644 --- a/CHANGES +++ b/CHANGES @@ -1,6 +1,17 @@ Changes with FreeUnit 1.35.4 xx xxx 2026 + *) Security: bounds-check untrusted shared-memory offsets on the port + IPC and libunit deserialization paths (audit PR-B): reject + out-of-range chunk_id and chunk_id+nchunks in mmap messages + (nxt_port_mmap_get_incoming_buf), close a TOCTOU window between + incoming-mmap lookup and first hdr dereference by retaining the + handler refcount across the unlock, reject integer overflow in + the application-supplied (max_fields_count, max_fields_size) + response-buffer formula in nxt_unit_response_init/realloc, and + validate every nxt_unit_request_t sptr at request arrival before + any libunit consumer dereferences it. + *) Bugfix: fix router process CPU spin and connection hang under port scanning load; CLOSE-WAIT sockets are now cleaned up properly on client FIN, idle connection queue iteration fixed, systemd file diff --git a/src/nxt_port_memory.c b/src/nxt_port_memory.c index ad6e97ade..3dbcf0039 100644 --- a/src/nxt_port_memory.c +++ b/src/nxt_port_memory.c @@ -531,6 +531,16 @@ nxt_port_get_port_incoming_mmap(nxt_task_t *task, nxt_pid_t spid, uint32_t id) if (nxt_fast_path(process->incoming.size > id)) { mmap_handler = process->incoming.elts[id].mmap_handler; + /* + * Bump refcount under the mutex so the handler cannot be unmapped + * by a concurrent peer-side close between this lookup and the + * caller's first dereference of mmap_handler->hdr. The caller + * adopts this reference and is responsible for releasing it. + */ + if (mmap_handler != NULL) { + nxt_port_mmap_handler_use(mmap_handler, 1); + } + } else { mmap_handler = NULL; @@ -543,6 +553,26 @@ nxt_port_get_port_incoming_mmap(nxt_task_t *task, nxt_pid_t spid, uint32_t id) } +/* + * Validate that a peer-supplied (chunk_id, nchunks) pair refers to a + * region wholly inside the mapped data area. Returns non-zero on + * success. Underflow-safe: subtracts on the constant side. + */ +nxt_inline nxt_bool_t +nxt_port_mmap_chunk_range_valid(nxt_chunk_id_t chunk_id, size_t nchunks) +{ + if (chunk_id >= PORT_MMAP_CHUNK_COUNT) { + return 0; + } + + if (nchunks > (size_t) PORT_MMAP_CHUNK_COUNT - chunk_id) { + return 0; + } + + return 1; +} + + nxt_buf_t * nxt_port_mmap_get_buf(nxt_task_t *task, nxt_port_mmaps_t *mmaps, size_t size) { @@ -679,8 +709,31 @@ nxt_port_mmap_get_incoming_buf(nxt_task_t *task, nxt_port_t *port, return NULL; } + /* + * mmap_msg fields originate from a peer process; reject offsets + * that would point outside the mapped data area before they reach + * pointer arithmetic below. See security-audit.md V5. + */ + nchunks = mmap_msg->size / PORT_MMAP_CHUNK_SIZE; + if ((mmap_msg->size % PORT_MMAP_CHUNK_SIZE) != 0) { + nchunks++; + } + + if (nxt_slow_path(!nxt_port_mmap_chunk_range_valid(mmap_msg->chunk_id, + nchunks))) + { + nxt_alert(task, "invalid mmap message from pid %PI: " + "chunk_id %uD, size %uD (chunks %uz, max %d)", + spid, mmap_msg->chunk_id, mmap_msg->size, + nchunks, PORT_MMAP_CHUNK_COUNT); + + nxt_port_mmap_handler_use(mmap_handler, -1); + return NULL; + } + b = nxt_buf_mem_ts_alloc(task, port->mem_pool, 0); if (nxt_slow_path(b == NULL)) { + nxt_port_mmap_handler_use(mmap_handler, -1); return NULL; } @@ -688,11 +741,6 @@ nxt_port_mmap_get_incoming_buf(nxt_task_t *task, nxt_port_t *port, nxt_buf_set_port_mmap(b); - nchunks = mmap_msg->size / PORT_MMAP_CHUNK_SIZE; - if ((mmap_msg->size % PORT_MMAP_CHUNK_SIZE) != 0) { - nchunks++; - } - hdr = mmap_handler->hdr; b->mem.start = nxt_port_mmap_chunk_start(hdr, mmap_msg->chunk_id); @@ -701,7 +749,7 @@ nxt_port_mmap_get_incoming_buf(nxt_task_t *task, nxt_port_t *port, b->mem.end = b->mem.start + nchunks * PORT_MMAP_CHUNK_SIZE; b->parent = mmap_handler; - nxt_port_mmap_handler_use(mmap_handler, 1); + /* Adopts the reference taken by nxt_port_get_port_incoming_mmap(). */ nxt_debug(task, "incoming mmap buf allocation: %p [%p,%uz] %PI->%PI,%d,%d", b, b->mem.start, b->mem.end - b->mem.start, diff --git a/src/nxt_unit.c b/src/nxt_unit.c index 7d523beb8..bfb187ef1 100644 --- a/src/nxt_unit.c +++ b/src/nxt_unit.c @@ -203,6 +203,87 @@ static void nxt_unit_lvlhsh_free(void *data, void *p); static int nxt_unit_memcasecmp(const void *p1, const void *p2, size_t length); +/* + * Compute the response buffer size for a given (max_fields_count, + * max_fields_size) pair, rejecting integer overflow. Both inputs come + * from the application; an overflow here would yield an undersized + * allocation that subsequent field memcpy()s overrun. See + * security-audit.md V10. + */ +static int +nxt_unit_response_buf_size(uint32_t max_fields_count, + uint32_t max_fields_size, uint32_t *buf_size) +{ + /* + * Each field name and value is 0-terminated by libunit, hence + * the '+ 2' per field (matches the historical formula). + */ + uint32_t total; + + if (max_fields_count + > (UINT32_MAX - (uint32_t) sizeof(nxt_unit_response_t)) + / (uint32_t) (sizeof(nxt_unit_field_t) + 2)) + { + return NXT_UNIT_ERROR; + } + + total = (uint32_t) sizeof(nxt_unit_response_t) + + max_fields_count * (uint32_t) (sizeof(nxt_unit_field_t) + 2); + + if (max_fields_size > UINT32_MAX - total) { + return NXT_UNIT_ERROR; + } + + *buf_size = total + max_fields_size; + + return NXT_UNIT_OK; +} + + +/* + * Validate that an sptr field within a peer-supplied buffer dereferences + * to a [length]-byte range that is wholly inside the buffer. Used at + * request-arrival time to vet every sptr in nxt_unit_request_t before + * the application sees it. See security-audit.md V10. + * + * sptr->base aliases the address of the sptr itself (the union encodes + * an offset relative to that location), so this also implicitly checks + * that the sptr is inside the buffer. + */ +static int +nxt_unit_sptr_in_buf(nxt_unit_sptr_t *sptr, uint32_t length, + void *buf_start, uint32_t buf_size) +{ + size_t sptr_off, end_off; + + if ((uint8_t *) sptr < (uint8_t *) buf_start) { + return 0; + } + + sptr_off = (uint8_t *) sptr - (uint8_t *) buf_start; + /* + * Underflow-safe: subtract on the constant side throughout. The + * sptr struct itself must fit inside the buffer before we + * dereference sptr->offset, so check sizeof(*sptr) here rather + * than just sptr_off > buf_size. + */ + if (sptr_off > buf_size - sizeof(nxt_unit_sptr_t)) { + return 0; + } + + if (sptr->offset > buf_size - sptr_off) { + return 0; + } + + end_off = sptr_off + sptr->offset; + if (length > buf_size - end_off) { + return 0; + } + + return 1; +} + + struct nxt_unit_mmap_buf_s { nxt_unit_buf_t buf; @@ -1303,6 +1384,46 @@ nxt_unit_process_req_headers(nxt_unit_ctx_t *ctx, nxt_unit_recv_msg_t *recv_msg, return NXT_UNIT_ERROR; } + /* + * Validate every sptr in the request struct before any code path + * dereferences it. Offsets originate from the router (a more + * privileged peer) but the libunit ABI is also reachable from + * attacker-influenced input shapes; keeping the validation + * co-located with arrival makes the trust boundary explicit. + * See security-audit.md V10 (sptr offset). + */ + { + nxt_unit_request_t *vr = recv_msg->start; + uint32_t vsize = recv_msg->size; + + if (nxt_slow_path( + !nxt_unit_sptr_in_buf(&vr->method, vr->method_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->version, vr->version_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->remote, vr->remote_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->local_addr, vr->local_addr_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->local_port, vr->local_port_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->server_name, vr->server_name_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->target, vr->target_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->path, vr->path_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->query, vr->query_length, + recv_msg->start, vsize) + || !nxt_unit_sptr_in_buf(&vr->preread_content, 0, + recv_msg->start, vsize))) + { + nxt_unit_warn(ctx, "#%"PRIu32": malformed request: " + "sptr out of buffer", recv_msg->stream); + return NXT_UNIT_ERROR; + } + } + req_impl = nxt_unit_request_info_get(ctx); if (nxt_slow_path(req_impl == NULL)) { nxt_unit_warn(ctx, "#%"PRIu32": request info allocation failed", @@ -2042,13 +2163,15 @@ nxt_unit_response_init(nxt_unit_request_info_t *req, nxt_unit_req_debug(req, "duplicate response init"); } - /* - * Each field name and value 0-terminated by libunit, - * this is the reason of '+ 2' below. - */ - buf_size = sizeof(nxt_unit_response_t) - + max_fields_count * (sizeof(nxt_unit_field_t) + 2) - + max_fields_size; + if (nxt_slow_path(nxt_unit_response_buf_size(max_fields_count, + max_fields_size, + &buf_size) != NXT_UNIT_OK)) + { + nxt_unit_req_alert(req, "init: response buffer size overflow " + "(max_fields_count=%"PRIu32", max_fields_size=%"PRIu32")", + max_fields_count, max_fields_size); + return NXT_UNIT_ERROR; + } if (nxt_slow_path(req->response_buf != NULL)) { buf = req->response_buf; @@ -2121,13 +2244,15 @@ nxt_unit_response_realloc(nxt_unit_request_info_t *req, return NXT_UNIT_ERROR; } - /* - * Each field name and value 0-terminated by libunit, - * this is the reason of '+ 2' below. - */ - buf_size = sizeof(nxt_unit_response_t) - + max_fields_count * (sizeof(nxt_unit_field_t) + 2) - + max_fields_size; + if (nxt_slow_path(nxt_unit_response_buf_size(max_fields_count, + max_fields_size, + &buf_size) != NXT_UNIT_OK)) + { + nxt_unit_req_alert(req, "realloc: response buffer size overflow " + "(max_fields_count=%"PRIu32", max_fields_size=%"PRIu32")", + max_fields_count, max_fields_size); + return NXT_UNIT_ERROR; + } nxt_unit_req_debug(req, "realloc %"PRIu32"", buf_size);