Skip to content

Commit e07a034

Browse files
[multicast] Add multicast replication support for softnpu/a4x2
Introduce two-group multicast replication (mcast_grp_a / mcast_grp_b) modeled after dendrite's replication contract and P4 sidecar expectations. When egress metadata declares the multicast field set, the generated pipeline replicates packets to group members with per-copy attribution tags, ingress-port exclusion, and mcast-over-broadcast precedence. ## Slice codegen fix Fix a latent bug where non-byte-aligned multi-byte slices (e.g., field[11:4] on bit<32>) produced incorrect bitvec ranges after header byte-reversal. Validation moves to HLIR so these are now rejected at compile time with a diagnostic rather than silently generating wrong code. ## Multicast contract The codegen path activates when egress_metadata_t declares all 4 fields: mcast_grp_a, mcast_grp_b, mcast_replication, and mcast_replicated. Partial declarations are caught at codegen time. A separate softnpu_mcast.p4 test platform definition keeps multicast opt-in. ## Runtime support McastReplicationTag values (0/1/2) match dendrite's MULTICAST_TAG_* wire encoding directly. We added required methods on the Pipeline trait expose group management. ## Tests Integration tests covering varying multicast workflows. Two end-to-end HLIR tests verify slice alignment diagnostics through the full compiler pipeline.
1 parent 132cdc3 commit e07a034

File tree

9 files changed

+1175
-150
lines changed

9 files changed

+1175
-150
lines changed

codegen/rust/src/expression.rs

Lines changed: 226 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -111,22 +111,37 @@ impl<'a> ExpressionGenerator<'a> {
111111
}
112112
ExpressionKind::Index(lval, xpr) => {
113113
let mut ts = self.generate_lvalue(lval);
114-
ts.extend(self.generate_expression(xpr.as_ref()));
114+
// When the index expression is a slice (e.g. `field[31:28]`),
115+
// we need the parent field's bit width to adjust for the
116+
// byte reversal in header.rs. The HLIR resolves lvalue types
117+
// during type checking, so we look up the width here and
118+
// pass it to generate_slice. For non-bit types (or if the
119+
// lvalue is missing from the HLIR, which should not happen
120+
// for well-typed programs), we fall back to width 0 which
121+
// skips the adjustment.
122+
if let ExpressionKind::Slice(begin, end) = &xpr.kind {
123+
let field_width = self
124+
.hlir
125+
.lvalue_decls
126+
.get(lval)
127+
.map(|ni| match &ni.ty {
128+
p4::ast::Type::Bit(w)
129+
| p4::ast::Type::Varbit(w)
130+
| p4::ast::Type::Int(w) => *w,
131+
_ => 0,
132+
})
133+
.unwrap_or(0);
134+
ts.extend(self.generate_slice(begin, end, field_width));
135+
} else {
136+
ts.extend(self.generate_expression(xpr.as_ref()));
137+
}
115138
ts
116139
}
117140
ExpressionKind::Slice(begin, end) => {
118-
let l = match &begin.kind {
119-
ExpressionKind::IntegerLit(v) => *v as usize,
120-
_ => panic!("slice ranges can only be integer literals"),
121-
};
122-
let l = l + 1;
123-
let r = match &end.kind {
124-
ExpressionKind::IntegerLit(v) => *v as usize,
125-
_ => panic!("slice ranges can only be integer literals"),
126-
};
127-
quote! {
128-
[#r..#l]
129-
}
141+
// Bare slice outside ExpressionKind::Index context (should not
142+
// occur in well-typed programs per hlir.rs validation). No
143+
// field width available, so no byte-reversal adjustment.
144+
self.generate_slice(begin, end, 0)
130145
}
131146
ExpressionKind::Call(call) => {
132147
let lv: Vec<TokenStream> = call
@@ -158,6 +173,57 @@ impl<'a> ExpressionGenerator<'a> {
158173
}
159174
}
160175

176+
/// Generate a bitvec range for a P4 bit slice `[hi:lo]`.
177+
///
178+
/// # Confused-endian byte reversal
179+
///
180+
/// header.rs stores multi-byte fields (width > 8 bits) with bytes
181+
/// reversed relative to wire order. Bit positions within each byte
182+
/// are preserved (Msb0), only byte order is flipped.
183+
///
184+
/// P4 bit numbering is MSB-first: bit W-1 is the MSB. A naive
185+
/// `W-1-b` mapping is correct for wire order but wrong after byte
186+
/// reversal. The correct mapping:
187+
///
188+
/// ```text
189+
/// wire_idx = W - 1 - b
190+
/// wire_byte = wire_idx / 8
191+
/// bit_in_byte = wire_idx % 8
192+
/// storage_byte = W/8 - 1 - wire_byte
193+
/// bitvec_idx = storage_byte * 8 + bit_in_byte
194+
/// ```
195+
///
196+
/// Full-byte MSB-aligned slices (e.g. `[127:120]`) happen to
197+
/// produce the same range either way due to a numeric coincidence.
198+
/// Sub-byte or non-MSB-aligned slices (e.g. `[31:28]`) require
199+
/// the adjustment.
200+
fn generate_slice(
201+
&self,
202+
begin: &Expression,
203+
end: &Expression,
204+
field_width: FieldWidth,
205+
) -> TokenStream {
206+
let hi: P4Bit = match &begin.kind {
207+
ExpressionKind::IntegerLit(v) => *v as usize,
208+
_ => panic!("slice ranges can only be integer literals"),
209+
};
210+
let lo: P4Bit = match &end.kind {
211+
ExpressionKind::IntegerLit(v) => *v as usize,
212+
_ => panic!("slice ranges can only be integer literals"),
213+
};
214+
215+
if field_width > 8 {
216+
let (r, l) = reversed_slice_range(hi, lo, field_width);
217+
quote! { [#r..#l] }
218+
} else {
219+
// Fields <= 8 bits are not byte-reversed by header.rs,
220+
// so the naive P4-to-bitvec mapping is correct.
221+
let l = hi + 1;
222+
let r = lo;
223+
quote! { [#r..#l] }
224+
}
225+
}
226+
161227
pub(crate) fn generate_bit_literal(
162228
&self,
163229
width: u16,
@@ -223,3 +289,150 @@ impl<'a> ExpressionGenerator<'a> {
223289
}
224290
}
225291
}
292+
293+
/// P4 bit position (MSB-first index within a field).
294+
type P4Bit = usize;
295+
296+
/// Width of a P4 header field in bits.
297+
type FieldWidth = usize;
298+
299+
/// Half-open bitvec range `(start, end)` into the storage representation.
300+
type BitvecRange = (usize, usize);
301+
302+
/// Compute the bitvec range `(start, end)` for a P4 slice `[hi:lo]` on a
303+
/// byte-reversed field of the given width.
304+
///
305+
/// header.rs reverses byte order for multi-byte fields. The mapping from
306+
/// P4 bit positions to storage positions depends on whether the slice
307+
/// stays within a single wire byte or spans multiple bytes.
308+
///
309+
/// For a single-byte slice, we map each endpoint through the byte reversal:
310+
/// the target byte moves to a new position but bit ordering within the byte
311+
/// is preserved (Msb0). This handles sub-byte nibble extractions like
312+
/// `ipv4.dst[31:28]`.
313+
///
314+
/// For a multi-byte slice, the byte reversal makes the endpoints
315+
/// non-contiguous (byte 0 maps to the far end, byte 1 maps next to it,
316+
/// etc.). However, if the slice is byte-aligned, the reversed bytes form
317+
/// a contiguous block at a different offset. We compute the storage byte
318+
/// range directly. Non-byte-aligned multi-byte slices cannot be represented
319+
/// as a single contiguous range after reversal and will panic.
320+
fn reversed_slice_range(
321+
hi: P4Bit,
322+
lo: P4Bit,
323+
field_width: FieldWidth,
324+
) -> BitvecRange {
325+
// Wire byte indices for the slice endpoints. P4 bit W-1 is in wire
326+
// byte 0 (MSB-first), so higher bit numbers map to lower byte indices.
327+
let wire_byte_hi = (field_width - 1 - hi) / 8;
328+
let wire_byte_lo = (field_width - 1 - lo) / 8;
329+
330+
if wire_byte_hi == wire_byte_lo {
331+
// Single-byte slice: map each endpoint individually.
332+
let map_bit = |bit_pos: usize| -> usize {
333+
let wire_idx = field_width - 1 - bit_pos;
334+
let wire_byte = wire_idx / 8;
335+
let bit_in_byte = wire_idx % 8;
336+
let storage_byte = field_width / 8 - 1 - wire_byte;
337+
storage_byte * 8 + bit_in_byte
338+
};
339+
340+
let mapped_hi = map_bit(hi);
341+
let mapped_lo = map_bit(lo);
342+
(mapped_hi.min(mapped_lo), mapped_hi.max(mapped_lo) + 1)
343+
} else {
344+
// Multi-byte slice: the HLIR rejects non-byte-aligned cases
345+
// during validation.
346+
assert!(
347+
(hi + 1).is_multiple_of(8) && lo.is_multiple_of(8),
348+
"non-byte-aligned multi-byte slice [{hi}:{lo}] on \
349+
{field_width}-bit field reached codegen",
350+
);
351+
352+
// Reversed storage bytes form a contiguous block.
353+
let storage_byte_start = field_width / 8 - 1 - wire_byte_lo;
354+
let storage_byte_end = field_width / 8 - 1 - wire_byte_hi;
355+
(storage_byte_start * 8, (storage_byte_end + 1) * 8)
356+
}
357+
}
358+
359+
#[cfg(test)]
360+
mod test {
361+
use super::*;
362+
363+
// Verify the reversed slice range mapping against the byte reversal
364+
// in header.rs. For each case we check that the bitvec range lands
365+
// on the correct bits in the reversed storage layout.
366+
367+
// Sub-byte slices within a single wire byte.
368+
369+
#[test]
370+
fn slice_32bit_top_nibble() {
371+
// P4 [31:28] on 32-bit: top nibble of wire byte 0.
372+
// Storage: wire byte 0 -> storage byte 3.
373+
// High nibble of storage byte 3 = bitvec [24..28].
374+
assert_eq!(reversed_slice_range(31, 28, 32), (24, 28));
375+
}
376+
377+
#[test]
378+
fn slice_32bit_bottom_nibble() {
379+
// P4 [3:0] on 32-bit: bottom nibble of wire byte 3.
380+
// Storage: wire byte 3 -> storage byte 0.
381+
// Low nibble (Msb0) of storage byte 0 = bitvec [4..8].
382+
assert_eq!(reversed_slice_range(3, 0, 32), (4, 8));
383+
}
384+
385+
#[test]
386+
fn slice_16bit_top_nibble() {
387+
// P4 [15:12] on 16-bit: top nibble of wire byte 0.
388+
// Storage: wire byte 0 -> storage byte 1.
389+
// High nibble of storage byte 1 = bitvec [8..12].
390+
assert_eq!(reversed_slice_range(15, 12, 16), (8, 12));
391+
}
392+
393+
// Full-byte slices (single byte).
394+
395+
#[test]
396+
fn slice_128bit_top_byte() {
397+
// P4 [127:120] on 128-bit: wire byte 0 -> storage byte 15.
398+
// bitvec [120..128].
399+
assert_eq!(reversed_slice_range(127, 120, 128), (120, 128));
400+
}
401+
402+
#[test]
403+
fn slice_16bit_low_byte() {
404+
// P4 [7:0] on 16-bit: wire byte 1 -> storage byte 0.
405+
// bitvec [0..8].
406+
assert_eq!(reversed_slice_range(7, 0, 16), (0, 8));
407+
}
408+
409+
#[test]
410+
fn slice_32bit_middle_byte() {
411+
// P4 [23:16] on 32-bit: wire byte 1 -> storage byte 2.
412+
// bitvec [16..24].
413+
assert_eq!(reversed_slice_range(23, 16, 32), (16, 24));
414+
}
415+
416+
// Multi-byte byte-aligned slices.
417+
418+
#[test]
419+
fn slice_128bit_top_two_bytes() {
420+
// P4 [127:112] on 128-bit: wire bytes 0-1 -> storage bytes 14-15.
421+
// bitvec [112..128].
422+
assert_eq!(reversed_slice_range(127, 112, 128), (112, 128));
423+
}
424+
425+
#[test]
426+
fn slice_32bit_top_three_bytes() {
427+
// P4 [31:8] on 32-bit: wire bytes 0-2 -> storage bytes 1-3.
428+
// bitvec [8..32].
429+
assert_eq!(reversed_slice_range(31, 8, 32), (8, 32));
430+
}
431+
432+
#[test]
433+
fn slice_32bit_bottom_two_bytes() {
434+
// P4 [15:0] on 32-bit: wire bytes 2-3 -> storage bytes 0-1.
435+
// bitvec [0..16].
436+
assert_eq!(reversed_slice_range(15, 0, 32), (0, 16));
437+
}
438+
}

0 commit comments

Comments
 (0)