Skip to content
Draft
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
4e1d883
Add dynamic runtime sequence operations to AIEX dialect
jgmelber Feb 20, 2026
58e3d15
Add standalone TXN encoding library and EmitC-based C++ code generation
jgmelber Mar 11, 2026
ec123b2
Merge dynamic and static TXN paths to remove duplication
jgmelber Mar 11, 2026
a48a3ed
Add dynamic-size runtime TXN demo: compile once, run at any size
jgmelber Mar 11, 2026
1bb1992
clang-format
jgmelber Mar 11, 2026
73d54a9
Unify static and dynamic AIEX ops, eliminate Dyn op duplication
jgmelber Mar 12, 2026
f017bcf
Add IRON-level dynamic runtime support: RuntimeScalar, write_rtp, dyn…
jgmelber Mar 12, 2026
88de190
Move dynamic GEMM designs to single_core_dynamic/, add placed variant…
jgmelber Mar 12, 2026
3f8bcd3
Extract RTP address from static instructions instead of hardcoding
jgmelber Mar 12, 2026
6c28cc9
clang-format and black formatting
jgmelber Mar 12, 2026
0155ebd
Add dynamic TXN generation for runtime-configurable GEMM
jgmelber Mar 26, 2026
544aec0
Fix dynamic TXN generation after rebase on main
jgmelber Mar 26, 2026
39fed19
Address code review: fix correctness bugs and clean up
jgmelber Mar 26, 2026
0066a60
Reset cmake/modulesXilinx submodule to match main
jgmelber Mar 26, 2026
93acc43
Fix linker error: add AIETransforms dependency to AIETargets
jgmelber Mar 26, 2026
c97ce07
Remove IsolatedFromAbove from RuntimeSequenceOp, fix CI test failures
jgmelber Mar 26, 2026
5286ec4
Fix Python formatting for CI (black)
jgmelber Mar 26, 2026
934d90b
Reset cmake/modulesXilinx submodule to match main
jgmelber Mar 26, 2026
84a7704
Format Python files to match CI black version
jgmelber Mar 26, 2026
1fa8774
Clean branch for PR: remove hand-written TXN, stage all changes
jgmelber May 5, 2026
93c6450
Merge remote-tracking branch 'origin/main' into dynamic-runtime-seque…
jgmelber May 5, 2026
ba6319d
Fix critical and blocker issues from PR review
jgmelber May 5, 2026
0e9e2fe
Address remaining PR review findings: performance, code quality, docs
jgmelber May 5, 2026
90ab0cd
Fix remaining PR review items: docs, validation, code quality
jgmelber May 5, 2026
2b63765
Add FileCheck test coverage for EmitC TXN C++ generation (M7)
jgmelber May 5, 2026
fe0ba91
Fix passthrough dynamic Makefile: use aiecc instead of aie-translate
jgmelber May 5, 2026
2943c94
Format C++ and Python files for CI (clang-format, black)
jgmelber May 5, 2026
8da783e
Scope SCF-to-CF conversion to aie.core ops only (M3)
jgmelber May 5, 2026
240b7b3
Fix CI format check: clang-format and black
jgmelber May 5, 2026
88c66ee
Fix remaining clang-format issues
jgmelber May 5, 2026
bdf4d07
Fix Python formatting for CI (black 26.3.1)
jgmelber May 5, 2026
1c58970
Reformat test files with black 26.3.1 to match CI
jgmelber May 5, 2026
f6e8a06
Fix buffer_resolution.py FileCheck: update rtp_write assembly format
jgmelber May 5, 2026
b8cafe9
Add lit test for dynamic GEMM TXN generation
jgmelber May 5, 2026
606ca9f
Add NPU1 (Phoenix) support and lit tests for dynamic TXN examples
jgmelber May 5, 2026
e508bb5
Support SSA i32 in npu.dma_memcpy_nd and lower floordivsi to EmitC
jgmelber May 6, 2026
1531551
Refactor single_core_dynamic.py as minimal delta over single_core.py
jgmelber May 6, 2026
e0d55eb
[WIP] testing and using blockwrites
jgmelber May 6, 2026
119053c
Collapse dynamic BD into a single blockwrite call
jgmelber May 6, 2026
5b59027
Keep dma_memcpy_nd at i64 + add static-vs-dynamic TXN equivalence test
jgmelber May 6, 2026
758ab3d
Apply clang-format to prior changes
jgmelber May 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions include/aie/Conversion/AIEXToEmitC/AIEXToEmitC.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
//===- AIEXToEmitC.h - AIEX to EmitC conversion -----------------*- C++ -*-===//
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2025 Advanced Micro Devices, Inc.
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

2026

//
//===----------------------------------------------------------------------===//

#ifndef AIE_CONVERSION_AIEXTOEMITC_AIEXTOEMITC_H
#define AIE_CONVERSION_AIEXTOEMITC_AIEXTOEMITC_H

#include "mlir/IR/BuiltinOps.h"
#include "mlir/Pass/Pass.h"
#include <memory>

namespace xilinx {

std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
createConvertAIEXToEmitCPass();

} // namespace xilinx

#endif // AIE_CONVERSION_AIEXTOEMITC_AIEXTOEMITC_H
1 change: 1 addition & 0 deletions include/aie/Conversion/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#include "aie/Conversion/AIEToConfiguration/AIEToConfiguration.h"
#include "aie/Conversion/AIEVecToLLVM/AIEVecToLLVM.h"
#include "aie/Conversion/AIEXToEmitC/AIEXToEmitC.h"
#include "aie/Conversion/PassesEnums.h.inc"

namespace xilinx {
Expand Down
22 changes: 22 additions & 0 deletions include/aie/Conversion/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,26 @@ def ConvertAIEToControlPackets : Pass<"convert-aie-to-control-packets",
];
}

//===----------------------------------------------------------------------===//
// AIEXToEmitC
//===----------------------------------------------------------------------===//

def ConvertAIEXToEmitC : Pass<"convert-aiex-to-emitc", "mlir::ModuleOp"> {
let summary = "Convert AIEX dynamic runtime sequence ops to EmitC dialect";
let description = [{
This pass converts AIEX runtime sequence operations (write32, maskwrite32,
sync — including their dynamic operand forms) along with static NPU ops,
SCF control flow, and arith operations into EmitC dialect ops. The
resulting EmitC IR can be translated to C++ code via translateToCpp()
that calls functions from the standalone TxnEncoding.h library to
generate TXN binaries at runtime.
}];
let constructor = "xilinx::createConvertAIEXToEmitCPass()";
let dependentDialects = ["mlir::emitc::EmitCDialect",
"mlir::arith::ArithDialect",
"mlir::scf::SCFDialect",
"xilinx::AIE::AIEDialect",
"xilinx::AIEX::AIEXDialect"];
}

#endif // AIE_CONVERSION_PASSES
4 changes: 2 additions & 2 deletions include/aie/Dialect/AIE/IR/AIEOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -2228,8 +2228,8 @@ def AIE_BDChainOp: AIE_Op<"bd_chain", [Symbol, SkipAccessibilityCheckTrait]> {

def AIE_RuntimeSequenceOp : AIE_Op<"runtime_sequence", [
Symbol,
NoTerminator,
HasParent<"DeviceOp">,
NoTerminator,
HasParent<"DeviceOp">,
]> {
let summary = "Program the configuration co-processor of the AI Engine array";
let description = [{
Expand Down
152 changes: 130 additions & 22 deletions include/aie/Dialect/AIEX/IR/AIEX.td
Original file line number Diff line number Diff line change
Expand Up @@ -748,13 +748,19 @@ def AIE_NpuWriteRTPOp: AIEX_Op<"npu.rtp_write", []> {
let arguments = (
ins FlatSymbolRefAttr:$buffer,
UI32Attr:$index,
I32Attr:$value
OptionalAttr<I32Attr>:$value,
Optional<I32>:$dyn_value
);
let results = (outs );
let assemblyFormat = [{ `(` $buffer `,` $index `,` $value `)` attr-dict
}];
let hasCustomAssemblyFormat = 1;
let hasVerifier = 1;
let description = [{
rtp write operator
rtp write operator.
When `dyn_value` is provided, it supplies the RTP value at runtime
instead of the static `value` attribute.
}];
let extraClassDeclaration = [{
bool hasDynamicValue() { return getDynValue() != nullptr; }
}];
Comment on lines +776 to 789
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a little worried about code bloat with having every parameter for these ops duplicated, once as an attribute and once as an SSA value, along with the added custom verifier and assembly format for each op.

Could we consider removing the attributes altogether and instead use SSA values, with arith.constant for the static case? All existing lowerings can get the value from arith.constant and throw an error if it's not a constant, this emitC pass can use the actual SSA values. This approach would of course touch a lot of code (all examples etc. that use these ops with attributes would have to be rewritten to use arith.constant), but I think AI could handle it. I think it would be cleaner and might remove the need for customAssemblyFormat and hasVerifier for every op (haven't gotten to those yet but assume they're there because of this).

}

Expand All @@ -781,19 +787,20 @@ def AIE_NpuPushQueueOp: AIEX_Op<"npu.push_queue", []> {
}

// WRITE32
def AIE_NpuWrite32Op: AIEX_Op<"npu.write32", []> {
def AIE_NpuWrite32Op: AIEX_Op<"npu.write32", [AttrSizedOperandSegments]> {
let summary = "write32 operator";
let arguments = (
ins UI32Attr:$address,
UI32Attr:$value,
OptionalAttr<FlatSymbolRefAttr>:$buffer,
OptionalAttr<I32Attr>:$column,
OptionalAttr<I32Attr>:$row
OptionalAttr<I32Attr>:$row,
Optional<AnySignlessInteger>:$dyn_address,
Optional<AnySignlessInteger>:$dyn_value
);
let results = (outs );
let assemblyFormat = [{
attr-dict
}];
let hasCustomAssemblyFormat = 1;
let hasVerifier = 1;
let description = [{
NPU write32 operator writes a 32bit value to the AIE array.
If 'buffer' is present then 'address' is interpreted as an offset into the
Expand All @@ -802,27 +809,57 @@ def AIE_NpuWrite32Op: AIEX_Op<"npu.write32", []> {
into the memory space of aie.tile(column, row).
If 'buffer' is not present and 'column' and 'row' are not present then
'address' is interpreted as a full 32-bit address in the AIE array.

Optionally, SSA values can be provided for 'dyn_address' and 'dyn_value'
to enable runtime-parameterized sequences. When dynamic operands are present,
the static attributes serve as placeholders (typically 0) and the SSA values
are used instead.

Static syntax (unchanged): `aiex.npu.write32 {address = 123 : ui32, value = 456 : ui32}`
Dynamic syntax: `aiex.npu.write32(%addr, %val) {address = 0 : ui32, value = 0 : ui32} : i32, i32`
}];
let extraClassDeclaration = [{
std::optional<uint32_t> getAbsoluteAddress();
bool hasDynamicOperands() { return getDynAddress() != nullptr; }
}];
let builders = [
OpBuilder<(ins "uint32_t":$address, "uint32_t":$value,
"mlir::FlatSymbolRefAttr":$buffer,
"mlir::IntegerAttr":$column, "mlir::IntegerAttr":$row), [{
build($_builder, $_state,
$_builder.getUI32IntegerAttr(address),
$_builder.getUI32IntegerAttr(value),
buffer, column, row,
/*dyn_address=*/Value(), /*dyn_value=*/Value());
}]>,
OpBuilder<(ins "mlir::IntegerAttr":$address, "mlir::IntegerAttr":$value,
"mlir::FlatSymbolRefAttr":$buffer,
"mlir::IntegerAttr":$column, "mlir::IntegerAttr":$row), [{
build($_builder, $_state,
address, value,
buffer, column, row,
/*dyn_address=*/Value(), /*dyn_value=*/Value());
}]>
];
}

// MASKWRITE
def AIE_NpuMaskWrite32Op: AIEX_Op<"npu.maskwrite32", []> {
def AIE_NpuMaskWrite32Op: AIEX_Op<"npu.maskwrite32", [AttrSizedOperandSegments]> {
let summary = "Write a masked 32-bit value to the AIE array";
let arguments = (
ins UI32Attr:$address,
UI32Attr:$value,
UI32Attr:$mask,
OptionalAttr<FlatSymbolRefAttr>:$buffer,
OptionalAttr<I32Attr>:$column,
OptionalAttr<I32Attr>:$row
OptionalAttr<I32Attr>:$row,
Optional<AnySignlessInteger>:$dyn_address,
Optional<AnySignlessInteger>:$dyn_value,
Optional<AnySignlessInteger>:$dyn_mask
);
let results = (outs );
let assemblyFormat = [{
attr-dict
}];
let hasCustomAssemblyFormat = 1;
let hasVerifier = 1;
let description = [{
NPU mask write32 operator writes a masked 32bit value to the AIE array.
If 'buffer' is present then 'address' is interpreted as an offset into the
Expand All @@ -831,10 +868,38 @@ def AIE_NpuMaskWrite32Op: AIEX_Op<"npu.maskwrite32", []> {
into the memory space of aie.tile(column, row).
If 'buffer' is not present and 'column' and 'row' are not present then
'address' is interpreted as a full 32-bit address in the AIE array.

Optionally, SSA values can be provided for 'dyn_address', 'dyn_value', and
'dyn_mask' to enable runtime-parameterized sequences.

Static syntax (unchanged): `aiex.npu.maskwrite32 {address = 123 : ui32, ...}`
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggest removing "(unchanged)" from these comments

Dynamic syntax: `aiex.npu.maskwrite32(%addr, %val, %mask) {address = 0 : ui32, ...} : i32, i32, i32`
}];
let extraClassDeclaration = [{
std::optional<uint32_t> getAbsoluteAddress();
bool hasDynamicOperands() { return getDynAddress() != nullptr; }
}];
let builders = [
OpBuilder<(ins "uint32_t":$address, "uint32_t":$value, "uint32_t":$mask,
"mlir::FlatSymbolRefAttr":$buffer,
"mlir::IntegerAttr":$column, "mlir::IntegerAttr":$row), [{
build($_builder, $_state,
$_builder.getUI32IntegerAttr(address),
$_builder.getUI32IntegerAttr(value),
$_builder.getUI32IntegerAttr(mask),
buffer, column, row,
/*dyn_address=*/Value(), /*dyn_value=*/Value(), /*dyn_mask=*/Value());
}]>,
OpBuilder<(ins "mlir::IntegerAttr":$address, "mlir::IntegerAttr":$value,
"mlir::IntegerAttr":$mask,
"mlir::FlatSymbolRefAttr":$buffer,
"mlir::IntegerAttr":$column, "mlir::IntegerAttr":$row), [{
build($_builder, $_state,
address, value, mask,
buffer, column, row,
/*dyn_address=*/Value(), /*dyn_value=*/Value(), /*dyn_mask=*/Value());
}]>
];
}

// BLOCKWRITE
Expand Down Expand Up @@ -867,28 +932,67 @@ def AIE_NpuBlockWriteOp: AIEX_Op<"npu.blockwrite", []> {
}

// OP_SYNC
def AIE_NpuSyncOp: AIEX_Op<"npu.sync", []> {
def AIE_NpuSyncOp: AIEX_Op<"npu.sync", [AttrSizedOperandSegments]> {
let summary = "sync operator";
let arguments = (
ins I32Attr:$column,
I32Attr:$row,
I32Attr:$direction,
I32Attr:$channel,
I32Attr:$column_num,
I32Attr:$row_num
I32Attr:$row_num,
Optional<AnySignlessInteger>:$dyn_column,
Optional<AnySignlessInteger>:$dyn_row,
Optional<AnySignlessInteger>:$dyn_direction,
Optional<AnySignlessInteger>:$dyn_channel,
Optional<AnySignlessInteger>:$dyn_column_num,
Optional<AnySignlessInteger>:$dyn_row_num
);
let results = (outs );
let assemblyFormat = [{
attr-dict
}];
let hasCustomAssemblyFormat = 1;
let hasVerifier = 1;
let description = [{
The sync operation blocks execution of the instruction stream until a task-complete token (TCT) is received on `column`, `row`, channel `channel`, direction `direction` (where `0` is `S2MM` and `1` is `MM2S`).

#### Troubleshooting

If this operation appears to deadlock, ensure that at least one buffer descriptor is configured to issue a TCT on the channel you expect.
By default, `dma_memcpy_nd` operations only issue tokens for `S2MM` channels, and `issue_token` must be set to `true` to issue tokens for `MM2S` channels.

Optionally, SSA values can be provided for all parameters to enable
runtime-parameterized sequences.

Static syntax (unchanged): `aiex.npu.sync {column = 0 : i32, ...}`
Dynamic syntax: `aiex.npu.sync(%col, %row, %dir, %chan, %ncol, %nrow) {column = 0 : i32, ...} : i32, i32, i32, i32, i32, i32`
}];
let extraClassDeclaration = [{
bool hasDynamicOperands() { return getDynColumn() != nullptr; }
}];
let builders = [
OpBuilder<(ins "int32_t":$column, "int32_t":$row,
"int32_t":$direction, "int32_t":$channel,
"int32_t":$column_num, "int32_t":$row_num), [{
build($_builder, $_state,
$_builder.getI32IntegerAttr(column),
$_builder.getI32IntegerAttr(row),
$_builder.getI32IntegerAttr(direction),
$_builder.getI32IntegerAttr(channel),
$_builder.getI32IntegerAttr(column_num),
$_builder.getI32IntegerAttr(row_num),
/*dyn_column=*/Value(), /*dyn_row=*/Value(),
/*dyn_direction=*/Value(), /*dyn_channel=*/Value(),
/*dyn_column_num=*/Value(), /*dyn_row_num=*/Value());
}]>,
OpBuilder<(ins "mlir::IntegerAttr":$column, "mlir::IntegerAttr":$row,
"mlir::IntegerAttr":$direction, "mlir::IntegerAttr":$channel,
"mlir::IntegerAttr":$column_num, "mlir::IntegerAttr":$row_num), [{
build($_builder, $_state,
column, row, direction, channel, column_num, row_num,
/*dyn_column=*/Value(), /*dyn_row=*/Value(),
/*dyn_direction=*/Value(), /*dyn_channel=*/Value(),
/*dyn_column_num=*/Value(), /*dyn_row_num=*/Value());
}]>
];
}

// XAIE_IO_CUSTOM_OP_BEGIN + 1 (address patch)
Expand All @@ -897,14 +1001,17 @@ def AIE_NpuAddressPatchOp: AIEX_Op<"npu.address_patch", []> {
let arguments = (
ins UI32Attr:$addr,
I32Attr:$arg_idx,
I32Attr:$arg_plus
I32Attr:$arg_plus,
Optional<I32>:$dyn_arg_plus
);
let results = (outs );
let assemblyFormat = [{
attr-dict
(`(` $dyn_arg_plus^ `:` type($dyn_arg_plus) `)`)? attr-dict
}];
let description = [{
address patch operator
address patch operator.
When `dyn_arg_plus` is provided, it is used instead of the static
`arg_plus` attribute. This enables runtime-parameterized buffer offsets.
}];
}

Expand Down Expand Up @@ -1015,6 +1122,7 @@ def AIE_NpuLoadPdiOp: AIEX_Op<"npu.load_pdi", []> {
let hasCanonicalizeMethod = 1;
}


def AIE_DMAConfigureTaskOp : AIEX_Op<"dma_configure_task", [HasParent<"AIE::RuntimeSequenceOp">, TileElement]>, Results<(outs Index:$result)> {
let summary = "Concrete Instantiation of a Buffer Descriptor Chain as a Task on a Channel and Direction on a Tile";
let description = [{
Expand Down
Loading
Loading