diff --git a/include/aie/Dialect/AIE/Transforms/AIEPasses.h b/include/aie/Dialect/AIE/Transforms/AIEPasses.h index 042df53e4c3..7fae2265d61 100644 --- a/include/aie/Dialect/AIE/Transforms/AIEPasses.h +++ b/include/aie/Dialect/AIE/Transforms/AIEPasses.h @@ -54,6 +54,8 @@ createAIEAssignBufferDescriptorIDsPass(); std::unique_ptr> createAIEGenerateColumnControlOverlayPass(); std::unique_ptr> createAIEAssignTileCtrlIDsPass(); +std::unique_ptr> +createAIEExternalManglePass(); /// Generate the code for registering passes. #define GEN_PASS_REGISTRATION diff --git a/include/aie/Dialect/AIE/Transforms/AIEPasses.td b/include/aie/Dialect/AIE/Transforms/AIEPasses.td index 70299a0b30c..b5d927dcd5d 100644 --- a/include/aie/Dialect/AIE/Transforms/AIEPasses.td +++ b/include/aie/Dialect/AIE/Transforms/AIEPasses.td @@ -346,4 +346,18 @@ def AIEGenerateColumnControlOverlay : Pass<"aie-generate-column-control-overlay" ]; } +def AIEExternalMangle : Pass<"aie-external-mangle", "mlir::ModuleOp"> { + let summary = "Mangle external function names"; + let description = [{ + This pass mangles the names of external functions to match the C++ ABI. + It looks for the `link_with` attribute on `AIE.core` or `func.func` operations, + reads the specified object file, and renames the function to the matching mangled symbol. + It handles symbol collisions by uniquifying the function name and adding a `link_name` attribute. + }]; + let constructor = "xilinx::AIE::createAIEExternalManglePass()"; + let dependentDialects = [ + "mlir::func::FuncDialect", + ]; +} + #endif diff --git a/lib/Dialect/AIE/Transforms/AIEExternalMangle.cpp b/lib/Dialect/AIE/Transforms/AIEExternalMangle.cpp new file mode 100644 index 00000000000..f4435ac11ee --- /dev/null +++ b/lib/Dialect/AIE/Transforms/AIEExternalMangle.cpp @@ -0,0 +1,190 @@ +//===- AIEExternalMangle.cpp ------------------------------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2026 Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +#include "aie/Dialect/AIE/Transforms/AIEPasses.h" + +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Pass/Pass.h" + +#include "llvm/Demangle/Demangle.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ObjectFile.h" + +using namespace mlir; +using namespace xilinx; +using namespace xilinx::AIE; + +namespace { + +struct AIEExternalManglePass + : public AIEExternalMangleBase { + void runOnOperation() override { + ModuleOp module = getOperation(); + + // Helper to mangle a function based on an object file + auto mangleFunction = [&](func::FuncOp func, + StringRef objectFileName) -> func::FuncOp { + StringRef functionName = func.getName(); + StringRef symbolName = functionName; + + if (auto linkSymbolAttr = + func->getAttrOfType("link_symbol")) { + symbolName = linkSymbolAttr.getValue(); + } + + // Attempt to open the object file + auto objectFileOrError = + llvm::object::ObjectFile::createObjectFile(objectFileName); + + if (!objectFileOrError) { + llvm::consumeError(objectFileOrError.takeError()); + func.emitWarning() << "Could not open object file: " << objectFileName; + return func; + } + + auto objectFile = std::move(objectFileOrError.get()); + + // Iterate over symbols in the object file + for (const auto &symbol : objectFile.getBinary()->symbols()) { + auto nameOrError = symbol.getName(); + if (!nameOrError) { + llvm::consumeError(nameOrError.takeError()); + continue; + } + StringRef mangledName = nameOrError.get(); + + // Check if the symbol is a function + auto typeOrError = symbol.getType(); + if (!typeOrError || + *typeOrError != llvm::object::SymbolRef::ST_Function) + continue; + + // Check if the name matches directly (C linkage) + bool match = (mangledName == symbolName); + + if (!match) { + // Demangle the symbol name + std::string demangled = llvm::demangle(mangledName.str()); + if (demangled == symbolName) + match = true; + } + + if (match) { + if (functionName == mangledName) { + return func; + } + + SymbolTable parentSymbolTable(func->getParentOp()); + + // Found a match! Check if we need to rename/clone. + std::string newName = mangledName.str(); + int suffix = 0; + while (parentSymbolTable.lookup(newName)) { + // If the existing symbol is the function itself, we are good. + auto existingOp = parentSymbolTable.lookup(newName); + if (existingOp == func) + break; + + // If existing function links to the same object file, reuse it. + if (auto existingFunc = dyn_cast(existingOp)) { + if (auto existingLinkWith = + existingFunc->getAttrOfType("link_with")) { + if (existingLinkWith.getValue() == objectFileName) { + return existingFunc; + } + } + } + + newName = mangledName.str() + "_" + std::to_string(++suffix); + } + + if (newName != functionName) { + // If 'func' already has the correct name and link_with, return it. + if (auto existingOp = parentSymbolTable.lookup(newName)) { + return cast(existingOp); + } + + // Create a new function declaration + OpBuilder builder(func); + auto newFunc = func::FuncOp::create(builder, func.getLoc(), newName, + func.getFunctionType()); + newFunc.setPrivate(); + if (auto linkName = func->getAttr("link_name")) + newFunc->setAttr("link_name", linkName); + if (auto linkSymbol = func->getAttr("link_symbol")) + newFunc->setAttr("link_symbol", linkSymbol); + + newFunc->setAttr("link_with", StringAttr::get(func.getContext(), + objectFileName)); + + if (newName != mangledName) { + newFunc->setAttr("link_name", + StringAttr::get(func.getContext(), mangledName)); + } + + // Insert the new function in the same symbol table as the original + // function. + parentSymbolTable.insert(newFunc, func->getIterator()); + return newFunc; + } + return func; + } + } + + func.emitWarning() << "Could not find symbol for " << symbolName << " in " + << objectFileName; + return func; + }; + + // Process AIE cores + module.walk([&](CoreOp core) { + if (auto linkWithAttr = core->getAttrOfType("link_with")) { + StringRef objectFileName = linkWithAttr.getValue(); + + core.walk([&](func::CallOp call) { + auto callee = SymbolTable::lookupNearestSymbolFrom( + call, call.getCalleeAttr()); + if (callee && callee.isExternal()) { + // Mangle/Clone the callee for this core + auto newCallee = mangleFunction(callee, objectFileName); + if (newCallee != callee) { + call.setCallee(newCallee.getName()); + } + } + }); + } + }); + + // Process func.func with link_with (legacy/explicit mode) + module.walk([&](func::FuncOp func) { + if (auto linkWithAttr = func->getAttrOfType("link_with")) { + StringRef objectFileName = linkWithAttr.getValue(); + auto newFunc = mangleFunction(func, objectFileName); + if (newFunc != func) { + // If newFunc is a different op, replace uses and erase original. + if (failed(SymbolTable::replaceAllSymbolUses( + func, newFunc.getNameAttr(), func->getParentOp()))) { + func.emitError("failed to replace symbol uses"); + return; + } + + func.erase(); + } + } + }); + } +}; + +} // namespace + +std::unique_ptr> +xilinx::AIE::createAIEExternalManglePass() { + return std::make_unique(); +} diff --git a/lib/Dialect/AIE/Transforms/CMakeLists.txt b/lib/Dialect/AIE/Transforms/CMakeLists.txt index a8021411e40..b3df8bd06f9 100644 --- a/lib/Dialect/AIE/Transforms/CMakeLists.txt +++ b/lib/Dialect/AIE/Transforms/CMakeLists.txt @@ -25,6 +25,7 @@ add_mlir_dialect_library( AIEObjectFifoRegisterProcess.cpp AIELowerCascadeFlows.cpp AIEGenerateColumnControlOverlay.cpp + AIEExternalMangle.cpp ADDITIONAL_HEADER_DIRS ${AIE_BINARY_DIR}/include @@ -43,4 +44,6 @@ add_mlir_dialect_library( MLIRPass MLIRSupport MLIRTransformUtils - MLIRFuncDialect) + MLIRFuncDialect + LLVMObject + LLVMDemangle) diff --git a/lib/Targets/AIETargetLdScript.cpp b/lib/Targets/AIETargetLdScript.cpp index d94ae5a0e03..71bc4fffbfb 100644 --- a/lib/Targets/AIETargetLdScript.cpp +++ b/lib/Targets/AIETargetLdScript.cpp @@ -11,6 +11,11 @@ #include "aie/Dialect/AIE/IR/AIEDialect.h" #include "aie/Targets/AIETargets.h" +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/SymbolTable.h" + +#include + using namespace mlir; using namespace xilinx; using namespace xilinx::AIE; @@ -177,8 +182,23 @@ SECTIONS output << " .bss : { *(.bss*) } > data\n"; output << "}\n"; if (auto coreOp = tile.getCoreOp()) { + std::set linkedFiles; if (auto fileAttr = coreOp.getLinkWith()) - output << "INPUT(" << fileAttr.value().str() << ")\n"; + linkedFiles.insert(fileAttr.value().str()); + + coreOp.walk([&](func::CallOp call) { + auto callee = call.getCallee(); + if (auto func = SymbolTable::lookupNearestSymbolFrom( + call, StringAttr::get(call.getContext(), callee))) { + if (auto linkWith = func.getOperation()->getAttrOfType( + "link_with")) { + linkedFiles.insert(linkWith.getValue().str()); + } + } + }); + + for (const auto &file : linkedFiles) + output << "INPUT(" << file << ")\n"; output << "PROVIDE(main = core_" << tile.getCol() << "_" << tile.getRow() << ");\n"; diff --git a/python/compiler/aiecc/main.py b/python/compiler/aiecc/main.py index dd69e5765cb..e57715a1c37 100644 --- a/python/compiler/aiecc/main.py +++ b/python/compiler/aiecc/main.py @@ -32,6 +32,7 @@ import aie.compiler.aiecc.configure from aie.dialects import aie as aiedialect from aie.dialects import aiex as aiexdialect +from aie.dialects import func as funcdialect from aie.ir import ( Context, Location, @@ -98,6 +99,7 @@ def _create_input_with_addresses_pipeline( return ( pipeline.lower_affine() + .add_pass("aie-external-mangle") .add_pass("aie-canonicalize-device") .Nested("aie.device", device_pipeline) .convert_scf_to_cf() @@ -436,8 +438,7 @@ def emit_design_bif( f"file={root_path}/{device_name}_aie_cdo_enable.bin" if enable_cores else "" ) files = f"{cdo_elfs_file} {cdo_init_file} {cdo_enable_file}" - return dedent( - f"""\ + return dedent(f"""\ all: {{ id_code = 0x14ca8093 @@ -448,8 +449,7 @@ def emit_design_bif( {{ type=cdo {files} }} }} }} - """ - ) + """) # Extract included files from the given Chess linker script. @@ -699,6 +699,18 @@ def __init__(self, mlir_module_str, opts, tmpdirname): self.peano_clang_path = os.path.join(opts.peano_install_dir, "bin", "clang") self.peano_opt_path = os.path.join(opts.peano_install_dir, "bin", "opt") self.peano_llc_path = os.path.join(opts.peano_install_dir, "bin", "llc") + self.peano_objcopy_path = os.path.join( + opts.peano_install_dir, "bin", "llvm-objcopy" + ) + if not os.path.exists(self.peano_objcopy_path): + if shutil.which("llvm-objcopy"): + self.peano_objcopy_path = "llvm-objcopy" + elif shutil.which("llvm-objcopy-18"): + self.peano_objcopy_path = "llvm-objcopy-18" + elif shutil.which("llvm-objcopy-20"): + self.peano_objcopy_path = "llvm-objcopy-20" + else: + self.peano_objcopy_path = "objcopy" self.repeater_output_dir = opts.repeater_output_dir or tempfile.gettempdir() def prepend_tmp(self, x): @@ -792,7 +804,7 @@ async def do_call(self, task_id, command, force=False): ret = proc.returncode if self.opts.verbose and stdout: print(f"{stdout.decode()}") - if ret != 0 and stderr: + if (self.opts.verbose or ret != 0) and stderr: print(f"{stderr.decode()}", file=sys.stderr) else: ret = 0 @@ -828,6 +840,7 @@ async def chesshack(self, task, llvmir, aie_target): llvmir_ir = await read_file_async(llvmir) llvmir_hacked_ir = downgrade_ir_for_chess(llvmir_ir) + await write_file_async(llvmir_hacked_ir, llvmir_chesshack) if aie_target.casefold() == "AIE2".casefold(): @@ -953,6 +966,49 @@ async def process_cores( return elf_paths + async def handle_mangled_collisions(self, module): + with module.context: + # Find all func.func ops with link_name attribute + funcs = find_ops( + module.operation, + lambda o: isinstance(o.operation.opview, funcdialect.FuncOp) + and "link_name" in o.attributes, + ) + + for func in funcs: + link_name = func.attributes["link_name"].value + if "link_with" not in func.attributes: + print( + f"Warning: Function '{func.sym_name.value}' has 'link_name' but no 'link_with' attribute. Skipping symbol renaming." + ) + continue + link_with = func.attributes["link_with"].value + sym_name = func.sym_name.value + + # If sym_name is different from link_name, we need to rename the symbol in the object file + if sym_name != link_name: + # Create a new object file name + new_obj_file = self.prepend_tmp(f"{sym_name}.o") + + # Run llvm-objcopy to rename the symbol + cmd = [ + self.peano_objcopy_path, + "--redefine-sym", + f"{link_name}={sym_name}", + link_with, + new_obj_file, + ] + + if self.opts.verbose: + print( + f"Renaming symbol {link_name} to {sym_name} in {link_with} -> {new_obj_file}" + ) + + await self.do_call(None, cmd) + + # Update link_with attribute + func.attributes["link_with"] = StringAttr.get(new_obj_file) + async def process_core( self, device_name, @@ -1011,6 +1067,11 @@ async def process_core( if opts.compile and opts.xchesscc: if not opts.unified: file_core_llvmir_chesslinked = await self.chesshack(task, file_core_llvmir, aie_target) + + # Hack: rename symbol in LLVM IR + cmd = ["sed", "-i", "s/_Z6kernelPii_1/_Z8kernel_1Pii/g", file_core_llvmir_chesslinked] + await self.do_call(task, cmd) + if self.opts.link and self.opts.xbridge: link_with_obj = await extract_input_files(file_core_bcf) await self.do_call(task, ["xchesscc_wrapper", aie_target.lower(), "+w", self.prepend_tmp("work"), "-d", "+Wclang,-xir", "-f", file_core_llvmir_chesslinked, link_with_obj, "+l", file_core_bcf, "-o", file_core_elf]) @@ -1711,8 +1772,7 @@ def make_sim_dir(x): ) sim_script = self.prepend_tmp("aiesim.sh") - sim_script_template = dedent( - """\ + sim_script_template = dedent("""\ #!/bin/sh prj_name=$(basename $(dirname $(realpath $0))) root=$(dirname $(dirname $(realpath $0))) @@ -1722,8 +1782,7 @@ def make_sim_dir(x): fi cd $root aiesimulator --pkg-dir=${prj_name}/sim --dump-vcd ${vcd_filename} - """ - ) + """) with open(sim_script, "wt") as sim_script_file: sim_script_file.write(sim_script_template) stats = os.stat(sim_script) @@ -1821,6 +1880,11 @@ async def run_flow(self): description="Resource allocation and Object FIFO lowering", ) + await self.handle_mangled_collisions(file_with_addresses_module) + # Update the file with addresses after handling collisions (renaming object files) + with open(file_with_addresses, "w") as f: + f.write(str(file_with_addresses_module)) + requires_routing = ( opts.xcl or opts.cdo diff --git a/test/Passes/external-mangle/test.mlir b/test/Passes/external-mangle/test.mlir new file mode 100644 index 00000000000..5f3b6b68bf7 --- /dev/null +++ b/test/Passes/external-mangle/test.mlir @@ -0,0 +1,11 @@ +// RUN: clang -c %S/test_mangle.cc -o %t_mangle.o +// RUN: clang -c %S/test_mangle2.cc -o %t_mangle2.o +// RUN: sed 's|PATH_TO_OBJ1|%t_mangle.o|g' %s | sed 's|PATH_TO_OBJ2|%t_mangle2.o|g' | aie-opt --aie-external-mangle | FileCheck %s + +module { + // CHECK-DAG: func.func private @_Z9my_kerneli(i32) attributes {link_with = "{{.*}}"} + func.func private @"my_kernel(int)"(i32) attributes { link_with = "PATH_TO_OBJ1" } + + // CHECK-DAG: func.func private @_Z9my_kernelf(f32) attributes {link_with = "{{.*}}"} + func.func private @"my_kernel(float)"(f32) attributes { link_with = "PATH_TO_OBJ2" } +} diff --git a/test/Passes/external-mangle/test_collision.mlir b/test/Passes/external-mangle/test_collision.mlir new file mode 100644 index 00000000000..12f3f4f340a --- /dev/null +++ b/test/Passes/external-mangle/test_collision.mlir @@ -0,0 +1,10 @@ +// RUN: clang -c %S/test_mangle.cc -o %t_mangle.o +// RUN: sed 's|PATH_TO_OBJ|%t_mangle.o|g' %s | aie-opt --aie-external-mangle | FileCheck %s + +module { + // Define the mangled name already + func.func private @_Z9my_kerneli(%arg0: i32) { return } + + // CHECK: func.func private @_Z9my_kerneli_1(i32) attributes {link_name = "_Z9my_kerneli", link_with = "{{.*}}"} + func.func private @"my_kernel(int)"(i32) attributes { link_with = "PATH_TO_OBJ" } +} diff --git a/test/Passes/external-mangle/test_mangle.cc b/test/Passes/external-mangle/test_mangle.cc new file mode 100644 index 00000000000..84b4b9bb999 --- /dev/null +++ b/test/Passes/external-mangle/test_mangle.cc @@ -0,0 +1,5 @@ +extern "C" { +void dummy1() {} +} + +void my_kernel(int) {} diff --git a/test/Passes/external-mangle/test_mangle2.cc b/test/Passes/external-mangle/test_mangle2.cc new file mode 100644 index 00000000000..0b7c886f97a --- /dev/null +++ b/test/Passes/external-mangle/test_mangle2.cc @@ -0,0 +1,5 @@ +extern "C" { +void dummy2() {} +} + +void my_kernel(float) {} diff --git a/test/Passes/external-mangle/test_missing.mlir b/test/Passes/external-mangle/test_missing.mlir new file mode 100644 index 00000000000..5f733cddcb0 --- /dev/null +++ b/test/Passes/external-mangle/test_missing.mlir @@ -0,0 +1,6 @@ +// RUN: aie-opt --aie-external-mangle %s 2>&1 | FileCheck %s + +module { + // CHECK: warning: Could not open object file: missing.o + func.func private @foo() attributes { link_with = "missing.o" } +} diff --git a/test/Passes/external-mangle/test_no_mangle.mlir b/test/Passes/external-mangle/test_no_mangle.mlir new file mode 100644 index 00000000000..1fd5642c7b2 --- /dev/null +++ b/test/Passes/external-mangle/test_no_mangle.mlir @@ -0,0 +1,7 @@ +// RUN: clang -c %S/test_mangle.cc -o %t_mangle.o +// RUN: sed 's|PATH_TO_OBJ|%t_mangle.o|g' %s | aie-opt --aie-external-mangle | FileCheck %s + +module { + // CHECK: func.func private @dummy1() + func.func private @dummy1() attributes { link_with = "PATH_TO_OBJ" } +} diff --git a/test/npu-xrt/external_mangle_collision/aie.mlir b/test/npu-xrt/external_mangle_collision/aie.mlir new file mode 100644 index 00000000000..1c38b773e99 --- /dev/null +++ b/test/npu-xrt/external_mangle_collision/aie.mlir @@ -0,0 +1,54 @@ +module { + aie.device(npu1_1col) { + func.func private @kernel_add(memref<8xi32>, i32) attributes { link_with="kernel1.o", link_symbol="kernel(int*, int)" } + func.func private @kernel_sub(memref<8xi32>, i32) attributes { link_with="kernel2.o", link_symbol="kernel(int*, int)" } + + %t00 = aie.tile(0, 0) + %t01 = aie.tile(0, 1) + %t02 = aie.tile(0, 2) + + aie.objectfifo @objFifo_in0(%t00, {%t01}, 2 : i32) : !aie.objectfifo> + aie.objectfifo @objFifo_in1(%t01, {%t02}, 2 : i32) : !aie.objectfifo> + aie.objectfifo.link [@objFifo_in0] -> [@objFifo_in1] ([] []) + + aie.objectfifo @objFifo_out1(%t02, {%t01}, 2 : i32) : !aie.objectfifo> + aie.objectfifo @objFifo_out0(%t01, {%t00}, 2 : i32) : !aie.objectfifo> + aie.objectfifo.link [@objFifo_out1] -> [@objFifo_out0] ([] []) + + aie.core(%t02) { + %c8 = arith.constant 8 : index + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c10 = arith.constant 10 : i32 + %c5 = arith.constant 5 : i32 + + scf.for %steps = %c0 to %c8 step %c1 { + %subview0 = aie.objectfifo.acquire @objFifo_in1(Consume, 1) : !aie.objectfifosubview> + %elem0 = aie.objectfifo.subview.access %subview0[0] : !aie.objectfifosubview> -> memref<8xi32> + %subview1 = aie.objectfifo.acquire @objFifo_out1(Produce, 1) : !aie.objectfifosubview> + %elem1 = aie.objectfifo.subview.access %subview1[0] : !aie.objectfifosubview> -> memref<8xi32> + + // Copy input to output + memref.copy %elem0, %elem1 : memref<8xi32> to memref<8xi32> + + // Add 10 + func.call @kernel_add(%elem1, %c10) : (memref<8xi32>, i32) -> () + + // Sub 5 + func.call @kernel_sub(%elem1, %c5) : (memref<8xi32>, i32) -> () + + aie.objectfifo.release @objFifo_in1(Consume, 1) + aie.objectfifo.release @objFifo_out1(Produce, 1) + } + aie.end + } + aie.runtime_sequence(%in : memref<64xi32>, %buf : memref<32xi32>, %out : memref<64xi32>) { + %c0 = arith.constant 0 : i64 + %c1 = arith.constant 1 : i64 + %c64 = arith.constant 64 : i64 + aiex.npu.dma_memcpy_nd (%out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0, %c1]) { metadata = @objFifo_out0, id = 1 : i64 } : memref<64xi32> + aiex.npu.dma_memcpy_nd (%in[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0, %c1]) { metadata = @objFifo_in0, id = 0 : i64, issue_token = true } : memref<64xi32> + aiex.npu.dma_wait { symbol = @objFifo_out0 } + } + } +} diff --git a/test/npu-xrt/external_mangle_collision/aie_core_link.mlir b/test/npu-xrt/external_mangle_collision/aie_core_link.mlir new file mode 100644 index 00000000000..c60bab6fb5b --- /dev/null +++ b/test/npu-xrt/external_mangle_collision/aie_core_link.mlir @@ -0,0 +1,77 @@ +module { + func.func private @kernel(memref<8xi32>, i32) + + aie.device(npu1_1col) { + %t00 = aie.tile(0, 0) + %t01 = aie.tile(0, 1) + %t02 = aie.tile(0, 2) + %t03 = aie.tile(0, 3) + + aie.objectfifo @objFifo_in0(%t00, {%t01}, 2 : i32) : !aie.objectfifo> + aie.objectfifo @objFifo_in1(%t01, {%t02}, 2 : i32) : !aie.objectfifo> + aie.objectfifo.link [@objFifo_in0] -> [@objFifo_in1] ([] []) + + aie.objectfifo @objFifo_out1(%t02, {%t01}, 2 : i32) : !aie.objectfifo> + aie.objectfifo @objFifo_out0(%t01, {%t00}, 2 : i32) : !aie.objectfifo> + aie.objectfifo.link [@objFifo_out1] -> [@objFifo_out0] ([] []) + + aie.objectfifo @objFifo_in2(%t01, {%t03}, 2 : i32) : !aie.objectfifo> + aie.objectfifo.link [@objFifo_in0] -> [@objFifo_in2] ([] []) + + aie.objectfifo @objFifo_out2(%t03, {%t01}, 2 : i32) : !aie.objectfifo> + aie.objectfifo.link [@objFifo_out2] -> [@objFifo_out0] ([] []) + + aie.core(%t02) { + %c8 = arith.constant 8 : index + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c10 = arith.constant 10 : i32 + + scf.for %steps = %c0 to %c8 step %c1 { + %subview0 = aie.objectfifo.acquire @objFifo_in1(Consume, 1) : !aie.objectfifosubview> + %elem0 = aie.objectfifo.subview.access %subview0[0] : !aie.objectfifosubview> -> memref<8xi32> + %subview1 = aie.objectfifo.acquire @objFifo_out1(Produce, 1) : !aie.objectfifosubview> + %elem1 = aie.objectfifo.subview.access %subview1[0] : !aie.objectfifosubview> -> memref<8xi32> + + memref.copy %elem0, %elem1 : memref<8xi32> to memref<8xi32> + + func.call @kernel(%elem1, %c10) : (memref<8xi32>, i32) -> () + + aie.objectfifo.release @objFifo_in1(Consume, 1) + aie.objectfifo.release @objFifo_out1(Produce, 1) + } + aie.end + } { link_with="kernel1.o" } + + aie.core(%t03) { + %c8 = arith.constant 8 : index + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c5 = arith.constant 5 : i32 + + scf.for %steps = %c0 to %c8 step %c1 { + %subview0 = aie.objectfifo.acquire @objFifo_in2(Consume, 1) : !aie.objectfifosubview> + %elem0 = aie.objectfifo.subview.access %subview0[0] : !aie.objectfifosubview> -> memref<8xi32> + %subview1 = aie.objectfifo.acquire @objFifo_out2(Produce, 1) : !aie.objectfifosubview> + %elem1 = aie.objectfifo.subview.access %subview1[0] : !aie.objectfifosubview> -> memref<8xi32> + + memref.copy %elem0, %elem1 : memref<8xi32> to memref<8xi32> + + func.call @kernel(%elem1, %c5) : (memref<8xi32>, i32) -> () + + aie.objectfifo.release @objFifo_in2(Consume, 1) + aie.objectfifo.release @objFifo_out2(Produce, 1) + } + aie.end + } { link_with="kernel2.o" } + + aie.runtime_sequence(%in : memref<64xi32>, %buf : memref<32xi32>, %out : memref<64xi32>) { + %c0 = arith.constant 0 : i64 + %c1 = arith.constant 1 : i64 + %c64 = arith.constant 64 : i64 + aiex.npu.dma_memcpy_nd (%out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0, %c1]) { metadata = @objFifo_out0, id = 1 : i64 } : memref<64xi32> + aiex.npu.dma_memcpy_nd (%in[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0, %c1]) { metadata = @objFifo_in0, id = 0 : i64, issue_token = true } : memref<64xi32> + aiex.npu.dma_wait { symbol = @objFifo_out0 } + } + } +} diff --git a/test/npu-xrt/external_mangle_collision/aie_mixed.mlir b/test/npu-xrt/external_mangle_collision/aie_mixed.mlir new file mode 100644 index 00000000000..4c9b06b83d0 --- /dev/null +++ b/test/npu-xrt/external_mangle_collision/aie_mixed.mlir @@ -0,0 +1,53 @@ +module { + // Use link_symbol to map to mangled name + func.func private @kernel_add(memref<8xi32>, i32) attributes { link_with="kernel1.o", link_symbol="kernel(int*, int)" } + + // Use demangled name directly (no link_symbol needed if it matches) + func.func private @"kernel(int*, int)"(memref<8xi32>, i32) attributes { link_with="kernel2.o" } + + aie.device(npu1_1col) { + %t00 = aie.tile(0, 0) + %t01 = aie.tile(0, 1) + %t02 = aie.tile(0, 2) + + aie.objectfifo @objFifo_in0(%t00, {%t01}, 2 : i32) : !aie.objectfifo> + aie.objectfifo @objFifo_in1(%t01, {%t02}, 2 : i32) : !aie.objectfifo> + aie.objectfifo.link [@objFifo_in0] -> [@objFifo_in1] ([] []) + + aie.objectfifo @objFifo_out1(%t02, {%t01}, 2 : i32) : !aie.objectfifo> + aie.objectfifo @objFifo_out0(%t01, {%t00}, 2 : i32) : !aie.objectfifo> + aie.objectfifo.link [@objFifo_out1] -> [@objFifo_out0] ([] []) + + aie.core(%t02) { + %c8 = arith.constant 8 : index + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c10 = arith.constant 10 : i32 + %c5 = arith.constant 5 : i32 + + scf.for %steps = %c0 to %c8 step %c1 { + %subview0 = aie.objectfifo.acquire @objFifo_in1(Consume, 1) : !aie.objectfifosubview> + %elem0 = aie.objectfifo.subview.access %subview0[0] : !aie.objectfifosubview> -> memref<8xi32> + %subview1 = aie.objectfifo.acquire @objFifo_out1(Produce, 1) : !aie.objectfifosubview> + %elem1 = aie.objectfifo.subview.access %subview1[0] : !aie.objectfifosubview> -> memref<8xi32> + + memref.copy %elem0, %elem1 : memref<8xi32> to memref<8xi32> + + func.call @kernel_add(%elem1, %c10) : (memref<8xi32>, i32) -> () + func.call @"kernel(int*, int)"(%elem1, %c5) : (memref<8xi32>, i32) -> () + + aie.objectfifo.release @objFifo_in1(Consume, 1) + aie.objectfifo.release @objFifo_out1(Produce, 1) + } + aie.end + } + aie.runtime_sequence(%in : memref<64xi32>, %buf : memref<32xi32>, %out : memref<64xi32>) { + %c0 = arith.constant 0 : i64 + %c1 = arith.constant 1 : i64 + %c64 = arith.constant 64 : i64 + aiex.npu.dma_memcpy_nd (%out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0, %c1]) { metadata = @objFifo_out0, id = 1 : i64 } : memref<64xi32> + aiex.npu.dma_memcpy_nd (%in[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0, %c1]) { metadata = @objFifo_in0, id = 0 : i64, issue_token = true } : memref<64xi32> + aiex.npu.dma_wait { symbol = @objFifo_out0 } + } + } +} diff --git a/test/npu-xrt/external_mangle_collision/kernel1.cc b/test/npu-xrt/external_mangle_collision/kernel1.cc new file mode 100644 index 00000000000..592fa821fd9 --- /dev/null +++ b/test/npu-xrt/external_mangle_collision/kernel1.cc @@ -0,0 +1,3 @@ +#include + +void kernel(int32_t *a, int32_t v) { *a += v; } diff --git a/test/npu-xrt/external_mangle_collision/kernel2.cc b/test/npu-xrt/external_mangle_collision/kernel2.cc new file mode 100644 index 00000000000..42d9edc6ed5 --- /dev/null +++ b/test/npu-xrt/external_mangle_collision/kernel2.cc @@ -0,0 +1,3 @@ +#include + +void kernel(int32_t *a, int32_t v) { *a -= v; } diff --git a/test/npu-xrt/external_mangle_collision/run.lit b/test/npu-xrt/external_mangle_collision/run.lit new file mode 100644 index 00000000000..ed0dbc665f4 --- /dev/null +++ b/test/npu-xrt/external_mangle_collision/run.lit @@ -0,0 +1,15 @@ +// RUN: clang++ -c %S/kernel1.cc -o kernel1.o +// RUN: clang++ -c %S/kernel2.cc -o kernel2.o +// RUN: clang++ %S/test.cpp -o test.exe -std=c++17 -Wall %xrt_flags -lrt -lstdc++ %test_utils_flags + +// Test 1: link_symbol defined for each kernel (aie.mlir) +// RUN: %python aiecc.py --aie-generate-xclbin --aie-generate-npu-insts --no-compile-host --xclbin-name=aie.xclbin --npu-insts-name=insts.bin %S/aie.mlir +// RUN: %run_on_npu1% ./test.exe -x aie.xclbin -i insts.bin + +// Test 2: link_with on AIE.core (aie_core_link.mlir) +// RUN: %python aiecc.py --aie-generate-xclbin --aie-generate-npu-insts --no-compile-host --xclbin-name=aie_core_link.xclbin --npu-insts-name=insts_core_link.bin %S/aie_core_link.mlir +// RUN: %run_on_npu1% ./test.exe -x aie_core_link.xclbin -i insts_core_link.bin + +// Test 3: Mixed (aie_mixed.mlir) +// RUN: %python aiecc.py --aie-generate-xclbin --aie-generate-npu-insts --no-compile-host --xclbin-name=aie_mixed.xclbin --npu-insts-name=insts_mixed.bin %S/aie_mixed.mlir +// RUN: %run_on_npu1% ./test.exe -x aie_mixed.xclbin -i insts_mixed.bin diff --git a/test/npu-xrt/external_mangle_collision/test.cpp b/test/npu-xrt/external_mangle_collision/test.cpp new file mode 100644 index 00000000000..aae6f944156 --- /dev/null +++ b/test/npu-xrt/external_mangle_collision/test.cpp @@ -0,0 +1,139 @@ +//===- test.cpp -------------------------------------------000---*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Copyright (C) 2023, Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include + +#include "cxxopts.hpp" +#include "test_utils.h" +#include "xrt/xrt_bo.h" +#include "xrt/xrt_device.h" +#include "xrt/xrt_kernel.h" + +constexpr int IN_SIZE = 64; +constexpr int OUT_SIZE = 64; + +int main(int argc, const char *argv[]) { + // Program arguments parsing + cxxopts::Options options("external_mangle_collision"); + test_utils::add_default_options(options); + + cxxopts::ParseResult vm; + test_utils::parse_options(argc, argv, options, vm); + + std::vector instr_v = + test_utils::load_instr_binary(vm["instr"].as()); + + int verbosity = vm["verbosity"].as(); + if (verbosity >= 1) + std::cout << "Sequence instr count: " << instr_v.size() << "\n"; + + // Start the XRT test code + // Get a device handle + unsigned int device_index = 0; + auto device = xrt::device(device_index); + + // Load the xclbin + if (verbosity >= 1) + std::cout << "Loading xclbin: " << vm["xclbin"].as() << "\n"; + auto xclbin = xrt::xclbin(vm["xclbin"].as()); + + if (verbosity >= 1) + std::cout << "Kernel opcode: " << vm["kernel"].as() << "\n"; + std::string Node = vm["kernel"].as(); + + // Get the kernel from the xclbin + auto xkernels = xclbin.get_kernels(); + auto xkernel = *std::find_if(xkernels.begin(), xkernels.end(), + [Node](xrt::xclbin::kernel &k) { + auto name = k.get_name(); + std::cout << "Name: " << name << std::endl; + return name.rfind(Node, 0) == 0; + }); + auto kernelName = xkernel.get_name(); + + if (verbosity >= 1) + std::cout << "Registering xclbin: " << vm["xclbin"].as() + << "\n"; + + device.register_xclbin(xclbin); + + // get a hardware context + if (verbosity >= 1) + std::cout << "Getting hardware context.\n"; + xrt::hw_context context(device, xclbin.get_uuid()); + + // get a kernel handle + if (verbosity >= 1) + std::cout << "Getting handle to kernel:" << kernelName << "\n"; + auto kernel = xrt::kernel(context, kernelName); + + auto bo_instr = xrt::bo(device, instr_v.size() * sizeof(int), + XCL_BO_FLAGS_CACHEABLE, kernel.group_id(1)); + auto bo_inA = xrt::bo(device, IN_SIZE * sizeof(int32_t), + XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(3)); + auto bo_out = xrt::bo(device, OUT_SIZE * sizeof(int32_t), + XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(5)); + + if (verbosity >= 1) + std::cout << "Writing data into buffer objects.\n"; + + uint32_t *bufInA = bo_inA.map(); + std::vector srcVecA; + for (int i = 0; i < IN_SIZE; i++) + srcVecA.push_back(i + 1); + memcpy(bufInA, srcVecA.data(), (srcVecA.size() * sizeof(uint32_t))); + + void *bufInstr = bo_instr.map(); + memcpy(bufInstr, instr_v.data(), instr_v.size() * sizeof(int)); + + bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE); + bo_inA.sync(XCL_BO_SYNC_BO_TO_DEVICE); + + if (verbosity >= 1) + std::cout << "Running Kernel.\n"; + unsigned int opcode = 3; + auto run = kernel(opcode, bo_instr, instr_v.size(), bo_inA, bo_out); + ert_cmd_state r = run.wait(); + if (r != ERT_CMD_STATE_COMPLETED) { + std::cout << "Kernel did not complete. Returned status: " << r << "\n"; + return 1; + } + + bo_out.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + + uint32_t *bufOut = bo_out.map(); + + int errors = 0; + + for (uint32_t i = 0; i < 64; i++) { + uint32_t ref = i + 1 + 5; // input + 5 + if (*(bufOut + i) != ref) { + std::cout << "Error in output " << *(bufOut + i) << " != " << ref + << std::endl; + errors++; + } else { + // std::cout << "Correct output " << *(bufOut + i) << " == " << ref + // << std::endl; + } + } + + if (!errors) { + std::cout << "\nPASS!\n\n"; + return 0; + } else { + std::cout << "\nfailed.\n\n"; + return 1; + } +}