diff --git a/compiler/rustc_builtin_macros/src/eii.rs b/compiler/rustc_builtin_macros/src/eii.rs index fd0ef8500c6c3..c58050c5db7aa 100644 --- a/compiler/rustc_builtin_macros/src/eii.rs +++ b/compiler/rustc_builtin_macros/src/eii.rs @@ -12,8 +12,8 @@ use thin_vec::{ThinVec, thin_vec}; use crate::errors::{ EiiExternTargetExpectedList, EiiExternTargetExpectedMacro, EiiExternTargetExpectedUnsafe, EiiMacroExpectedMaxOneArgument, EiiOnlyOnce, EiiSharedMacroInStatementPosition, - EiiSharedMacroTarget, EiiStaticArgumentRequired, EiiStaticDefault, - EiiStaticMultipleImplementations, EiiStaticMutable, + EiiSharedMacroTarget, EiiStaticArgumentRequired, EiiStaticMultipleImplementations, + EiiStaticMutable, }; /// ```rust @@ -86,14 +86,6 @@ fn eii_( let (item_span, foreign_item_name) = match kind { ItemKind::Fn(func) => (func.sig.span, func.ident), ItemKind::Static(stat) => { - // Statics with a default are not supported yet - if let Some(stat_body) = &stat.expr { - ecx.dcx().emit_err(EiiStaticDefault { - span: stat_body.span, - name: path_to_string(&meta_item.path), - }); - return vec![]; - } // Statics must have an explicit name for the eii if meta_item.is_word() { ecx.dcx().emit_err(EiiStaticArgumentRequired { @@ -137,18 +129,16 @@ fn eii_( let mut module_items = Vec::new(); - if let ItemKind::Fn(func) = kind - && func.body.is_some() - { - module_items.push(generate_default_func_impl( - ecx, - &func, - impl_unsafe, - macro_name, - eii_attr_span, - item_span, - foreign_item_name, - )) + if let Some(default_impl) = generate_default_impl( + ecx, + kind, + impl_unsafe, + macro_name, + eii_attr_span, + item_span, + foreign_item_name, + ) { + module_items.push(default_impl); } module_items.push(generate_foreign_item( @@ -220,20 +210,33 @@ fn filter_attrs_for_multiple_eii_attr( .collect() } -fn generate_default_func_impl( +fn generate_default_impl( ecx: &mut ExtCtxt<'_>, - func: &ast::Fn, + item_kind: &ItemKind, impl_unsafe: bool, macro_name: Ident, eii_attr_span: Span, item_span: Span, foreign_item_name: Ident, -) -> Box { +) -> Option> { + match item_kind { + ItemKind::Fn(func) => { + if func.body.is_none() { + return None; + } + } + ItemKind::Static(stat) => { + if stat.expr.is_none() { + return None; + } + } + _ => unreachable!("Target was checked earlier"), + }; + // FIXME: re-add some original attrs let attrs = ThinVec::new(); - let mut default_func = func.clone(); - default_func.eii_impls.push(EiiImpl { + let eii_impl = EiiImpl { node_id: DUMMY_NODE_ID, inner_span: macro_name.span, eii_macro_path: ast::Path::from_ident(macro_name), @@ -253,7 +256,18 @@ fn generate_default_func_impl( ), impl_unsafe, }), - }); + }; + + let mut item_kind = item_kind.clone(); + match &mut item_kind { + ItemKind::Fn(func) => { + func.eii_impls.push(eii_impl); + } + ItemKind::Static(stat) => { + stat.eii_impls.push(eii_impl); + } + _ => unreachable!("Target was checked earlier"), + }; let anon_mod = |span: Span, stmts: ThinVec| { let unit = ecx.ty(item_span, ast::TyKind::Tup(ThinVec::new())); @@ -267,15 +281,12 @@ fn generate_default_func_impl( }; // const _: () = { - // + // // } - anon_mod( + Some(anon_mod( item_span, - thin_vec![ecx.stmt_item( - item_span, - ecx.item(item_span, attrs, ItemKind::Fn(Box::new(default_func))) - ),], - ) + thin_vec![ecx.stmt_item(item_span, ecx.item(item_span, attrs, item_kind))], + )) } /// Generates a foreign item, like @@ -362,6 +373,8 @@ fn generate_foreign_static(mut stat: Box) -> ast::ForeignItemKi stat.safety = ast::Safety::Safe(stat.ident.span); } + stat.expr = None; + ast::ForeignItemKind::Static(stat) } diff --git a/compiler/rustc_builtin_macros/src/errors.rs b/compiler/rustc_builtin_macros/src/errors.rs index c64d6871269a6..24ce4bda79c45 100644 --- a/compiler/rustc_builtin_macros/src/errors.rs +++ b/compiler/rustc_builtin_macros/src/errors.rs @@ -1134,14 +1134,6 @@ pub(crate) struct EiiStaticMultipleImplementations { pub span: Span, } -#[derive(Diagnostic)] -#[diag("`#[{$name}]` cannot be used on statics with a value")] -pub(crate) struct EiiStaticDefault { - #[primary_span] - pub span: Span, - pub name: String, -} - #[derive(Diagnostic)] #[diag("`#[{$name}]` requires the name as an explicit argument when used on a static")] pub(crate) struct EiiStaticArgumentRequired { diff --git a/compiler/rustc_codegen_gcc/src/asm.rs b/compiler/rustc_codegen_gcc/src/asm.rs index 5bb65365ad6ad..ba7af7afcbdee 100644 --- a/compiler/rustc_codegen_gcc/src/asm.rs +++ b/compiler/rustc_codegen_gcc/src/asm.rs @@ -677,6 +677,8 @@ fn reg_class_to_gcc(reg_class: InlineAsmRegClass) -> &'static str { InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::preg) => { unreachable!("clobber-only") } + InlineAsmRegClass::Amdgpu(AmdgpuInlineAsmRegClass::Sgpr(_)) => "Sg", + InlineAsmRegClass::Amdgpu(AmdgpuInlineAsmRegClass::Vgpr(_)) => "v", InlineAsmRegClass::Arm(ArmInlineAsmRegClass::reg) => "r", InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg) | InlineAsmRegClass::Arm(ArmInlineAsmRegClass::dreg_low16) @@ -780,6 +782,7 @@ fn dummy_output_type<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, reg: InlineAsmRegCl InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::preg) => { unreachable!("clobber-only") } + InlineAsmRegClass::Amdgpu(_) => cx.type_i32(), InlineAsmRegClass::Arm(ArmInlineAsmRegClass::reg) => cx.type_i32(), InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg) | InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg_low16) => cx.type_f32(), @@ -983,6 +986,7 @@ fn modifier_to_gcc( InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::preg) => { unreachable!("clobber-only") } + InlineAsmRegClass::Amdgpu(_) => None, InlineAsmRegClass::Arm(ArmInlineAsmRegClass::reg) => None, InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg) | InlineAsmRegClass::Arm(ArmInlineAsmRegClass::sreg_low16) => None, diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs index c5ab9fc2336eb..4211c10c7ca61 100644 --- a/compiler/rustc_codegen_llvm/src/asm.rs +++ b/compiler/rustc_codegen_llvm/src/asm.rs @@ -43,7 +43,7 @@ impl<'ll, 'tcx> AsmBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> { match *op { InlineAsmOperandRef::Out { reg, late, place } => { let is_target_supported = |reg_class: InlineAsmRegClass| { - for &(_, feature) in reg_class.supported_types(asm_arch, true) { + for &(_, feature) in reg_class.supported_types(asm_arch, true).as_ref() { if let Some(feature) = feature { if self .tcx @@ -229,6 +229,7 @@ impl<'ll, 'tcx> AsmBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> { InlineAsmArch::AArch64 | InlineAsmArch::Arm64EC | InlineAsmArch::Arm => { constraints.push("~{cc}".to_string()); } + InlineAsmArch::Amdgpu => {} InlineAsmArch::X86 | InlineAsmArch::X86_64 => { constraints.extend_from_slice(&[ "~{dirflag}".to_string(), @@ -698,6 +699,8 @@ fn reg_to_llvm(reg: InlineAsmRegOrRegClass, layout: Option<&TyAndLayout<'_>>) -> | Arm(ArmInlineAsmRegClass::dreg_low8) | Arm(ArmInlineAsmRegClass::qreg_low4) => "x", Arm(ArmInlineAsmRegClass::dreg) | Arm(ArmInlineAsmRegClass::qreg) => "w", + Amdgpu(AmdgpuInlineAsmRegClass::Sgpr(_)) => "s", + Amdgpu(AmdgpuInlineAsmRegClass::Vgpr(_)) => "v", Hexagon(HexagonInlineAsmRegClass::reg) => "r", Hexagon(HexagonInlineAsmRegClass::reg_pair) => "r", Hexagon(HexagonInlineAsmRegClass::preg) => unreachable!("clobber-only"), @@ -803,6 +806,7 @@ fn modifier_to_llvm( modifier } } + Amdgpu(_) => None, Hexagon(_) => None, LoongArch(_) => None, Mips(_) => None, @@ -883,6 +887,7 @@ fn dummy_output_type<'ll>(cx: &CodegenCx<'ll, '_>, reg: InlineAsmRegClass) -> &' Arm(ArmInlineAsmRegClass::qreg) | Arm(ArmInlineAsmRegClass::qreg_low8) | Arm(ArmInlineAsmRegClass::qreg_low4) => cx.type_vector(cx.type_i64(), 2), + Amdgpu(_) => cx.type_i32(), Hexagon(HexagonInlineAsmRegClass::reg) => cx.type_i32(), Hexagon(HexagonInlineAsmRegClass::reg_pair) => cx.type_i64(), Hexagon(HexagonInlineAsmRegClass::preg) => unreachable!("clobber-only"), diff --git a/compiler/rustc_hir_typeck/src/inline_asm.rs b/compiler/rustc_hir_typeck/src/inline_asm.rs index 09d1d18ccfa85..68822a2eec2e7 100644 --- a/compiler/rustc_hir_typeck/src/inline_asm.rs +++ b/compiler/rustc_hir_typeck/src/inline_asm.rs @@ -465,7 +465,8 @@ impl<'a, 'tcx> InlineAsmCtxt<'a, 'tcx> { if let InlineAsmRegClass::Err = reg_class { continue; } - for &(_, feature) in reg_class.supported_types(asm_arch, allow_experimental_reg) + for &(_, feature) in + reg_class.supported_types(asm_arch, allow_experimental_reg).as_ref() { match feature { Some(feature) => { diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 69ed7314855f9..836a0676e50b1 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -1835,6 +1835,12 @@ symbols! { self_in_typedefs, self_struct_ctor, semiopaque, + sgpr32, + sgpr64, + sgpr96, + sgpr128, + sgpr256, + sgpr512, sha2, sha3, sha512_sm_x86, @@ -2239,6 +2245,21 @@ symbols! { verbatim, version, vfp2, + vgpr16, + vgpr32, + vgpr64, + vgpr96, + vgpr128, + vgpr160, + vgpr192, + vgpr224, + vgpr256, + vgpr288, + vgpr320, + vgpr352, + vgpr384, + vgpr512, + vgpr1024, view_types, vis, visible_private_types, diff --git a/compiler/rustc_target/src/asm/amdgpu.rs b/compiler/rustc_target/src/asm/amdgpu.rs new file mode 100644 index 0000000000000..191cf6baa266f --- /dev/null +++ b/compiler/rustc_target/src/asm/amdgpu.rs @@ -0,0 +1,491 @@ +use std::fmt; + +use rustc_span::Symbol; + +use super::{InlineAsmArch, InlineAsmType, ModifierInfo}; + +// Types are listed as SGPR_*/VGPR_* in llvm/lib/Target/AMDGPU/SIRegisterInfo.td + +/// Amdgpu register classes +/// +/// The number is the size of the register class in bits. +#[derive( + Copy, + Clone, + rustc_macros::Encodable, + rustc_macros::Decodable, + Debug, + Eq, + PartialEq, + PartialOrd, + Hash, + rustc_macros::StableHash +)] +#[allow(non_camel_case_types)] +pub enum AmdgpuInlineAsmRegClass { + Sgpr(u16), + Vgpr(u16), +} + +pub(super) fn regclass_map() -> rustc_data_structures::fx::FxHashMap< + super::InlineAsmRegClass, + rustc_data_structures::fx::FxIndexSet, +> { + use rustc_data_structures::fx::{FxHashMap, FxIndexSet}; + + use super::InlineAsmRegClass; + let mut map = FxHashMap::default(); + + // SGPR and VGPR sizes + for i in [32, 64, 96, 128, 256, 512] { + map.insert( + InlineAsmRegClass::Amdgpu(AmdgpuInlineAsmRegClass::Sgpr(i)), + FxIndexSet::default(), + ); + map.insert( + InlineAsmRegClass::Amdgpu(AmdgpuInlineAsmRegClass::Vgpr(i)), + FxIndexSet::default(), + ); + } + + // VGPR-only sizes + for i in [16, 160, 192, 224, 288, 320, 352, 384, 1024] { + map.insert( + InlineAsmRegClass::Amdgpu(AmdgpuInlineAsmRegClass::Vgpr(i)), + FxIndexSet::default(), + ); + } + + map +} + +// See https://llvm.org/docs/AMDGPUOperandSyntax.html +impl AmdgpuInlineAsmRegClass { + /// Prefix when printed and register constraint in LLVM + fn prefix(self) -> &'static str { + match self { + Self::Sgpr(_) => "s", + Self::Vgpr(_) => "v", + } + } + + /// Return size of the register class in bits + fn bits(self) -> u16 { + let (Self::Sgpr(i) | Self::Vgpr(i)) = self; + i + } + + /// Return size of the register class in bytes + fn bytes(self) -> u16 { + self.bits() / 8 + } + + /// Returns the name or `None` if this is not a valid register class + fn try_get_name(self) -> Option { + let s = match self { + Self::Sgpr(32) => rustc_span::sym::sgpr32, + Self::Sgpr(64) => rustc_span::sym::sgpr64, + Self::Sgpr(96) => rustc_span::sym::sgpr96, + Self::Sgpr(128) => rustc_span::sym::sgpr128, + Self::Sgpr(256) => rustc_span::sym::sgpr256, + Self::Sgpr(512) => rustc_span::sym::sgpr512, + Self::Vgpr(16) => rustc_span::sym::vgpr16, + Self::Vgpr(32) => rustc_span::sym::vgpr32, + Self::Vgpr(64) => rustc_span::sym::vgpr64, + Self::Vgpr(96) => rustc_span::sym::vgpr96, + Self::Vgpr(128) => rustc_span::sym::vgpr128, + Self::Vgpr(160) => rustc_span::sym::vgpr160, + Self::Vgpr(192) => rustc_span::sym::vgpr192, + Self::Vgpr(224) => rustc_span::sym::vgpr224, + Self::Vgpr(256) => rustc_span::sym::vgpr256, + Self::Vgpr(288) => rustc_span::sym::vgpr288, + Self::Vgpr(320) => rustc_span::sym::vgpr320, + Self::Vgpr(352) => rustc_span::sym::vgpr352, + Self::Vgpr(384) => rustc_span::sym::vgpr384, + Self::Vgpr(512) => rustc_span::sym::vgpr512, + Self::Vgpr(1024) => rustc_span::sym::vgpr1024, + _ => return None, + }; + Some(s) + } + + pub fn name(self) -> rustc_span::Symbol { + self.try_get_name().expect("Invalid amdgpu register class") + } + + pub fn parse(name: rustc_span::Symbol) -> Result { + match name { + rustc_span::sym::sgpr32 => Ok(Self::Sgpr(32)), + rustc_span::sym::sgpr64 => Ok(Self::Sgpr(64)), + rustc_span::sym::sgpr96 => Ok(Self::Sgpr(96)), + rustc_span::sym::sgpr128 => Ok(Self::Sgpr(128)), + rustc_span::sym::sgpr256 => Ok(Self::Sgpr(256)), + rustc_span::sym::sgpr512 => Ok(Self::Sgpr(512)), + rustc_span::sym::vgpr16 => Ok(Self::Vgpr(16)), + rustc_span::sym::vgpr32 => Ok(Self::Vgpr(32)), + rustc_span::sym::vgpr64 => Ok(Self::Vgpr(64)), + rustc_span::sym::vgpr96 => Ok(Self::Vgpr(96)), + rustc_span::sym::vgpr128 => Ok(Self::Vgpr(128)), + rustc_span::sym::vgpr160 => Ok(Self::Vgpr(160)), + rustc_span::sym::vgpr192 => Ok(Self::Vgpr(192)), + rustc_span::sym::vgpr224 => Ok(Self::Vgpr(224)), + rustc_span::sym::vgpr256 => Ok(Self::Vgpr(256)), + rustc_span::sym::vgpr288 => Ok(Self::Vgpr(288)), + rustc_span::sym::vgpr320 => Ok(Self::Vgpr(320)), + rustc_span::sym::vgpr352 => Ok(Self::Vgpr(352)), + rustc_span::sym::vgpr384 => Ok(Self::Vgpr(384)), + rustc_span::sym::vgpr512 => Ok(Self::Vgpr(512)), + rustc_span::sym::vgpr1024 => Ok(Self::Vgpr(1024)), + _ => Err(&[ + rustc_span::sym::sgpr32, + rustc_span::sym::sgpr64, + rustc_span::sym::sgpr96, + rustc_span::sym::sgpr128, + rustc_span::sym::sgpr256, + rustc_span::sym::sgpr512, + rustc_span::sym::vgpr16, + rustc_span::sym::vgpr32, + rustc_span::sym::vgpr64, + rustc_span::sym::vgpr96, + rustc_span::sym::vgpr128, + rustc_span::sym::vgpr160, + rustc_span::sym::vgpr192, + rustc_span::sym::vgpr224, + rustc_span::sym::vgpr256, + rustc_span::sym::vgpr288, + rustc_span::sym::vgpr320, + rustc_span::sym::vgpr352, + rustc_span::sym::vgpr384, + rustc_span::sym::vgpr512, + rustc_span::sym::vgpr1024, + ]), + } + } + + pub fn valid_modifiers(self, _arch: InlineAsmArch) -> &'static [char] { + &[] + } + + pub fn suggest_class(self, _arch: InlineAsmArch, ty: InlineAsmType) -> Option { + // 8-bit types and f128 are not supported + if matches!( + ty, + InlineAsmType::I8 + | InlineAsmType::VecI8(_) + | InlineAsmType::F128 + | InlineAsmType::VecF128(_) + ) { + return None; + } + + Some(Self::Vgpr(ty.size().bits().try_into().ok()?)) + } + + pub fn suggest_modifier( + self, + _arch: InlineAsmArch, + _ty: InlineAsmType, + ) -> Option { + None + } + + pub fn default_modifier(self, _arch: InlineAsmArch) -> Option { + None + } + + pub fn supported_types(self, _arch: InlineAsmArch) -> Vec<(InlineAsmType, Option)> { + use InlineAsmType::*; + let mut types = Vec::new(); + let mut add_types = |ts: &[_]| { + for t in ts { + types.push((*t, None)) + } + }; + let bits = self.bits() as u64; + + // Primitive types + match bits { + 16 => add_types(&[I16, F16]), + // Many 16-bit instructions take 32-bit registers, so allow 16-bit values + 32 => add_types(&[I16, F16, I32, F32]), + 64 => add_types(&[I64, F64]), + 128 => add_types(&[I128]), + _ => {} + } + + // Vector types + if bits == 1024 { + add_types(&[VecF32(1024 / 32)]); + } else { + if bits > 16 && bits.is_power_of_two() { + // 32, 64, 128, 256, 512 + add_types(&[VecI16(bits / 16), VecF16(bits / 16)]); + } + if bits > 32 { + // 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 512 + add_types(&[VecI32(bits / 32), VecF32(bits / 32)]); + } + } + + // The LLVM backend supports more vector types, but these are rather uncommon + // and not systematic, so we only list common types here. + + types + } + + /// The number of supported registers in this class. + /// The returned number is the length, so supported register + /// indices are 0 to max_num()-1. + fn max_num(self) -> u16 { + if self == Self::Vgpr(16) { + return 512; + } + let size = self.bytes(); + match self { + Self::Sgpr(_) => 106 - (size / 4 - 1), + Self::Vgpr(_) => 256 - (size / 4 - 1), + } + } + + /// Get register class from prefix and size. + fn parse_with_prefix(prefix: char, bits: u16) -> Result { + let res = match prefix { + 's' => Self::Sgpr(bits), + 'v' => Self::Vgpr(bits), + _ => return Err("unknown register prefix"), + }; + + // Check that the size is valid by converting it to a symbol + if res.try_get_name().is_none() { + return Err("invalid register size for this class"); + } + + Ok(res) + } +} + +/// Start index of a register. +/// +/// Together with the register size this gives the range occupied by a register. +#[derive( + Copy, + Clone, + rustc_macros::Encodable, + rustc_macros::Decodable, + Debug, + Eq, + PartialEq, + PartialOrd, + Hash, + rustc_macros::StableHash +)] +enum AmdgpuRegStart { + /// Low 16-bit of the register at this index + Low(u16), + /// High 16-bit of the register at this index + High(u16), + /// One or more 32-bit registers, starting at this index + Full(u16), +} + +#[derive( + Copy, + Clone, + rustc_macros::Encodable, + rustc_macros::Decodable, + Debug, + Eq, + PartialEq, + PartialOrd, + Hash, + rustc_macros::StableHash +)] +#[allow(non_camel_case_types)] +pub struct AmdgpuInlineAsmReg { + class: AmdgpuInlineAsmRegClass, + range: AmdgpuRegStart, +} + +impl AmdgpuInlineAsmReg { + pub fn name(self) -> String { + let c = self.class.prefix(); + match self.range { + AmdgpuRegStart::Low(n) => format!("{c}{n}.l"), + AmdgpuRegStart::High(n) => format!("{c}{n}.h"), + AmdgpuRegStart::Full(n) if self.class.bytes() == 4 => format!("{c}{n}"), + AmdgpuRegStart::Full(n) => format!("{c}[{n}:{}]", n + self.class.bytes() / 4 - 1), + } + } + + pub fn reg_class(self) -> AmdgpuInlineAsmRegClass { + self.class + } + + pub fn parse(name: &str) -> Result { + if name.is_empty() { + return Err("invalid empty register"); + } + // s or v + let prefix = name.chars().next().unwrap(); + // Form with range, e.g. s[2:3] + let res; + if name[1..].starts_with('[') { + if !name.ends_with(']') { + return Err("invalid register, missing closing bracket"); + } + if let Some((start, end)) = name[2..name.len() - 1].split_once(':') { + let Ok(start) = start.parse() else { + return Err("invalid register range start"); + }; + let Ok(end) = end.parse() else { + return Err("invalid register range end"); + }; + + // Check range + if start > end { + return Err("invalid reversed register range"); + } + + let class = + AmdgpuInlineAsmRegClass::parse_with_prefix(prefix, ((end - start) + 1) * 32)?; + if end >= class.max_num() { + return Err("too large register for this class"); + } + res = Self { class, range: AmdgpuRegStart::Full(start) }; + } else { + return Err("invalid register range"); + } + } else { + let parse_num = |core: &str| { + let Ok(start) = core.parse() else { + return Err("invalid register number"); + }; + + let class = AmdgpuInlineAsmRegClass::parse_with_prefix(prefix, 32)?; + if start >= class.max_num() { + return Err("too large register for this class"); + } + + Ok(start) + }; + + let name = &name[1..]; + let class; + let range = if let Some(name) = name.strip_suffix(".l") { + class = AmdgpuInlineAsmRegClass::parse_with_prefix(prefix, 16)?; + if matches!(class, AmdgpuInlineAsmRegClass::Sgpr(_)) { + return Err("invalid 16-bit SGPR register"); + } + AmdgpuRegStart::Low(parse_num(name)?) + } else if let Some(name) = name.strip_suffix(".h") { + class = AmdgpuInlineAsmRegClass::parse_with_prefix(prefix, 16)?; + if matches!(class, AmdgpuInlineAsmRegClass::Sgpr(_)) { + return Err("invalid 16-bit SGPR register"); + } + AmdgpuRegStart::High(parse_num(name)?) + } else { + class = AmdgpuInlineAsmRegClass::parse_with_prefix(prefix, 32)?; + let start = parse_num(name)?; + AmdgpuRegStart::Full(start) + }; + res = Self { class, range }; + } + Ok(res) + } + + pub fn validate( + self, + _arch: super::InlineAsmArch, + _reloc_model: crate::spec::RelocModel, + _target_features: &rustc_data_structures::fx::FxIndexSet, + _target: &crate::spec::Target, + _is_clobber: bool, + ) -> Result<(), &'static str> { + Ok(()) + } +} + +pub(super) fn fill_reg_map( + _arch: super::InlineAsmArch, + _reloc_model: crate::spec::RelocModel, + _target_features: &rustc_data_structures::fx::FxIndexSet, + _target: &crate::spec::Target, + map: &mut rustc_data_structures::fx::FxHashMap< + super::InlineAsmRegClass, + rustc_data_structures::fx::FxIndexSet, + >, +) { + use super::{InlineAsmReg, InlineAsmRegClass}; + + #[allow(rustc::potential_query_instability)] + for class in regclass_map().keys() { + let InlineAsmRegClass::Amdgpu(class) = *class else { unreachable!("Must be amdgpu class") }; + if let Some(set) = map.get_mut(&InlineAsmRegClass::Amdgpu(class)) { + if class == AmdgpuInlineAsmRegClass::Vgpr(16) { + for i in 0..(class.max_num() / 2) { + set.insert(InlineAsmReg::Amdgpu(AmdgpuInlineAsmReg { + class, + range: AmdgpuRegStart::Low(i), + })); + set.insert(InlineAsmReg::Amdgpu(AmdgpuInlineAsmReg { + class, + range: AmdgpuRegStart::High(i), + })); + } + } else { + for i in 0..class.max_num() { + set.insert(InlineAsmReg::Amdgpu(AmdgpuInlineAsmReg { + class, + range: AmdgpuRegStart::Full(i), + })); + } + } + } + } +} + +impl AmdgpuInlineAsmReg { + pub fn emit( + self, + out: &mut dyn fmt::Write, + _arch: InlineAsmArch, + _modifier: Option, + ) -> fmt::Result { + out.write_str(&self.name()) + } + + pub fn overlapping_regs(self, mut cb: impl FnMut(AmdgpuInlineAsmReg)) { + if matches!(self.class, AmdgpuInlineAsmRegClass::Vgpr(_)) { + // Overlapping 16-bit registers (not supported for sgprs) + if let AmdgpuRegStart::Full(start) = self.range { + for i in start..(start + self.class.bytes().div_ceil(4) - 1) { + cb(AmdgpuInlineAsmReg { + class: AmdgpuInlineAsmRegClass::Vgpr(16), + range: AmdgpuRegStart::Low(i), + }); + cb(AmdgpuInlineAsmReg { + class: AmdgpuInlineAsmRegClass::Vgpr(16), + range: AmdgpuRegStart::High(i), + }); + } + } + } + + // Overlapping 32-bit registers, up to size 32 + for size in 1..=32 { + let (AmdgpuRegStart::Low(start) + | AmdgpuRegStart::High(start) + | AmdgpuRegStart::Full(start)) = self.range; + + let size_range = size - 1; + for overlap_start in (start - size_range)..=(start + self.class.bytes().div_ceil(4) - 1) + { + let class = match self.class { + AmdgpuInlineAsmRegClass::Sgpr(_) => AmdgpuInlineAsmRegClass::Sgpr(size * 32), + AmdgpuInlineAsmRegClass::Vgpr(_) => AmdgpuInlineAsmRegClass::Vgpr(size * 32), + }; + cb(AmdgpuInlineAsmReg { class, range: AmdgpuRegStart::Full(overlap_start) }); + } + } + } +} diff --git a/compiler/rustc_target/src/asm/mod.rs b/compiler/rustc_target/src/asm/mod.rs index 14fef2880ff68..ec34bdf88e9d9 100644 --- a/compiler/rustc_target/src/asm/mod.rs +++ b/compiler/rustc_target/src/asm/mod.rs @@ -1,3 +1,4 @@ +use std::borrow::Cow; use std::fmt; use rustc_abi::Size; @@ -177,6 +178,7 @@ macro_rules! types { } mod aarch64; +mod amdgpu; mod arm; mod avr; mod bpf; @@ -196,6 +198,7 @@ mod wasm; mod x86; pub use aarch64::{AArch64InlineAsmReg, AArch64InlineAsmRegClass}; +pub use amdgpu::{AmdgpuInlineAsmReg, AmdgpuInlineAsmRegClass}; pub use arm::{ArmInlineAsmReg, ArmInlineAsmRegClass}; pub use avr::{AvrInlineAsmReg, AvrInlineAsmRegClass}; pub use bpf::{BpfInlineAsmReg, BpfInlineAsmRegClass}; @@ -224,6 +227,7 @@ pub enum InlineAsmArch { RiscV32, RiscV64, Nvptx64, + Amdgpu, Hexagon, LoongArch32, LoongArch64, @@ -252,6 +256,7 @@ impl InlineAsmArch { Arch::Arm => Some(Self::Arm), Arch::Arm64EC => Some(Self::Arm64EC), Arch::AArch64 => Some(Self::AArch64), + Arch::AmdGpu => Some(Self::Amdgpu), Arch::RiscV32 => Some(Self::RiscV32), Arch::RiscV64 => Some(Self::RiscV64), Arch::Nvptx64 => Some(Self::Nvptx64), @@ -273,7 +278,7 @@ impl InlineAsmArch { Arch::Msp430 => Some(Self::Msp430), Arch::M68k => Some(Self::M68k), Arch::CSky => Some(Self::CSKY), - Arch::AmdGpu | Arch::Xtensa | Arch::Other(_) => None, + Arch::Xtensa | Arch::Other(_) => None, } } } @@ -283,6 +288,7 @@ impl InlineAsmArch { pub enum InlineAsmReg { X86(X86InlineAsmReg), Arm(ArmInlineAsmReg), + Amdgpu(AmdgpuInlineAsmReg), AArch64(AArch64InlineAsmReg), RiscV(RiscVInlineAsmReg), Nvptx(NvptxInlineAsmReg), @@ -304,24 +310,25 @@ pub enum InlineAsmReg { } impl InlineAsmReg { - pub fn name(self) -> &'static str { + pub fn name(self) -> Cow<'static, str> { match self { - Self::X86(r) => r.name(), - Self::Arm(r) => r.name(), - Self::AArch64(r) => r.name(), - Self::RiscV(r) => r.name(), - Self::PowerPC(r) => r.name(), - Self::Hexagon(r) => r.name(), - Self::LoongArch(r) => r.name(), - Self::Mips(r) => r.name(), - Self::S390x(r) => r.name(), - Self::Sparc(r) => r.name(), - Self::Bpf(r) => r.name(), - Self::Avr(r) => r.name(), - Self::Msp430(r) => r.name(), - Self::M68k(r) => r.name(), - Self::CSKY(r) => r.name(), - Self::Err => "", + Self::X86(r) => r.name().into(), + Self::Arm(r) => r.name().into(), + Self::AArch64(r) => r.name().into(), + Self::Amdgpu(r) => r.name().into(), + Self::RiscV(r) => r.name().into(), + Self::PowerPC(r) => r.name().into(), + Self::Hexagon(r) => r.name().into(), + Self::LoongArch(r) => r.name().into(), + Self::Mips(r) => r.name().into(), + Self::S390x(r) => r.name().into(), + Self::Sparc(r) => r.name().into(), + Self::Bpf(r) => r.name().into(), + Self::Avr(r) => r.name().into(), + Self::Msp430(r) => r.name().into(), + Self::M68k(r) => r.name().into(), + Self::CSKY(r) => r.name().into(), + Self::Err => "".into(), } } @@ -330,6 +337,7 @@ impl InlineAsmReg { Self::X86(r) => InlineAsmRegClass::X86(r.reg_class()), Self::Arm(r) => InlineAsmRegClass::Arm(r.reg_class()), Self::AArch64(r) => InlineAsmRegClass::AArch64(r.reg_class()), + Self::Amdgpu(r) => InlineAsmRegClass::Amdgpu(r.reg_class()), Self::RiscV(r) => InlineAsmRegClass::RiscV(r.reg_class()), Self::PowerPC(r) => InlineAsmRegClass::PowerPC(r.reg_class()), Self::Hexagon(r) => InlineAsmRegClass::Hexagon(r.reg_class()), @@ -356,6 +364,7 @@ impl InlineAsmReg { InlineAsmArch::AArch64 | InlineAsmArch::Arm64EC => { Self::AArch64(AArch64InlineAsmReg::parse(name)?) } + InlineAsmArch::Amdgpu => Self::Amdgpu(AmdgpuInlineAsmReg::parse(name)?), InlineAsmArch::RiscV32 | InlineAsmArch::RiscV64 => { Self::RiscV(RiscVInlineAsmReg::parse(name)?) } @@ -398,6 +407,7 @@ impl InlineAsmReg { Self::X86(r) => r.validate(arch, reloc_model, target_features, target, is_clobber), Self::Arm(r) => r.validate(arch, reloc_model, target_features, target, is_clobber), Self::AArch64(r) => r.validate(arch, reloc_model, target_features, target, is_clobber), + Self::Amdgpu(r) => r.validate(arch, reloc_model, target_features, target, is_clobber), Self::RiscV(r) => r.validate(arch, reloc_model, target_features, target, is_clobber), Self::PowerPC(r) => r.validate(arch, reloc_model, target_features, target, is_clobber), Self::Hexagon(r) => r.validate(arch, reloc_model, target_features, target, is_clobber), @@ -428,6 +438,7 @@ impl InlineAsmReg { Self::X86(r) => r.emit(out, arch, modifier), Self::Arm(r) => r.emit(out, arch, modifier), Self::AArch64(r) => r.emit(out, arch, modifier), + Self::Amdgpu(r) => r.emit(out, arch, modifier), Self::RiscV(r) => r.emit(out, arch, modifier), Self::PowerPC(r) => r.emit(out, arch, modifier), Self::Hexagon(r) => r.emit(out, arch, modifier), @@ -449,6 +460,7 @@ impl InlineAsmReg { Self::X86(r) => r.overlapping_regs(|r| cb(Self::X86(r))), Self::Arm(r) => r.overlapping_regs(|r| cb(Self::Arm(r))), Self::AArch64(_) => cb(self), + Self::Amdgpu(r) => r.overlapping_regs(|r| cb(Self::Amdgpu(r))), Self::RiscV(_) => cb(self), Self::PowerPC(r) => r.overlapping_regs(|r| cb(Self::PowerPC(r))), Self::Hexagon(r) => r.overlapping_regs(|r| cb(Self::Hexagon(r))), @@ -472,6 +484,7 @@ pub enum InlineAsmRegClass { X86(X86InlineAsmRegClass), Arm(ArmInlineAsmRegClass), AArch64(AArch64InlineAsmRegClass), + Amdgpu(AmdgpuInlineAsmRegClass), RiscV(RiscVInlineAsmRegClass), Nvptx(NvptxInlineAsmRegClass), PowerPC(PowerPCInlineAsmRegClass), @@ -497,6 +510,7 @@ impl InlineAsmRegClass { Self::X86(r) => r.name(), Self::Arm(r) => r.name(), Self::AArch64(r) => r.name(), + Self::Amdgpu(r) => r.name(), Self::RiscV(r) => r.name(), Self::Nvptx(r) => r.name(), Self::PowerPC(r) => r.name(), @@ -524,6 +538,7 @@ impl InlineAsmRegClass { Self::X86(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::X86), Self::Arm(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::Arm), Self::AArch64(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::AArch64), + Self::Amdgpu(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::Amdgpu), Self::RiscV(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::RiscV), Self::Nvptx(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::Nvptx), Self::PowerPC(r) => r.suggest_class(arch, ty).map(InlineAsmRegClass::PowerPC), @@ -554,6 +569,7 @@ impl InlineAsmRegClass { Self::X86(r) => r.suggest_modifier(arch, ty), Self::Arm(r) => r.suggest_modifier(arch, ty), Self::AArch64(r) => r.suggest_modifier(arch, ty), + Self::Amdgpu(r) => r.suggest_modifier(arch, ty), Self::RiscV(r) => r.suggest_modifier(arch, ty), Self::Nvptx(r) => r.suggest_modifier(arch, ty), Self::PowerPC(r) => r.suggest_modifier(arch, ty), @@ -584,6 +600,7 @@ impl InlineAsmRegClass { Self::X86(r) => r.default_modifier(arch), Self::Arm(r) => r.default_modifier(arch), Self::AArch64(r) => r.default_modifier(arch), + Self::Amdgpu(r) => r.default_modifier(arch), Self::RiscV(r) => r.default_modifier(arch), Self::Nvptx(r) => r.default_modifier(arch), Self::PowerPC(r) => r.default_modifier(arch), @@ -612,26 +629,27 @@ impl InlineAsmRegClass { self, arch: InlineAsmArch, allow_experimental_reg: bool, - ) -> &'static [(InlineAsmType, Option)] { + ) -> Cow<'static, [(InlineAsmType, Option)]> { match self { - Self::X86(r) => r.supported_types(arch, allow_experimental_reg), - Self::Arm(r) => r.supported_types(arch), - Self::AArch64(r) => r.supported_types(arch), - Self::RiscV(r) => r.supported_types(arch), - Self::Nvptx(r) => r.supported_types(arch), - Self::PowerPC(r) => r.supported_types(arch), - Self::Hexagon(r) => r.supported_types(arch), - Self::LoongArch(r) => r.supported_types(arch), - Self::Mips(r) => r.supported_types(arch), - Self::S390x(r) => r.supported_types(arch), - Self::Sparc(r) => r.supported_types(arch), - Self::SpirV(r) => r.supported_types(arch), - Self::Wasm(r) => r.supported_types(arch), - Self::Bpf(r) => r.supported_types(arch), - Self::Avr(r) => r.supported_types(arch), - Self::Msp430(r) => r.supported_types(arch), - Self::M68k(r) => r.supported_types(arch), - Self::CSKY(r) => r.supported_types(arch), + Self::X86(r) => r.supported_types(arch, allow_experimental_reg).into(), + Self::Arm(r) => r.supported_types(arch).into(), + Self::AArch64(r) => r.supported_types(arch).into(), + Self::Amdgpu(r) => r.supported_types(arch).into(), + Self::RiscV(r) => r.supported_types(arch).into(), + Self::Nvptx(r) => r.supported_types(arch).into(), + Self::PowerPC(r) => r.supported_types(arch).into(), + Self::Hexagon(r) => r.supported_types(arch).into(), + Self::LoongArch(r) => r.supported_types(arch).into(), + Self::Mips(r) => r.supported_types(arch).into(), + Self::S390x(r) => r.supported_types(arch).into(), + Self::Sparc(r) => r.supported_types(arch).into(), + Self::SpirV(r) => r.supported_types(arch).into(), + Self::Wasm(r) => r.supported_types(arch).into(), + Self::Bpf(r) => r.supported_types(arch).into(), + Self::Avr(r) => r.supported_types(arch).into(), + Self::Msp430(r) => r.supported_types(arch).into(), + Self::M68k(r) => r.supported_types(arch).into(), + Self::CSKY(r) => r.supported_types(arch).into(), Self::Err => unreachable!("Use of InlineAsmRegClass::Err"), } } @@ -645,6 +663,7 @@ impl InlineAsmRegClass { InlineAsmArch::AArch64 | InlineAsmArch::Arm64EC => { Self::AArch64(AArch64InlineAsmRegClass::parse(name)?) } + InlineAsmArch::Amdgpu => Self::Amdgpu(AmdgpuInlineAsmRegClass::parse(name)?), InlineAsmArch::RiscV32 | InlineAsmArch::RiscV64 => { Self::RiscV(RiscVInlineAsmRegClass::parse(name)?) } @@ -682,6 +701,7 @@ impl InlineAsmRegClass { Self::X86(r) => r.valid_modifiers(arch), Self::Arm(r) => r.valid_modifiers(arch), Self::AArch64(r) => r.valid_modifiers(arch), + Self::Amdgpu(r) => r.valid_modifiers(arch), Self::RiscV(r) => r.valid_modifiers(arch), Self::Nvptx(r) => r.valid_modifiers(arch), Self::PowerPC(r) => r.valid_modifiers(arch), @@ -843,6 +863,11 @@ pub fn allocatable_registers( aarch64::fill_reg_map(arch, reloc_model, target_features, target, &mut map); map } + InlineAsmArch::Amdgpu => { + let mut map = amdgpu::regclass_map(); + amdgpu::fill_reg_map(arch, reloc_model, target_features, target, &mut map); + map + } InlineAsmArch::RiscV32 | InlineAsmArch::RiscV64 => { let mut map = riscv::regclass_map(); riscv::fill_reg_map(arch, reloc_model, target_features, target, &mut map); diff --git a/library/alloctests/tests/str.rs b/library/alloctests/tests/str.rs index f1bd5325da587..dca2c49249aff 100644 --- a/library/alloctests/tests/str.rs +++ b/library/alloctests/tests/str.rs @@ -2348,6 +2348,7 @@ fn utf8_char_counts() { .flat_map(|n| n - spread..=n + spread) .collect::>(); if cfg!(not(miri)) { + // Miri is too slow reps.extend([1024, 1 << 16].iter().copied().flat_map(|n| n - spread..=n + spread)); } let counts = if cfg!(miri) { 0..1 } else { 0..8 }; diff --git a/library/alloctests/tests/sync.rs b/library/alloctests/tests/sync.rs index 6d3ab1b1d11e1..5742855768c70 100644 --- a/library/alloctests/tests/sync.rs +++ b/library/alloctests/tests/sync.rs @@ -469,8 +469,6 @@ fn test_weak_count_locked() { while !a2.load(SeqCst) { let n = Arc::weak_count(&a2); assert!(n < 2, "bad weak count: {}", n); - #[cfg(miri)] // Miri's scheduler does not guarantee liveness, and thus needs this hint. - std::hint::spin_loop(); } t.join().unwrap(); } diff --git a/library/core/src/ffi/c_short.md b/library/core/src/ffi/c_short.md index 3d1e53d1325f3..29415129b50a7 100644 --- a/library/core/src/ffi/c_short.md +++ b/library/core/src/ffi/c_short.md @@ -1,5 +1,3 @@ Equivalent to C's `signed short` (`short`) type. This type will almost always be [`i16`], but may differ on some esoteric systems. The C standard technically only requires that this type be a signed integer with at least 16 bits; some systems may define it as `i32`, for example. - -[`char`]: c_char diff --git a/library/core/src/fmt/num.rs b/library/core/src/fmt/num.rs index e9302a6127f5c..d395d0ab58354 100644 --- a/library/core/src/fmt/num.rs +++ b/library/core/src/fmt/num.rs @@ -299,8 +299,8 @@ macro_rules! impl_Display { } impl $Unsigned { - /// Allows users to write an integer (in signed decimal format) into a variable `buf` of - /// type [`NumBuffer`] that is passed by the caller by mutable reference. + /// Allows users to write an integer (in unsigned decimal format) into a variable `buf` + /// of type [`NumBuffer`] that is passed by the caller by mutable reference. /// /// # Examples /// @@ -738,7 +738,7 @@ impl u128 { offset } - /// Allows users to write an integer (in signed decimal format) into a variable `buf` of + /// Allows users to write an integer (in unsigned decimal format) into a variable `buf` of /// type [`NumBuffer`] that is passed by the caller by mutable reference. /// /// # Examples diff --git a/library/core/src/iter/traits/iterator.rs b/library/core/src/iter/traits/iterator.rs index 6dc748c568fcd..c5182cb3d0720 100644 --- a/library/core/src/iter/traits/iterator.rs +++ b/library/core/src/iter/traits/iterator.rs @@ -3674,7 +3674,7 @@ pub const trait Iterator { Sum::sum(self) } - /// Iterates over the entire iterator, multiplying all the elements + /// Iterates over the entire iterator, multiplying all the elements. /// /// An empty iterator returns the one value of the type. /// diff --git a/library/core/src/num/f128.rs b/library/core/src/num/f128.rs index c17f55a25896a..d381402b469f4 100644 --- a/library/core/src/num/f128.rs +++ b/library/core/src/num/f128.rs @@ -977,7 +977,7 @@ impl f128 { #[must_use = "this returns the result of the operation, \ without modifying the original"] pub const fn midpoint(self, other: f128) -> f128 { - const HI: f128 = f128::MAX / 2.; + const HI: f128 = f128::MAX * 0.5; let (a, b) = (self, other); let abs_a = a.abs(); @@ -985,9 +985,9 @@ impl f128 { if abs_a <= HI && abs_b <= HI { // Overflow is impossible - (a + b) / 2. + (a + b) * 0.5 } else { - (a / 2.) + (b / 2.) + (a * 0.5) + (b * 0.5) } } diff --git a/library/core/src/num/f16.rs b/library/core/src/num/f16.rs index 110465068b8f6..c26ae17d870cc 100644 --- a/library/core/src/num/f16.rs +++ b/library/core/src/num/f16.rs @@ -973,7 +973,7 @@ impl f16 { #[must_use = "this returns the result of the operation, \ without modifying the original"] pub const fn midpoint(self, other: f16) -> f16 { - const HI: f16 = f16::MAX / 2.; + const HI: f16 = f16::MAX * 0.5; let (a, b) = (self, other); let abs_a = a.abs(); @@ -981,9 +981,9 @@ impl f16 { if abs_a <= HI && abs_b <= HI { // Overflow is impossible - (a + b) / 2. + (a + b) * 0.5 } else { - (a / 2.) + (b / 2.) + (a * 0.5) + (b * 0.5) } } diff --git a/library/core/src/num/f32.rs b/library/core/src/num/f32.rs index f9cb7cc650f4f..24c97a6491c11 100644 --- a/library/core/src/num/f32.rs +++ b/library/core/src/num/f32.rs @@ -1164,10 +1164,10 @@ impl f32 { target_arch = "wasm32", target_arch = "wasm64", ) => { - ((self as f64 + other as f64) / 2.0) as f32 + ((self as f64 + other as f64) * 0.5) as f32 } _ => { - const HI: f32 = f32::MAX / 2.; + const HI: f32 = f32::MAX * 0.5; let (a, b) = (self, other); let abs_a = a.abs(); @@ -1175,9 +1175,9 @@ impl f32 { if abs_a <= HI && abs_b <= HI { // Overflow is impossible - (a + b) / 2. + (a + b) * 0.5 } else { - (a / 2.) + (b / 2.) + (a * 0.5) + (b * 0.5) } } } diff --git a/library/core/src/num/f64.rs b/library/core/src/num/f64.rs index 87f5505ce2b33..be045033a3553 100644 --- a/library/core/src/num/f64.rs +++ b/library/core/src/num/f64.rs @@ -1150,7 +1150,7 @@ impl f64 { #[must_use = "this returns the result of the operation, \ without modifying the original"] pub const fn midpoint(self, other: f64) -> f64 { - const HI: f64 = f64::MAX / 2.; + const HI: f64 = f64::MAX * 0.5; let (a, b) = (self, other); let abs_a = a.abs(); @@ -1158,9 +1158,9 @@ impl f64 { if abs_a <= HI && abs_b <= HI { // Overflow is impossible - (a + b) / 2. + (a + b) * 0.5 } else { - (a / 2.) + (b / 2.) + (a * 0.5) + (b * 0.5) } } diff --git a/library/std/src/env.rs b/library/std/src/env.rs index f60c8392785c1..540dcfd204e9a 100644 --- a/library/std/src/env.rs +++ b/library/std/src/env.rs @@ -303,12 +303,12 @@ impl Error for VarError {} /// /// # Safety /// -/// This function is safe to call in a single-threaded program. +/// This function is sound to call in a single-threaded program. /// -/// This function is also always safe to call on Windows, in single-threaded +/// This function is also always sound to call on Windows, in single-threaded /// and multi-threaded programs. /// -/// In multi-threaded programs on other operating systems, the only safe option is +/// In multi-threaded programs on other operating systems, the only sound option is /// to not use `set_var` or `remove_var` at all. /// /// The exact requirement is: you @@ -322,7 +322,7 @@ impl Error for VarError {} /// lookups from [`std::net::ToSocketAddrs`]. No stable guarantee is made about /// which functions may read from the environment in future versions of a /// library. All this makes it not practically possible for you to guarantee -/// that no other thread will read the environment, so the only safe option is +/// that no other thread will read the environment, so the only sound option is /// to not use `set_var` or `remove_var` in multi-threaded programs at all. /// /// Discussion of this unsafety on Unix may be found in: @@ -366,12 +366,12 @@ pub unsafe fn set_var, V: AsRef>(key: K, value: V) { /// /// # Safety /// -/// This function is safe to call in a single-threaded program. +/// This function is sound to call in a single-threaded program. /// -/// This function is also always safe to call on Windows, in single-threaded +/// This function is also always sound to call on Windows, in single-threaded /// and multi-threaded programs. /// -/// In multi-threaded programs on other operating systems, the only safe option is +/// In multi-threaded programs on other operating systems, the only sound option is /// to not use `set_var` or `remove_var` at all. /// /// The exact requirement is: you @@ -385,7 +385,7 @@ pub unsafe fn set_var, V: AsRef>(key: K, value: V) { /// lookups from [`std::net::ToSocketAddrs`]. No stable guarantee is made about /// which functions may read from the environment in future versions of a /// library. All this makes it not practically possible for you to guarantee -/// that no other thread will read the environment, so the only safe option is +/// that no other thread will read the environment, so the only sound option is /// to not use `set_var` or `remove_var` in multi-threaded programs at all. /// /// Discussion of this unsafety on Unix may be found in: diff --git a/library/std/src/keyword_docs.rs b/library/std/src/keyword_docs.rs index e1dedad313ddc..5f94a13dad22a 100644 --- a/library/std/src/keyword_docs.rs +++ b/library/std/src/keyword_docs.rs @@ -238,7 +238,7 @@ mod break_keyword {} /// /// Turning a `fn` into a `const fn` has no effect on run-time uses of that function. /// -/// ## Other uses of `const` +/// ## raw pointers /// /// The `const` keyword is also used in raw pointers in combination with `mut`, as seen in `*const /// T` and `*mut T`. More about `const` as used in raw pointers can be read at the Rust docs for the [pointer primitive]. diff --git a/library/std/src/os/unix/process.rs b/library/std/src/os/unix/process.rs index a0defc39ac82e..71896d73670fd 100644 --- a/library/std/src/os/unix/process.rs +++ b/library/std/src/os/unix/process.rs @@ -494,9 +494,18 @@ impl ChildExt for process::Child { self.handle.send_process_group_signal(signal) } + #[cfg(not(target_os = "espidf"))] fn kill_process_group(&mut self) -> io::Result<()> { self.handle.send_process_group_signal(libc::SIGKILL) } + + #[cfg(target_os = "espidf")] + fn kill_process_group(&mut self) -> io::Result<()> { + Err(io::Error::new( + io::ErrorKind::Unsupported, + "process groups are not supported on espidf", + )) + } } #[stable(feature = "process_extensions", since = "1.2.0")] diff --git a/library/std/src/sys/fs/unix.rs b/library/std/src/sys/fs/unix.rs index 5d2cfdf6718a1..a7cb9ebad314e 100644 --- a/library/std/src/sys/fs/unix.rs +++ b/library/std/src/sys/fs/unix.rs @@ -1090,7 +1090,7 @@ impl DirEntry { target_os = "illumos", target_vendor = "apple", )), - miri + miri // no dirfd on Miri ))] pub fn metadata(&self) -> io::Result { run_path_with_cstr(&self.path(), &lstat) diff --git a/library/std/src/sys/pal/unix/conf.rs b/library/std/src/sys/pal/unix/conf.rs index a97173a1a35a1..4c00379a88439 100644 --- a/library/std/src/sys/pal/unix/conf.rs +++ b/library/std/src/sys/pal/unix/conf.rs @@ -12,9 +12,7 @@ pub fn page_size() -> usize { /// /// [posix_confstr]: /// https://pubs.opengroup.org/onlinepubs/9699919799/functions/confstr.html -// -// FIXME: Support `confstr` in Miri. -#[cfg(all(target_vendor = "apple", not(miri)))] +#[cfg(target_vendor = "apple")] pub fn confstr( key: crate::ffi::c_int, size_hint: Option, diff --git a/library/std/src/sys/pal/unix/conf/tests.rs b/library/std/src/sys/pal/unix/conf/tests.rs index 63a1cc1e94a1d..a84086037ce0b 100644 --- a/library/std/src/sys/pal/unix/conf/tests.rs +++ b/library/std/src/sys/pal/unix/conf/tests.rs @@ -25,7 +25,7 @@ fn test_parse_glibc_version() { // Smoke check `confstr`, do it for several hint values, to ensure our resizing // logic is correct. #[test] -#[cfg(all(target_vendor = "apple", not(miri)))] +#[cfg(target_vendor = "apple")] fn test_confstr() { for key in [libc::_CS_DARWIN_USER_TEMP_DIR, libc::_CS_PATH] { let value_nohint = super::confstr(key, None).unwrap_or_else(|e| { diff --git a/library/std/src/sys/pal/unix/mod.rs b/library/std/src/sys/pal/unix/mod.rs index 0be150a0bfaa8..cfd7acdb08ab2 100644 --- a/library/std/src/sys/pal/unix/mod.rs +++ b/library/std/src/sys/pal/unix/mod.rs @@ -78,7 +78,7 @@ pub unsafe fn init(argc: isize, argv: *const *const u8, sigpipe: u8) { // fast path with a single syscall for systems with poll() #[cfg(not(any( - miri, + miri, // no `poll` target_os = "emscripten", target_os = "fuchsia", target_os = "vxworks", @@ -125,8 +125,6 @@ pub unsafe fn init(argc: isize, argv: *const *const u8, sigpipe: u8) { // fallback in case poll isn't available or limited by RLIMIT_NOFILE #[cfg(not(any( - // The standard fds are always available in Miri. - miri, target_os = "emscripten", target_os = "fuchsia", target_os = "vxworks", diff --git a/library/std/src/sys/paths/unix.rs b/library/std/src/sys/paths/unix.rs index 616456c6d4a47..e0b1aafda6eb0 100644 --- a/library/std/src/sys/paths/unix.rs +++ b/library/std/src/sys/paths/unix.rs @@ -393,7 +393,7 @@ pub fn current_exe() -> io::Result { if !path.is_absolute() { getcwd().map(|cwd| cwd.join(path)) } else { Ok(path) } } -#[cfg(all(target_vendor = "apple", not(miri)))] +#[cfg(target_vendor = "apple")] fn darwin_temp_dir() -> PathBuf { crate::sys::pal::conf::confstr(libc::_CS_DARWIN_USER_TEMP_DIR, Some(64)) .map(PathBuf::from) @@ -407,7 +407,7 @@ fn darwin_temp_dir() -> PathBuf { pub fn temp_dir() -> PathBuf { crate::env::var_os("TMPDIR").map(PathBuf::from).unwrap_or_else(|| { cfg_select! { - all(target_vendor = "apple", not(miri)) => darwin_temp_dir(), + target_vendor = "apple" => darwin_temp_dir(), target_os = "android" => PathBuf::from("/data/local/tmp"), _ => PathBuf::from("/tmp"), } diff --git a/library/std/tests/pipe_subprocess.rs b/library/std/tests/pipe_subprocess.rs index c51a4459e718b..dad1ea6c57377 100644 --- a/library/std/tests/pipe_subprocess.rs +++ b/library/std/tests/pipe_subprocess.rs @@ -1,4 +1,5 @@ fn main() { + // No `Command` on Miri and emscripten #[cfg(all(not(miri), any(unix, windows), not(target_os = "emscripten")))] { use std::io::{Read, pipe}; diff --git a/library/std/tests/windows_unix_socket.rs b/library/std/tests/windows_unix_socket.rs index 1d16ec9ed8414..dd71b1f135dce 100644 --- a/library/std/tests/windows_unix_socket.rs +++ b/library/std/tests/windows_unix_socket.rs @@ -1,5 +1,5 @@ #![cfg(windows)] -#![cfg(not(miri))] // no socket support in Miri +#![cfg(not(miri))] // no Windows socket support in Miri #![feature(windows_unix_domain_sockets)] // Now only test windows_unix_domain_sockets feature // in the future, will test both unix and windows uds diff --git a/src/bootstrap/src/core/build_steps/dist.rs b/src/bootstrap/src/core/build_steps/dist.rs index 222b982073280..360b8418d2194 100644 --- a/src/bootstrap/src/core/build_steps/dist.rs +++ b/src/bootstrap/src/core/build_steps/dist.rs @@ -27,7 +27,7 @@ use crate::core::build_steps::gcc::GccTargetPair; use crate::core::build_steps::tool::{ self, RustcPrivateCompilers, ToolTargetBuildMode, get_tool_target_compiler, }; -use crate::core::build_steps::vendor::{VENDOR_DIR, Vendor}; +use crate::core::build_steps::vendor::Vendor; use crate::core::build_steps::{compile, llvm}; use crate::core::builder::{Builder, Kind, RunConfig, ShouldRun, Step, StepMetadata}; use crate::core::config::{GccCiMode, TargetSelection}; @@ -1211,6 +1211,19 @@ impl Step for Src { &dst_src, ); + // Vendor all Cargo dependencies + let vendor = builder.ensure(Vendor { + sync_args: vec![], + versioned_dirs: true, + root_dir: dst_src.clone(), + output_dir: None, + only_library_workspace: true, + }); + + let library_cargo_config_dir = dst_src.join("library").join(".cargo"); + builder.create_dir(&library_cargo_config_dir); + builder.create(&library_cargo_config_dir.join("config.toml"), &vendor.config_library); + tarball.generate() } @@ -1376,12 +1389,17 @@ fn prepare_source_tarball<'a>( sync_args: pkgs_for_pgo_training.collect(), versioned_dirs: true, root_dir: plain_dst_src.into(), - output_dir: VENDOR_DIR.into(), + output_dir: None, + only_library_workspace: false, }); let cargo_config_dir = plain_dst_src.join(".cargo"); builder.create_dir(&cargo_config_dir); builder.create(&cargo_config_dir.join("config.toml"), &vendor.config); + + let library_cargo_config_dir = plain_dst_src.join("library").join(".cargo"); + builder.create_dir(&library_cargo_config_dir); + builder.create(&library_cargo_config_dir.join("config.toml"), &vendor.config_library); } // Delete extraneous directories diff --git a/src/bootstrap/src/core/build_steps/run.rs b/src/bootstrap/src/core/build_steps/run.rs index 897e03ce71813..2329bb93b4d3d 100644 --- a/src/bootstrap/src/core/build_steps/run.rs +++ b/src/bootstrap/src/core/build_steps/run.rs @@ -12,7 +12,7 @@ use clap_complete::{Generator, shells}; use crate::core::build_steps::dist::distdir; use crate::core::build_steps::test; use crate::core::build_steps::tool::{self, RustcPrivateCompilers, SourceType, Tool}; -use crate::core::build_steps::vendor::{Vendor, default_paths_to_vendor}; +use crate::core::build_steps::vendor::{VENDOR_DIR, Vendor, default_paths_to_vendor}; use crate::core::builder::{Builder, Kind, RunConfig, ShouldRun, Step, StepMetadata}; use crate::core::config::TargetSelection; use crate::core::config::flags::{get_completion, top_level_help}; @@ -275,9 +275,10 @@ impl Step for GenerateCopyright { sync_args: Vec::new(), versioned_dirs: true, root_dir: builder.src.clone(), - output_dir: cache_dir.clone(), + output_dir: Some(cache_dir.clone()), + only_library_workspace: false, }); - cache_dir + cache_dir.join(VENDOR_DIR) }; let _guard = builder.group("generate-copyright"); diff --git a/src/bootstrap/src/core/build_steps/vendor.rs b/src/bootstrap/src/core/build_steps/vendor.rs index 36a740c6f35fc..246598550553a 100644 --- a/src/bootstrap/src/core/build_steps/vendor.rs +++ b/src/bootstrap/src/core/build_steps/vendor.rs @@ -46,9 +46,15 @@ pub(crate) struct Vendor { /// Determines whether vendored dependencies use versioned directories. pub(crate) versioned_dirs: bool, /// The root directory of the source code. + /// + /// Vendored dependencies will be stored in /vendor and + /// /library/vendor unless overridden by `output_dir`. pub(crate) root_dir: PathBuf, - /// The target directory for storing vendored dependencies. - pub(crate) output_dir: PathBuf, + /// The root directory for storing vendored dependencies in /vendor + /// and /library/vendor. + pub(crate) output_dir: Option, + /// Only vendor crates necessary by the library workspace. + pub(crate) only_library_workspace: bool, } impl Step for Vendor { @@ -68,7 +74,8 @@ impl Step for Vendor { sync_args: run.builder.config.cmd.vendor_sync_args(), versioned_dirs: run.builder.config.cmd.vendor_versioned_dirs(), root_dir: run.builder.src.clone(), - output_dir: run.builder.src.join(VENDOR_DIR), + output_dir: None, + only_library_workspace: false, }); } @@ -79,29 +86,53 @@ impl Step for Vendor { fn run(self, builder: &Builder<'_>) -> Self::Output { let _guard = builder.group(&format!("Vendoring sources to {:?}", self.root_dir)); - let mut cmd = command(&builder.initial_cargo); - cmd.arg("vendor"); + let config = if self.only_library_workspace { + String::new() + } else { + let mut cmd = command(&builder.initial_cargo); + cmd.arg("vendor"); - if self.versioned_dirs { - cmd.arg("--versioned-dirs"); - } + if self.versioned_dirs { + cmd.arg("--versioned-dirs"); + } - let to_vendor = default_paths_to_vendor(builder); - // These submodules must be present for `x vendor` to work. - for (_, submodules) in &to_vendor { - for submodule in submodules { - builder.build.require_submodule(submodule, None); + let to_vendor = default_paths_to_vendor(builder); + // These submodules must be present for `x vendor` to work. + for (_, submodules) in &to_vendor { + for submodule in submodules { + builder.build.require_submodule(submodule, None); + } } - } - // Sync these paths by default. - for (p, _) in &to_vendor { - cmd.arg("--sync").arg(p); - } + // Sync these paths by default. + for (p, _) in &to_vendor { + cmd.arg("--sync").arg(p); + } - // Also sync explicitly requested paths. - for sync_arg in self.sync_args { - cmd.arg("--sync").arg(sync_arg); + // Also sync explicitly requested paths. + for sync_arg in self.sync_args { + cmd.arg("--sync").arg(sync_arg); + } + + // Will read the libstd Cargo.toml + // which uses the unstable `public-dependency` feature. + cmd.env("RUSTC_BOOTSTRAP", "1"); + cmd.env("RUSTC", &builder.initial_rustc); + + cmd.current_dir(&self.root_dir); + match &self.output_dir { + None => cmd.arg(VENDOR_DIR), + Some(output_dir) => cmd.arg(output_dir.join(VENDOR_DIR)), + }; + + cmd.run_capture_stdout(builder).stdout() + }; + + let mut cmd = command(&builder.initial_cargo); + cmd.arg("vendor"); + + if self.versioned_dirs { + cmd.arg("--versioned-dirs"); } // Will read the libstd Cargo.toml @@ -109,10 +140,15 @@ impl Step for Vendor { cmd.env("RUSTC_BOOTSTRAP", "1"); cmd.env("RUSTC", &builder.initial_rustc); - cmd.current_dir(self.root_dir).arg(&self.output_dir); + cmd.current_dir(self.root_dir.join("library")); + match &self.output_dir { + None => cmd.arg(VENDOR_DIR), + Some(output_dir) => cmd.arg(output_dir.join("library").join(VENDOR_DIR)), + }; + + let config_library = cmd.run_capture_stdout(builder).stdout(); - let config = cmd.run_capture_stdout(builder); - VendorOutput { config: config.stdout() } + VendorOutput { config, config_library } } } @@ -120,4 +156,5 @@ impl Step for Vendor { #[derive(Debug, Clone)] pub(crate) struct VendorOutput { pub(crate) config: String, + pub(crate) config_library: String, } diff --git a/tests/assembly-llvm/asm/amdgpu-types.rs b/tests/assembly-llvm/asm/amdgpu-types.rs new file mode 100644 index 0000000000000..fe8ae88ee83ef --- /dev/null +++ b/tests/assembly-llvm/asm/amdgpu-types.rs @@ -0,0 +1,232 @@ +//@ add-minicore +//@ revisions: gfx11 gfx12 +//@ assembly-output: emit-asm +//@ compile-flags: --target amdgcn-amd-amdhsa +//@[gfx11] compile-flags: -Ctarget-cpu=gfx1100 +//@[gfx12] compile-flags: -Ctarget-cpu=gfx1200 +//@ needs-llvm-components: amdgpu +//@ needs-rust-lld + +#![feature(abi_gpu_kernel, no_core, asm_experimental_arch, f16)] +#![crate_type = "rlib"] +#![no_core] +#![allow(asm_sub_register, non_camel_case_types, unused_assignments, unused_variables)] + +extern crate minicore; +use minicore::*; + +type ptr = *mut u8; + +macro_rules! check { + ($func:ident $ty:ident $class:ident $mov:literal) => { + #[no_mangle] + pub unsafe extern "gpu-kernel" fn $func(x: $ty) { + let y: $ty; + asm!(concat!($mov, " {}, {}"), out($class) y, in($class) x); + } + }; + + ($func:ident $ret_ty:ident $ret_class:ident $($arg_name:ident: $arg_ty:ident $arg_class:ident,)* + $mov:literal) => { + #[no_mangle] + pub unsafe extern "gpu-kernel" fn $func($($arg_name: $arg_ty,)*) { + let result: $ret_ty; + asm!(concat!($mov, " {}", $(", {", stringify!($arg_name), "}",)*), + out($ret_class) result, $($arg_name = in($arg_class) $arg_name,)*); + } + }; +} + +macro_rules! check_reg { + ($func:ident $ty:ident $reg:tt $mov:literal) => { + #[no_mangle] + pub unsafe extern "gpu-kernel" fn $func(x: $ty) { + let y: $ty; + asm!(concat!($mov, " ", $reg, ", ", $reg), lateout($reg) y, in($reg) x); + } + }; + + ($func:ident $ret_ty:ident $ret_reg:tt $($arg_name:ident: $arg_ty:ident $arg_reg:tt,)* + $mov:literal) => { + #[no_mangle] + pub unsafe extern "gpu-kernel" fn $func($($arg_name: $arg_ty,)*) { + let result: $ret_ty; + asm!(concat!($mov, " ", $ret_reg, $(", ", $arg_reg,)*), lateout($ret_reg) result, + $(in($arg_reg) $arg_name,)*); + } + }; +} + +// CHECK-LABEL: sgpr_i16: +// CHECK: #ASMSTART +// CHECK: s_pack_ll_b32_b16 s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}} +// CHECK: #ASMEND +check!(sgpr_i16 i32 sgpr32 x: i16 sgpr32, y: i16 sgpr32, "s_pack_ll_b32_b16"); + +// gfx11-LABEL: vgpr_i16: +// gfx11: #ASMSTART +// gfx11: v_mov_b16 v{{[a-z0-9.]+}}, v{{[a-z0-9.]+}} +// gfx11: #ASMEND +#[cfg(gfx11)] +check!(vgpr_i16 i16 vgpr32 "v_mov_b16"); + +// gfx12-LABEL: sgpr_f16: +// gfx12: #ASMSTART +// gfx12: s_add_f16 s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}} +// gfx12: #ASMEND +#[cfg(gfx12)] +check!(sgpr_f16 f16 sgpr32 x: f16 sgpr32, y: f16 sgpr32, "s_add_f16"); + +// gfx11-LABEL: vgpr_f16: +// gfx11: #ASMSTART +// gfx11: v_mov_b16 v{{[a-z0-9.]+}}, v{{[a-z0-9.]+}} +// gfx11: #ASMEND +#[cfg(gfx11)] +check!(vgpr_f16 f16 vgpr32 "v_mov_b16"); + +// CHECK-LABEL: sgpr_i32: +// CHECK: #ASMSTART +// CHECK: s_mov_b32 s{{[0-9]+}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_i32 i32 sgpr32 "s_mov_b32"); + +// CHECK-LABEL: vgpr_i32: +// CHECK: #ASMSTART +// CHECK: v_mov_b32 v{{[0-9]+}}, v{{[0-9]+}} +// CHECK: #ASMEND +check!(vgpr_i32 i32 vgpr32 "v_mov_b32"); + +// CHECK-LABEL: sgpr_f32: +// CHECK: #ASMSTART +// CHECK: s_mov_b32 s{{[0-9]+}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_f32 f32 sgpr32 "s_mov_b32"); + +// CHECK-LABEL: vgpr_f32: +// CHECK: #ASMSTART +// CHECK: v_mov_b32 v{{[0-9]+}}, v{{[0-9]+}} +// CHECK: #ASMEND +check!(vgpr_f32 f32 vgpr32 "v_mov_b32"); + +// CHECK-LABEL: sgpr_i64: +// CHECK: #ASMSTART +// CHECK: s_mov_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check!(sgpr_i64 i64 sgpr64 "s_mov_b64"); + +// CHECK-LABEL: vgpr_i64: +// CHECK: #ASMSTART +// CHECK: v_lshlrev_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check!(vgpr_i64 i64 vgpr64 x: i32 vgpr32, y: i64 vgpr64, "v_lshlrev_b64"); + +// CHECK-LABEL: sgpr_f64: +// CHECK: #ASMSTART +// CHECK: s_mov_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check!(sgpr_f64 f64 sgpr64 "s_mov_b64"); + +// CHECK-LABEL: vgpr_f64: +// CHECK: #ASMSTART +// CHECK: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check!(vgpr_f64 f64 vgpr64 x: f64 vgpr64, y: f64 vgpr64, "v_add_f64"); + +// CHECK-LABEL: sgpr_i128: +// CHECK: #ASMSTART +// CHECK: s_load_b128 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_i128 i128 sgpr128 x: ptr sgpr64, y: i32 sgpr32, "s_load_b128"); + +// CHECK-LABEL: vgpr_i128: +// CHECK: #ASMSTART +// CHECK: global_load_b128 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check!(vgpr_i128 i128 vgpr128 x: i32 vgpr32, y: ptr sgpr64, "global_load_b128"); + +// CHECK-LABEL: s0_i16: +// CHECK: #ASMSTART +// CHECK: s_pack_ll_b32_b16 s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}} +// CHECK: #ASMEND +check_reg!(s0_i16 i32 "s0" x: i16 "s1", y: i16 "s2", "s_pack_ll_b32_b16"); + +// gfx11-LABEL: v0_i16: +// gfx11: #ASMSTART +// gfx11: v_mov_b16 v{{[a-z0-9.]+}}, v{{[a-z0-9.]+}} +// gfx11: #ASMEND +#[cfg(gfx11)] +check_reg!(v0_i16 i16 "v0.l" "v_mov_b16"); + +// gfx12-LABEL: s0_f16: +// gfx12: #ASMSTART +// gfx12: s_add_f16 s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}} +// gfx12: #ASMEND +#[cfg(gfx12)] +check_reg!(s0_f16 f16 "s0" x: f16 "s1", y: f16 "s2", "s_add_f16"); + +// gfx11-LABEL: v0_f16: +// gfx11: #ASMSTART +// gfx11: v_mov_b16 v{{[a-z0-9.]+}}, v{{[a-z0-9.]+}} +// gfx11: #ASMEND +#[cfg(gfx11)] +check_reg!(v0_f16 f16 "v0.l" "v_mov_b16"); + +// CHECK-LABEL: s0_i32: +// CHECK: #ASMSTART +// CHECK: s_mov_b32 s{{[0-9]+}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_i32 i32 "s0" "s_mov_b32"); + +// CHECK-LABEL: v0_i32: +// CHECK: #ASMSTART +// CHECK: v_mov_b32 v{{[0-9]+}}, v{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(v0_i32 i32 "v0" "v_mov_b32"); + +// CHECK-LABEL: s0_f32: +// CHECK: #ASMSTART +// CHECK: s_mov_b32 s{{[0-9]+}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_f32 f32 "s0" "s_mov_b32"); + +// CHECK-LABEL: v0_f32: +// CHECK: #ASMSTART +// CHECK: v_mov_b32 v{{[0-9]+}}, v{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(v0_f32 f32 "v0" "v_mov_b32"); + +// CHECK-LABEL: s0_i64: +// CHECK: #ASMSTART +// CHECK: s_mov_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check_reg!(s0_i64 i64 "s[0:1]" "s_mov_b64"); + +// CHECK-LABEL: v0_i64: +// CHECK: #ASMSTART +// CHECK: v_lshlrev_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check_reg!(v0_i64 i64 "v[0:1]" x: i32 "v2", y: i64 "v[0:1]", "v_lshlrev_b64"); + +// CHECK-LABEL: s0_f64: +// CHECK: #ASMSTART +// CHECK: s_mov_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check_reg!(s0_f64 f64 "s[0:1]" "s_mov_b64"); + +// CHECK-LABEL: v0_f64: +// CHECK: #ASMSTART +// CHECK: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check_reg!(v0_f64 f64 "v[0:1]" x: f64 "v[0:1]", y: f64 "v[2:3]", "v_add_f64"); + +// CHECK-LABEL: s0_i128: +// CHECK: #ASMSTART +// CHECK: s_load_b128 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_i128 i128 "s[0:3]" x: ptr "s[0:1]", y: i32 "s0", "s_load_b128"); + +// CHECK-LABEL: v0_i128: +// CHECK: #ASMSTART +// CHECK: global_load_b128 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check_reg!(v0_i128 i128 "v[0:3]" x: i32 "v0", y: ptr "s[0:1]", "global_load_b128"); diff --git a/tests/assembly-llvm/asm/amdgpu-vec-types.rs b/tests/assembly-llvm/asm/amdgpu-vec-types.rs new file mode 100644 index 0000000000000..64d643d91530e --- /dev/null +++ b/tests/assembly-llvm/asm/amdgpu-vec-types.rs @@ -0,0 +1,732 @@ +//@ add-minicore +//@ revisions: gfx11 gfx12 +//@ assembly-output: emit-asm +//@ compile-flags: --target amdgcn-amd-amdhsa +//@[gfx11] compile-flags: -Ctarget-cpu=gfx1100 +//@[gfx12] compile-flags: -Ctarget-cpu=gfx1200 +//@ needs-llvm-components: amdgpu +//@ needs-rust-lld +// ignore-tidy-linelength + +#![feature(abi_gpu_kernel, no_core, asm_experimental_arch, repr_simd, f16)] +#![crate_type = "rlib"] +#![no_core] +#![allow( + asm_sub_register, + improper_gpu_kernel_arg, + improper_ctypes_definitions, + non_camel_case_types, + unused_assignments, + unused_variables +)] + +extern crate minicore; +use minicore::*; + +type ptr = *mut u8; + +#[repr(simd)] +pub struct i16x2([i16; 2]); +#[repr(simd)] +pub struct f16x2([f16; 2]); + +#[repr(simd)] +pub struct i16x4([i16; 4]); +#[repr(simd)] +pub struct f16x4([f16; 4]); +#[repr(simd)] +pub struct i32x2([i32; 2]); +#[repr(simd)] +pub struct f32x2([f32; 2]); + +#[repr(simd)] +pub struct i32x3([i32; 3]); +#[repr(simd)] +pub struct f32x3([f32; 3]); + +#[repr(simd)] +pub struct i16x8([i16; 8]); +#[repr(simd)] +pub struct f16x8([f16; 8]); +#[repr(simd)] +pub struct i32x4([i32; 4]); +#[repr(simd)] +pub struct f32x4([f32; 4]); + +#[repr(simd)] +pub struct i32x5([i32; 5]); +#[repr(simd)] +pub struct f32x5([f32; 5]); + +#[repr(simd)] +pub struct i32x6([i32; 6]); +#[repr(simd)] +pub struct f32x6([f32; 6]); + +#[repr(simd)] +pub struct i32x7([i32; 7]); +#[repr(simd)] +pub struct f32x7([f32; 7]); + +#[repr(simd)] +pub struct i16x16([i16; 16]); +#[repr(simd)] +pub struct f16x16([f16; 16]); +#[repr(simd)] +pub struct i32x8([i32; 8]); +#[repr(simd)] +pub struct f32x8([f32; 8]); + +#[repr(simd)] +pub struct i32x10([i32; 10]); +#[repr(simd)] +pub struct f32x10([f32; 10]); + +#[repr(simd)] +pub struct i16x32([i16; 32]); +#[repr(simd)] +pub struct f16x32([f16; 32]); +#[repr(simd)] +pub struct i32x16([i32; 16]); +#[repr(simd)] +pub struct f32x16([f32; 16]); + +macro_rules! impl_copy { + ($($ty:ident)*) => { + $( + impl Copy for $ty {} + )* + }; +} + +impl_copy!( + i16x2 f16x2 i16x4 f16x4 i32x2 f32x2 i32x3 f32x3 i16x8 f16x8 i32x4 f32x4 + i32x5 f32x5 i32x6 f32x6 i32x7 f32x7 i16x16 f16x16 i32x8 f32x8 i32x10 f32x10 + i16x32 f16x32 i32x16 f32x16 +); + +macro_rules! check { + ($func:ident $ty:ident $class:ident $mov:literal) => { + #[no_mangle] + pub unsafe extern "gpu-kernel" fn $func(x: $ty) { + let y: $ty; + asm!(concat!($mov, " {}, {}"), out($class) y, in($class) x); + } + }; + + ($func:ident $ret_ty:ident $ret_class:ident $($arg_name:ident: $arg_ty:ident $arg_class:ident,)* + $mov:literal) => { + check!($func $ret_ty $ret_class $($arg_name: $arg_ty $arg_class,)* $mov, ""); + }; + + ($func:ident $ret_ty:ident $ret_class:ident $($arg_name:ident: $arg_ty:ident $arg_class:ident,)* + $mov:literal, $tail:literal) => { + #[no_mangle] + pub unsafe extern "gpu-kernel" fn $func($($arg_name: $arg_ty,)*) { + let result: $ret_ty; + asm!(concat!($mov, " {}", $(", {", stringify!($arg_name), "}",)* $tail), + out($ret_class) result, $($arg_name = in($arg_class) $arg_name,)*); + } + }; +} + +macro_rules! check_reg { + ($func:ident $ty:ident $reg:tt $mov:literal) => { + #[no_mangle] + pub unsafe extern "gpu-kernel" fn $func(x: $ty) { + let y: $ty; + asm!(concat!($mov, " ", $reg, ", ", $reg), lateout($reg) y, in($reg) x); + } + }; + + ($func:ident $ret_ty:ident $ret_reg:tt $($arg_name:ident: $arg_ty:ident $arg_reg:tt,)* + $mov:literal) => { + check_reg!($func $ret_ty $ret_reg $($arg_name: $arg_ty $arg_reg,)* $mov, ""); + }; + + ($func:ident $ret_ty:ident $ret_reg:tt $($arg_name:ident: $arg_ty:ident $arg_reg:tt,)* + $mov:literal, $tail:literal) => { + #[no_mangle] + pub unsafe extern "gpu-kernel" fn $func($($arg_name: $arg_ty,)*) { + let result: $ret_ty; + asm!(concat!($mov, " ", $ret_reg, $(", ", $arg_reg,)* $tail), lateout($ret_reg) result, + $(in($arg_reg) $arg_name,)*); + } + }; +} + +// CHECK-LABEL: sgpr_i16x2: +// CHECK: #ASMSTART +// CHECK: s_pack_ll_b32_b16 s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}} +// CHECK: #ASMEND +check!(sgpr_i16x2 i16x2 sgpr32 x: i16 sgpr32, y: i16 sgpr32, "s_pack_ll_b32_b16"); + +// CHECK-LABEL: sgpr_f16x2: +// CHECK: #ASMSTART +// CHECK: s_pack_ll_b32_b16 s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}} +// CHECK: #ASMEND +check!(sgpr_f16x2 f16x2 sgpr32 x: i16 sgpr32, y: i16 sgpr32, "s_pack_ll_b32_b16"); + +// CHECK-LABEL: vgpr_i16x2: +// CHECK: #ASMSTART +// CHECK: v_pk_add_i16 v{{[a-z0-9.]+}}, v{{[a-z0-9.]+}}, v{{[a-z0-9.]+}} +// CHECK: #ASMEND +check!(vgpr_i16x2 i16x2 vgpr32 x: i16x2 vgpr32, y: i16x2 vgpr32, "v_pk_add_i16"); + +// CHECK-LABEL: vgpr_f16x2: +// CHECK: #ASMSTART +// CHECK: v_pk_add_f16 v{{[a-z0-9.]+}}, v{{[a-z0-9.]+}}, v{{[a-z0-9.]+}} +// CHECK: #ASMEND +check!(vgpr_f16x2 f16x2 vgpr32 x: f16x2 vgpr32, y: f16x2 vgpr32, "v_pk_add_f16"); + +// CHECK-LABEL: sgpr_i16x4: +// CHECK: #ASMSTART +// CHECK: s_load_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_i16x4 i16x4 sgpr64 x: ptr sgpr64, y: i32 sgpr32, "s_load_b64"); + +// CHECK-LABEL: sgpr_f16x4: +// CHECK: #ASMSTART +// CHECK: s_load_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_f16x4 f16x4 sgpr64 x: ptr sgpr64, y: i32 sgpr32, "s_load_b64"); + +// CHECK-LABEL: sgpr_i32x2: +// CHECK: #ASMSTART +// CHECK: s_load_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_i32x2 i32x2 sgpr64 x: ptr sgpr64, y: i32 sgpr32, "s_load_b64"); + +// CHECK-LABEL: sgpr_f32x2: +// CHECK: #ASMSTART +// CHECK: s_load_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_f32x2 f32x2 sgpr64 x: ptr sgpr64, y: i32 sgpr32, "s_load_b64"); + +// CHECK-LABEL: vgpr_i16x4: +// CHECK: #ASMSTART +// CHECK: global_load_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check!(vgpr_i16x4 i16x4 vgpr64 x: i32 vgpr32, y: ptr sgpr64, "global_load_b64"); + +// CHECK-LABEL: vgpr_f16x4: +// CHECK: #ASMSTART +// CHECK: global_load_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check!(vgpr_f16x4 f16x4 vgpr64 x: i32 vgpr32, y: ptr sgpr64, "global_load_b64"); + +// CHECK-LABEL: vgpr_i32x2: +// CHECK: #ASMSTART +// CHECK: global_load_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check!(vgpr_i32x2 i32x2 vgpr64 x: i32 vgpr32, y: ptr sgpr64, "global_load_b64"); + +// CHECK-LABEL: vgpr_f32x2: +// CHECK: #ASMSTART +// CHECK: global_load_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check!(vgpr_f32x2 f32x2 vgpr64 x: i32 vgpr32, y: ptr sgpr64, "global_load_b64"); + +// gfx12-LABEL: sgpr_i32x3: +// gfx12: #ASMSTART +// gfx12: s_load_b96 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// gfx12: #ASMEND +#[cfg(gfx12)] +check!(sgpr_i32x3 i32x3 sgpr96 x: ptr sgpr64, y: i32 sgpr32, "s_load_b96"); + +// gfx12-LABEL: sgpr_f32x3: +// gfx12: #ASMSTART +// gfx12: s_load_b96 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// gfx12: #ASMEND +#[cfg(gfx12)] +check!(sgpr_f32x3 f32x3 sgpr96 x: ptr sgpr64, y: i32 sgpr32, "s_load_b96"); + +// CHECK-LABEL: vgpr_i32x3: +// CHECK: #ASMSTART +// CHECK: global_load_b96 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check!(vgpr_i32x3 i32x3 vgpr96 x: i32 vgpr32, y: ptr sgpr64, "global_load_b96"); + +// CHECK-LABEL: vgpr_f32x3: +// CHECK: #ASMSTART +// CHECK: global_load_b96 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check!(vgpr_f32x3 f32x3 vgpr96 x: i32 vgpr32, y: ptr sgpr64, "global_load_b96"); + +// CHECK-LABEL: sgpr_i16x8: +// CHECK: #ASMSTART +// CHECK: s_load_b128 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_i16x8 i16x8 sgpr128 x: ptr sgpr64, y: i32 sgpr32, "s_load_b128"); + +// CHECK-LABEL: sgpr_f16x8: +// CHECK: #ASMSTART +// CHECK: s_load_b128 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_f16x8 f16x8 sgpr128 x: ptr sgpr64, y: i32 sgpr32, "s_load_b128"); + +// CHECK-LABEL: sgpr_i32x4: +// CHECK: #ASMSTART +// CHECK: s_load_b128 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_i32x4 i32x4 sgpr128 x: ptr sgpr64, y: i32 sgpr32, "s_load_b128"); + +// CHECK-LABEL: sgpr_f32x4: +// CHECK: #ASMSTART +// CHECK: s_load_b128 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_f32x4 f32x4 sgpr128 x: ptr sgpr64, y: i32 sgpr32, "s_load_b128"); + +// CHECK-LABEL: vgpr_i16x8: +// CHECK: #ASMSTART +// CHECK: global_load_b128 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check!(vgpr_i16x8 i16x8 vgpr128 x: i32 vgpr32, y: ptr sgpr64, "global_load_b128"); + +// CHECK-LABEL: vgpr_f16x8: +// CHECK: #ASMSTART +// CHECK: global_load_b128 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check!(vgpr_f16x8 f16x8 vgpr128 x: i32 vgpr32, y: ptr sgpr64, "global_load_b128"); + +// CHECK-LABEL: vgpr_i32x4: +// CHECK: #ASMSTART +// CHECK: global_load_b128 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check!(vgpr_i32x4 i32x4 vgpr128 x: i32 vgpr32, y: ptr sgpr64, "global_load_b128"); + +// CHECK-LABEL: vgpr_f32x4: +// CHECK: #ASMSTART +// CHECK: global_load_b128 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check!(vgpr_f32x4 f32x4 vgpr128 x: i32 vgpr32, y: ptr sgpr64, "global_load_b128"); + +// CHECK-LABEL: vgpr_i32x5: +// CHECK: #ASMSTART +// CHECK: image_load v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0xf dim:SQ_RSRC_IMG_1D tfe +// CHECK: #ASMEND +check!(vgpr_i32x5 i32x5 vgpr160 x: i32 vgpr32, y: i32x8 sgpr256, "image_load", + " dmask:0xf dim:SQ_RSRC_IMG_1D tfe"); + +// CHECK-LABEL: vgpr_f32x5: +// CHECK: #ASMSTART +// CHECK: image_load v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0xf dim:SQ_RSRC_IMG_1D tfe +// CHECK: #ASMEND +check!(vgpr_f32x5 f32x5 vgpr160 x: i32 vgpr32, y: i32x8 sgpr256, "image_load", + " dmask:0xf dim:SQ_RSRC_IMG_1D tfe"); + +// gfx11-LABEL: vgpr_i32x6: +// gfx11: #ASMSTART +// gfx11: image_sample_d v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0xf dim:SQ_RSRC_IMG_2D +// gfx11: #ASMEND +#[cfg(gfx11)] +check!(vgpr_i32x6 i32x4 vgpr128 x: i32x6 vgpr192, y: i32x8 sgpr256, z: i32x4 sgpr128, + "image_sample_d", " dmask:0xf dim:SQ_RSRC_IMG_2D"); + +// gfx11-LABEL: vgpr_f32x6: +// gfx11: #ASMSTART +// gfx11: image_sample_d v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0xf dim:SQ_RSRC_IMG_2D +// gfx11: #ASMEND +#[cfg(gfx11)] +check!(vgpr_f32x6 i32x4 vgpr128 x: f32x6 vgpr192, y: i32x8 sgpr256, z: i32x4 sgpr128, + "image_sample_d", " dmask:0xf dim:SQ_RSRC_IMG_2D"); + +// gfx11-LABEL: vgpr_i32x7: +// gfx11: #ASMSTART +// gfx11: image_sample_d_cl v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0xf dim:SQ_RSRC_IMG_2D +// gfx11: #ASMEND +#[cfg(gfx11)] +check!(vgpr_i32x7 i32x4 vgpr128 x: i32x7 vgpr224, y: i32x8 sgpr256, z: i32x4 sgpr128, + "image_sample_d_cl", " dmask:0xf dim:SQ_RSRC_IMG_2D"); + +// gfx11-LABEL: vgpr_f32x7: +// gfx11: #ASMSTART +// gfx11: image_sample_d_cl v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0xf dim:SQ_RSRC_IMG_2D +// gfx11: #ASMEND +#[cfg(gfx11)] +check!(vgpr_f32x7 i32x4 vgpr128 x: f32x7 vgpr224, y: i32x8 sgpr256, z: i32x4 sgpr128, + "image_sample_d_cl", " dmask:0xf dim:SQ_RSRC_IMG_2D"); + +// CHECK-LABEL: sgpr_i16x16: +// CHECK: #ASMSTART +// CHECK: s_load_b256 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_i16x16 i16x16 sgpr256 x: ptr sgpr64, y: i32 sgpr32, "s_load_b256"); + +// CHECK-LABEL: sgpr_f16x16: +// CHECK: #ASMSTART +// CHECK: s_load_b256 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_f16x16 f16x16 sgpr256 x: ptr sgpr64, y: i32 sgpr32, "s_load_b256"); + +// CHECK-LABEL: sgpr_i32x8: +// CHECK: #ASMSTART +// CHECK: s_load_b256 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_i32x8 i32x8 sgpr256 x: ptr sgpr64, y: i32 sgpr32, "s_load_b256"); + +// CHECK-LABEL: sgpr_f32x8: +// CHECK: #ASMSTART +// CHECK: s_load_b256 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_f32x8 f32x8 sgpr256 x: ptr sgpr64, y: i32 sgpr32, "s_load_b256"); + +// gfx11-LABEL: vgpr_i16x16: +// gfx11: #ASMSTART +// gfx11: v_wmma_f32_16x16x16_bf16 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} +// gfx11: #ASMEND +#[cfg(gfx11)] +check!(vgpr_i16x16 f32x8 vgpr256 x: i32x8 vgpr256, y: i16x16 vgpr256, z: f32x8 vgpr256, + "v_wmma_f32_16x16x16_bf16"); + +// gfx11-LABEL: vgpr_f16x16: +// gfx11: #ASMSTART +// gfx11: v_wmma_f32_16x16x16_f16 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} +// gfx11: #ASMEND +#[cfg(gfx11)] +check!(vgpr_f16x16 f32x8 vgpr256 x: f16x16 vgpr256, y: f16x16 vgpr256, z: f32x8 vgpr256, + "v_wmma_f32_16x16x16_f16"); + +// gfx11-LABEL: vgpr_i32x8: +// gfx11: #ASMSTART +// gfx11: v_wmma_i32_16x16x16_iu8 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} +// gfx11: #ASMEND +#[cfg(gfx11)] +check!(vgpr_i32x8 i32x8 vgpr256 x: i32x4 vgpr128, y: i32x4 vgpr128, z: i32x8 vgpr256, + "v_wmma_i32_16x16x16_iu8"); + +// gfx12-LABEL: vgpr_f32x8: +// gfx12: #ASMSTART +// gfx12: v_wmma_f32_16x16x16_fp8_fp8 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} +// gfx12: #ASMEND +#[cfg(gfx12)] +check!(vgpr_f32x8 f32x8 vgpr256 x: f32x2 vgpr64, y: f32x2 vgpr64, z: f32x8 vgpr256, + "v_wmma_f32_16x16x16_fp8_fp8"); + +// gfx12-LABEL: vgpr_i32x10: +// gfx12: #ASMSTART +// gfx12: image_bvh8_intersect_ray v{{\[[0-9]+:[0-9]+\]}}, [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3] +// gfx12: #ASMEND +#[cfg(gfx12)] +check!(vgpr_i32x10 i32x10 vgpr320 "image_bvh8_intersect_ray", + ", [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3]"); + +// gfx12-LABEL: vgpr_f32x10: +// gfx12: #ASMSTART +// gfx12: image_bvh8_intersect_ray v{{\[[0-9]+:[0-9]+\]}}, [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3] +// gfx12: #ASMEND +#[cfg(gfx12)] +check!(vgpr_f32x10 f32x10 vgpr320 "image_bvh8_intersect_ray", + ", [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3]"); + +// CHECK-LABEL: sgpr_i16x32: +// CHECK: #ASMSTART +// CHECK: s_load_b512 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_i16x32 i16x32 sgpr512 x: ptr sgpr64, y: i32 sgpr32, "s_load_b512"); + +// CHECK-LABEL: sgpr_f16x32: +// CHECK: #ASMSTART +// CHECK: s_load_b512 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_f16x32 f16x32 sgpr512 x: ptr sgpr64, y: i32 sgpr32, "s_load_b512"); + +// CHECK-LABEL: sgpr_i32x16: +// CHECK: #ASMSTART +// CHECK: s_load_b512 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_i32x16 i32x16 sgpr512 x: ptr sgpr64, y: i32 sgpr32, "s_load_b512"); + +// CHECK-LABEL: sgpr_f32x16: +// CHECK: #ASMSTART +// CHECK: s_load_b512 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check!(sgpr_f32x16 f32x16 sgpr512 x: ptr sgpr64, y: i32 sgpr32, "s_load_b512"); + +// CHECK-LABEL: s0_i16x2: +// CHECK: #ASMSTART +// CHECK: s_pack_ll_b32_b16 s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}} +// CHECK: #ASMEND +check_reg!(s0_i16x2 i16x2 "s0" x: i16 "s1", y: i16 "s2", "s_pack_ll_b32_b16"); + +// CHECK-LABEL: s0_f16x2: +// CHECK: #ASMSTART +// CHECK: s_pack_ll_b32_b16 s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}}, s{{[a-z0-9.]+}} +// CHECK: #ASMEND +check_reg!(s0_f16x2 f16x2 "s0" x: i16 "s1", y: i16 "s2", "s_pack_ll_b32_b16"); + +// CHECK-LABEL: v0_i16x2: +// CHECK: #ASMSTART +// CHECK: v_pk_add_i16 v{{[a-z0-9.]+}}, v{{[a-z0-9.]+}}, v{{[a-z0-9.]+}} +// CHECK: #ASMEND +check_reg!(v0_i16x2 i16x2 "v0" x: i16x2 "v1", y: i16x2 "v2", "v_pk_add_i16"); + +// CHECK-LABEL: v0_f16x2: +// CHECK: #ASMSTART +// CHECK: v_pk_add_f16 v{{[a-z0-9.]+}}, v{{[a-z0-9.]+}}, v{{[a-z0-9.]+}} +// CHECK: #ASMEND +check_reg!(v0_f16x2 f16x2 "v0" x: f16x2 "v1", y: f16x2 "v2", "v_pk_add_f16"); + +// CHECK-LABEL: s0_i16x4: +// CHECK: #ASMSTART +// CHECK: s_load_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_i16x4 i16x4 "s[0:1]" x: ptr "s[2:3]", y: i32 "s4", "s_load_b64"); + +// CHECK-LABEL: s0_f16x4: +// CHECK: #ASMSTART +// CHECK: s_load_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_f16x4 f16x4 "s[0:1]" x: ptr "s[2:3]", y: i32 "s4", "s_load_b64"); + +// CHECK-LABEL: s0_i32x2: +// CHECK: #ASMSTART +// CHECK: s_load_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_i32x2 i32x2 "s[0:1]" x: ptr "s[2:3]", y: i32 "s4", "s_load_b64"); + +// CHECK-LABEL: s0_f32x2: +// CHECK: #ASMSTART +// CHECK: s_load_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_f32x2 f32x2 "s[0:1]" x: ptr "s[2:3]", y: i32 "s4", "s_load_b64"); + +// CHECK-LABEL: v0_i16x4: +// CHECK: #ASMSTART +// CHECK: global_load_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check_reg!(v0_i16x4 i16x4 "v[0:1]" x: i32 "v2", y: ptr "s[0:1]", "global_load_b64"); + +// CHECK-LABEL: v0_f16x4: +// CHECK: #ASMSTART +// CHECK: global_load_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check_reg!(v0_f16x4 f16x4 "v[0:1]" x: i32 "v2", y: ptr "s[0:1]", "global_load_b64"); + +// CHECK-LABEL: v0_i32x2: +// CHECK: #ASMSTART +// CHECK: global_load_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check_reg!(v0_i32x2 i32x2 "v[0:1]" x: i32 "v2", y: ptr "s[0:1]", "global_load_b64"); + +// CHECK-LABEL: v0_f32x2: +// CHECK: #ASMSTART +// CHECK: global_load_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check_reg!(v0_f32x2 f32x2 "v[0:1]" x: i32 "v2", y: ptr "s[0:1]", "global_load_b64"); + +// gfx12-LABEL: s0_i32x3: +// gfx12: #ASMSTART +// gfx12: s_load_b96 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// gfx12: #ASMEND +#[cfg(gfx12)] +check_reg!(s0_i32x3 i32x3 "s[0:2]" x: ptr "s[4:5]", y: i32 "s6", "s_load_b96"); + +// gfx12-LABEL: s0_f32x3: +// gfx12: #ASMSTART +// gfx12: s_load_b96 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// gfx12: #ASMEND +#[cfg(gfx12)] +check_reg!(s0_f32x3 f32x3 "s[0:2]" x: ptr "s[4:5]", y: i32 "s6", "s_load_b96"); + +// CHECK-LABEL: v0_i32x3: +// CHECK: #ASMSTART +// CHECK: global_load_b96 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check_reg!(v0_i32x3 i32x3 "v[0:2]" x: i32 "v3", y: ptr "s[0:1]", "global_load_b96"); + +// CHECK-LABEL: v0_f32x3: +// CHECK: #ASMSTART +// CHECK: global_load_b96 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check_reg!(v0_f32x3 f32x3 "v[0:2]" x: i32 "v3", y: ptr "s[0:1]", "global_load_b96"); + +// CHECK-LABEL: s0_i16x8: +// CHECK: #ASMSTART +// CHECK: s_load_b128 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_i16x8 i16x8 "s[0:3]" x: ptr "s[4:5]", y: i32 "s6", "s_load_b128"); + +// CHECK-LABEL: s0_f16x8: +// CHECK: #ASMSTART +// CHECK: s_load_b128 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_f16x8 f16x8 "s[0:3]" x: ptr "s[4:5]", y: i32 "s6", "s_load_b128"); + +// CHECK-LABEL: s0_i32x4: +// CHECK: #ASMSTART +// CHECK: s_load_b128 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_i32x4 i32x4 "s[0:3]" x: ptr "s[4:5]", y: i32 "s6", "s_load_b128"); + +// CHECK-LABEL: s0_f32x4: +// CHECK: #ASMSTART +// CHECK: s_load_b128 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_f32x4 f32x4 "s[0:3]" x: ptr "s[4:5]", y: i32 "s6", "s_load_b128"); + +// CHECK-LABEL: v0_i16x8: +// CHECK: #ASMSTART +// CHECK: global_load_b128 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check_reg!(v0_i16x8 i16x8 "v[0:3]" x: i32 "v4", y: ptr "s[0:1]", "global_load_b128"); + +// CHECK-LABEL: v0_f16x8: +// CHECK: #ASMSTART +// CHECK: global_load_b128 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check_reg!(v0_f16x8 f16x8 "v[0:3]" x: i32 "v4", y: ptr "s[0:1]", "global_load_b128"); + +// CHECK-LABEL: v0_i32x4: +// CHECK: #ASMSTART +// CHECK: global_load_b128 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check_reg!(v0_i32x4 i32x4 "v[0:3]" x: i32 "v4", y: ptr "s[0:1]", "global_load_b128"); + +// CHECK-LABEL: v0_f32x4: +// CHECK: #ASMSTART +// CHECK: global_load_b128 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} +// CHECK: #ASMEND +check_reg!(v0_f32x4 f32x4 "v[0:3]" x: i32 "v4", y: ptr "s[0:1]", "global_load_b128"); + +// CHECK-LABEL: v0_i32x5: +// CHECK: #ASMSTART +// CHECK: image_load v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0xf dim:SQ_RSRC_IMG_1D tfe +// CHECK: #ASMEND +check_reg!(v0_i32x5 i32x5 "v[0:4]" x: i32 "v5", y: i32x8 "s[0:7]", "image_load", + " dmask:0xf dim:SQ_RSRC_IMG_1D tfe"); + +// CHECK-LABEL: v0_f32x5: +// CHECK: #ASMSTART +// CHECK: image_load v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0xf dim:SQ_RSRC_IMG_1D tfe +// CHECK: #ASMEND +check_reg!(v0_f32x5 f32x5 "v[0:4]" x: i32 "v5", y: i32x8 "s[0:7]", "image_load", + " dmask:0xf dim:SQ_RSRC_IMG_1D tfe"); + +// gfx11-LABEL: v0_i32x6: +// gfx11: #ASMSTART +// gfx11: image_sample_d v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0xf dim:SQ_RSRC_IMG_2D +// gfx11: #ASMEND +#[cfg(gfx11)] +check_reg!(v0_i32x6 i32x4 "v[0:3]" x: i32x6 "v[4:9]", y: i32x8 "s[0:7]", z: i32x4 "s[8:11]", + "image_sample_d", " dmask:0xf dim:SQ_RSRC_IMG_2D"); + +// gfx11-LABEL: v0_f32x6: +// gfx11: #ASMSTART +// gfx11: image_sample_d v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0xf dim:SQ_RSRC_IMG_2D +// gfx11: #ASMEND +#[cfg(gfx11)] +check_reg!(v0_f32x6 i32x4 "v[0:3]" x: f32x6 "v[4:9]", y: i32x8 "s[0:7]", z: i32x4 "s[8:11]", + "image_sample_d", " dmask:0xf dim:SQ_RSRC_IMG_2D"); + +// gfx11-LABEL: v0_i32x7: +// gfx11: #ASMSTART +// gfx11: image_sample_d_cl v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0xf dim:SQ_RSRC_IMG_2D +// gfx11: #ASMEND +#[cfg(gfx11)] +check_reg!(v0_i32x7 i32x4 "v[0:3]" x: i32x7 "v[4:10]", y: i32x8 "s[0:7]", z: i32x4 "s[8:11]", + "image_sample_d_cl", " dmask:0xf dim:SQ_RSRC_IMG_2D"); + +// gfx11-LABEL: v0_f32x7: +// gfx11: #ASMSTART +// gfx11: image_sample_d_cl v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} dmask:0xf dim:SQ_RSRC_IMG_2D +// gfx11: #ASMEND +#[cfg(gfx11)] +check_reg!(v0_f32x7 i32x4 "v[0:3]" x: f32x7 "v[4:10]", y: i32x8 "s[0:7]", z: i32x4 "s[8:11]", + "image_sample_d_cl", " dmask:0xf dim:SQ_RSRC_IMG_2D"); + +// CHECK-LABEL: s0_i16x16: +// CHECK: #ASMSTART +// CHECK: s_load_b256 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_i16x16 i16x16 "s[0:7]" x: ptr "s[8:9]", y: i32 "s10", "s_load_b256"); + +// CHECK-LABEL: s0_f16x16: +// CHECK: #ASMSTART +// CHECK: s_load_b256 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_f16x16 f16x16 "s[0:7]" x: ptr "s[8:9]", y: i32 "s10", "s_load_b256"); + +// CHECK-LABEL: s0_i32x8: +// CHECK: #ASMSTART +// CHECK: s_load_b256 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_i32x8 i32x8 "s[0:7]" x: ptr "s[8:9]", y: i32 "s10", "s_load_b256"); + +// CHECK-LABEL: s0_f32x8: +// CHECK: #ASMSTART +// CHECK: s_load_b256 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_f32x8 f32x8 "s[0:7]" x: ptr "s[8:9]", y: i32 "s10", "s_load_b256"); + +// gfx11-LABEL: v0_i16x16: +// gfx11: #ASMSTART +// gfx11: v_wmma_f32_16x16x16_bf16 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} +// gfx11: #ASMEND +#[cfg(gfx11)] +check_reg!(v0_i16x16 f32x8 "v[0:7]" x: i32x8 "v[8:15]", y: i16x16 "v[16:23]", z: f32x8 "v[24:31]", + "v_wmma_f32_16x16x16_bf16"); + +// gfx11-LABEL: v0_f16x16: +// gfx11: #ASMSTART +// gfx11: v_wmma_f32_16x16x16_f16 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} +// gfx11: #ASMEND +#[cfg(gfx11)] +check_reg!(v0_f16x16 f32x8 "v[0:7]" x: f16x16 "v[8:15]", y: f16x16 "v[16:23]", z: f32x8 "v[24:31]", + "v_wmma_f32_16x16x16_f16"); + +// gfx11-LABEL: v0_i32x8: +// gfx11: #ASMSTART +// gfx11: v_wmma_i32_16x16x16_iu8 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} +// gfx11: #ASMEND +#[cfg(gfx11)] +check_reg!(v0_i32x8 i32x8 "v[0:7]" x: i32x4 "v[8:11]", y: i32x4 "v[16:19]", z: i32x8 "v[24:31]", + "v_wmma_i32_16x16x16_iu8"); + +// gfx12-LABEL: v0_f32x8: +// gfx12: #ASMSTART +// gfx12: v_wmma_f32_16x16x16_fp8_fp8 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} +// gfx12: #ASMEND +#[cfg(gfx12)] +check_reg!(v0_f32x8 f32x8 "v[0:7]" x: f32x2 "v[8:9]", y: f32x2 "v[16:17]", z: f32x8 "v[24:31]", + "v_wmma_f32_16x16x16_fp8_fp8"); + +// gfx12-LABEL: v0_i32x10: +// gfx12: #ASMSTART +// gfx12: image_bvh8_intersect_ray v{{\[[0-9]+:[0-9]+\]}}, [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3] +// gfx12: #ASMEND +#[cfg(gfx12)] +check_reg!(v0_i32x10 i32x10 "v[0:9]" "image_bvh8_intersect_ray", + ", [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3]"); + +// gfx12-LABEL: v0_f32x10: +// gfx12: #ASMSTART +// gfx12: image_bvh8_intersect_ray v{{\[[0-9]+:[0-9]+\]}}, [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3] +// gfx12: #ASMEND +#[cfg(gfx12)] +check_reg!(v0_f32x10 f32x10 "v[0:9]" "image_bvh8_intersect_ray", + ", [v[0:1], v[2:3], v[16:18], v[19:21], v9], s[0:3]"); + +// CHECK-LABEL: s0_i16x32: +// CHECK: #ASMSTART +// CHECK: s_load_b512 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_i16x32 i16x32 "s[0:15]" x: ptr "s[16:17]", y: i32 "s18", "s_load_b512"); + +// CHECK-LABEL: s0_f16x32: +// CHECK: #ASMSTART +// CHECK: s_load_b512 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_f16x32 f16x32 "s[0:15]" x: ptr "s[16:17]", y: i32 "s18", "s_load_b512"); + +// CHECK-LABEL: s0_i32x16: +// CHECK: #ASMSTART +// CHECK: s_load_b512 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_i32x16 i32x16 "s[0:15]" x: ptr "s[16:17]", y: i32 "s18", "s_load_b512"); + +// CHECK-LABEL: s0_f32x16: +// CHECK: #ASMSTART +// CHECK: s_load_b512 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} +// CHECK: #ASMEND +check_reg!(s0_f32x16 f32x16 "s[0:15]" x: ptr "s[16:17]", y: i32 "s18", "s_load_b512"); diff --git a/tests/assembly-llvm/asm/amdgpu-vec-types2.rs b/tests/assembly-llvm/asm/amdgpu-vec-types2.rs new file mode 100644 index 0000000000000..1257abe932b0b --- /dev/null +++ b/tests/assembly-llvm/asm/amdgpu-vec-types2.rs @@ -0,0 +1,264 @@ +//@ add-minicore +//@ revisions: gfx942 gfx950 gfx1030 +//@ assembly-output: emit-asm +//@ compile-flags: --target amdgcn-amd-amdhsa +//@[gfx942] compile-flags: -Ctarget-cpu=gfx942 +//@[gfx950] compile-flags: -Ctarget-cpu=gfx950 +//@[gfx1030] compile-flags: -Ctarget-cpu=gfx1030 +//@ needs-llvm-components: amdgpu +//@ needs-rust-lld +// ignore-tidy-linelength + +// Tests for different gfx versions that do not fit in gfx11 and 12 + +#![feature(abi_gpu_kernel, no_core, asm_experimental_arch, repr_simd, f16)] +#![crate_type = "rlib"] +#![no_core] +#![allow( + asm_sub_register, + improper_gpu_kernel_arg, + improper_ctypes_definitions, + non_camel_case_types, + unused_assignments, + unused_variables +)] + +extern crate minicore; +use minicore::*; + +type ptr = *mut u8; + +#[repr(simd)] +pub struct i32x4([i32; 4]); +#[repr(simd)] +pub struct f32x4([f32; 4]); + +#[repr(simd)] +pub struct i32x9([i32; 9]); +#[repr(simd)] +pub struct f32x9([f32; 9]); + +#[repr(simd)] +pub struct i32x11([i32; 11]); +#[repr(simd)] +pub struct f32x11([f32; 11]); + +#[repr(simd)] +pub struct i32x12([i32; 12]); +#[repr(simd)] +pub struct f32x12([f32; 12]); + +#[repr(simd)] +pub struct i16x32([i16; 32]); +#[repr(simd)] +pub struct f16x32([f16; 32]); +#[repr(simd)] +pub struct i32x16([i32; 16]); +#[repr(simd)] +pub struct f32x16([f32; 16]); + +#[repr(simd)] +pub struct f32x32([f32; 32]); + +macro_rules! impl_copy { + ($($ty:ident)*) => { + $( + impl Copy for $ty {} + )* + }; +} + +impl_copy!( + i32x4 f32x4 i32x9 f32x9 i32x11 f32x11 i32x12 f32x12 i16x32 f16x32 + i32x16 f32x16 f32x32 +); + +macro_rules! check { + ($func:ident $ty:ident $class:ident $mov:literal) => { + #[no_mangle] + pub unsafe extern "gpu-kernel" fn $func(x: $ty) { + let y: $ty; + asm!(concat!($mov, " {}, {}"), out($class) y, in($class) x); + } + }; + + ($func:ident $ret_ty:ident $ret_class:ident $($arg_name:ident: $arg_ty:ident $arg_class:ident,)* + $mov:literal) => { + check!($func $ret_ty $ret_class $($arg_name: $arg_ty $arg_class,)* $mov, ""); + }; + + ($func:ident $ret_ty:ident $ret_class:ident $($arg_name:ident: $arg_ty:ident $arg_class:ident,)* + $mov:literal, $tail:literal) => { + #[no_mangle] + pub unsafe extern "gpu-kernel" fn $func($($arg_name: $arg_ty,)*) { + let result: $ret_ty; + asm!(concat!($mov, " {}", $(", {", stringify!($arg_name), "}",)* $tail), + out($ret_class) result, $($arg_name = in($arg_class) $arg_name,)*); + } + }; +} + +macro_rules! check_reg { + ($func:ident $ty:ident $reg:tt $mov:literal) => { + #[no_mangle] + pub unsafe extern "gpu-kernel" fn $func(x: $ty) { + let y: $ty; + asm!(concat!($mov, " ", $reg, ", ", $reg), lateout($reg) y, in($reg) x); + } + }; + + ($func:ident $ret_ty:ident $ret_reg:tt $($arg_name:ident: $arg_ty:ident $arg_reg:tt,)* + $mov:literal) => { + check_reg!($func $ret_ty $ret_reg $($arg_name: $arg_ty $arg_reg,)* $mov, ""); + }; + + ($func:ident $ret_ty:ident $ret_reg:tt $($arg_name:ident: $arg_ty:ident $arg_reg:tt,)* + $mov:literal, $tail:literal) => { + #[no_mangle] + pub unsafe extern "gpu-kernel" fn $func($($arg_name: $arg_ty,)*) { + let result: $ret_ty; + asm!(concat!($mov, " ", $ret_reg, $(", ", $arg_reg,)* $tail), lateout($ret_reg) result, + $(in($arg_reg) $arg_name,)*); + } + }; +} + +// gfx1030-LABEL: vgpr_i32x9: +// gfx1030: #ASMSTART +// gfx1030: image_bvh64_intersect_ray v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} a16 +// gfx1030: #ASMEND +#[cfg(gfx1030)] +check!(vgpr_i32x9 i32x4 vgpr128 x: i32x9 vgpr288, y: i32x4 sgpr128, "image_bvh64_intersect_ray", + " a16"); + +// gfx1030-LABEL: vgpr_f32x9: +// gfx1030: #ASMSTART +// gfx1030: image_bvh64_intersect_ray v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} a16 +// gfx1030: #ASMEND +#[cfg(gfx1030)] +check!(vgpr_f32x9 i32x4 vgpr128 x: f32x9 vgpr288, y: i32x4 sgpr128, "image_bvh64_intersect_ray", + " a16"); + +// gfx1030-LABEL: vgpr_i32x11: +// gfx1030: #ASMSTART +// gfx1030: image_bvh_intersect_ray v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +// gfx1030: #ASMEND +#[cfg(gfx1030)] +check!(vgpr_i32x11 i32x4 vgpr128 x: i32x11 vgpr352, y: i32x4 sgpr128, "image_bvh_intersect_ray"); + +// gfx1030-LABEL: vgpr_f32x11: +// gfx1030: #ASMSTART +// gfx1030: image_bvh_intersect_ray v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +// gfx1030: #ASMEND +#[cfg(gfx1030)] +check!(vgpr_f32x11 i32x4 vgpr128 x: f32x11 vgpr352, y: i32x4 sgpr128, "image_bvh_intersect_ray"); + +// gfx1030-LABEL: vgpr_i32x12: +// gfx1030: #ASMSTART +// gfx1030: image_bvh64_intersect_ray v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +// gfx1030: #ASMEND +#[cfg(gfx1030)] +check!(vgpr_i32x12 i32x4 vgpr128 x: i32x12 vgpr384, y: i32x4 sgpr128, "image_bvh64_intersect_ray"); + +// gfx1030-LABEL: vgpr_f32x12: +// gfx1030: #ASMSTART +// gfx1030: image_bvh64_intersect_ray v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +// gfx1030: #ASMEND +#[cfg(gfx1030)] +check!(vgpr_f32x12 i32x4 vgpr128 x: f32x12 vgpr384, y: i32x4 sgpr128, "image_bvh64_intersect_ray"); + +// gfx950-LABEL: vgpr_i32x16: +// gfx950: #ASMSTART +// gfx950: v_mfma_i32_32x32x32_i8 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} +// gfx950: #ASMEND +#[cfg(gfx950)] +check!(vgpr_i32x16 i32x16 vgpr512 x: i32x4 vgpr128, y: i32x4 vgpr128, z: i16x32 vgpr512, + "v_mfma_i32_32x32x32_i8"); + +// gfx950-LABEL: vgpr_f32x16: +// gfx950: #ASMSTART +// gfx950: v_mfma_f32_32x32x16_f16 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} +// gfx950: #ASMEND +#[cfg(gfx950)] +check!(vgpr_f32x16 f32x16 vgpr512 x: f32x4 vgpr128, y: f32x4 vgpr128, z: f16x32 vgpr512, + "v_mfma_f32_32x32x16_f16"); + +// gfx942-LABEL: vgpr_f32x32: +// gfx942: #ASMSTART +// gfx942: v_mfma_f32_32x32x1_2b_f32 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} +// gfx942: #ASMEND +#[cfg(gfx942)] +check!(vgpr_f32x32 f32x32 vgpr1024 x: f32 vgpr32, y: f32 vgpr32, "v_mfma_f32_32x32x1_2b_f32", + ", v[0:31]"); + +// gfx1030-LABEL: v0_i32x9: +// gfx1030: #ASMSTART +// gfx1030: image_bvh64_intersect_ray v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} a16 +// gfx1030: #ASMEND +#[cfg(gfx1030)] +check_reg!(v0_i32x9 i32x4 "v[0:3]" x: i32x9 "v[8:16]", y: i32x4 "s[0:3]", + "image_bvh64_intersect_ray", " a16"); + +// gfx1030-LABEL: v0_f32x9: +// gfx1030: #ASMSTART +// gfx1030: image_bvh64_intersect_ray v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} a16 +// gfx1030: #ASMEND +#[cfg(gfx1030)] +check_reg!(v0_f32x9 i32x4 "v[0:3]" x: f32x9 "v[8:16]", y: i32x4 "s[0:3]", + "image_bvh64_intersect_ray", " a16"); + +// gfx1030-LABEL: v0_i32x11: +// gfx1030: #ASMSTART +// gfx1030: image_bvh_intersect_ray v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +// gfx1030: #ASMEND +#[cfg(gfx1030)] +check_reg!(v0_i32x11 i32x4 "v[0:3]" x: i32x11 "v[8:18]", y: i32x4 "s[0:3]", + "image_bvh_intersect_ray"); + +// gfx1030-LABEL: v0_f32x11: +// gfx1030: #ASMSTART +// gfx1030: image_bvh_intersect_ray v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +// gfx1030: #ASMEND +#[cfg(gfx1030)] +check_reg!(v0_f32x11 i32x4 "v[0:3]" x: f32x11 "v[8:18]", y: i32x4 "s[0:3]", + "image_bvh_intersect_ray"); + +// gfx1030-LABEL: v0_i32x12: +// gfx1030: #ASMSTART +// gfx1030: image_bvh64_intersect_ray v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +// gfx1030: #ASMEND +#[cfg(gfx1030)] +check_reg!(v0_i32x12 i32x4 "v[0:3]" x: i32x12 "v[8:19]", y: i32x4 "s[0:3]", + "image_bvh64_intersect_ray"); + +// gfx1030-LABEL: v0_f32x12: +// gfx1030: #ASMSTART +// gfx1030: image_bvh64_intersect_ray v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} +// gfx1030: #ASMEND +#[cfg(gfx1030)] +check_reg!(v0_f32x12 i32x4 "v[0:3]" x: f32x12 "v[8:19]", y: i32x4 "s[0:3]", + "image_bvh64_intersect_ray"); + +// gfx950-LABEL: v0_i32x16: +// gfx950: #ASMSTART +// gfx950: v_mfma_i32_32x32x32_i8 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} +// gfx950: #ASMEND +#[cfg(gfx950)] +check_reg!(v0_i32x16 i32x16 "v[0:15]" x: i32x4 "v[16:19]", y: i32x4 "v[20:23]", z: i16x32 "v[0:15]", + "v_mfma_i32_32x32x32_i8"); + +// gfx950-LABEL: v0_f32x16: +// gfx950: #ASMSTART +// gfx950: v_mfma_f32_32x32x16_f16 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} +// gfx950: #ASMEND +#[cfg(gfx950)] +check_reg!(v0_f32x16 f32x16 "v[0:15]" x: f32x4 "v[16:19]", y: f32x4 "v[20:23]", z: f16x32 "v[0:15]", + "v_mfma_f32_32x32x16_f16"); + +// gfx942-LABEL: v0_f32x32: +// gfx942: #ASMSTART +// gfx942: v_mfma_f32_32x32x1_2b_f32 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} +// gfx942: #ASMEND +#[cfg(gfx942)] +check_reg!(v0_f32x32 f32x32 "v[0:31]" x: f32 "v32", y: f32 "v33", "v_mfma_f32_32x32x1_2b_f32", + ", v[0:31]"); diff --git a/tests/ui/eii/static/auxiliary/decl_with_default.rs b/tests/ui/eii/static/auxiliary/decl_with_default.rs new file mode 100644 index 0000000000000..ba87b35deb457 --- /dev/null +++ b/tests/ui/eii/static/auxiliary/decl_with_default.rs @@ -0,0 +1,5 @@ +#![crate_type = "rlib"] +#![feature(extern_item_impls)] + +#[eii(eii1)] +pub static DECL1: u64 = 5; diff --git a/tests/ui/eii/static/auxiliary/impl_default_override.rs b/tests/ui/eii/static/auxiliary/impl_default_override.rs new file mode 100644 index 0000000000000..d70a137fbc459 --- /dev/null +++ b/tests/ui/eii/static/auxiliary/impl_default_override.rs @@ -0,0 +1,8 @@ +//@ aux-build: decl_with_default.rs +#![crate_type = "rlib"] +#![feature(extern_item_impls)] + +extern crate decl_with_default as decl; + +#[decl::eii1] +pub static EII1_IMPL: u64 = 10; diff --git a/tests/ui/eii/static/default.rs b/tests/ui/eii/static/default.rs new file mode 100644 index 0000000000000..02307432e2f81 --- /dev/null +++ b/tests/ui/eii/static/default.rs @@ -0,0 +1,15 @@ +//@ run-pass +//@ check-run-results +//@ ignore-backends: gcc +// FIXME: linking on windows (specifically mingw) not yet supported, see tracking issue #125418 +//@ ignore-windows +// Tests static EIIs with default implementations. + +#![feature(extern_item_impls)] + +#[eii(eii1)] +pub static DECL1: u64 = 5; + +fn main() { + println!("{DECL1}"); +} diff --git a/tests/ui/eii/static/default.run.stdout b/tests/ui/eii/static/default.run.stdout new file mode 100644 index 0000000000000..7ed6ff82de6bc --- /dev/null +++ b/tests/ui/eii/static/default.run.stdout @@ -0,0 +1 @@ +5 diff --git a/tests/ui/eii/static/default_cross_crate.rs b/tests/ui/eii/static/default_cross_crate.rs new file mode 100644 index 0000000000000..4d6df4b7ee717 --- /dev/null +++ b/tests/ui/eii/static/default_cross_crate.rs @@ -0,0 +1,13 @@ +//@ aux-build: decl_with_default.rs +//@ run-pass +//@ check-run-results +//@ ignore-backends: gcc +// FIXME: linking on windows (specifically mingw) not yet supported, see tracking issue #125418 +//@ ignore-windows +// Tests that a static EII default can be used from another crate. + +extern crate decl_with_default; + +fn main() { + println!("{}", decl_with_default::DECL1); +} diff --git a/tests/ui/eii/static/default_cross_crate.run.stdout b/tests/ui/eii/static/default_cross_crate.run.stdout new file mode 100644 index 0000000000000..7ed6ff82de6bc --- /dev/null +++ b/tests/ui/eii/static/default_cross_crate.run.stdout @@ -0,0 +1 @@ +5 diff --git a/tests/ui/eii/static/default_cross_crate_explicit.rs b/tests/ui/eii/static/default_cross_crate_explicit.rs new file mode 100644 index 0000000000000..c3139f45d19c5 --- /dev/null +++ b/tests/ui/eii/static/default_cross_crate_explicit.rs @@ -0,0 +1,15 @@ +//@ aux-build: decl_with_default.rs +//@ aux-build: impl_default_override.rs +//@ run-pass +//@ check-run-results +//@ ignore-backends: gcc +// FIXME: linking on windows (specifically mingw) not yet supported, see tracking issue #125418 +//@ ignore-windows +// Tests that an explicit static EII implementation overrides a cross-crate default. + +extern crate decl_with_default; +extern crate impl_default_override; + +fn main() { + println!("{}", decl_with_default::DECL1); +} diff --git a/tests/ui/eii/static/default_cross_crate_explicit.run.stdout b/tests/ui/eii/static/default_cross_crate_explicit.run.stdout new file mode 100644 index 0000000000000..f599e28b8ab0d --- /dev/null +++ b/tests/ui/eii/static/default_cross_crate_explicit.run.stdout @@ -0,0 +1 @@ +10 diff --git a/tests/ui/eii/static/default_explicit.rs b/tests/ui/eii/static/default_explicit.rs new file mode 100644 index 0000000000000..b18da35c3debe --- /dev/null +++ b/tests/ui/eii/static/default_explicit.rs @@ -0,0 +1,19 @@ +//@ run-pass +//@ check-run-results +//@ ignore-backends: gcc +// FIXME: linking on windows (specifically mingw) not yet supported, see tracking issue #125418 +//@ ignore-windows +// Tests that an explicit static EII implementation overrides a local default. + +#![feature(extern_item_impls)] +#![allow(dead_code)] + +#[eii(eii1)] +pub static DECL1: u64 = 5; + +#[eii1] +pub static EII1_IMPL: u64 = 10; + +fn main() { + println!("{DECL1}"); +} diff --git a/tests/ui/eii/static/default_explicit.run.stdout b/tests/ui/eii/static/default_explicit.run.stdout new file mode 100644 index 0000000000000..f599e28b8ab0d --- /dev/null +++ b/tests/ui/eii/static/default_explicit.run.stdout @@ -0,0 +1 @@ +10