diff --git a/Content/ReleaseBriefings/4_6/Section_BackendUpdates.tex b/Content/ReleaseBriefings/4_6/Section_BackendUpdates.tex new file mode 100644 index 00000000..52051ad2 --- /dev/null +++ b/Content/ReleaseBriefings/4_6/Section_BackendUpdates.tex @@ -0,0 +1,88 @@ +%========================================================================== + +\begin{frame}[fragile] + + {\Huge Backend Updates} + + \vspace{10pt} + +\end{frame} + + +%========================================================================== + +% Examples + +% note: always keep the [fragile] for your frames! + +\begin{frame}[fragile]{CUDA, SYCL and Serial} + \begin{itemize} + \item SYCL: Improved sorting performance for non-contiguous views + \item Serial: Reduce fences overhead when using \texttt{Kokkos\_ENABLE\_ATOMICS\_BYPASS} + \item CUDA: Improved performance for \texttt{Kokkos::parallel\_reduce} on H100 and newer by removing limitations on the runtime thread configuration + \end{itemize} +\end{frame} + +%========================================================================== +\begin{frame}[fragile]{Performance of \texttt{parallel\_reduce}} + + \begin{center} + \begin{minipage}{.45\textwidth} + \begin{tikzpicture} + \begin{axis}[ + title={Computionally \textbf{cheap} kernel}, + ymin=0, + ymax=1.1, + xmin=-100000, + xmax=1200000, + ybar, + xtick={100000,1000000}, + width=0.9\textwidth, + legend style={at={(0.3,0.75)},anchor=west}, + xlabel=Num Elements, + ylabel=Runtime relative to 4.5] + \addplot coordinates {(100000,1.033333333) (1000000,0.876923077)}; + \addplot coordinates {(100000,1.0) (1000000,0.6559139)}; + \legend{V100,H100} + \end{axis} +\end{tikzpicture} +\end{minipage} +\begin{minipage}{.45\textwidth} + \begin{tikzpicture} + \begin{axis}[ + title={Computionally \textbf{expensive} kernel}, + ymin=0, + ymax=1.1, + xmin=-100000, + xmax=1200000, + ybar, + xtick={100000,1000000}, + width=0.9\textwidth, + legend style={at={(0.3,0.75)},anchor=west}, + xlabel=Num Elements, + % ylabel=Speedup relative to 4.5, + ] + \addplot coordinates {(100000,0.7956778) (1000000,0.785453609)}; + \addplot coordinates {(100000,0.7149638336) (1000000,0.6977690684)}; + \legend{V100,H100} + \end{axis} +\end{tikzpicture} +\end{minipage} + \end{center} + +\end{frame} +%========================================================================== + +\begin{frame}[fragile]{HIP} + \begin{itemize} + \item Change block size deduction to prefer smaller blocks/teams if possible + \item Allocate memory with stream ordered semantics (\emph{i.e.}\ use \texttt{hipMallocAsync}) + \item Fix a segfault when a virtual function called inside a kernel requires too many registers + \end{itemize} +\end{frame} + +%========================================================================== + + +%========================================================================== + diff --git a/Content/ReleaseBriefings/4_6/Section_BreakingChanges.tex b/Content/ReleaseBriefings/4_6/Section_BreakingChanges.tex new file mode 100644 index 00000000..21578a24 --- /dev/null +++ b/Content/ReleaseBriefings/4_6/Section_BreakingChanges.tex @@ -0,0 +1,90 @@ +%========================================================================== + +\begin{frame}[fragile] + + {\Huge Deprecations and other breaking changes} + + \vspace{10pt} + +\end{frame} + + +\begin{frame}[fragile]{Dropping support for Intel C++ Compiler Classic} + \begin{itemize} + \item Intel has deprecated Intel Classic in 2022, and removed it from oneAPI 2024 + \item In order to focus on newer compilers, and reduce maintenance burden, we have \textbf{removed} support for Intel Classic (oneAPI Intel/icpx still supported of course!) + \end{itemize} +\end{frame} + + +\begin{frame}[fragile]{DualView changes} + \textbf{Deprecate} direct access to \texttt{d\_view} and \texttt{h\_view} + \begin{itemize} + \item Modifying the allocations in d\_view and h\_view directly is dangerous, especially if \texttt{modify} and \texttt{sync} are skipped + \item Use \texttt{view\_host()} and \texttt{view\_device()} instead + \item These two functions return by value with deprecated code enabled and by const reference otherwise. This might have perfomance implications if used extensively, e.g., in loop bounds. + \end{itemize} +\end{frame} + + +\begin{frame}[fragile]{Experimental SIMD changes} + \begin{itemize} + \item \texttt{native\_simd}, \texttt{native\_simd\_mask} \textbf{deprecated} to align with the C++26 standard + \item \textbf{Removed} Obtaining a reference from SIMD \texttt{operator[]} to align with the C++26 Standard + \item \textbf{Changed} the return type of SIMD \texttt{operator==} and \texttt{operator!=} to return SIMD masks instead of \texttt{bool} + \begin{itemize} + \item If you want old behavior, use \texttt{all\_of(a == b)} + \end{itemize} + \end{itemize} +\end{frame} + +\begin{frame}[fragile]{Additional Deprecations and Removals} + \begin{itemize} + \item Already discussed deprecating the Makefile + \item StaticCrsGraph is \textbf{moved} to Kokkos Kernels and \textbf{deprecated} in Core + \begin{itemize} + \item See \url{https://github.com/kokkos/kokkos-kernels/pull/2419} + \item Symbol is in Kernels under \texttt{KokkosSparse::StaticCrsGraph} + \end{itemize} + \end{itemize} +\end{frame} +%========================================================================== + +% Examples + +% note: always keep the [fragile] for your frames! + +%\begin{frame}[fragile]{Example list} +% \begin{itemize} +% \item Item 1 +% \item Item 2 with some \texttt{code} +% \begin{itemize} +% \item Sub-item 2.1 +% \item Sub-item 2.2 +% \end{itemize} +% \end{itemize} +%\end{frame} + +%\begin{frame}[fragile]{Example code} +% \begin{code}[keywords={std}] +% #include +% +% int main() { +% std::cout << "hello world\n"; +% } +% \end{code} +%\end{frame} + +%\begin{frame}[fragile]{Example table} +% \begin{center} +% \begin{tabular}{l|l} +% a & b \\\hline +% c & d +% \end{tabular} +% \end{center} +%\end{frame} + +%========================================================================== + + +%========================================================================== diff --git a/Content/ReleaseBriefings/4_6/Section_BugFixes.tex b/Content/ReleaseBriefings/4_6/Section_BugFixes.tex new file mode 100644 index 00000000..0a6676ae --- /dev/null +++ b/Content/ReleaseBriefings/4_6/Section_BugFixes.tex @@ -0,0 +1,105 @@ +%========================================================================== + + +% Fix performance bug affecting atomic_fetch_{add,sub,min,max,and,or,xor} on integral types long and unsigned long with HIP #7816 +% Performance bug in RangePolicy: construct error message if and only if the precondition is violated #7809 +% Fix execution of ranges with more than 2B elements #7797 +% Fix clean target when embedding Kokkos in another project #7557 +% Build system: hint to ARCH_NATIVE if ARMv9 Grace arch is not explicitly supported by the compiler #7862 +% Fix Zen3 flag for NVHPC #7558 +% Use right arch for MI300A in makefiles #7786 +% graph: nodes must be stored by the graph #7619 +% Make sure lock arrays are on device before launching a graph #7685 +% Cuda: fix incorrect iteration in MDRangePolicy of rank > 4 for high iteration counts #7724 +% Cuda: ignore gcc assembler options in nvcc-wrapper #7492 + +% simd: fix a bug in scalar min/max #7813 +% simd: fix a bug in non-masked reductions #7845 +% Fix compiling BasicView on MSVC #7751 + + +\begin{frame}[fragile] + + {\Huge Bug Fixes} + + \vspace{10pt} + +\end{frame} + +\begin{frame}[fragile]{General bug fixes} + \begin{itemize} + \item Fix execution of ranges with more than 2 billion elements + \item Graph: + \begin{itemize} + \item Fix graph node lifetime issues + \item Fix lock-based atomics failure when launching CUDA and HIP graphs + \end{itemize} + \item CUDA backend: Fix incorrect iteration in MDRangePolicy of rank $> 4$ for high iteration counts + \item SIMD: + \begin{itemize} + \item fix a bug in scalar min/max + \item fix a bug in non-masked reductions + \end{itemize} + \item View: fix MSVC compilation + \end{itemize} + \end{frame} + +\begin{frame}[fragile]{Build system fixes} + \begin{itemize} + \item Fix \texttt{clean} target when embedding Kokkos in another project + \item Stop generation if ARMv9 Grace arch is not explicitly supported by the compiler when \texttt{KOKKOS\_ARCH\_ARMV9\_GRACE} is specified + \begin{itemize} + \item Can still try and configure with \texttt{ARCH\_NATIVE} + \end{itemize} + \item Fix Zen3 flag for NVHPC + \item Use right arch for MI300A in makefiles + \item (CUDA) ignore gcc assembler options in \texttt{nvcc\_wrapper} + \end{itemize} + \end{frame} + +\begin{frame}[fragile]{Performance bugfixes} + \begin{itemize} + \item Fix performance bug affecting atomic\_fetch\_\{add,sub,min,max,and,or,xor\} on integral types long and unsigned long with HIP + \item Fix performance of \texttt{RangePolicy} where an error message is generated even if precondition not violated + \end{itemize} +\end{frame} +%========================================================================== + +% Examples + +% note: always keep the [fragile] for your frames! + +%\begin{frame}[fragile]{Example list} +% \begin{itemize} +% \item Item 1 +% \item Item 2 with some \texttt{code} +% \begin{itemize} +% \item Sub-item 2.1 +% \item Sub-item 2.2 +% \end{itemize} +% \end{itemize} +%\end{frame} + +%\begin{frame}[fragile]{Example code} +% \begin{code}[keywords={std}] +% #include +% +% int main() { +% std::cout << "hello world\n"; +% } +% \end{code} +%\end{frame} + +%\begin{frame}[fragile]{Example table} +% \begin{center} +% \begin{tabular}{l|l} +% a & b \\\hline +% c & d +% \end{tabular} +% \end{center} +%\end{frame} + +%========================================================================== + + +%========================================================================== diff --git a/Content/ReleaseBriefings/4_6/Section_BuildSystemUpdates.tex b/Content/ReleaseBriefings/4_6/Section_BuildSystemUpdates.tex new file mode 100644 index 00000000..4a0ed990 --- /dev/null +++ b/Content/ReleaseBriefings/4_6/Section_BuildSystemUpdates.tex @@ -0,0 +1,43 @@ +%========================================================================== + +\begin{frame}[fragile] + + {\Huge Build Systems Updates} + + \vspace{10pt} + +\end{frame} + +%========================================================================== + +% Examples + +% note: always keep the [fragile] for your frames! + +\begin{frame}[fragile]{New build system features} + \begin{itemize} + \item Add support for Zen 4 AMD microarchitecture (\texttt{Kokkos\_ARCH\_ZEN4}) + \item Enable NVIDIA Grace architecture with NVHPC (\texttt{Kokkos\_ARCH\_ARMV9\_GRACE}) + \item Support static library builds via \texttt{CMAKE\_CUDA\_RUNTIME\_LIBRARY=static} when using CUDA as CMake language + \end{itemize} + +\end{frame} + +%========================================================================== + +\begin{frame}[fragile]{Spack support for MI300A} + \begin{itemize} + \item Spack \textit{develop} branch now supports MI300A with a new variant \textcolor{red}{\texttt{apu}} + (\href{https://github.com/spack/spack/pull/48609}{spack/spack\#48609}) + + \item To compile Kokkos for MI300A, forcing the APU mode, use the following command: + \texttt{spack install kokkos +rocm amdgpu\_target=gfx942 \textcolor{red}{+apu}} + + % In pure CMake, this is equivalent to: + % cmake -DKokkos_ENABLE_ROCM=ON -DKokkos_ARCH_AMD_GFX942_APU=ON + \end{itemize} + +\end{frame} + + +%========================================================================== diff --git a/Content/ReleaseBriefings/4_6/Section_GeneralEnhancements.tex b/Content/ReleaseBriefings/4_6/Section_GeneralEnhancements.tex new file mode 100644 index 00000000..2cd505b2 --- /dev/null +++ b/Content/ReleaseBriefings/4_6/Section_GeneralEnhancements.tex @@ -0,0 +1,182 @@ +%========================================================================== + +\begin{frame}[fragile] + + {\Huge General Enhancements} + + \vspace{10pt} + +\end{frame} + +%========================================================================== + +% Examples + +% note: always keep the [fragile] for your frames! + +%\begin{frame}[fragile]{Example list} +% \begin{itemize} +% \item Item 1 +% \item Item 2 with some \texttt{code} +% \begin{itemize} +% \item Sub-item 2.1 +% \item Sub-item 2.2 +% \end{itemize} +% \end{itemize} +%\end{frame} + +%\begin{frame}[fragile]{Example code} +% \begin{code}[keywords={std}] +% #include +% +% int main() { +% std::cout << "hello world\n"; +% } +% \end{code} +%\end{frame} + +%\begin{frame}[fragile]{Example table} +% \begin{center} +% \begin{tabular}{l|l} +% a & b \\\hline +% c & d +% \end{tabular} +% \end{center} +%\end{frame} + +%========================================================================== + +\begin{frame}[fragile]{Fix a warning from kokkos\_check} + \begin{itemize} + \item \texttt{kokkos\_check}: Check at configure time that Kokkos was built with the requested backends and target architectures. + \item Fix a warning when a user calls the cmake function \texttt{kokkos\_check} from a \texttt{Config.cmake} "Find Module" file + {\tiny \begin{verbatim} +CMake Warning (dev) at /usr/share/cmake-3.22/Modules/FindPackageHandleStandardArgs.cmake:438 (message): + The package name passed to `find_package_handle_standard_args` + (Kokkos_DEVICES) does not match the name of the calling package (SomePackage). + This can lead to problems in calling code that expects `find_package` + result variables (e.g., `_FOUND`) to follow a certain pattern. +Call Stack (most recent call first): + ... /kokkos/lib/cmake/Kokkos/KokkosConfigCommon.cmake:110 (find_package_handle_standard_args) + ... +This warning is for project developers. Use -Wno-dev to suppress it. + \end{verbatim}} + \end{itemize} +\end{frame} + +\begin{frame}[fragile]{\texttt{inclusive\_scan} performance improvements} + With the Cuda and HIP backends, \texttt{Kokkos::Experimental::inclusive\_scan} now calls the vendor versions in Thrust + \begin{itemize} + \item The vendor versions are up to 3x faster than the \texttt{Kokkos::parallel\_scan}-based default implementation + \item Thrust requires \texttt{Kokkos\_ENABLE\_ROCTHRUST} to be \texttt{ON} (which is the default) + \item Approximately 1.5-3x speed up (V100, MI300A) + \end{itemize} +\end{frame} + +\begin{frame}[fragile]{Reduce tooling interface overhead} + Reduced the overhead of Kokkos tools related checks + \begin{itemize} + \item Store the information whether Kokkos tools are enabled after each modification to the tools' callbacks + \item Previously, this value was recomputed for every event (\texttt{parallel\_for}, \texttt{fence}, etc.) + \item Most noticeable in small serial kernels (around 100 elements) + \item Reduction of launch time of approximately 10ns (About the time to increment 100 elements in a kernel) on CPU + \end{itemize} +\end{frame} + +%========================================================================== +\begin{frame}[fragile]{SIMD reductions and compound assignments} + Added reductions and remaining compound assignments to Kokkos SIMD + + \begin{itemize} + \item \texttt{basic\_simd\& operator/=(basic\_simd\&, U\&\&)} + \item \texttt{basic\_simd\& operator>>=(basic\_simd\&, U\&\&)} + \item \texttt{basic\_simd\& operator<<=(basic\_simd\&, U\&\&)} + \end{itemize} + + \vspace{5pt} + + \begin{itemize} + \item \texttt{T reduce\_min(const basic\_simd\& x)} + \item \texttt{T reduce\_max(const basic\_simd\& x)} + \item \texttt{T reduce(const basic\_simd\& x, const mask\_type\& mask, T identity\_element, BinaryOperation binary\_op)} + \begin{itemize} + \item Supported binary operations are: std::plus, std::multiplies, std::bit\_and, std::bit\_or and std::bit\_xor + \item std::plus is used if binary op is not specified + \end{itemize} + \end{itemize} + +\end{frame} +%========================================================================== +\begin{frame}[fragile]{Performance of algorithms} + + \begin{itemize} + \item In Kokkos 4.5, we fixed a performance bug in \texttt{Kokkos::sort} + \item Root cause is in an implementation detail in \texttt{RandomAccessIterator} + \end{itemize} + + \vspace{10pt} + + \begin{itemize} + \item In Kokkos 4.6 the root cause is fixed + \item Fixes all algorithms that rely on \texttt{RandomAccessIterator} (e.g. sort,search,etc.) + \end{itemize} + +\end{frame} +%========================================================================== +\begin{frame}[fragile]{Performance of \texttt{search}} +\begin{center} +\textbf{Improvement dependent on algorithm and hardware!} +\end{center} + + \begin{center} + \begin{tikzpicture} + \begin{axis}[ + title={Host only, Intel Skylake}, + legend pos=north west, + xmode=log, + height=0.4\textwidth, + xlabel=Number of Elements, + ylabel=Time in s] + \addplot[mark=x] coordinates { + (10000,3.0119e-05) + (100000,0.000321003) + (1000000,0.00270167) + (10000000,0.0288293) + (100000000,0.294202) + }; + + \addplot[mark=o] coordinates { + (10000,8.57e-06) + (100000,8.7155e-05) + (1000000,0.000909428) + (10000000,0.00823764) + (100000000,0.0830087) + }; + \legend{4.5,4.6} + \end{axis} +\end{tikzpicture} + + \end{center} + +\end{frame} +%========================================================================== +\begin{frame}[fragile]{Print support for system allocated memory} + \texttt{print\_configuration} outputs if system allocated memory is accessible on GPU + \begin{center} + \textbf{No guarantees about the print format!} + \end{center} + + \vspace{5pt} + + \begin{center} + Example output for MI300A with HIP backend + \end{center} + \begin{code} + XNACK environment variable set: yes + Kernel reports HMM module via `CONFIG_HMM_MIRROR=y` in `/boot/config`: yes + Architecture capable of accessing system allocated memory: 1, + System allows accessing system allocated memory on GPU: 1, + \end{code} + +\end{frame} +%========================================================================== diff --git a/Content/ReleaseBriefings/4_6/Section_NewFeatures.tex b/Content/ReleaseBriefings/4_6/Section_NewFeatures.tex new file mode 100644 index 00000000..75eb847e --- /dev/null +++ b/Content/ReleaseBriefings/4_6/Section_NewFeatures.tex @@ -0,0 +1,151 @@ +%========================================================================== + +\begin{frame}[fragile] + + {\Huge Feature highlights} + + \vspace{10pt} + +\end{frame} + +%========================================================================== + +% Examples + +% note: always keep the [fragile] for your frames! + +%\begin{frame}[fragile]{Example list} +% \begin{itemize} +% \item Item 1 +% \item Item 2 with some \texttt{code} +% \begin{itemize} +% \item Sub-item 2.1 +% \item Sub-item 2.2 +% \end{itemize} +% \end{itemize} +%\end{frame} + +%\begin{frame}[fragile]{Example code} +% \begin{code}[keywords={std}] +% #include +% +% int main() { +% std::cout << "hello world\n"; +% } +% \end{code} +%\end{frame} + +%\begin{frame}[fragile]{Example table} +% \begin{center} +% \begin{tabular}{l|l} +% a & b \\\hline +% c & d +% \end{tabular} +% \end{center} +%\end{frame} + +%========================================================================== + +% \begin{frame}[fragile]\label{sec:new_features} + + % {\Huge Kokkos::Graph features} + + % \vspace{10pt} + +% \end{frame} + +\begin{frame}[fragile]{Kokkos::Graph recap} + \begin{itemize} + \item describes asynchronous workloads organised as a direct acyclic graph (DAG) + \item executed using \texttt{submit()}, possibly many times, observing dependencies + \begin{code}[keywords={auto}] +auto graph = Kokkos::create_graph([&](auto root) { + auto node_A = root.then_parallel_for("A", ...policy..., ...functor...); + auto node_B = node_A.then_parallel_for("B", ...policy..., ...functor...); + auto node_C = node_A.then_parallel_for("C", ...policy..., ...functor...); + + auto node_D = Kokkos::when_all(node_B, node_C). + then_parallel_for("D", ...policy..., ...functor...); +}); + +graph.instantiate(); + +graph.submit(); + \end{code} + \end{itemize} +\end{frame} + +\begin{frame}[fragile]{Kokkos::Graph new features} + \begin{itemize} + \item \texttt{then} node: executes a callable on device + \item Single call of the functor per \texttt{submit()} + \item Executed in the \texttt{ExecutionSpace} the graph is submitted to + \begin{code}[keywords={auto}] +auto graph = Kokkos::create_graph([&](auto root) { + auto node_A = root.then_parallel_for("A", ...policy..., ...functor...); + auto node_B = node_A.then("B", ...functor...); +}); + \end{code} + \item Functor passed to \texttt{then} must be callable without arguments and marked with \texttt{KOKKOS\_FUNCTION} + \end{itemize} +\end{frame} + +\begin{frame}[fragile]{Kokkos::Graph new features} + \begin{itemize} + \item Interoperability: create a \texttt{Kokkos::Graph} from a native Cuda/HIP/Sycl graph + \begin{code}[keywords={create_graph_from_native}] +cudaGraph_t native_graph = nullptr; +cudaGraphCreate(&native_graph, 0); + +auto graph_from_native = + Kokkos::Experimental::create_graph_from_native(exec, native_graph); + \end{code} + \item Experimental, does not yet allow adding nodes created using the native API to a \texttt{Kokkos::Graph} + \end{itemize} +\end{frame} + +% \begin{frame}[fragile]\label{sec:new_features} + + % {\Huge Multi-GPU for HIP Backend} + + % \vspace{10pt} + +% \end{frame} + +\begin{frame}[fragile]{Multi-GPU for HIP Backend} + \begin{itemize} + \item Launch kernels on multiple devices from a single host process + \item Available for ROCm 5.6 and later + \item Requires direct use of HIP runtime API for creating and destroying streams + \item Experimental, still looking for feedback from new users + \item New documentation (for all backends) + \begin{itemize} + \item[] \url{https://kokkos.org/kokkos-core-wiki/API/core/MultiGPUSupport.html} + \end{itemize} + \end{itemize} +\end{frame} + +\begin{frame}[fragile]{Multi-GPU for HIP Backend} + \begin{code}[keywords={auto}] +// Create streams on different devices +hipStream_t streams[2]; +hipSetDevice(0); hipStreamCreate(&streams[0]); +hipSetDevice(1); hipStreamCreate(&streams[1]); +{ + // Creating execution spaces + Kokkos::HIP exec0(streams[0]), exec1(streams[1]); + + // Allocating views + Kokkos::View v0(Kokkos::view_alloc("v0", exec0), N); + Kokkos::View v1(Kokkos::view_alloc("v1", exec1), M); + + // Launch kernels (run concurrently) + Kokkos::parallel_for(Kokkos::RangePolicy(exec0, 0, N), functor0); + Kokkos::parallel_for(Kokkos::RangePolicy(exec1, 0, M), functor1); +} +// Destroy streams (after execution spaces are deleted) +hipStreamDestroy(streams[0]); hipStreamDestroy(streams[1]); + \end{code} +\end{frame} + +%========================================================================== diff --git a/Content/ReleaseBriefings/4_6/Section_Organizational.tex b/Content/ReleaseBriefings/4_6/Section_Organizational.tex new file mode 100644 index 00000000..e6622f3d --- /dev/null +++ b/Content/ReleaseBriefings/4_6/Section_Organizational.tex @@ -0,0 +1,146 @@ + +%========================================================================== + +\begin{frame}[fragile] + + {\Huge Organizational} + + \vspace{10pt} + + \textbf{Content:} + \begin{itemize} + \item HPSF and Kokkos Meeting 2025 + \item Targeting C++20 for Kokkos 5.0 + \item Makefile deprecation + \end{itemize} + +\end{frame} + +%========================================================================== + +\begin{frame}[fragile]{Kokkos User Group Meeting 2025} +\begin{center} +\textbf{Kokkos User Group Meeting 2025 @ HPSF Conference} +\end{center} + +\begin{itemize} +\item{\textit{When:} May 5th-8th 2025} +\item{\textit{Where:} Chicago} +\item{\textit{What:} 2-days HPSF plenary + 2-days Project meetings} +\item{\textit{KUG-Content:} Focused on user experiences +\begin{itemize} + \item{How do you leverage Kokkos?} + \item{What are pain points?} + \item{Kokkos-based libraries of interest to the community} +\end{itemize} +} +\end{itemize} + +\vspace{10pt} + +\begin{center} +\textit{Registration open now!} +\end{center} +\end{frame} + +\begin{frame}[fragile]{Kokkos User Group Meeting 2025} +\begin{center} +\textbf{What to expect from KUG} +\end{center} + +\begin{itemize} + \item{Eight 90-minute sessions featuring a dynamic blend of Kokkos developers and community users} + + \begin{multicols}{2} + \item{\textit{Day 1 Highlights:}} + \begin{itemize} + \item{Essential Updates} + \item{Kokkos in Applications} + \item{Adopting Kokkos} + \item{Lightning Talks} + \end{itemize} + + \columnbreak + + \item{\textit{Day 2 Highlights:}} + \begin{itemize} + \item{Kokkos Ecosystem} + \item{Tuning and Performance} + \item{Algorithms} + \item{Panel Discussion} + \end{itemize} + \end{multicols} +\end{itemize} +\end{frame} + +\begin{frame}[fragile]{HPSF Conference 2025} +\begin{center} +\textbf{Other reasons to go} +\end{center} + +\begin{itemize} + \item{General Poster Session} + \item{Updates on the HPSF project} + \item{Introduction to various working groups} + \item{Various Panel Discussions} + \item{Chance to meet all other members of HPSF} + \item{...} +\end{itemize} +\end{frame} + +\begin{frame}[fragile]{Other outreach} + \begin{itemize} + \item{HPSF will be present at \href{https://isc.app.swapcard.com/widget/event/isc-high-performance-2025/planning/UGxhbm5pbmdfMjU4NjE0MQ==}{ISC BOF 2025}} +\item{\href{https://kokkos.org/community/tea-time/}{Kokkos Tea-Time} on 2nd or 3rd Wed of the month} + \begin{itemize} + \item April 16th @ 11am EST "Solomon: unified schemes for directive-based GPU offloading" + \end{itemize} +\end{itemize} + +\end{frame} + + +\begin{frame}[fragile]{Kokkos 5 and ISO C++20} +\begin{center} +\textbf{Kokkos 5 is comming Summer 2025} + +\vspace{0.5cm} +\textbf{We will require C++20!} +\end{center} + +\textit{Start preparing now:} +\begin{itemize} + \item{Check availability of compilers on your systems} + \item{Test with C++20 enabled: start with a CPU build} + \item{Minimum Compiler requirements will change (more details later)} +\end{itemize} + +\vspace{0.5cm} +\begin{center} +\textit{Nothing wrong for your project to require C++20 now if you feel ready!} +\end{center} +\end{frame} + +\begin{frame}[fragile]{Makefile deprecation} +\begin{center} +\textbf{Makefile is officially deprecated and will be removed in the next major release} + +\textit{Start preparing now:} +\begin{itemize} + \item{Check if you can transition to CMake} + \item{Comment on pinned issue \href{https://github.com/kokkos/kokkos/issues/7610}{7610}} +\end{itemize} +\end{center} + +\end{frame} + +\begin{frame}[fragile]{Open SSF Scorecard} +\begin{center} +\textbf{We reached ``passed'' on the OSSF Best Practices Program} +\href{https://www.bestpractices.dev/en/projects/9344}{www.bestpractices.dev} + +\vspace{0.5cm} +\textit{This means Kokkos is continuously tracking and openly reporting the conformity with open source software practices.} +\end{center} + +\end{frame} diff --git a/Content/ReleaseBriefings/Makefile b/Content/ReleaseBriefings/Makefile index f88928b6..a258286e 100644 --- a/Content/ReleaseBriefings/Makefile +++ b/Content/ReleaseBriefings/Makefile @@ -1,5 +1,5 @@ -default: release-45 +default: release-46 workaround-on: sed -i.bak 's|%\\input|\\input|g' KokkosTutorial_PreTitle.tex @@ -34,5 +34,8 @@ release-44: release-45: pdflatex release-45.tex +release-46: + pdflatex release-46.tex + clean: rm -r *.aux *.log *.nav *.out *.snm *.toc *.vrb diff --git a/Content/ReleaseBriefings/release-46.tex b/Content/ReleaseBriefings/release-46.tex new file mode 100644 index 00000000..b0f9cced --- /dev/null +++ b/Content/ReleaseBriefings/release-46.tex @@ -0,0 +1,119 @@ +\input{KokkosTutorial_PreTitle} +\usepackage{tikz} +\graphicspath{{4_6/figures/}} +\usepackage{multicol} +\usepackage{pgfplots} + +%disclaimer for Sandia. uncomment and the whole blob goes away @ b80c116300122 +% \def\sandid{SANDXXXX PE} +\def\ornlid{THIS WILL NEED TO BE UPDATED AFTER RESOLUTION} + +% \title{Performance Portability with Kokkos} +\title{Kokkos 4.6 Release Briefing} + +%BAD misuse of author field +\author{New Capabilities} + +\date{04/10/2025} + +\input{KokkosTutorial_PostTitle} + +\begin{document} + +\begin{frame} + \titlepage +\end{frame} + + +\begin{frame}[fragile]{Outline} + + \textbf{4.6 Release Highlights} + + \begin{itemize} + \item{Organizational} + \item{Feature Highlights} + \item{General Enhancements} + \item{Backend updates} + \item{Build system updates} + \item{Deprecations and other breaking changes} + \item{Bug Fixes} + \end{itemize} + +\end{frame} + +\begin{frame}{Find More} + + \textbf{Online Resources}: + + \begin{itemize} + \item \url{https://github.com/kokkos}: + \begin{itemize} + \item Primary Kokkos GitHub Organization + \end{itemize} + \item \url{https://kokkos.org/kokkos-core-wiki/tutorials-and-examples.html}: + \begin{itemize} + \item{Tutorials, video lectures, and examples} + \end{itemize} + \item \url{https://kokkos.org/kokkos-core-wiki}: + \begin{itemize} + \item Wiki including API reference + \end{itemize} + \item \url{https://kokkosteam.slack.com}: + \begin{itemize} + \item Slack workspace for Kokkos. + \item Please join: fastest way to get your questions answered. + \item Can whitelist domains, or invite individual people. + \end{itemize} + \end{itemize} + +\end{frame} + +\begin{frame}[fragile]{Kokkos Usage} + \textbf{Would like to strengthen community bonds and discoverability} + + \vspace{10pt} + \textit{List of Applications and Libraries} + \begin{itemize} + \item Add your app to \url{https://github.com/kokkos/kokkos/issues/1950} + \item We are planning to add that to the Kokkos website. + \item Helps people discover each other when working on similar things. + \end{itemize} + + \vspace{10pt} + \textit{GitHub Topics} + \begin{itemize} + \item Use \textit{kokkos} tag on your repos. + \item If you click on the topic you get a list of all projects on github with that topic. + \end{itemize} +\end{frame} + +\input{4_6/Section_Organizational.tex} +\input{4_6/Section_NewFeatures.tex} +\input{4_6/Section_GeneralEnhancements.tex} +\input{4_6/Section_BackendUpdates.tex} +\input{4_6/Section_BuildSystemUpdates.tex} +\input{4_6/Section_BreakingChanges.tex} +\input{4_6/Section_BugFixes.tex} + +%========================================================================== + +\begin{frame}[fragile] + + \vspace{10pt} + + \textbf{How to Get Your Fixes and Features into Kokkos} + \newline + \begin{itemize} + \item Fork the Kokkos repo (\url{https://github.com/kokkos/kokkos}) + \item Make topic branch from \textit{develop} for your code + \item Add tests for your code + \item Create a pull request (PR) on the main project \textit{develop} + \item Update the documentation (\url{https://github.com/kokkos/kokkos-core-wiki}) if your code changes the API + \item Get in touch if you have any question (\url{https://kokkosteam.slack.com}) + \end{itemize} + +\end{frame} + +%========================================================================== + +\end{document}