From ce6fc1b25faed3878b4eff96e2140c92f6d4f9b6 Mon Sep 17 00:00:00 2001 From: Github Executorch Date: Mon, 11 May 2026 16:01:10 -0700 Subject: [PATCH] overflow --- backends/aoti/slim/core/slim_tensor.h | 48 +++++++++++++++++++-------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/backends/aoti/slim/core/slim_tensor.h b/backends/aoti/slim/core/slim_tensor.h index 391ae2248b4..5a58508a4a2 100644 --- a/backends/aoti/slim/core/slim_tensor.h +++ b/backends/aoti/slim/core/slim_tensor.h @@ -433,13 +433,19 @@ class SlimTensor { /** * Copy data from another tensor to this tensor. * - * Both tensors must have the same numel and dtype. - * Currently only supports CPU-to-CPU copy (contiguous tensors only). + * Both tensors must have the same numel, sizes and dtype. * * @param other The source tensor to copy from * @return Reference to this tensor */ SlimTensor& copy_(const SlimTensor& other) { + ET_CHECK_MSG( + this->dim() == other.dim(), + "copy_: dim of tensors must match (%zu vs %zu)", + this->dim(), + other.dim()); + ET_CHECK_MSG( + this->sizes() == other.sizes(), "copy_: sizes of tensors must match"); ET_CHECK_MSG( this->numel() == other.numel(), "copy_: numel of tensors must match"); ET_CHECK_MSG(this->dtype() == other.dtype(), "copy_: dtype must match"); @@ -463,29 +469,43 @@ class SlimTensor { std::vector counter(this->dim(), 0); for (size_t i = 0; i < this->numel(); i++) { - // Compute src offset in elements int64_t src_offset = 0; - for (size_t d = 0; d < other.dim(); d++) { - src_offset += counter[d] * other.stride(d); - } - - // Compute dst offset in elements int64_t dst_offset = 0; for (size_t d = 0; d < this->dim(); d++) { - dst_offset += counter[d] * this->stride(d); + int64_t src_term = 0; + int64_t dst_term = 0; + // src_offset = src_offset + counter[d] * other.stride(d) + // dst_offset = dst_offset + counter[d] * this->stride(d) + ET_CHECK_MSG( + !::c10::mul_overflows(counter[d], other.stride(d), &src_term) && + !::c10::add_overflows(src_offset, src_term, &src_offset) && + !::c10::mul_overflows(counter[d], this->stride(d), &dst_term) && + !::c10::add_overflows(dst_offset, dst_term, &dst_offset), + "copy_: offset computation overflow"); } + size_t src_byte_offset = 0; + size_t dst_byte_offset = 0; + // src_byte_offset = src_offset * elem_size + // dst_byte_offset = dst_offset * elem_size + ET_CHECK_MSG( + src_offset >= 0 && dst_offset >= 0 && + !::c10::mul_overflows( + static_cast(src_offset), + elem_size, + &src_byte_offset) && + !::c10::mul_overflows( + static_cast(dst_offset), elem_size, &dst_byte_offset), + "copy_: byte offset overflow"); // Copy elem_size bytes from src to dst if (this->device().is_cpu() && other.device().is_cpu()) { std::memcpy( - dst_data + dst_offset * elem_size, - src_data + src_offset * elem_size, - elem_size); + dst_data + dst_byte_offset, src_data + src_byte_offset, elem_size); } else if (this->device().is_cuda() || other.device().is_cuda()) { #if defined(CUDA_AVAILABLE) DeviceTraits::memcpy( - dst_data + dst_offset * elem_size, - src_data + src_offset * elem_size, + dst_data + dst_byte_offset, + src_data + src_byte_offset, elem_size, device(), // dst device other.device() // src device