diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index da69fe2311..96e18085f6 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -83,10 +83,10 @@ jobs: name: Build and test Firedrake (Linux) strategy: # We want to know all of the tests which fail, so don't kill real if - # complex fails and vice-versa + # complex or single fails and vice-versa fail-fast: false matrix: - arch: [default, complex] + arch: [default, complex, single] runs-on: [self-hosted, Linux] container: image: ubuntu:latest diff --git a/firedrake/assemble.py b/firedrake/assemble.py index a9d15a7263..318d930379 100644 --- a/firedrake/assemble.py +++ b/firedrake/assemble.py @@ -28,7 +28,7 @@ from firedrake.petsc import PETSc from firedrake.slate import slac, slate from firedrake.slate.slac.kernel_builder import CellFacetKernelArg, LayerCountKernelArg -from firedrake.utils import ScalarType, assert_empty, tuplify +from firedrake.utils import IntType, ScalarType, assert_empty, tuplify from pyop2 import op2 from pyop2.exceptions import MapValueError, SparsityFormatError from functools import cached_property @@ -1783,8 +1783,8 @@ def _make_mat_global_kernel_arg(self, Vrow, Vcol): else: rmap_arg, cmap_arg = (V.topological.entity_node_map(self._mesh.topology, self._integral_type, self._subdomain_id, self._all_integer_subdomain_ids)._global_kernel_arg for V in [Vrow, Vcol]) # PyOP2 matrix objects have scalar dims so we flatten them here - rdim = numpy.prod(self._get_dim(relem), dtype=int) - cdim = numpy.prod(self._get_dim(celem), dtype=int) + rdim = numpy.prod(self._get_dim(relem), dtype=IntType) + cdim = numpy.prod(self._get_dim(celem), dtype=IntType) return op2.MatKernelArg((((rdim, cdim),),), (rmap_arg, cmap_arg), unroll=self._unroll) @staticmethod @@ -1879,7 +1879,7 @@ def _as_global_kernel_arg_coefficient(_, self): @_as_global_kernel_arg.register(kernel_args.ConstantKernelArg) def _as_global_kernel_arg_constant(_, self): const = next(self._constants) - value_size = numpy.prod(const.ufl_shape, dtype=int) + value_size = numpy.prod(const.ufl_shape, dtype=IntType) return op2.GlobalKernelArg((value_size,)) diff --git a/firedrake/cython/supermeshimpl.pyx b/firedrake/cython/supermeshimpl.pyx index fde3fa6ba9..6ce56b1c98 100644 --- a/firedrake/cython/supermeshimpl.pyx +++ b/firedrake/cython/supermeshimpl.pyx @@ -160,8 +160,8 @@ def intersection_finder(mesh_A, mesh_B): libsupermesh_tree_intersection_finder_query_output(&nindices) - indices = numpy.empty((nindices,), dtype=int) - indptr = numpy.empty((mesh_A.num_cells() + 1,), dtype=int) + indices = numpy.empty((nindices,), dtype=IntType) + indptr = numpy.empty((mesh_A.num_cells() + 1,), dtype=IntType) libsupermesh_tree_intersection_finder_get_output(&ncells_A, &nindices, indices.data, indptr.data) diff --git a/firedrake/evaluate.h b/firedrake/evaluate.h index 47bf93c23f..bd33a0e112 100644 --- a/firedrake/evaluate.h +++ b/firedrake/evaluate.h @@ -9,13 +9,13 @@ extern "C" { struct Function { /* Number of cells in the base mesh */ - int n_cols; + PetscInt n_cols; /* 1 if extruded, 0 if not */ int extruded; /* number of layers for extruded, otherwise 1 */ - int n_layers; + PetscInt n_layers; /* Coordinate values and node mapping */ PetscScalar *coords; @@ -36,26 +36,28 @@ struct Function { typedef PetscReal (*ref_cell_l1_dist)(void *data_, struct Function *f, - int cell, + PetscInt cell, double *x); typedef PetscReal (*ref_cell_l1_dist_xtr)(void *data_, struct Function *f, - int cell, - int layer, + PetscInt cell, + PetscInt layer, double *x); -extern int locate_cell(struct Function *f, +extern PetscInt locate_cell(struct Function *f, double *x, int dim, ref_cell_l1_dist try_candidate, ref_cell_l1_dist_xtr try_candidate_xtr, void *temp_ref_coords, void *found_ref_coords, - double *found_ref_cell_dist_l1, - size_t ncells_ignore, - int* cells_ignore); + PetscReal *found_ref_cell_dist_l1, + size_t ncells_ignore, + PetscInt* cells_ignore); +/* x is physical coordinates: always double (libspatialindex requires float64). + * Return value is a status code (-1 = not found, 0 = success), not a cell index. */ extern int evaluate(struct Function *f, double *x, PetscScalar *result); diff --git a/firedrake/function.py b/firedrake/function.py index 9d8a219fb7..31fadb03e2 100644 --- a/firedrake/function.py +++ b/firedrake/function.py @@ -37,9 +37,9 @@ class _CFunction(ctypes.Structure): r"""C struct collecting data from a :class:`Function`""" - _fields_ = [("n_cols", c_int), + _fields_ = [("n_cols", as_ctypes(IntType)), ("extruded", c_int), - ("n_layers", c_int), + ("n_layers", as_ctypes(IntType)), ("coords", c_void_p), ("coords_map", POINTER(as_ctypes(IntType))), ("f", c_void_p), @@ -564,7 +564,8 @@ def evaluate(self, coord, mapping, component, index_values): # Called by UFL when evaluating expressions at coordinates if component or index_values: raise NotImplementedError("Unsupported arguments when attempting to evaluate Function.") - coord = np.asarray(coord, dtype=utils.ScalarType) + # Point evaluation always uses float64 for geometric robustness + coord = np.asarray(coord, dtype=np.float64) evaluator = PointEvaluator(self.function_space().mesh(), coord) result = evaluator.evaluate(self) if len(coord.shape) == 1: diff --git a/firedrake/functionspaceimpl.py b/firedrake/functionspaceimpl.py index b5b2e90a0a..5422b9c32f 100644 --- a/firedrake/functionspaceimpl.py +++ b/firedrake/functionspaceimpl.py @@ -19,6 +19,7 @@ from pyop2.utils import as_tuple from firedrake import dmhooks +from firedrake.utils import IntType from firedrake.mesh import MeshGeometry, MeshSequenceTopology, MeshSequenceGeometry from firedrake.functionspacedata import get_shared_data, create_element from firedrake.petsc import PETSc @@ -569,7 +570,7 @@ def __init__(self, mesh, element, name=None): :class:`finat.ufl.mixedelement.TensorElement` have rank 1 and 2 respectively.""" - self.block_size = int(numpy.prod(self.shape, dtype=int)) + self.block_size = int(numpy.prod(self.shape, dtype=IntType)) r"""The total number of degrees of freedom at each function space node.""" self.name = name diff --git a/firedrake/locate.c b/firedrake/locate.c index f1fa6856a3..009bc5d173 100644 --- a/firedrake/locate.c +++ b/firedrake/locate.c @@ -1,22 +1,21 @@ #include #include #include -#include #include -int locate_cell(struct Function *f, +PetscInt locate_cell(struct Function *f, double *x, int dim, ref_cell_l1_dist try_candidate, ref_cell_l1_dist_xtr try_candidate_xtr, void *temp_ref_coords, void *found_ref_coords, - double *found_ref_cell_dist_l1, + PetscReal *found_ref_cell_dist_l1, size_t ncells_ignore, - int* cells_ignore) + PetscInt* cells_ignore) { RTreeError err; - int cell = -1; + PetscInt cell = -1; int cell_ignore_found = 0; /* NOTE: temp_ref_coords and found_ref_coords are actually of type struct ReferenceCoords but can't be declared as such in the function @@ -25,8 +24,8 @@ int locate_cell(struct Function *f, surrounds this is declared in pointquery_utils.py. We cast when we use the ref_coords_copy function and trust that the underlying memory which the pointers refer to is updated as necessary. */ - double ref_cell_dist_l1 = DBL_MAX; - double current_ref_cell_dist_l1 = -0.5; + PetscReal ref_cell_dist_l1 = PETSC_MAX_REAL; + PetscReal current_ref_cell_dist_l1 = -0.5; /* NOTE: `tolerance`, which is used throughout this funciton, is a static variable defined outside this function when putting together all the C code that needs to be compiled - see pointquery_utils.py */ @@ -73,9 +72,9 @@ int locate_cell(struct Function *f, } else { for (size_t i = 0; i < nids; i++) { - int nlayers = f->n_layers; - int c = ids[i] / nlayers; - int l = ids[i] % nlayers; + PetscInt nlayers = f->n_layers; + PetscInt c = ids[i] / nlayers; + PetscInt l = ids[i] % nlayers; current_ref_cell_dist_l1 = (*try_candidate_xtr)(temp_ref_coords, f, c, l, x); for (size_t j = 0; j < ncells_ignore; j++) { if (ids[i] == cells_ignore[j]) { diff --git a/firedrake/mesh.py b/firedrake/mesh.py index 64c929c210..36bcb7afc2 100644 --- a/firedrake/mesh.py +++ b/firedrake/mesh.py @@ -37,7 +37,7 @@ import firedrake.extrusion_utils as eutils import firedrake.cython.rtree as rtree import firedrake.utils as utils -from firedrake.utils import as_cstr, IntType, RealType +from firedrake.utils import as_cstr, as_ctypes, IntType, RealType, RealType_c from firedrake.logging import logger from firedrake.parameters import parameters from firedrake.petsc import PETSc, DEFAULT_PARTITIONER @@ -422,7 +422,7 @@ def _from_triangle(filename, dim, comm): nodecount = header[0] nodedim = header[1] assert nodedim == dim - coordinates = np.loadtxt(nodefile, usecols=list(range(1, dim+1)), skiprows=1, dtype=np.double) + coordinates = np.loadtxt(nodefile, usecols=list(range(1, dim+1)), skiprows=1, dtype=PETSc.RealType) assert nodecount == coordinates.shape[0] with open(basename+".ele") as elefile: @@ -472,12 +472,12 @@ def plex_from_cell_list(dim, cells, coords, comm, name=None): :arg comm: communicator to build the mesh on. Must be a PyOP2 internal communicator :kwarg name: name of the plex """ - # These types are /correct/, DMPlexCreateFromCellList wants int - # and double (not PetscInt, PetscReal). + # petsc4py's createFromCellList converts coords via iarray_r (NPY_PETSC_REAL), + # so passing PETSc.RealType is correct. The underlying C API accepts PetscReal. with temp_internal_comm(comm) as icomm: if comm.rank == 0: cells = np.asarray(cells, dtype=np.int32) - coords = np.asarray(coords, dtype=np.double) + coords = np.asarray(coords, dtype=PETSc.RealType) icomm.bcast(cells.shape, root=0) icomm.bcast(coords.shape, root=0) # Provide the actual data on rank 0. @@ -491,7 +491,7 @@ def plex_from_cell_list(dim, cells, coords, comm, name=None): # A subsequent call to plex.distribute() takes care of parallel partitioning plex = PETSc.DMPlex().createFromCellList(dim, np.zeros(cell_shape, dtype=np.int32), - np.zeros(coord_shape, dtype=np.double), + np.zeros(coord_shape, dtype=PETSc.RealType), comm=comm) if name is not None: plex.setName(name) @@ -570,7 +570,7 @@ def __init__(self, topology_dm, name, reorder, sfXB, perm_is, distribution_name, # Mark OP2 entities and derive the resulting Plex renumbering with PETSc.Log.Event("Mesh: numbering"): self._mark_entity_classes() - self._entity_classes = dmcommon.get_entity_classes(self.topology_dm).astype(int) + self._entity_classes = dmcommon.get_entity_classes(self.topology_dm).astype(IntType) if perm_is: self._dm_renumbering = perm_is else: @@ -1922,13 +1922,13 @@ def node_classes(self, nodes_per_entity, real_tensorproduct=False): :returns: the number of nodes in each of core, owned, and ghost classes. """ if real_tensorproduct: - nodes = np.asarray(nodes_per_entity) + nodes = np.asarray(nodes_per_entity, dtype=IntType) nodes_per_entity = sum(nodes[:, i] for i in range(2)) return super(ExtrudedMeshTopology, self).node_classes(nodes_per_entity) elif self.variable_layers: return extnum.node_classes(self, nodes_per_entity) else: - nodes = np.asarray(nodes_per_entity) + nodes = np.asarray(nodes_per_entity, dtype=IntType) if self.extruded_periodic: nodes_per_entity = sum(nodes[:, i]*(self.layers - 1) for i in range(2)) else: @@ -2656,7 +2656,7 @@ def locate_cell_and_reference_coordinate(self, x, tolerance=None, cell_ignore=No (cell number, reference coordinates) of type (int, numpy array), or, when point is not in the domain, (None, None). """ - x = np.asarray(x) + x = np.asarray(x, dtype=np.float64) if x.size != self.geometric_dimension: raise ValueError("Point must have the same geometric dimension as the mesh") x = x.reshape((1, self.geometric_dimension)) @@ -2694,11 +2694,13 @@ def locate_cells_ref_coords_and_dists(self, xs, tolerance=None, cells_ignore=Non tolerance = self.tolerance else: self.tolerance = tolerance - xs = np.asarray(xs, dtype=utils.ScalarType) + # Physical coordinates use float64 for the spatial index (libspatialindex requires double). + # Reference coordinates and distances use RealType (follows PETSc precision). + xs = np.asarray(xs, dtype=np.float64) xs = xs.real.copy() if xs.shape[1] != self.geometric_dimension: raise ValueError("Point coordinate dimension does not match mesh geometric dimension") - Xs = np.empty_like(xs) + Xs = np.empty((len(xs), self.geometric_dimension), dtype=RealType) npoints = len(xs) if cells_ignore is None or cells_ignore[0][0] is None: cells_ignore = np.full((npoints, 1), -1, dtype=IntType, order="C") @@ -2707,14 +2709,14 @@ def locate_cells_ref_coords_and_dists(self, xs, tolerance=None, cells_ignore=Non if cells_ignore.shape[0] != npoints: raise ValueError("Number of cells to ignore does not match number of points") assert cells_ignore.shape == (npoints, cells_ignore.shape[1]) - ref_cell_dists_l1 = np.empty(npoints, dtype=utils.RealType) + ref_cell_dists_l1 = np.empty(npoints, dtype=RealType) cells = np.empty(npoints, dtype=IntType) assert xs.size == npoints * self.geometric_dimension run_c = self._c_locator(tolerance=tolerance) - cells_data = cells.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) - ref_cells_dists = ref_cell_dists_l1.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) + cells_data = cells.ctypes.data_as(ctypes.POINTER(as_ctypes(IntType))) + ref_cells_dists = ref_cell_dists_l1.ctypes.data_as(ctypes.POINTER(as_ctypes(RealType))) xs_data = xs.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) - Xs_data = Xs.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) + Xs_data = Xs.ctypes.data_as(ctypes.POINTER(as_ctypes(RealType))) with PETSc.Log.Event("c_locator_run"): run_c(self.coordinates._ctypes, xs_data, Xs_data, ref_cells_dists, cells_data, npoints, cells_ignore.shape[1], cells_ignore) return cells, Xs, ref_cell_dists_l1 @@ -2730,9 +2732,10 @@ def _c_locator(self, tolerance=None): return cache[tolerance] except KeyError: IntTypeC = as_cstr(IntType) + RealTypeC = RealType_c src = pq_utils.src_locate_cell(self, tolerance=tolerance) src += dedent(f""" - int locator(struct Function *f, double *x, double *X, double *ref_cell_dists_l1, {IntTypeC} *cells, {IntTypeC} npoints, size_t ncells_ignore, int* cells_ignore) + {IntTypeC} locator(struct Function *f, double *x, {RealTypeC} *X, {RealTypeC} *ref_cell_dists_l1, {IntTypeC} *cells, {IntTypeC} npoints, size_t ncells_ignore, {IntTypeC}* cells_ignore) {{ {IntTypeC} j = 0; /* index into x and X */ for({IntTypeC} i=0; i @@ -66,7 +67,7 @@ def to_reference_coordinates(ufl_coordinate_element, parameters=None): %(to_reference_coords_newton_step)s -static inline void to_reference_coords_kernel(PetscScalar *X, const PetscScalar *x0, const PetscScalar *C) +static inline void to_reference_coords_kernel(%(RealType)s *X, const PetscScalar *x0, const PetscScalar *C) { const int space_dim = %(geometric_dimension)d; @@ -78,7 +79,7 @@ def to_reference_coordinates(ufl_coordinate_element, parameters=None): int converged = 0; for (int it = 0; !converged && it < %(max_iteration_count)d; it++) { - double dX[%(topological_dimension)d] = { 0.0 }; + PetscReal dX[%(topological_dimension)d] = { 0.0 }; to_reference_coords_newton_step(C, x0, X, dX); if (%(dX_norm_square)s < %(convergence_epsilon)g * %(convergence_epsilon)g) { @@ -192,13 +193,13 @@ def prolong_kernel(expression, Vf): static void pyop2_kernel_prolong(PetscScalar *R, PetscScalar *f, const PetscScalar *X, const PetscScalar *Xc %(cell_orient)s%(cell_sizes)s) { - PetscScalar Xref[%(tdim)d]; + PetscReal Xref[%(tdim)d]; int cell = -1; int bestcell = -1; - double bestdist = 1e10; + PetscReal bestdist = 1e10; for (int i = 0; i < %(ncandidate)d; i++) { const PetscScalar *Xci = Xc + i*%(Xc_cell_inc)d; - double celldist = 2*bestdist; + PetscReal celldist = 2*bestdist; to_reference_coords_kernel(Xref, X, Xci); if (%(inside_cell)s) { cell = i; @@ -284,13 +285,13 @@ def restrict_kernel(Vf, Vc): static void pyop2_kernel_restrict(PetscScalar *R, PetscScalar *b, const PetscScalar *X, const PetscScalar *Xc %(cell_orient)s%(cell_sizes)s) { - PetscScalar Xref[%(tdim)d]; + PetscReal Xref[%(tdim)d]; int cell = -1; int bestcell = -1; - double bestdist = 1e10; + PetscReal bestdist = 1e10; for (int i = 0; i < %(ncandidate)d; i++) { const PetscScalar *Xci = Xc + i*%(Xc_cell_inc)d; - double celldist = 2*bestdist; + PetscReal celldist = 2*bestdist; to_reference_coords_kernel(Xref, X, Xci); if (%(inside_cell)s) { cell = i; @@ -405,7 +406,7 @@ def set_coordinates(self, domain): def _coefficient(self, coefficient, name): element = create_element(coefficient.ufl_element()) shape = self.shape + element.index_shape - size = numpy.prod(shape, dtype=int) + size = numpy.prod(shape, dtype=IntType) funarg = lp.GlobalArg(name, dtype=ScalarType, shape=(size,)) expression = gem.reshape(gem.Variable(name, (size, )), shape) expression = gem.partial_indexed(expression, self.indices) diff --git a/firedrake/pointeval_utils.py b/firedrake/pointeval_utils.py index f6c24f8b30..d3ac3b9625 100644 --- a/firedrake/pointeval_utils.py +++ b/firedrake/pointeval_utils.py @@ -1,5 +1,5 @@ import loopy as lp -from firedrake.utils import IntType, as_cstr +from firedrake.utils import IntType, RealType_c, as_cstr from finat.element_factory import as_fiat_cell from finat.point_set import UnknownPointSet @@ -160,6 +160,7 @@ def predicate(index): "extruded_define": "1" if extruded else "0", "IntType": as_cstr(IntType), "scalar_type": utils.ScalarType_c, + "real_type": RealType_c, } # if maps are the same, only need to pass one of them if coordinates.cell_node_map() == coefficient.cell_node_map(): @@ -177,9 +178,9 @@ def predicate(index): int evaluate(struct Function *f, double *x, %(scalar_type)s *result) { /* The type definitions and arguments used here are defined as statics in pointquery_utils.py */ - double found_ref_cell_dist_l1 = DBL_MAX; + %(real_type)s found_ref_cell_dist_l1 = PETSC_MAX_REAL; struct ReferenceCoords temp_reference_coords, found_reference_coords; - int cells_ignore[1] = {-1}; + %(IntType)s cells_ignore[1] = {-1}; %(IntType)s cell = locate_cell(f, x, %(geometric_dimension)d, &to_reference_coords, &to_reference_coords_xtr, &temp_reference_coords, &found_reference_coords, &found_ref_cell_dist_l1, 1, cells_ignore); if (cell == -1) { return -1; diff --git a/firedrake/pointquery_utils.py b/firedrake/pointquery_utils.py index 141eb3a939..ddf842f07c 100644 --- a/firedrake/pointquery_utils.py +++ b/firedrake/pointquery_utils.py @@ -152,10 +152,10 @@ def to_reference_coords_newton_step(ufl_coordinate_element, parameters, x0_dtype x0_expr = builder._coefficient(x0, "x0") loopy_args = [ lp.GlobalArg( - "C", dtype=ScalarType, shape=(numpy.prod(Cexpr.shape, dtype=int),) + "C", dtype=ScalarType, shape=(numpy.prod(Cexpr.shape, dtype=IntType),) ), lp.GlobalArg( - "x0", dtype=x0_dtype, shape=(numpy.prod(x0_expr.shape, dtype=int),) + "x0", dtype=x0_dtype, shape=(numpy.prod(x0_expr.shape, dtype=IntType),) ), ] @@ -232,7 +232,7 @@ def compile_coordinate_element(mesh: MeshGeometry, contains_eps: float, paramete "to_reference_coords_newton_step": to_reference_coords_newton_step(ufl_coordinate_element, parameters), "init_X": init_X(element.cell, parameters), "max_iteration_count": 1 if is_affine(ufl_coordinate_element) else 16, - "convergence_epsilon": 1e-12, + "convergence_epsilon": 1e-6 if numpy.dtype(ScalarType) == numpy.float32 else 1e-12, "dX_norm_square": dX_norm_square(mesh.topological_dimension), "X_isub_dX": X_isub_dX(mesh.topological_dimension), "extruded_arg": f", {as_cstr(IntType)} const *__restrict__ layers" if mesh.extruded else "", @@ -246,7 +246,7 @@ def compile_coordinate_element(mesh: MeshGeometry, contains_eps: float, paramete evaluate_template_c = """#include struct ReferenceCoords { - %(ScalarType)s X[%(geometric_dimension)d]; + %(RealType)s X[%(geometric_dimension)d]; }; static %(RealType)s tolerance = %(tolerance)s; /* used in locate_cell */ @@ -261,12 +261,12 @@ def compile_coordinate_element(mesh: MeshGeometry, contains_eps: float, paramete * Mapping coordinates from physical to reference space */ - %(ScalarType)s *X = result->X; + %(RealType)s *X = result->X; %(init_X)s int converged = 0; for (int it = 0; !converged && it < %(max_iteration_count)d; it++) { - %(ScalarType)s dX[%(topological_dimension)d] = { 0.0 }; + %(RealType)s dX[%(topological_dimension)d] = { 0.0 }; to_reference_coords_newton_step(C, x0, X, dX); if (%(dX_norm_square)s < %(convergence_epsilon)g * %(convergence_epsilon)g) { @@ -283,14 +283,14 @@ def compile_coordinate_element(mesh: MeshGeometry, contains_eps: float, paramete void* const result_, double* const x, %(RealType)s* const cell_dist_l1, %(IntType)s const start, %(IntType)s const end%(extruded_arg)s, %(ScalarType)s const *__restrict__ coords, %(IntType)s const *__restrict__ coords_map); -%(RealType)s to_reference_coords(void *result_, struct Function *f, int cell, double *x) +%(RealType)s to_reference_coords(void *result_, struct Function *f, %(IntType)s cell, double *x) { %(RealType)s cell_dist_l1 = 0.0; %(extr_comment_out)swrap_to_reference_coords(result_, x, &cell_dist_l1, cell, cell+1, f->coords, f->coords_map); return cell_dist_l1; } -%(RealType)s to_reference_coords_xtr(void *result_, struct Function *f, int cell, int layer, double *x) +%(RealType)s to_reference_coords_xtr(void *result_, struct Function *f, %(IntType)s cell, %(IntType)s layer, double *x) { %(RealType)s cell_dist_l1 = 0.0; %(non_extr_comment_out)s%(IntType)s layers[2] = {0, layer+2}; // +2 because the layer loop goes to layers[1]-1, which is nlayers-1 diff --git a/firedrake/utility_meshes.py b/firedrake/utility_meshes.py index cdf2cd3df1..ef3b386cde 100644 --- a/firedrake/utility_meshes.py +++ b/firedrake/utility_meshes.py @@ -330,7 +330,7 @@ def OneElementThickMesh( right = np.roll(left, -1) cells = np.array([left, left, right, right]).T dx = Lx / ncells - X = np.arange(1.0 * ncells, dtype=np.double) * dx + X = np.arange(1.0 * ncells, dtype=PETSc.RealType) * dx Y = 0.0 * X coords = np.array([X, Y]).T @@ -1185,8 +1185,8 @@ def CircleManifoldMesh( vertices = radius * np.column_stack( ( - np.cos(np.arange(ncells, dtype=np.double) * (2 * np.pi / ncells)), - np.sin(np.arange(ncells, dtype=np.double) * (2 * np.pi / ncells)), + np.cos(np.arange(ncells, dtype=PETSc.RealType) * (2 * np.pi / ncells)), + np.sin(np.arange(ncells, dtype=PETSc.RealType) * (2 * np.pi / ncells)), ) ) @@ -1255,7 +1255,7 @@ def UnitDiskMesh( """ vertices = np.array( [[0, 0], [1, 0], [1, 1], [0, 1], [-1, 1], [-1, 0], [-1, -1], [0, -1], [1, -1]], - dtype=np.double, + dtype=PETSc.RealType, ) cells = np.array( @@ -1343,7 +1343,7 @@ def UnitBallMesh( [0, -1, 0], [0, 0, -1], ], - dtype=np.double, + dtype=PETSc.RealType, ) cells = np.array( @@ -1643,9 +1643,9 @@ def BoxMesh( comm=comm, ) else: - xcoords = np.linspace(0, Lx, nx + 1, dtype=np.double) - ycoords = np.linspace(0, Ly, ny + 1, dtype=np.double) - zcoords = np.linspace(0, Lz, nz + 1, dtype=np.double) + xcoords = np.linspace(0, Lx, nx + 1, dtype=PETSc.RealType) + ycoords = np.linspace(0, Ly, ny + 1, dtype=PETSc.RealType) + zcoords = np.linspace(0, Lz, nz + 1, dtype=PETSc.RealType) return TensorBoxMesh( xcoords, ycoords, @@ -1872,9 +1872,9 @@ def PeriodicBoxMesh( # TODO: When hexahedra -> simplex refinement is implemented this can go away. if tuple(directions) != (True, True, True): raise NotImplementedError("Can only specify directions with hexahedral = True") - xcoords = np.arange(0.0, Lx, Lx / nx, dtype=np.double) - ycoords = np.arange(0.0, Ly, Ly / ny, dtype=np.double) - zcoords = np.arange(0.0, Lz, Lz / nz, dtype=np.double) + xcoords = np.arange(0.0, Lx, Lx / nx, dtype=PETSc.RealType) + ycoords = np.arange(0.0, Ly, Ly / ny, dtype=PETSc.RealType) + zcoords = np.arange(0.0, Lz, Lz / nz, dtype=PETSc.RealType) coords = ( np.asarray(np.meshgrid(xcoords, ycoords, zcoords)).swapaxes(0, 3).reshape(-1, 3) ) @@ -2093,7 +2093,7 @@ def IcosahedralSphereMesh( [-phi, 0, -1], [-phi, 0, 1], ], - dtype=np.double, + dtype=PETSc.RealType, ) # faces of the base icosahedron faces = np.array( @@ -2420,7 +2420,7 @@ def _cubedsphere_cells_and_coords(radius, refinement_level): # transformation dtheta = 2 ** (-refinement_level + 1) * np.arctan(1.0) a = 3.0 ** (-0.5) * radius - theta = np.arange(np.arctan(-1.0), np.arctan(1.0) + dtheta, dtheta, dtype=np.double) + theta = np.arange(np.arctan(-1.0), np.arctan(1.0) + dtheta, dtheta, dtype=PETSc.RealType) x = a * np.tan(theta) Nx = x.size @@ -2506,7 +2506,7 @@ def _cubedsphere_cells_and_coords(radius, refinement_level): # Set up an array for all of the mesh coordinates Npoints = panel_numbering.max() + 1 - coords = np.zeros((Npoints, 3), dtype=np.double) + coords = np.zeros((Npoints, 3), dtype=PETSc.RealType) lX, lY = np.meshgrid(x, x) lX.shape = (Nx**2,) lY.shape = (Nx**2,) @@ -2711,7 +2711,7 @@ def TorusMesh( r * np.sin(idx_temp[:, 1] * (2 * np.pi / nr)), ) ), - dtype=np.double, + dtype=PETSc.RealType, ) # cell vertices @@ -2913,7 +2913,7 @@ def CylinderMesh( np.column_stack( (np.tile(coord_xy, (nl + 1, 1)), np.tile(coord_z, (1, nr)).reshape(-1, 1)) ), - dtype=np.double, + dtype=PETSc.RealType, ) # intervals on circumference @@ -2929,7 +2929,7 @@ def CylinderMesh( if not quadrilateral and diagonal == "crossed": dxy = np.pi / nr Lxy = 2 * np.pi - extra_uv = np.linspace(dxy, Lxy - dxy, nr, dtype=np.double) + extra_uv = np.linspace(dxy, Lxy - dxy, nr, dtype=PETSc.RealType) extra_xy = radius * np.column_stack((np.cos(extra_uv), np.sin(extra_uv))) dz = 1 * 0.5 / nl extra_z = depth * np.linspace(dz, 1 - dz, nl).reshape(-1, 1) @@ -2937,7 +2937,7 @@ def CylinderMesh( np.column_stack( (np.tile(extra_xy, (nl, 1)), np.tile(extra_z, (1, nr)).reshape(-1, 1)) ), - dtype=np.double, + dtype=PETSc.RealType, ) origvertices = vertices vertices = np.vstack([vertices, extras]) @@ -2974,10 +2974,10 @@ def CylinderMesh( cells = cells[:, idx].reshape(-1, 3) if longitudinal_direction == "x": - rotation = np.asarray([[0, 0, 1], [0, 1, 0], [-1, 0, 0]], dtype=np.double) + rotation = np.asarray([[0, 0, 1], [0, 1, 0], [-1, 0, 0]], dtype=PETSc.RealType) vertices = np.dot(vertices, rotation.T) elif longitudinal_direction == "y": - rotation = np.asarray([[1, 0, 0], [0, 0, 1], [0, -1, 0]], dtype=np.double) + rotation = np.asarray([[1, 0, 0], [0, 0, 1], [0, -1, 0]], dtype=PETSc.RealType) vertices = np.dot(vertices, rotation.T) elif longitudinal_direction != "z": raise ValueError("Unknown longitudinal direction '%s'" % longitudinal_direction) diff --git a/firedrake/utils.py b/firedrake/utils.py index fa58f1f7a0..361e560529 100644 --- a/firedrake/utils.py +++ b/firedrake/utils.py @@ -21,6 +21,7 @@ IntType_c = as_cstr(IntType) complex_mode = (petsctools.get_petscvariables()["PETSC_SCALAR"].lower() == "complex") +single_mode = (petsctools.get_petscvariables()["PETSC_PRECISION"].lower() == "single") # Remove this (and update test suite) when Slate supports complex mode. SLATE_SUPPORTS_COMPLEX = False diff --git a/pyop2/codegen/builder.py b/pyop2/codegen/builder.py index d05e5aeeae..59fa0c8548 100644 --- a/pyop2/codegen/builder.py +++ b/pyop2/codegen/builder.py @@ -641,10 +641,10 @@ def pack(self, loop_indices=None): # Need to compute row and col shape based on individual pack shapes for p in self.packs[:, 0]: shape, _ = p.shapes - rshape += numpy.prod(shape, dtype=int) + rshape += numpy.prod(shape, dtype=IntType) for p in self.packs[0, :]: _, shape = p.shapes - cshape += numpy.prod(shape, dtype=int) + cshape += numpy.prod(shape, dtype=IntType) shape = (rshape, cshape) if self.access in {WRITE, INC}: val = Zero((), self.dtype) diff --git a/scripts/firedrake-configure b/scripts/firedrake-configure index abcca15926..47e651517a 100755 --- a/scripts/firedrake-configure +++ b/scripts/firedrake-configure @@ -42,10 +42,12 @@ CUDA = GPUArch.CUDA class FiredrakeArch(enum.Enum): DEFAULT = "default" COMPLEX = "complex" + SINGLE = "single" ARCH_DEFAULT = FiredrakeArch.DEFAULT ARCH_COMPLEX = FiredrakeArch.COMPLEX +ARCH_SINGLE = FiredrakeArch.SINGLE SUPPORTED_PETSC_VERSION = "v3.25.0" @@ -324,10 +326,13 @@ MINIMAL_SYSTEM_PACKAGES = { SYSTEM_PACKAGES = { (LINUX_APT_X86_64, ARCH_DEFAULT, NO_GPU): LINUX_APT_PACKAGES_NOGPU, (LINUX_APT_X86_64, ARCH_COMPLEX, NO_GPU): LINUX_APT_PACKAGES_NOGPU, + (LINUX_APT_X86_64, ARCH_SINGLE, NO_GPU): LINUX_APT_PACKAGES_NOGPU, (LINUX_APT_AARCH64, ARCH_DEFAULT, NO_GPU): LINUX_APT_PACKAGES_NOGPU, (LINUX_APT_AARCH64, ARCH_COMPLEX, NO_GPU): LINUX_APT_PACKAGES_NOGPU, + (LINUX_APT_AARCH64, ARCH_SINGLE, NO_GPU): LINUX_APT_PACKAGES_NOGPU, (MACOS_HOMEBREW_ARM64, ARCH_DEFAULT, NO_GPU): MACOS_HOMEBREW_PACKAGES, (MACOS_HOMEBREW_ARM64, ARCH_COMPLEX, NO_GPU): MACOS_HOMEBREW_PACKAGES, + (MACOS_HOMEBREW_ARM64, ARCH_SINGLE, NO_GPU): MACOS_HOMEBREW_PACKAGES, (LINUX_APT_X86_64, ARCH_DEFAULT, CUDA): LINUX_APT_PACKAGES_CUDA, (LINUX_APT_AARCH64, ARCH_DEFAULT, CUDA): LINUX_APT_PACKAGES_CUDA, } @@ -581,6 +586,8 @@ def prepare_configure_options( if arch == ARCH_COMPLEX: configure_options.append("--with-scalar-type=complex") + elif arch == ARCH_SINGLE: + configure_options.append("--with-precision=single") if gpu_arch == CUDA: configure_options.extend( @@ -588,6 +595,11 @@ def prepare_configure_options( ) external_packages = list(COMMON_PETSC_EXTERNAL_PACKAGES) + if arch == ARCH_SINGLE: + # These packages lack fp32 support + for pkg in ("fftw", "suitesparse"): + if pkg in external_packages: + external_packages.remove(pkg) if arch != ARCH_COMPLEX: external_packages.append("hypre") if gpu_arch == CUDA: @@ -603,12 +615,16 @@ def prepare_configure_options( PETSC_VALID_BUILD_COMBINATIONS = ( (LINUX_APT_X86_64, ARCH_DEFAULT, NO_GPU), (LINUX_APT_X86_64, ARCH_COMPLEX, NO_GPU), + (LINUX_APT_X86_64, ARCH_SINGLE, NO_GPU), (LINUX_APT_AARCH64, ARCH_DEFAULT, NO_GPU), (LINUX_APT_AARCH64, ARCH_COMPLEX, NO_GPU), + (LINUX_APT_AARCH64, ARCH_SINGLE, NO_GPU), (MACOS_HOMEBREW_ARM64, ARCH_DEFAULT, NO_GPU), (MACOS_HOMEBREW_ARM64, ARCH_COMPLEX, NO_GPU), + (MACOS_HOMEBREW_ARM64, ARCH_SINGLE, NO_GPU), (None, ARCH_DEFAULT, NO_GPU), (None, ARCH_COMPLEX, NO_GPU), + (None, ARCH_SINGLE, NO_GPU), (LINUX_APT_X86_64, ARCH_DEFAULT, CUDA), (LINUX_APT_AARCH64, ARCH_DEFAULT, CUDA), (None, ARCH_DEFAULT, CUDA), diff --git a/tests/firedrake/adjoint/test_covariance_operator.py b/tests/firedrake/adjoint/test_covariance_operator.py index a1980eda7a..94601bf5ea 100644 --- a/tests/firedrake/adjoint/test_covariance_operator.py +++ b/tests/firedrake/adjoint/test_covariance_operator.py @@ -1,4 +1,7 @@ import pytest + +pytestmark = pytest.mark.skipsingle + from firedrake import * from firedrake.adjoint import * diff --git a/tests/firedrake/conftest.py b/tests/firedrake/conftest.py index 571b6082a6..271994cc32 100644 --- a/tests/firedrake/conftest.py +++ b/tests/firedrake/conftest.py @@ -170,10 +170,14 @@ def pytest_configure(config): "markers", "skipnogpu: mark as skipped when GPU hardware is unavailable" ) + config.addinivalue_line( + "markers", + "skipsingle: mark as skipped in single precision (fp32) mode" + ) def pytest_collection_modifyitems(session, config, items): - from firedrake.utils import complex_mode, device_matrix_type, SLATE_SUPPORTS_COMPLEX + from firedrake.utils import complex_mode, single_mode, device_matrix_type, SLATE_SUPPORTS_COMPLEX for item in items: if complex_mode: @@ -189,6 +193,10 @@ def pytest_collection_modifyitems(session, config, items): if item.get_closest_marker("skipnogpu") is not None: item.add_marker(pytest.mark.skip(reason="Test requires GPU hardware to run.")) + if single_mode: + if item.get_closest_marker("skipsingle") is not None: + item.add_marker(pytest.mark.skip(reason="Test not supported in single precision (fp32) mode")) + for dep, marker, reason in dependency_skip_markers_and_reasons: if item.get_closest_marker(marker) is not None and _skip_test_dependency(dep): item.add_marker(pytest.mark.skip(reason)) diff --git a/tests/firedrake/regression/test_adjoint_operators.py b/tests/firedrake/regression/test_adjoint_operators.py index 57faf80477..9d9c8ff627 100644 --- a/tests/firedrake/regression/test_adjoint_operators.py +++ b/tests/firedrake/regression/test_adjoint_operators.py @@ -564,6 +564,7 @@ def supermesh_setup(vector=False): return source, target_space +@pytest.mark.skipsingle @pytest.mark.skipcomplex # Taping for complex-valued 0-forms not yet done def test_self_supermesh_project(): source, target_space = supermesh_setup() @@ -586,6 +587,7 @@ def test_self_supermesh_project(): assert np.isclose(rf(h), 10.0) +@pytest.mark.skipsingle @pytest.mark.skipcomplex # Taping for complex-valued 0-forms not yet done def test_supermesh_project_function(): source, target_space = supermesh_setup() @@ -608,6 +610,7 @@ def test_supermesh_project_function(): assert np.isclose(rf(h), 10.0) +@pytest.mark.skipsingle @pytest.mark.skipcomplex # Taping for complex-valued 0-forms not yet done def test_supermesh_project_to_function_space(): source, target_space = supermesh_setup() @@ -629,6 +632,7 @@ def test_supermesh_project_to_function_space(): assert np.isclose(rf(h), 10.0) +@pytest.mark.skipsingle @pytest.mark.skipcomplex # Taping for complex-valued 0-forms not yet done def test_supermesh_project_gradient(vector, rg): source, target_space = supermesh_setup() @@ -644,6 +648,7 @@ def test_supermesh_project_gradient(vector, rg): assert minconv > 1.9 +@pytest.mark.skipsingle @pytest.mark.skipcomplex # Taping for complex-valued 0-forms not yet done def test_supermesh_project_tlm(vector): source, target_space = supermesh_setup() @@ -664,6 +669,7 @@ def test_supermesh_project_tlm(vector): assert taylor_test(rf, source, h, dJdm=J.block_variable.tlm_value) > 1.9 +@pytest.mark.skipsingle @pytest.mark.skipcomplex # Taping for complex-valued 0-forms not yet done def test_supermesh_project_hessian(vector, rg): source, target_space = supermesh_setup() diff --git a/tests/firedrake/regression/test_covariance_operator.py b/tests/firedrake/regression/test_covariance_operator.py index c8bb09b87f..fa4fcdb0cd 100644 --- a/tests/firedrake/regression/test_covariance_operator.py +++ b/tests/firedrake/regression/test_covariance_operator.py @@ -1,4 +1,7 @@ import pytest + +pytestmark = pytest.mark.skipsingle + import numpy as np from scipy.sparse import csr_array import petsctools diff --git a/tests/firedrake/regression/test_interpolate_cross_mesh.py b/tests/firedrake/regression/test_interpolate_cross_mesh.py index c41f37dbab..a7ff194470 100644 --- a/tests/firedrake/regression/test_interpolate_cross_mesh.py +++ b/tests/firedrake/regression/test_interpolate_cross_mesh.py @@ -44,7 +44,8 @@ def make_high_order(m_low_order, degree): "circlemanifold_to_high_order", "unitsquare_from_high_order", "unitsquare_to_high_order", - "extrudedcube", + # This test uses VertexOnlyMesh which is broken in fp32 (petsc4py DMSwarm.getField/PETSC_DOUBLE) + pytest.param("extrudedcube", marks=pytest.mark.skipsingle), "unitsquare_vfs", "unitsquare_tfs", "unitsquare_N1curl_source", diff --git a/tests/firedrake/regression/test_locate_cell.py b/tests/firedrake/regression/test_locate_cell.py index 5968aac8ca..d0cb724fb7 100644 --- a/tests/firedrake/regression/test_locate_cell.py +++ b/tests/firedrake/regression/test_locate_cell.py @@ -151,6 +151,7 @@ def test_high_order_location_warped_interior_facet(): assert not np.isclose(dists[0], 0.0) +@pytest.mark.skipsingle # High-order cell location in warped mesh requires double precision; fails in fp32 @pytest.mark.parallel([1, 3]) def test_parallel_high_order_location(): mesh = UnitSquareMesh(2, 2) diff --git a/tests/firedrake/regression/test_matrix_free.py b/tests/firedrake/regression/test_matrix_free.py index aaf9f60f49..9157b403f8 100644 --- a/tests/firedrake/regression/test_matrix_free.py +++ b/tests/firedrake/regression/test_matrix_free.py @@ -1,11 +1,13 @@ from firedrake import * from firedrake.petsc import PETSc -from firedrake.utils import ScalarType +from firedrake.utils import ScalarType, single_mode from firedrake.solving_utils import DEFAULT_KSP_PARAMETERS import pytest import numpy as np from mpi4py import MPI +_fp32 = single_mode + @pytest.fixture def mesh(): @@ -205,7 +207,7 @@ def test_fieldsplitting(mesh, preassembled, parameters, rhs): f -= expect for d in f.dat.data_ro: - assert np.allclose(d, 0.0) + assert np.allclose(d, 0.0, atol=1e-5 if _fp32 else 1e-8) @pytest.mark.parallel(nprocs=4) diff --git a/tests/firedrake/regression/test_nullspace.py b/tests/firedrake/regression/test_nullspace.py index e7ba515579..5fc91729c5 100644 --- a/tests/firedrake/regression/test_nullspace.py +++ b/tests/firedrake/regression/test_nullspace.py @@ -1,7 +1,10 @@ from firedrake import * +from firedrake.utils import single_mode import pytest import numpy as np +_fp32 = single_mode + @pytest.fixture(scope='module', params=[False, True]) def V(request): @@ -194,8 +197,9 @@ def sigma(fn): 'mat_type': 'aij'}) # check that both solutions are equal to the exact solution - assert sqrt(assemble(inner(w1-w2, w1-w2)*dx)) < 1e-7 - assert sqrt(assemble(inner(w1-w_exact, w1-w_exact)*dx)) < 1e-7 + _tol = 1e-4 if _fp32 else 1e-7 + assert sqrt(assemble(inner(w1-w2, w1-w2)*dx)) < _tol + assert sqrt(assemble(inner(w1-w_exact, w1-w_exact)*dx)) < _tol with open(wo_nns_log, "r") as f: f.readline() diff --git a/tests/firedrake/regression/test_stokes_mini.py b/tests/firedrake/regression/test_stokes_mini.py index 4c0d33bd99..93ad298d96 100644 --- a/tests/firedrake/regression/test_stokes_mini.py +++ b/tests/firedrake/regression/test_stokes_mini.py @@ -62,6 +62,7 @@ def run_stokes_mini(mat_type, n): return errornorm(uexact, u, degree_rise=0), errornorm(pexact, p, degree_rise=0) +@pytest.mark.skipsingle # Schur complement fieldsplit stalls in fp32; needs a separate fp32 solver test @pytest.mark.parametrize('mat_type', ["aij", "nest"]) def test_stokes_mini(mat_type): u_err = [] diff --git a/tests/firedrake/regression/test_vertex_based_limiter.py b/tests/firedrake/regression/test_vertex_based_limiter.py index 6d3c0b6f0e..5cf56a31a4 100644 --- a/tests/firedrake/regression/test_vertex_based_limiter.py +++ b/tests/firedrake/regression/test_vertex_based_limiter.py @@ -1,4 +1,7 @@ import pytest + +pytestmark = pytest.mark.skipsingle + from firedrake import * import numpy as np diff --git a/tests/firedrake/supermesh/test_assemble_mixed_mass_matrix.py b/tests/firedrake/supermesh/test_assemble_mixed_mass_matrix.py index 9e8e07e013..f923623d71 100644 --- a/tests/firedrake/supermesh/test_assemble_mixed_mass_matrix.py +++ b/tests/firedrake/supermesh/test_assemble_mixed_mass_matrix.py @@ -3,6 +3,8 @@ from firedrake.supermeshing import * import pytest +pytestmark = pytest.mark.skipsingle + @pytest.fixture(params=[2, 3]) def mesh(request): diff --git a/tests/firedrake/supermesh/test_galerkin_projection.py b/tests/firedrake/supermesh/test_galerkin_projection.py index cc20b8b3b9..673fe5c6af 100644 --- a/tests/firedrake/supermesh/test_galerkin_projection.py +++ b/tests/firedrake/supermesh/test_galerkin_projection.py @@ -6,6 +6,8 @@ import numpy import pytest +pytestmark = pytest.mark.skipsingle + @pytest.fixture(params=[2, 3]) def mesh(request): diff --git a/tests/firedrake/supermesh/test_intersection_finder_nested.py b/tests/firedrake/supermesh/test_intersection_finder_nested.py index d842ac6706..fc75d4e7f3 100644 --- a/tests/firedrake/supermesh/test_intersection_finder_nested.py +++ b/tests/firedrake/supermesh/test_intersection_finder_nested.py @@ -2,6 +2,8 @@ from firedrake.supermeshing import * import pytest +pytestmark = pytest.mark.skipsingle + @pytest.fixture(params=[2, "q", 3]) def mesh(request): diff --git a/tests/firedrake/supermesh/test_nonnested_project.py b/tests/firedrake/supermesh/test_nonnested_project.py index 798228908d..8f1a766706 100644 --- a/tests/firedrake/supermesh/test_nonnested_project.py +++ b/tests/firedrake/supermesh/test_nonnested_project.py @@ -1,4 +1,7 @@ import pytest + +pytestmark = pytest.mark.skipsingle + import numpy from firedrake import * from firedrake.utils import IntType diff --git a/tests/firedrake/supermesh/test_nonnested_project_no_hierarchy.py b/tests/firedrake/supermesh/test_nonnested_project_no_hierarchy.py index e2173bb1e3..7d99b2dfaf 100644 --- a/tests/firedrake/supermesh/test_nonnested_project_no_hierarchy.py +++ b/tests/firedrake/supermesh/test_nonnested_project_no_hierarchy.py @@ -1,4 +1,7 @@ import pytest + +pytestmark = pytest.mark.skipsingle + import numpy from firedrake import * from itertools import product diff --git a/tests/firedrake/supermesh/test_periodic.py b/tests/firedrake/supermesh/test_periodic.py index 1780c6cc79..3ce2a1bde1 100644 --- a/tests/firedrake/supermesh/test_periodic.py +++ b/tests/firedrake/supermesh/test_periodic.py @@ -1,6 +1,8 @@ from firedrake import * import pytest +pytestmark = pytest.mark.skipsingle + @pytest.fixture(params=["scalar", "vector", "tensor"]) def shapify(request): diff --git a/tsfc/kernel_interface/common.py b/tsfc/kernel_interface/common.py index 40d585786f..3df6d33ff9 100644 --- a/tsfc/kernel_interface/common.py +++ b/tsfc/kernel_interface/common.py @@ -12,6 +12,7 @@ import gem.impero_utils as impero_utils import petsctools import numpy +from pyop2.datatypes import IntType from FIAT.reference_element import TensorProductCell from finat.cell_tools import max_complex from finat.quadrature import AbstractQuadratureRule @@ -478,7 +479,7 @@ def prepare_constant(constant, number): :returns: (funarg, expression) expression - GEM expression referring to the Constant value(s) """ - value_size = numpy.prod(constant.ufl_shape, dtype=int) + value_size = numpy.prod(constant.ufl_shape, dtype=IntType) return gem.reshape(gem.Variable(f"c_{number}", (value_size,)), constant.ufl_shape) @@ -504,7 +505,7 @@ def prepare_coefficient(coefficient, name, domain_integral_type_map): """ finat_element = create_element(coefficient.ufl_element()) shape = finat_element.index_shape - size = numpy.prod(shape, dtype=int) + size = numpy.prod(shape, dtype=IntType) domain = extract_unique_domain(coefficient) integral_type = domain_integral_type_map[domain] if integral_type is None: @@ -569,7 +570,7 @@ def expression(restricted): return gem.Indexed(gem.reshape(restricted, *shapes), tuple(chain(*multiindices))) - u_shape = numpy.array([numpy.prod(shape, dtype=int) for shape in shapes]) + u_shape = numpy.array([numpy.prod(shape, dtype=IntType) for shape in shapes]) c_shape = copy.deepcopy(u_shape) rs_tuples = [] for arg_num, arg in enumerate(arguments): diff --git a/tsfc/loopy.py b/tsfc/loopy.py index 6826f0b672..3077bfa671 100644 --- a/tsfc/loopy.py +++ b/tsfc/loopy.py @@ -237,7 +237,9 @@ def generate(impero_c, args, scalar_type, kernel_name="loopy_kernel", index_name for i, (temp, dtype) in enumerate(assign_dtypes(impero_c.temporaries, scalar_type)): name = "t%d" % i if isinstance(temp, gem.Constant): - data.append(lp.TemporaryVariable(name, shape=temp.shape, dtype=dtype, initializer=temp.array, address_space=lp.AddressSpace.LOCAL, read_only=True)) + # Cast initializer to match dtype — necessary for non-double precision (e.g., float32) + initializer = temp.array.astype(dtype) if temp.array.dtype != dtype else temp.array + data.append(lp.TemporaryVariable(name, shape=temp.shape, dtype=dtype, initializer=initializer, address_space=lp.AddressSpace.LOCAL, read_only=True)) else: shape = tuple([i.extent for i in ctx.indices[temp]]) + temp.shape data.append(lp.TemporaryVariable(name, shape=shape, dtype=dtype, initializer=None, address_space=lp.AddressSpace.LOCAL, read_only=False)) diff --git a/tsfc/parameters.py b/tsfc/parameters.py index af44ce0cd4..9992e06da9 100644 --- a/tsfc/parameters.py +++ b/tsfc/parameters.py @@ -1,6 +1,11 @@ import numpy from loopy.target.c import CWithGNULibcTarget +try: + from petsc4py.PETSc import ScalarType as _PETScScalarType +except ImportError: + _PETScScalarType = numpy.float64 + PARAMETERS = { "quadrature_rule": "auto", @@ -15,11 +20,15 @@ # that makes compilation time much shorter. "unroll_indexsum": 3, - # Scalar type numpy dtype - "scalar_type": numpy.dtype(numpy.float64), + # Scalar type numpy dtype — derived from PETSc compile-time precision + "scalar_type": numpy.dtype(_PETScScalarType), - # So that tests pass (needs to match scalar_type) - "scalar_type_c": "double", + # C type string matching scalar_type + "scalar_type_c": {numpy.dtype(numpy.float32): "float", + numpy.dtype(numpy.float64): "double", + numpy.dtype(numpy.complex128): "double complex", + numpy.dtype(numpy.complex64): "float complex", + }.get(numpy.dtype(_PETScScalarType), "double"), # Whether to wrap the generated kernels in a PETSc event "add_petsc_events": False, diff --git a/tsfc/ufl_utils.py b/tsfc/ufl_utils.py index c26febd68e..422196fb24 100644 --- a/tsfc/ufl_utils.py +++ b/tsfc/ufl_utils.py @@ -3,6 +3,7 @@ from functools import singledispatch import numpy +from pyop2.datatypes import IntType import ufl from ufl import as_tensor, indices, replace @@ -421,7 +422,7 @@ def apply_mapping(expression, element, domain): rvs = sub_elem.reference_value_shape seen = set() rpieces = [] - gm = int(numpy.prod(vs, dtype=int)) + gm = int(numpy.prod(vs, dtype=IntType)) for gi, ri in enumerate(fcm): # For each unique piece in reference space if ri in seen: