diff --git a/docs/user/next/workshop/exercises/helpers.py b/docs/user/next/workshop/exercises/helpers.py index 560252eb4f..301f4f064c 100644 --- a/docs/user/next/workshop/exercises/helpers.py +++ b/docs/user/next/workshop/exercises/helpers.py @@ -14,7 +14,7 @@ from gt4py.next import Dimension, DimensionKind, FieldOffset from gt4py.next.program_processors.runners import roundtrip from gt4py.next.program_processors.runners.gtfn import ( - run_gtfn_cached as gtfn_cpu, + run_gtfn as gtfn_cpu, run_gtfn_gpu as gtfn_gpu, ) diff --git a/src/gt4py/next/__init__.py b/src/gt4py/next/__init__.py index 04c2f12574..0a3702f0f3 100644 --- a/src/gt4py/next/__init__.py +++ b/src/gt4py/next/__init__.py @@ -93,10 +93,7 @@ where, ) from .otf.compiled_program import wait_for_compilation -from .program_processors.runners.gtfn import ( - run_gtfn_cached as gtfn_cpu, - run_gtfn_gpu_cached as gtfn_gpu, -) +from .program_processors.runners.gtfn import run_gtfn as gtfn_cpu, run_gtfn_gpu as gtfn_gpu from .program_processors.runners.roundtrip import default as itir_python diff --git a/src/gt4py/next/program_processors/formatters/gtfn.py b/src/gt4py/next/program_processors/formatters/gtfn.py index 1d65b8d8d0..75494a1759 100644 --- a/src/gt4py/next/program_processors/formatters/gtfn.py +++ b/src/gt4py/next/program_processors/formatters/gtfn.py @@ -16,11 +16,10 @@ @program_formatter.program_formatter def format_cpp(program: itir.Program, *args: Any, **kwargs: Any) -> str: - # TODO(tehrengruber): This is a little ugly. Revisit. - gtfn_translation = gtfn.GTFNBackendFactory().executor.translation # type: ignore[attr-defined] + gtfn_translation = gtfn.GTFNCompileWorkflowFactory(cached_translation=False).translation assert isinstance(gtfn_translation, GTFNTranslationStep) return gtfn_translation.generate_stencil_source( program, - offset_provider=kwargs.get("offset_provider", None), # type: ignore[arg-type] + offset_provider=kwargs.get("offset_provider", {}), column_axis=kwargs.get("column_axis", None), ) diff --git a/src/gt4py/next/program_processors/runners/dace/__init__.py b/src/gt4py/next/program_processors/runners/dace/__init__.py index 0bb2c40dc3..0e560fa761 100644 --- a/src/gt4py/next/program_processors/runners/dace/__init__.py +++ b/src/gt4py/next/program_processors/runners/dace/__init__.py @@ -11,10 +11,8 @@ from gt4py.next.program_processors.runners.dace.workflow.backend import ( make_dace_backend, run_dace_cpu, - run_dace_cpu_cached, run_dace_cpu_noopt, run_dace_gpu, - run_dace_gpu_cached, run_dace_gpu_noopt, ) @@ -23,9 +21,7 @@ "get_sdfg_args", "make_dace_backend", "run_dace_cpu", - "run_dace_cpu_cached", "run_dace_cpu_noopt", "run_dace_gpu", - "run_dace_gpu_cached", "run_dace_gpu_noopt", ] diff --git a/src/gt4py/next/program_processors/runners/dace/workflow/backend.py b/src/gt4py/next/program_processors/runners/dace/workflow/backend.py index de6778a750..ad95804ddb 100644 --- a/src/gt4py/next/program_processors/runners/dace/workflow/backend.py +++ b/src/gt4py/next/program_processors/runners/dace/workflow/backend.py @@ -52,6 +52,7 @@ class Params: hash_function = stages.compilation_hash otf_workflow = factory.SubFactory( DaCeWorkflowFactory, + cached_translation=True, device_type=factory.SelfAttribute("..device_type"), auto_optimize=factory.SelfAttribute("..auto_optimize"), ) @@ -127,7 +128,6 @@ def make_dace_backend( gpu=gpu, cached=cached, auto_optimize=auto_optimize, - otf_workflow__cached_translation=cached, otf_workflow__bare_translation__async_sdfg_call=(async_sdfg_call if gpu else False), otf_workflow__bare_translation__auto_optimize_args=optimization_args, otf_workflow__bare_translation__unstructured_horizontal_has_unit_stride=unstructured_horizontal_has_unit_stride, @@ -139,38 +139,22 @@ def make_dace_backend( run_dace_cpu = make_dace_backend( gpu=False, - cached=False, auto_optimize=True, async_sdfg_call=False, ) run_dace_cpu_noopt = make_dace_backend( gpu=False, - cached=False, auto_optimize=False, async_sdfg_call=False, ) -run_dace_cpu_cached = make_dace_backend( - gpu=False, - cached=True, - auto_optimize=True, - async_sdfg_call=False, -) run_dace_gpu = make_dace_backend( gpu=True, - cached=False, auto_optimize=True, async_sdfg_call=True, ) run_dace_gpu_noopt = make_dace_backend( gpu=True, - cached=False, auto_optimize=False, async_sdfg_call=True, ) -run_dace_gpu_cached = make_dace_backend( - gpu=True, - cached=True, - auto_optimize=True, - async_sdfg_call=True, -) diff --git a/src/gt4py/next/program_processors/runners/gtfn.py b/src/gt4py/next/program_processors/runners/gtfn.py index d0e9478b09..039e83d481 100644 --- a/src/gt4py/next/program_processors/runners/gtfn.py +++ b/src/gt4py/next/program_processors/runners/gtfn.py @@ -137,7 +137,6 @@ class Params: ) translation = factory.LazyAttribute(lambda o: o.bare_translation) - bindings: workflow.Workflow[stages.ProgramSource, stages.CompilableProject] = ( nanobind.bind_source ) @@ -158,7 +157,6 @@ class Meta: class Params: name_device = "cpu" name_cached = "" - name_temps = "" name_postfix = "" gpu = factory.Trait( allocator=next_allocators.StandardGPUFieldBufferAllocator(), @@ -174,11 +172,13 @@ class Params: device_type = core_defs.DeviceType.CPU hash_function = stages.compilation_hash otf_workflow = factory.SubFactory( - GTFNCompileWorkflowFactory, device_type=factory.SelfAttribute("..device_type") + GTFNCompileWorkflowFactory, + cached_translation=True, + device_type=factory.SelfAttribute("..device_type"), ) name = factory.LazyAttribute( - lambda o: f"run_gtfn_{o.name_device}{o.name_temps}{o.name_cached}{o.name_postfix}" + lambda o: f"run_gtfn_{o.name_device}{o.name_cached}{o.name_postfix}" ) executor = factory.LazyAttribute(lambda o: o.otf_workflow) @@ -186,20 +186,12 @@ class Params: transforms = backend.DEFAULT_TRANSFORMS -run_gtfn = GTFNBackendFactory() +run_gtfn = GTFNBackendFactory(cached=True) run_gtfn_imperative = GTFNBackendFactory( - name_postfix="_imperative", otf_workflow__translation__use_imperative_backend=True + cached=True, + name_postfix="_imperative", + otf_workflow__translation__use_imperative_backend=True, ) -run_gtfn_cached = GTFNBackendFactory(cached=True, otf_workflow__cached_translation=True) - -run_gtfn_gpu = GTFNBackendFactory(gpu=True) - -run_gtfn_gpu_cached = GTFNBackendFactory( - gpu=True, cached=True, otf_workflow__cached_translation=True -) - -run_gtfn_no_transforms = GTFNBackendFactory( - otf_workflow__bare_translation__enable_itir_transforms=False -) +run_gtfn_gpu = GTFNBackendFactory(cached=True, gpu=True) diff --git a/tests/next_tests/benchmarks/benchmark_program_call.py b/tests/next_tests/benchmarks/benchmark_program_call.py index b031245ab5..166509e8f3 100644 --- a/tests/next_tests/benchmarks/benchmark_program_call.py +++ b/tests/next_tests/benchmarks/benchmark_program_call.py @@ -24,9 +24,9 @@ from gt4py.next.program_processors.runners import dace as dace_backends DACE_BACKENDS = ( - [dace_backends.run_dace_cpu_cached, dace_backends.run_dace_gpu_cached] + [dace_backends.run_dace_cpu, dace_backends.run_dace_gpu] if cp is not None - else [dace_backends.run_dace_cpu_cached] + else [dace_backends.run_dace_cpu] ) except ImportError: DACE_BACKENDS = [] @@ -357,9 +357,9 @@ def benchmark(*args, **kwargs): backend_name = (arg.split("=", 1)[-1]).strip() match backend_name: case "dace-cpu": - backends.append(dace_backends.run_dace_cpu_cached) + backends.append(dace_backends.run_dace_cpu) case "dace-gpu": - backends.append(dace_backends.run_dace_gpu_cached) + backends.append(dace_backends.run_dace_gpu) case "gtfn-cpu": backends.append(gtfn_cpu) case "gtfn-gpu": diff --git a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_temporaries_with_sizes.py b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_temporaries_with_sizes.py index 90c0d775f2..bc3b0f26ff 100644 --- a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_temporaries_with_sizes.py +++ b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_temporaries_with_sizes.py @@ -11,7 +11,8 @@ from gt4py import next as gtx from gt4py.next import backend, common from gt4py.next.iterator.transforms import apply_common_transforms -from gt4py.next.program_processors.runners.gtfn import run_gtfn +from gt4py.next.program_processors.runners import gtfn +from gt4py.next import custom_layout_allocators as next_allocators from next_tests.integration_tests import cases from next_tests.integration_tests.cases import ( @@ -33,8 +34,8 @@ def exec_alloc_descriptor(): return backend.Backend( name="run_gtfn_with_temporaries_and_sizes", transforms=backend.DEFAULT_TRANSFORMS, - executor=run_gtfn.executor.replace( - translation=run_gtfn.executor.translation.replace( + executor=gtfn.GTFNCompileWorkflowFactory( + translation=gtfn.gtfn_module.GTFNTranslationStepFactory( symbolic_domain_sizes={ "Cell": "num_cells", "Edge": "num_edges", @@ -42,7 +43,7 @@ def exec_alloc_descriptor(): } ) ), - allocator=run_gtfn.allocator, + allocator=next_allocators.StandardCPUFieldBufferAllocator(), ) diff --git a/tests/next_tests/integration_tests/feature_tests/instrumentation_tests/test_hooks.py b/tests/next_tests/integration_tests/feature_tests/instrumentation_tests/test_hooks.py index 3862016ee4..9f51689d77 100644 --- a/tests/next_tests/integration_tests/feature_tests/instrumentation_tests/test_hooks.py +++ b/tests/next_tests/integration_tests/feature_tests/instrumentation_tests/test_hooks.py @@ -21,7 +21,7 @@ try: from gt4py.next.program_processors.runners import dace as dace_backends - BACKENDS = [None, gtfn_cpu, dace_backends.run_dace_cpu_cached] + BACKENDS = [None, gtfn_cpu, dace_backends.run_dace_cpu] except ImportError: BACKENDS = [None, gtfn_cpu] diff --git a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_builtins.py b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_builtins.py index 4fff5192aa..131006c60b 100644 --- a/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_builtins.py +++ b/tests/next_tests/integration_tests/feature_tests/iterator_tests/test_builtins.py @@ -50,7 +50,7 @@ abs, ) from gt4py.next.iterator.runtime import fendef, fundef, offset, set_at -from gt4py.next.program_processors.runners.gtfn import run_gtfn +from gt4py.next.program_processors.runners import gtfn from next_tests.integration_tests.feature_tests.math_builtin_test_data import math_builtin_test_data from next_tests.unit_tests.conftest import program_processor, run_processor @@ -189,11 +189,8 @@ def test_arithmetic_and_logical_functors_gtfn(builtin, inputs, expected): inps = field_maker(*array_maker(*inputs)) out = field_maker((np.zeros_like(*array_maker(expected))))[0] - gtfn_without_transforms = dataclasses.replace( - run_gtfn, - executor=run_gtfn.executor.replace( - translation=run_gtfn.executor.translation.replace(enable_itir_transforms=False), - ), # avoid inlining the function + gtfn_without_transforms = gtfn.GTFNBackendFactory( + otf_workflow__bare_translation__enable_itir_transforms=False ) fencil(builtin, out, *inps, processor=gtfn_without_transforms) diff --git a/tests/next_tests/unit_tests/program_processor_tests/codegens_tests/gtfn_tests/test_gtfn_module.py b/tests/next_tests/unit_tests/program_processor_tests/codegens_tests/gtfn_tests/test_gtfn_module.py index d027c9dcb1..fbafd21d71 100644 --- a/tests/next_tests/unit_tests/program_processor_tests/codegens_tests/gtfn_tests/test_gtfn_module.py +++ b/tests/next_tests/unit_tests/program_processor_tests/codegens_tests/gtfn_tests/test_gtfn_module.py @@ -133,13 +133,13 @@ def test_gtfn_file_cache(program_example): data=fencil, args=arguments.CompileTimeArgs.from_concrete(*parameters, **{"offset_provider": {}}), ) - cached_gtfn_translation_step = gtfn.GTFNBackendFactory( - gpu=False, cached=True, otf_workflow__cached_translation=True - ).executor.step.translation + cached_gtfn_translation_step = gtfn.GTFNCompileWorkflowFactory( + cached_translation=True + ).translation - bare_gtfn_translation_step = gtfn.GTFNBackendFactory( - gpu=False, cached=True, otf_workflow__cached_translation=False - ).executor.step.translation + bare_gtfn_translation_step = gtfn.GTFNCompileWorkflowFactory( + cached_translation=False + ).translation cache_key = stages.fingerprint_compilable_program(compilable_program) @@ -156,29 +156,3 @@ def test_gtfn_file_cache(program_example): bare_gtfn_translation_step(compilable_program) == cached_gtfn_translation_step.cache[cache_key] ) - - -# TODO(egparedes): we should switch to use the cached backend by default and then remove this test -def test_gtfn_file_cache_whole_workflow(cartesian_case_no_backend): - cartesian_case = cartesian_case_no_backend - cartesian_case.backend = gtfn.GTFNBackendFactory( - gpu=False, cached=True, otf_workflow__cached_translation=True - ) - cartesian_case.allocator = next_allocators.StandardCPUFieldBufferAllocator() - - assert cartesian_case.backend is not None - assert cartesian_case.allocator is not None - - @gtx.field_operator - def testee(a: cases.IJKField) -> cases.IJKField: - field_tuple = (a, a) - field_0 = field_tuple[0] - field_1 = field_tuple[1] - return field_0 - - # first call: this generates the cache file - cases.verify_with_default_data(cartesian_case, testee, ref=lambda a: a) - # clearing the OTFCompileWorkflow cache such that the OTFCompileWorkflow step is executed again - object.__setattr__(cartesian_case.backend.executor, "cache", {}) - # second call: the cache file is used - cases.verify_with_default_data(cartesian_case, testee, ref=lambda a: a) diff --git a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace.py b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace.py index a204886690..a6375fbad6 100644 --- a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace.py +++ b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace.py @@ -37,9 +37,9 @@ # see https://docs.pytest.org/en/latest/how-to/fixtures.html#override-a-fixture-on-a-test-module-level @pytest.fixture( params=[ - pytest.param(dace_backends.run_dace_cpu_cached, marks=pytest.mark.requires_dace), + pytest.param(dace_backends.run_dace_cpu, marks=pytest.mark.requires_dace), pytest.param( - dace_backends.run_dace_gpu_cached, + dace_backends.run_dace_gpu, marks=(pytest.mark.requires_gpu, pytest.mark.requires_dace), ), ] diff --git a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_bindings.py b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_bindings.py index 25dae344f2..cd8245ee99 100644 --- a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_bindings.py +++ b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace_bindings.py @@ -8,6 +8,7 @@ """Test the bindings stage of the dace backend workflow.""" +import dataclasses import functools import numpy as np import pytest @@ -298,10 +299,12 @@ def testee( backend = dace_runner.make_dace_backend( gpu=False, cached=False, - auto_optimize=True, use_metrics=use_metrics, use_zero_origin=use_zero_origin, ) + backend = dataclasses.replace( + backend, executor=backend.executor.replace(translation=backend.executor.translation.step) + ) # TODO(epaone): remove this line when the workflow stage cache is fixed (PR#2609) monkeypatch.setattr( dace_workflow.compilation.DaCeCompiler, "__call__", @@ -354,10 +357,12 @@ def testee(a: cases.VField, b: cases.VField): backend = dace_runner.make_dace_backend( gpu=False, cached=False, - auto_optimize=True, use_metrics=use_metrics, use_zero_origin=use_zero_origin, ) + backend = dataclasses.replace( + backend, executor=backend.executor.replace(translation=backend.executor.translation.step) + ) # TODO(epaone): remove this line when the workflow stage cache is fixed (PR#2609) monkeypatch.setattr( dace_workflow.compilation.DaCeCompiler, "__call__", diff --git a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/test_gtfn.py b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/test_gtfn.py index 96d8c6e27c..7188b11113 100644 --- a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/test_gtfn.py +++ b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/test_gtfn.py @@ -34,8 +34,10 @@ def test_backend_factory_trait_device(): assert cpu_version.name == "run_gtfn_cpu" assert gpu_version.name == "run_gtfn_gpu" - assert cpu_version.executor.translation.device_type is core_defs.DeviceType.CPU - assert gpu_version.executor.translation.device_type is core_defs.DeviceType.CUDA + assert isinstance(cpu_version.executor.translation, workflow.CachedStep) + assert cpu_version.executor.translation.step.device_type is core_defs.DeviceType.CPU + assert isinstance(gpu_version.executor.translation, workflow.CachedStep) + assert gpu_version.executor.translation.step.device_type is core_defs.DeviceType.CUDA assert cpu_version.executor.decoration.keywords["device"] is core_defs.DeviceType.CPU assert gpu_version.executor.decoration.keywords["device"] is core_defs.DeviceType.CUDA