diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
new file mode 100644
index 00000000..2b14e412
--- /dev/null
+++ b/.github/workflows/test.yaml
@@ -0,0 +1,104 @@
+name: NEW Unit Tests on GPU (Modal)
+
+# This CI is running on modal.com's GPUs.
+#
+# It's set up here on github actions and then the cloned repo is sent to modal and everything
+# happens on their hw - see ci/gpu_unit_tests.py for where the actual vm is loaded, updated and the
+# tests are run.
+#
+# Both files are annotated to what's important and how one might change or update things if needed.
+#
+# Note that since this is a Required job we can't use `on.push.path` file filter - we are using a
+# special quick collect-tests job to do the filtering for us so that the job can be skipped and
+# satisfy the Required status for PRs to pass.
+
+on:
+  pull_request_target:
+    branches:
+      - main
+  push:
+    branches:
+      - main
+    # do not use path filters here since it's a required job and if skipped it'd report failed (a
+    # known mis-feature in github), do it in the work around `collect-tests` job instead.
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  collect-tests:
+    name: NEW Collect tests to run
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: read
+    outputs:
+      arctictraining: ${{ steps.filter.outputs.arctictraining }}
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Filter changed files
+        uses: dorny/paths-filter@v3
+        id: filter
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          filters: |
+            arctictraining:
+              - '**.py'
+              - '.github/workflows/test.yaml'
+              - 'ci/**'
+              - 'tests/**'
+              - '!docs/**'
+              - '!projects/**'
+              - '!scripts/**'
+              - '!tutorial/**'
+
+  deploy:
+    name: NEW GPU Unit Tests
+    runs-on: ubuntu-latest
+    needs: collect-tests
+    env:
+      # note: we are sharing the same account with deepspeedai
+      # these are created at https://modal.com/settings/deepspeedai/tokens
+      # they are then added to the repo's secrets at https://github.com/snowflakedb/ArcticTraining/settings/secrets/actions
+      MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
+      MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
+      # this one comes from https://huggingface.co/settings/profile of the bot user
+      # and it too is then updated at https://github.com/snowflakedb/ArcticTraining/settings/secrets/actions
+      # XXX: this is a placeholder - we haven't needed this one yet
+      HF_TOKEN: ${{ secrets.HF_TOKEN }}
+
+    if: needs.collect-tests.outputs.arctictraining == 'true'
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+        with:
+          lfs: true
+
+      - name: Install Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: 'pip' # caching pip dependencies
+
+      - name: Install build dependencies
+        run: |
+          pip install uv # much faster than pip
+          uv pip install --system modal
+          # next we build requirements files since these help to cache the packages w/o rebuilding the modal image on each run
+          # 1. general packages
+          uv pip compile pyproject.toml --extra testing -o requirements-general.txt
+          # uv is not required but we rely on it in the CI later
+          echo "uv" >> requirements-general.txt
+          # 2. install a specific torch/cuda combo in case deps compilation got it wrong
+          echo "--index-url https://download.pytorch.org/whl/cu129"   > requirements-torch.txt
+          echo "torch==2.8.0"                                        >> requirements-torch.txt
+          # 3. flash_attn needs special care
+          echo 'flash_attn' > requirements-flash_attn.txt
+
+      - name: Run tests
+        run: |
+          modal run -m ci.gpu_unit_tests