diff --git a/.github/workflows/build-macos.yml b/.github/workflows/build-macos.yml
index 1ff0be8def..1db9de1b48 100644
--- a/.github/workflows/build-macos.yml
+++ b/.github/workflows/build-macos.yml
@@ -40,7 +40,7 @@ jobs:
       - name: Install Go
         uses: actions/setup-go@v6
         with:
-          go-version: '1.25.5'
+          go-version: '1.25.7'
           cache: true
       - name: Restore LLVM source cache
         uses: actions/cache/restore@v4
@@ -135,7 +135,7 @@ jobs:
       - name: Install Go
         uses: actions/setup-go@v6
         with:
-          go-version: '1.25.5'
+          go-version: '1.25.7'
           cache: true
       - name: Build TinyGo (LLVM ${{ matrix.version }})
         run: go install -tags=llvm${{ matrix.version }}
diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml
index d5a6619fbc..e11d8193af 100644
--- a/.github/workflows/linux.yml
+++ b/.github/workflows/linux.yml
@@ -137,7 +137,7 @@ jobs:
       - name: Install Go
         uses: actions/setup-go@v6
         with:
-          go-version: '1.25.5'
+          go-version: '1.25.7'
           cache: true
       - name: Install wasmtime
         uses: bytecodealliance/actions/wasmtime/setup@v1
@@ -181,7 +181,7 @@ jobs:
       - name: Install Go
         uses: actions/setup-go@v6
         with:
-          go-version: '1.25.5'
+          go-version: '1.25.7'
           cache: true
       - name: Install Node.js
         uses: actions/setup-node@v4
@@ -298,7 +298,7 @@ jobs:
       - name: Install Go
         uses: actions/setup-go@v6
         with:
-          go-version: '1.25.5'
+          go-version: '1.25.7'
           cache: true
       - name: Restore LLVM source cache
         uses: actions/cache/restore@v4
diff --git a/.github/workflows/nix.yml b/.github/workflows/nix.yml
index e65ae3193a..6c5b9f9a41 100644
--- a/.github/workflows/nix.yml
+++ b/.github/workflows/nix.yml
@@ -42,7 +42,7 @@ jobs:
           key: ${{ steps.cache-llvm-source.outputs.cache-primary-key }}
           path: |
             llvm-project/compiler-rt
-      - uses: cachix/install-nix-action@v22
+      - uses: cachix/install-nix-action@v31
       - name: Test
         run: |
           nix develop --ignore-environment --keep HOME --command bash -c "go install && ~/go/bin/tinygo version && ~/go/bin/tinygo build -o test ./testdata/cgo"
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
index 60b0d8cb5d..0cfeb518be 100644
--- a/.github/workflows/windows.yml
+++ b/.github/workflows/windows.yml
@@ -41,7 +41,7 @@ jobs:
       - name: Install Go
         uses: actions/setup-go@v6
         with:
-          go-version: '1.25.5'
+          go-version: '1.25.7'
           cache: true
       - name: Restore cached LLVM source
         uses: actions/cache/restore@v4
@@ -147,7 +147,7 @@ jobs:
       - name: Install Go
         uses: actions/setup-go@v6
         with:
-          go-version: '1.25.5'
+          go-version: '1.25.7'
           cache: true
       - name: Download TinyGo build
         uses: actions/download-artifact@v4
@@ -177,7 +177,7 @@ jobs:
       - name: Install Go
         uses: actions/setup-go@v6
         with:
-          go-version: '1.25.5'
+          go-version: '1.25.7'
           cache: true
       - name: Download TinyGo build
         uses: actions/download-artifact@v4
@@ -213,7 +213,7 @@ jobs:
       - name: Install Go
         uses: actions/setup-go@v6
         with:
-          go-version: '1.25.5'
+          go-version: '1.25.7'
           cache: true
       - name: Download TinyGo build
         uses: actions/download-artifact@v4
diff --git a/GNUmakefile b/GNUmakefile
index 99a654ca7f..368fedc910 100644
--- a/GNUmakefile
+++ b/GNUmakefile
@@ -814,6 +814,8 @@ endif
 	@$(MD5SUM) test.hex
 	$(TINYGO) build -size short -o test.hex -target=waveshare-rp2040-tiny examples/echo
 	@$(MD5SUM) test.hex
+	$(TINYGO) build -size short -o test.hex -target=vicharak_shrike-lite examples/echo
+	@$(MD5SUM) test.hex
 	# test pwm
 	$(TINYGO) build -size short -o test.hex -target=itsybitsy-m0        examples/pwm
 	@$(MD5SUM) test.hex
@@ -896,6 +898,10 @@ endif
 	@$(MD5SUM) test.hex
 	$(TINYGO) build -size short -o test.hex -target=digispark           examples/blinky1
 	@$(MD5SUM) test.hex
+	$(TINYGO) build -size short -o test.hex -target=digispark           examples/pwm
+	@$(MD5SUM) test.hex
+	$(TINYGO) build -size short -o test.hex -target=digispark           examples/mcp3008
+	@$(MD5SUM) test.hex
 	$(TINYGO) build -size short -o test.hex -target=digispark -gc=leaking examples/blinky1
 	@$(MD5SUM) test.hex
 ifneq ($(XTENSA), 0)
@@ -917,6 +923,10 @@ ifneq ($(XTENSA), 0)
 	@$(MD5SUM) test.bin
 	$(TINYGO) build -size short -o test.bin -target=xiao-esp32s3   		examples/blinky1
 	@$(MD5SUM) test.bin
+	$(TINYGO) build -size short -o test.bin -target=xiao-esp32s3   		examples/mcp3008
+	@$(MD5SUM) test.bin
+	$(TINYGO) build -size short -o test.bin -target=esp32s3-wroom1	    examples/mcp3008
+	@$(MD5SUM) test.bin
 endif
 	$(TINYGO) build -size short -o test.bin -target=esp-c3-32s-kit      examples/blinky1
 	@$(MD5SUM) test.bin
diff --git a/README.md b/README.md
index 518dcdad18..86955a2d9d 100644
--- a/README.md
+++ b/README.md
@@ -6,6 +6,9 @@ TinyGo is a Go compiler intended for use in small places such as microcontroller
 
 It reuses libraries used by the [Go language tools](https://golang.org/pkg/go/) alongside [LLVM](http://llvm.org) to provide an alternative way to compile programs written in the Go programming language.
 
+> [!IMPORTANT]
+> You can help TinyGo with a financial contribution using OpenCollective. Please see https://opencollective.com/tinygo for more information. Thank you!
+
 ## Embedded
 
 Here is an example program that blinks the built-in LED when run directly on any supported board with onboard LED:
@@ -63,7 +66,7 @@ tinygo build -buildmode=c-shared -o add.wasm -target=wasip1 add.go
 You can also use the same syntax as Go 1.24+:
 
 ```shell
-GOARCH=wasip1 GOOS=wasm tinygo build -buildmode=c-shared -o add.wasm add.go
+GOOS=wasip1 GOARCH=wasm tinygo build -buildmode=c-shared -o add.wasm add.go
 ```
 
 ## Installation
diff --git a/builder/build.go b/builder/build.go
index a598f01965..44f41eb233 100644
--- a/builder/build.go
+++ b/builder/build.go
@@ -19,6 +19,7 @@ import (
 	"os/exec"
 	"path/filepath"
 	"runtime"
+	"slices"
 	"sort"
 	"strconv"
 	"strings"
@@ -281,9 +282,13 @@ func Build(pkgName, outpath, tmpdir string, config *compileopts.Config) (BuildRe
 				allFiles[file.Name] = append(allFiles[file.Name], file)
 			}
 		}
-		for name, files := range allFiles {
-			name := name
-			files := files
+		// Sort embedded files by name to maintain output determinism.
+		embedNames := make([]string, 0, len(allFiles))
+		for _, files := range allFiles {
+			embedNames = append(embedNames, files[0].Name)
+		}
+		slices.Sort(embedNames)
+		for _, name := range embedNames {
 			job := &compileJob{
 				description: "make object file for " + name,
 				run: func(job *compileJob) error {
@@ -298,7 +303,7 @@ func Build(pkgName, outpath, tmpdir string, config *compileopts.Config) (BuildRe
 					sum := sha256.Sum256(data)
 					hexSum := hex.EncodeToString(sum[:16])
 
-					for _, file := range files {
+					for _, file := range allFiles[name] {
 						file.Size = uint64(len(data))
 						file.Hash = hexSum
 						if file.NeedsData {
diff --git a/compiler/gc.go b/compiler/gc.go
index fc0e6e687f..5ca79b91ba 100644
--- a/compiler/gc.go
+++ b/compiler/gc.go
@@ -99,6 +99,9 @@ func typeHasPointers(t llvm.Type) bool {
 		}
 		return false
 	case llvm.ArrayTypeKind:
+		if t.ArrayLength() == 0 {
+			return false
+		}
 		if typeHasPointers(t.ElementType()) {
 			return true
 		}
diff --git a/compiler/llvm.go b/compiler/llvm.go
index de387b39c0..7ce6c7d615 100644
--- a/compiler/llvm.go
+++ b/compiler/llvm.go
@@ -1,10 +1,10 @@
 package compiler
 
 import (
+	"encoding/binary"
 	"fmt"
 	"go/token"
 	"go/types"
-	"math/big"
 	"strings"
 
 	"github.com/tinygo-org/tinygo/compileopts"
@@ -231,6 +231,12 @@ func (c *compilerContext) makeGlobalArray(buf []byte, name string, elementType l
 //
 // For details on what's in this value, see src/runtime/gc_precise.go.
 func (c *compilerContext) createObjectLayout(t llvm.Type, pos token.Pos) llvm.Value {
+	if !typeHasPointers(t) {
+		// There are no pointers in this type, so we can simplify the layout.
+		layout := (uint64(1) << 1) | 1
+		return llvm.ConstIntToPtr(llvm.ConstInt(c.uintptrType, layout, false), c.dataPtrType)
+	}
+
 	// Use the element type for arrays. This works even for nested arrays.
 	for {
 		kind := t.TypeKind()
@@ -248,54 +254,29 @@ func (c *compilerContext) createObjectLayout(t llvm.Type, pos token.Pos) llvm.Va
 		break
 	}
 
-	// Do a few checks to see whether we need to generate any object layout
-	// information at all.
+	// Create the pointer bitmap.
 	objectSizeBytes := c.targetData.TypeAllocSize(t)
-	pointerSize := c.targetData.TypeAllocSize(c.dataPtrType)
-	pointerAlignment := c.targetData.PrefTypeAlignment(c.dataPtrType)
-	if objectSizeBytes < pointerSize {
-		// Too small to contain a pointer.
-		layout := (uint64(1) << 1) | 1
-		return llvm.ConstIntToPtr(llvm.ConstInt(c.uintptrType, layout, false), c.dataPtrType)
-	}
-	bitmap := c.getPointerBitmap(t, pos)
-	if bitmap.BitLen() == 0 {
-		// There are no pointers in this type, so we can simplify the layout.
-		// TODO: this can be done in many other cases, e.g. when allocating an
-		// array (like [4][]byte, which repeats a slice 4 times).
-		layout := (uint64(1) << 1) | 1
-		return llvm.ConstIntToPtr(llvm.ConstInt(c.uintptrType, layout, false), c.dataPtrType)
-	}
-	if objectSizeBytes%uint64(pointerAlignment) != 0 {
-		// This shouldn't happen except for packed structs, which aren't
-		// currently used.
-		c.addError(pos, "internal error: unexpected object size for object with pointer field")
-		return llvm.ConstNull(c.dataPtrType)
-	}
-	objectSizeWords := objectSizeBytes / uint64(pointerAlignment)
+	pointerAlignment := uint64(c.targetData.PrefTypeAlignment(c.dataPtrType))
+	bitmapLen := objectSizeBytes / pointerAlignment
+	bitmapBytes := (bitmapLen + 7) / 8
+	bitmap := make([]byte, bitmapBytes, max(bitmapBytes, 8))
+	c.buildPointerBitmap(bitmap, pointerAlignment, pos, t, 0)
 
+	// Try to encode the layout inline.
+	pointerSize := c.targetData.TypeAllocSize(c.dataPtrType)
 	pointerBits := pointerSize * 8
-	var sizeFieldBits uint64
-	switch pointerBits {
-	case 16:
-		sizeFieldBits = 4
-	case 32:
-		sizeFieldBits = 5
-	case 64:
-		sizeFieldBits = 6
-	default:
-		panic("unknown pointer size")
-	}
-	layoutFieldBits := pointerBits - 1 - sizeFieldBits
-
-	// Try to emit the value as an inline integer. This is possible in most
-	// cases.
-	if objectSizeWords < layoutFieldBits {
-		// If it can be stored directly in the pointer value, do so.
-		// The runtime knows that if the least significant bit of the pointer is
-		// set, the pointer contains the value itself.
-		layout := bitmap.Uint64()<<(sizeFieldBits+1) | (objectSizeWords << 1) | 1
-		return llvm.ConstIntToPtr(llvm.ConstInt(c.uintptrType, layout, false), c.dataPtrType)
+	if bitmapLen < pointerBits {
+		rawMask := binary.LittleEndian.Uint64(bitmap[0:8])
+		layout := rawMask*pointerBits + bitmapLen
+		layout <<= 1
+		layout |= 1
+
+		// Check if the layout fits.
+		layout &= 1<<pointerBits - 1
+		if (layout>>1)/pointerBits == rawMask {
+			// No set bits were shifted off.
+			return llvm.ConstIntToPtr(llvm.ConstInt(c.uintptrType, layout, false), c.dataPtrType)
+		}
 	}
 
 	// Unfortunately, the object layout is too big to fit in a pointer-sized
@@ -303,25 +284,24 @@ func (c *compilerContext) createObjectLayout(t llvm.Type, pos token.Pos) llvm.Va
 
 	// Try first whether the global already exists. All objects with a
 	// particular name have the same type, so this is possible.
-	globalName := "runtime/gc.layout:" + fmt.Sprintf("%d-%0*x", objectSizeWords, (objectSizeWords+15)/16, bitmap)
+	globalName := "runtime/gc.layout:" + fmt.Sprintf("%d-%0*x", bitmapLen, (bitmapLen+15)/16, bitmap)
 	global := c.mod.NamedGlobal(globalName)
 	if !global.IsNil() {
 		return global
 	}
 
 	// Create the global initializer.
-	bitmapBytes := make([]byte, int(objectSizeWords+7)/8)
-	bitmap.FillBytes(bitmapBytes)
-	reverseBytes(bitmapBytes) // big-endian to little-endian
-	var bitmapByteValues []llvm.Value
-	for _, b := range bitmapBytes {
-		bitmapByteValues = append(bitmapByteValues, llvm.ConstInt(c.ctx.Int8Type(), uint64(b), false))
+	bitmapByteValues := make([]llvm.Value, bitmapBytes)
+	i8 := c.ctx.Int8Type()
+	for i, b := range bitmap {
+		bitmapByteValues[i] = llvm.ConstInt(i8, uint64(b), false)
 	}
 	initializer := c.ctx.ConstStruct([]llvm.Value{
-		llvm.ConstInt(c.uintptrType, objectSizeWords, false),
-		llvm.ConstArray(c.ctx.Int8Type(), bitmapByteValues),
+		llvm.ConstInt(c.uintptrType, bitmapLen, false),
+		llvm.ConstArray(i8, bitmapByteValues),
 	}, false)
 
+	// Create the actual global.
 	global = llvm.AddGlobal(c.mod, initializer.Type(), globalName)
 	global.SetInitializer(initializer)
 	global.SetUnnamedAddr(true)
@@ -329,6 +309,7 @@ func (c *compilerContext) createObjectLayout(t llvm.Type, pos token.Pos) llvm.Va
 	global.SetLinkage(llvm.LinkOnceODRLinkage)
 	if c.targetData.PrefTypeAlignment(c.uintptrType) < 2 {
 		// AVR doesn't have alignment by default.
+		// The lowest bit must be unset to distinguish this from an inline layout.
 		global.SetAlignment(2)
 	}
 	if c.Debug && pos != token.NoPos {
@@ -360,52 +341,71 @@ func (c *compilerContext) createObjectLayout(t llvm.Type, pos token.Pos) llvm.Va
 	return global
 }
 
-// getPointerBitmap scans the given LLVM type for pointers and sets bits in a
-// bigint at the word offset that contains a pointer. This scan is recursive.
-func (c *compilerContext) getPointerBitmap(typ llvm.Type, pos token.Pos) *big.Int {
-	alignment := c.targetData.PrefTypeAlignment(c.dataPtrType)
-	switch typ.TypeKind() {
+// buildPointerBitmap scans the given LLVM type for pointers and sets bits in a
+// bitmap at the word offset that contains a pointer. This scan is recursive.
+func (c *compilerContext) buildPointerBitmap(
+	dst []byte,
+	ptrAlign uint64,
+	pos token.Pos,
+	t llvm.Type,
+	offset uint64,
+) {
+	switch t.TypeKind() {
 	case llvm.IntegerTypeKind, llvm.FloatTypeKind, llvm.DoubleTypeKind:
-		return big.NewInt(0)
+		// These types do not contain pointers.
+
 	case llvm.PointerTypeKind:
-		return big.NewInt(1)
+		// Set the corresponding position in the bitmap.
+		dst[offset/8] |= 1 << (offset % 8)
+
 	case llvm.StructTypeKind:
-		ptrs := big.NewInt(0)
-		for i, subtyp := range typ.StructElementTypes() {
-			subptrs := c.getPointerBitmap(subtyp, pos)
-			if subptrs.BitLen() == 0 {
-				continue
-			}
-			offset := c.targetData.ElementOffset(typ, i)
-			if offset%uint64(alignment) != 0 {
-				// This error will let the compilation fail, but by continuing
-				// the error can still easily be shown.
-				c.addError(pos, "internal error: allocated struct contains unaligned pointer")
+		// Recurse over struct elements.
+		for i, et := range t.StructElementTypes() {
+			eo := c.targetData.ElementOffset(t, i)
+			if eo%uint64(ptrAlign) != 0 {
+				if typeHasPointers(et) {
+					// This error will let the compilation fail, but by continuing
+					// the error can still easily be shown.
+					c.addError(pos, "internal error: allocated struct contains unaligned pointer")
+				}
 				continue
 			}
-			subptrs.Lsh(subptrs, uint(offset)/uint(alignment))
-			ptrs.Or(ptrs, subptrs)
+			c.buildPointerBitmap(
+				dst,
+				ptrAlign,
+				pos,
+				et,
+				offset+(eo/ptrAlign),
+			)
 		}
-		return ptrs
+
 	case llvm.ArrayTypeKind:
-		subtyp := typ.ElementType()
-		subptrs := c.getPointerBitmap(subtyp, pos)
-		ptrs := big.NewInt(0)
-		if subptrs.BitLen() == 0 {
-			return ptrs
+		// Recurse over array elements.
+		len := t.ArrayLength()
+		if len <= 0 {
+			return
 		}
-		elementSize := c.targetData.TypeAllocSize(subtyp)
-		if elementSize%uint64(alignment) != 0 {
-			// This error will let the compilation fail (but continues so that
-			// other errors can be shown).
-			c.addError(pos, "internal error: allocated array contains unaligned pointer")
-			return ptrs
+		et := t.ElementType()
+		elementSize := c.targetData.TypeAllocSize(et)
+		if elementSize%ptrAlign != 0 {
+			if typeHasPointers(et) {
+				// This error will let the compilation fail (but continues so that
+				// other errors can be shown).
+				c.addError(pos, "internal error: allocated array contains unaligned pointer")
+			}
+			return
 		}
-		for i := 0; i < typ.ArrayLength(); i++ {
-			ptrs.Lsh(ptrs, uint(elementSize)/uint(alignment))
-			ptrs.Or(ptrs, subptrs)
+		elementSize /= ptrAlign
+		for i := 0; i < len; i++ {
+			c.buildPointerBitmap(
+				dst,
+				ptrAlign,
+				pos,
+				et,
+				offset+uint64(i)*elementSize,
+			)
 		}
-		return ptrs
+
 	default:
 		// Should not happen.
 		panic("unknown LLVM type")
diff --git a/compiler/testdata/gc.go b/compiler/testdata/gc.go
index 20e5967028..9aa00a4c6f 100644
--- a/compiler/testdata/gc.go
+++ b/compiler/testdata/gc.go
@@ -24,6 +24,10 @@ var (
 		x *byte
 		y [61]uintptr
 	}
+	struct5 *struct {
+		x *byte
+		y [30]uintptr
+	}
 
 	slice1 []byte
 	slice2 []*int
@@ -58,6 +62,10 @@ func newStruct() {
 		x *byte
 		y [61]uintptr
 	})
+	struct5 = new(struct {
+		x *byte
+		y [30]uintptr
+	})
 }
 
 func newFuncValue() *func() {
diff --git a/compiler/testdata/gc.ll b/compiler/testdata/gc.ll
index d2be74cbcf..42a278b66e 100644
--- a/compiler/testdata/gc.ll
+++ b/compiler/testdata/gc.ll
@@ -16,11 +16,12 @@ target triple = "wasm32-unknown-wasi"
 @main.struct2 = hidden global ptr null, align 4
 @main.struct3 = hidden global ptr null, align 4
 @main.struct4 = hidden global ptr null, align 4
+@main.struct5 = hidden global ptr null, align 4
 @main.slice1 = hidden global { ptr, i32, i32 } zeroinitializer, align 4
 @main.slice2 = hidden global { ptr, i32, i32 } zeroinitializer, align 4
 @main.slice3 = hidden global { ptr, i32, i32 } zeroinitializer, align 4
-@"runtime/gc.layout:62-2000000000000001" = linkonce_odr unnamed_addr constant { i32, [8 x i8] } { i32 62, [8 x i8] c"\01\00\00\00\00\00\00 " }
-@"runtime/gc.layout:62-0001" = linkonce_odr unnamed_addr constant { i32, [8 x i8] } { i32 62, [8 x i8] c"\01\00\00\00\00\00\00\00" }
+@"runtime/gc.layout:62-0100000000000020" = linkonce_odr unnamed_addr constant { i32, [8 x i8] } { i32 62, [8 x i8] c"\01\00\00\00\00\00\00 " }
+@"runtime/gc.layout:62-0100000000000000" = linkonce_odr unnamed_addr constant { i32, [8 x i8] } { i32 62, [8 x i8] c"\01\00\00\00\00\00\00\00" }
 @"reflect/types.type:basic:complex128" = linkonce_odr constant { i8, ptr } { i8 80, ptr @"reflect/types.type:pointer:basic:complex128" }, align 4
 @"reflect/types.type:pointer:basic:complex128" = linkonce_odr constant { i8, i16, ptr } { i8 -43, i16 0, ptr @"reflect/types.type:basic:complex128" }, align 4
 
@@ -80,12 +81,15 @@ entry:
   %new1 = call align 4 dereferenceable(8) ptr @runtime.alloc(i32 8, ptr nonnull inttoptr (i32 3 to ptr), ptr undef) #3
   call void @runtime.trackPointer(ptr nonnull %new1, ptr nonnull %stackalloc, ptr undef) #3
   store ptr %new1, ptr @main.struct2, align 4
-  %new2 = call align 4 dereferenceable(248) ptr @runtime.alloc(i32 248, ptr nonnull @"runtime/gc.layout:62-2000000000000001", ptr undef) #3
+  %new2 = call align 4 dereferenceable(248) ptr @runtime.alloc(i32 248, ptr nonnull @"runtime/gc.layout:62-0100000000000020", ptr undef) #3
   call void @runtime.trackPointer(ptr nonnull %new2, ptr nonnull %stackalloc, ptr undef) #3
   store ptr %new2, ptr @main.struct3, align 4
-  %new3 = call align 4 dereferenceable(248) ptr @runtime.alloc(i32 248, ptr nonnull @"runtime/gc.layout:62-0001", ptr undef) #3
+  %new3 = call align 4 dereferenceable(248) ptr @runtime.alloc(i32 248, ptr nonnull @"runtime/gc.layout:62-0100000000000000", ptr undef) #3
   call void @runtime.trackPointer(ptr nonnull %new3, ptr nonnull %stackalloc, ptr undef) #3
   store ptr %new3, ptr @main.struct4, align 4
+  %new4 = call align 4 dereferenceable(124) ptr @runtime.alloc(i32 124, ptr nonnull inttoptr (i32 127 to ptr), ptr undef) #3
+  call void @runtime.trackPointer(ptr nonnull %new4, ptr nonnull %stackalloc, ptr undef) #3
+  store ptr %new4, ptr @main.struct5, align 4
   ret void
 }
 
diff --git a/flake.lock b/flake.lock
index 877c18b461..9ff701357d 100644
--- a/flake.lock
+++ b/flake.lock
@@ -20,16 +20,16 @@
     },
     "nixpkgs": {
       "locked": {
-        "lastModified": 1747953325,
-        "narHash": "sha256-y2ZtlIlNTuVJUZCqzZAhIw5rrKP4DOSklev6c8PyCkQ=",
+        "lastModified": 1770136044,
+        "narHash": "sha256-tlFqNG/uzz2++aAmn4v8J0vAkV3z7XngeIIB3rM3650=",
         "owner": "NixOS",
         "repo": "nixpkgs",
-        "rev": "55d1f923c480dadce40f5231feb472e81b0bab48",
+        "rev": "e576e3c9cf9bad747afcddd9e34f51d18c855b4e",
         "type": "github"
       },
       "original": {
         "id": "nixpkgs",
-        "ref": "nixos-25.05",
+        "ref": "nixos-25.11",
         "type": "indirect"
       }
     },
diff --git a/flake.nix b/flake.nix
index 4feea0a4b8..85ab404940 100644
--- a/flake.nix
+++ b/flake.nix
@@ -34,7 +34,7 @@
   inputs = {
     # Use a recent stable release, but fix the version to make it reproducible.
     # This version should be updated from time to time.
-    nixpkgs.url = "nixpkgs/nixos-25.05";
+    nixpkgs.url = "nixpkgs/nixos-25.11";
     flake-utils.url = "github:numtide/flake-utils";
   };
   outputs = { self, nixpkgs, flake-utils }:
diff --git a/goenv/version.go b/goenv/version.go
index 423f95906e..9ade0e0b79 100644
--- a/goenv/version.go
+++ b/goenv/version.go
@@ -10,7 +10,7 @@ import (
 
 // Version of TinyGo.
 // Update this value before release of new version of software.
-const version = "0.40.1"
+const version = "0.41.0-dev"
 
 // Return TinyGo version, either in the form 0.30.0 or as a development version
 // (like 0.30.0-dev-abcd012).
diff --git a/src/examples/pwm/digispark.go b/src/examples/pwm/digispark.go
new file mode 100644
index 0000000000..848d518546
--- /dev/null
+++ b/src/examples/pwm/digispark.go
@@ -0,0 +1,12 @@
+//go:build digispark
+
+package main
+
+import "machine"
+
+var (
+	// Use Timer1 for PWM (recommended for ATtiny85)
+	pwm  = machine.Timer1
+	pinA = machine.P1 // PB1, Timer1 channel A (LED pin)
+	pinB = machine.P4 // PB4, Timer1 channel B
+)
diff --git a/src/machine/board_amken_trio.go b/src/machine/board_amken_trio.go
index 3382085bc0..dee207acc6 100644
--- a/src/machine/board_amken_trio.go
+++ b/src/machine/board_amken_trio.go
@@ -135,7 +135,7 @@ var (
 
 	// FDCAN1 on PD0 (RX) / PD1 (TX) with onboard transceiver
 	CAN1  = &_CAN1
-	_CAN1 = FDCAN{
+	_CAN1 = CAN{
 		Bus:             stm32.FDCAN1,
 		TxAltFuncSelect: AF3_FDCAN1_FDCAN2,
 		RxAltFuncSelect: AF3_FDCAN1_FDCAN2,
diff --git a/src/machine/board_digispark.go b/src/machine/board_digispark.go
index f380aae85c..d7106a5544 100644
--- a/src/machine/board_digispark.go
+++ b/src/machine/board_digispark.go
@@ -2,17 +2,26 @@
 
 package machine
 
+// Digispark is a tiny ATtiny85-based board with 6 I/O pins.
+//
+// PWM is available on the following pins:
+//   - P0 (PB0): Timer0 channel A
+//   - P1 (PB1): Timer0 channel B or Timer1 channel A (LED pin)
+//   - P4 (PB4): Timer1 channel B
+//
+// Timer1 is recommended for PWM as it provides more flexible frequency control.
+
 // Return the current CPU frequency in hertz.
 func CPUFrequency() uint32 {
 	return 16000000
 }
 
 const (
-	P0 Pin = PB0
-	P1 Pin = PB1
+	P0 Pin = PB0 // PWM available (Timer0 OC0A)
+	P1 Pin = PB1 // PWM available (Timer0 OC0B or Timer1 OC1A)
 	P2 Pin = PB2
 	P3 Pin = PB3
-	P4 Pin = PB4
+	P4 Pin = PB4 // PWM available (Timer1 OC1B)
 	P5 Pin = PB5
 
 	LED = P1
diff --git a/src/machine/board_esp32s3-wroom1.go b/src/machine/board_esp32s3-wroom1.go
new file mode 100644
index 0000000000..649745398d
--- /dev/null
+++ b/src/machine/board_esp32s3-wroom1.go
@@ -0,0 +1,15 @@
+//go:build esp32s3_wroom1
+
+package machine
+
+const (
+	SPI1_SCK_PIN  = GPIO12 // SCK
+	SPI1_MOSI_PIN = GPIO11 // SDO (MOSI)
+	SPI1_MISO_PIN = GPIO13 // SDI (MISO)
+	SPI1_CS_PIN   = GPIO10 // CS
+
+	SPI2_SCK_PIN  = GPIO36 // SCK
+	SPI2_MOSI_PIN = GPIO35 // SDO (MOSI)
+	SPI2_MISO_PIN = GPIO37 // SDI (MISO)
+	SPI2_CS_PIN   = GPIO34 // CS
+)
diff --git a/src/machine/board_feather-m0.go b/src/machine/board_feather-m0.go
index f38d8ec889..15ec77d37a 100644
--- a/src/machine/board_feather-m0.go
+++ b/src/machine/board_feather-m0.go
@@ -43,6 +43,15 @@ const (
 	USBCDC_DP_PIN = PA25
 )
 
+// UART0 pins
+const (
+	UART0_TX_PIN = D1
+	UART0_RX_PIN = D0
+)
+
+// UART0 on the Feather M0.
+var UART0 = &sercomUSART0
+
 // UART1 pins
 const (
 	UART_TX_PIN = D10
diff --git a/src/machine/board_nucleog0b1re.go b/src/machine/board_nucleog0b1re.go
index 8c048fd0fc..7280638f77 100644
--- a/src/machine/board_nucleog0b1re.go
+++ b/src/machine/board_nucleog0b1re.go
@@ -107,7 +107,7 @@ var (
 
 	// FDCAN1 on PA11 (TX) / PA12 (RX)
 	CAN1  = &_CAN1
-	_CAN1 = FDCAN{
+	_CAN1 = CAN{
 		Bus:             stm32.FDCAN1,
 		TxAltFuncSelect: AF9_FDCAN1_FDCAN2,
 		RxAltFuncSelect: AF9_FDCAN1_FDCAN2,
@@ -116,7 +116,7 @@ var (
 
 	// FDCAN2 on PD12 (TX) / PD13 (RX)
 	CAN2  = &_CAN2
-	_CAN2 = FDCAN{
+	_CAN2 = CAN{
 		Bus:             stm32.FDCAN2,
 		TxAltFuncSelect: AF3_FDCAN1_FDCAN2,
 		RxAltFuncSelect: AF3_FDCAN1_FDCAN2,
diff --git a/src/machine/board_vicharak_shrike-lite.go b/src/machine/board_vicharak_shrike-lite.go
new file mode 100644
index 0000000000..8899e7125c
--- /dev/null
+++ b/src/machine/board_vicharak_shrike-lite.go
@@ -0,0 +1,118 @@
+//go:build vicharak_shrike_lite
+
+// Pin mappings for Vicharak Shrike-Lite.
+//
+// Reference: https://vicharak-in.github.io/shrike/shrike_pinouts.html
+
+package machine
+
+// Digital
+const (
+	IO0  Pin = GPIO0
+	IO1  Pin = GPIO1
+	IO2  Pin = GPIO2
+	IO3  Pin = GPIO3
+	IO4  Pin = GPIO4
+	IO5  Pin = GPIO5
+	IO6  Pin = GPIO6
+	IO7  Pin = GPIO7
+	IO8  Pin = GPIO8
+	IO9  Pin = GPIO9
+	IO10 Pin = GPIO10
+	IO11 Pin = GPIO11
+	IO12 Pin = GPIO12
+	IO13 Pin = GPIO13
+	IO14 Pin = GPIO14
+	IO15 Pin = GPIO15
+	IO16 Pin = GPIO16
+	IO17 Pin = GPIO17
+	IO18 Pin = GPIO18
+	IO19 Pin = GPIO19
+	IO20 Pin = GPIO20
+	IO21 Pin = GPIO21
+	IO22 Pin = GPIO22
+	IO23 Pin = GPIO23
+	IO24 Pin = GPIO24
+	IO25 Pin = GPIO25
+	IO26 Pin = GPIO26
+	IO27 Pin = GPIO27
+	IO28 Pin = GPIO28
+	IO29 Pin = GPIO29
+)
+
+// FPGA Pins
+const (
+	FPGA_EN  Pin = IO13
+	FPGA_PWR Pin = IO12
+	// SPI_SCLK
+	F3 Pin = IO2
+	// SPI_SS
+	F4 Pin = IO1
+	// SPI_SI (MOSI)
+	F5 Pin = IO3
+	// SPI_SO (MISO) / CONFIG
+	F6  Pin = IO0
+	F18 Pin = IO14
+	F17 Pin = IO15
+)
+
+// Analog pins
+const (
+	A0 Pin = IO26
+	A1 Pin = IO27
+	A2 Pin = IO28
+	A3 Pin = IO29
+)
+
+// LED
+const (
+	LED = IO4
+)
+
+// I2C pins
+const (
+	I2C0_SDA_PIN Pin = IO24
+	I2C0_SCL_PIN Pin = IO25
+
+	I2C1_SDA_PIN Pin = IO6
+	I2C1_SCL_PIN Pin = IO7
+)
+
+// SPI pins
+const (
+	SPI0_SCK_PIN Pin = IO18
+	SPI0_SDO_PIN Pin = IO19
+	SPI0_SDI_PIN Pin = IO20
+
+	SPI1_SCK_PIN Pin = IO10
+	SPI1_SDO_PIN Pin = IO11
+	SPI1_SDI_PIN Pin = IO8
+)
+
+// Onboard crystal oscillator frequency, in MHz.
+const (
+	xoscFreq = 12 // MHz
+)
+
+// UART pins
+const (
+	UART0_TX_PIN = IO28
+	UART0_RX_PIN = IO29
+	UART_TX_PIN  = UART0_TX_PIN
+	UART_RX_PIN  = UART0_RX_PIN
+	UART1_TX_PIN = IO24
+	UART1_RX_PIN = IO25
+)
+
+var DefaultUART = UART0
+
+// USB CDC identifiers
+const (
+	usb_STRING_PRODUCT      = "Shrike-Lite"
+	usb_STRING_MANUFACTURER = "Vicharak"
+)
+
+var (
+	usb_VID uint16 = 0x2e8a
+	usb_PID uint16 = 0x0003
+)
diff --git a/src/machine/board_xiao-esp32s3.go b/src/machine/board_xiao-esp32s3.go
index 9181bffc2e..6e1e67b832 100644
--- a/src/machine/board_xiao-esp32s3.go
+++ b/src/machine/board_xiao-esp32s3.go
@@ -47,9 +47,15 @@ const (
 
 // SPI pins
 const (
-	SPI_SCK_PIN = GPIO7
-	SPI_SDI_PIN = GPIO9
-	SPI_SDO_PIN = GPIO8
+	SPI1_SCK_PIN  = GPIO7 // D8
+	SPI1_MISO_PIN = GPIO8 // D9
+	SPI1_MOSI_PIN = GPIO9 // D10
+	SPI1_CS_PIN   = NoPin
+
+	SPI2_SCK_PIN  = NoPin
+	SPI2_MOSI_PIN = NoPin
+	SPI2_MISO_PIN = NoPin
+	SPI2_CS_PIN   = NoPin
 )
 
 // Onboard LEDs
diff --git a/src/machine/can.go b/src/machine/can.go
new file mode 100644
index 0000000000..51fe151dca
--- /dev/null
+++ b/src/machine/can.go
@@ -0,0 +1,102 @@
+//go:build stm32g0
+
+package machine
+
+// unexported functions here are implemented in the device file
+// and added to the build tags of this file.
+
+// These types are an alias for documentation purposes exclusively. We wish
+// the interface to be used by other ecosystems besides TinyGo which is why
+// we need these types to be a primitive types at the interface level.
+// If these types are defined at machine or machine/can level they are not
+// usable by non-TinyGo projects. This is not good news for fostering wider adoption
+// of our API in "big-Go" embedded system projects like TamaGo and periph.io
+type (
+	// CAN IDs in tinygo are represented as 30 bit integers where
+	// bits 1..29 store the actual ID and the 30th bit stores the IDE bit (if extended ID).
+	// We include the extended ID bit in the ID itself to make comparison of IDs easier for users
+	// since two identical IDs where one is extended and one is not are NOT equivalent IDs.
+	canID = uint32
+	// CAN flags bitmask are defined below.
+	canFlags = uint32
+)
+
+// CAN ID definitions.
+const (
+	canIDStdMask      canID = (1 << 11) - 1
+	canIDExtendedMask canID = (1 << 29) - 1
+	canIDExtendedBit  canID = 1 << 30
+)
+
+// CAN Flag bit definitions.
+const (
+	canFlagBRS canFlags = 1 << 0 // Bit Rate Switch active on tx/rx of frame.
+	canFlagFDF canFlags = 1 << 1 // Is a FD Frame.
+	canFlagRTR canFlags = 1 << 2 // is a retransmission request frame.
+	canFlagESI canFlags = 1 << 3 // Error status indicator active on tx/rx of frame.
+	canFlagIDE canFlags = 1 << 4 // Extended ID.
+)
+
+// TxFIFOLevel returns amount of CAN frames stored for transmission and total Tx fifo length.
+func (can *CAN) TxFIFOLevel() (level int, maxlevel int) {
+	return can.txFIFOLevel()
+}
+
+// Tx puts a CAN frame in TxFIFO for transmission. Returns error if TxFIFO is full.
+func (can *CAN) Tx(id canID, flags canFlags, data []byte) error {
+	return can.tx(id, flags, data)
+}
+
+// RxFIFOLevel returns amount of CAN frames received and stored and total Rx fifo length.
+// If the hardware is interrupt driven RxFIFOLevel should return 0,0.
+func (can *CAN) RxFIFOLevel() (level int, maxlevel int) {
+	return can.rxFIFOLevel()
+}
+
+type canRxCallback = func(data []byte, id canID, timestamp uint32, flags canFlags)
+
+// SetRxCallback sets the receive callback. See [canFlags] for information on how bits are layed out.
+func (can *CAN) SetRxCallback(cb canRxCallback) {
+	can.setRxCallback(cb)
+}
+
+// RxPoll is called periodically for poll driven drivers. If the driver is interrupt driven
+// then RxPoll is a no-op and may return nil. Users may determine if a CAN is interrupt driven by
+// checking if RxFIFOLevel returns 0,0.
+func (can *CAN) RxPoll() error {
+	return can.rxPoll()
+}
+
+// DLC to bytes lookup table
+var dlcToBytes = [16]byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 16, 20, 24, 32, 48, 64}
+
+// dlcToLength converts a DLC value to actual byte length
+func dlcToLength(dlc byte) uint8 {
+	if dlc > 15 {
+		dlc = 15
+	}
+	return dlcToBytes[dlc]
+}
+
+// lengthToDLC converts a byte length to DLC value
+func lengthToDLC(length uint8) (dlc byte) {
+	switch {
+	case length <= 8:
+		dlc = length
+	case length <= 12:
+		dlc = 9
+	case length <= 16:
+		dlc = 10
+	case length <= 20:
+		dlc = 11
+	case length <= 24:
+		dlc = 12
+	case length <= 32:
+		dlc = 13
+	case length <= 48:
+		dlc = 14
+	default:
+		dlc = 15
+	}
+	return dlc
+}
diff --git a/src/machine/machine_attiny85.go b/src/machine/machine_attiny85.go
index 33424c6052..6d31846b5a 100644
--- a/src/machine/machine_attiny85.go
+++ b/src/machine/machine_attiny85.go
@@ -21,3 +21,524 @@ func (p Pin) getPortMask() (*volatile.Register8, uint8) {
 	// Very simple for the attiny85, which only has a single port.
 	return avr.PORTB, 1 << uint8(p)
 }
+
+// PWM is one PWM peripheral, which consists of a counter and two output
+// channels (that can be connected to two fixed pins). You can set the frequency
+// using SetPeriod, but only for all the channels in this PWM peripheral at
+// once.
+type PWM struct {
+	num uint8
+}
+
+var (
+	Timer0 = PWM{0} // 8 bit timer for PB0 and PB1
+	Timer1 = PWM{1} // 8 bit high-speed timer for PB1 and PB4
+)
+
+// GTCCR bits for Timer1 that are not defined in the device file
+const (
+	gtccrPWM1B  = 0x40 // Pulse Width Modulator B Enable
+	gtccrCOM1B0 = 0x10 // Comparator B Output Mode bit 0
+	gtccrCOM1B1 = 0x20 // Comparator B Output Mode bit 1
+)
+
+// Configure enables and configures this PWM.
+//
+// For Timer0, there is only a limited number of periods available, namely the
+// CPU frequency divided by 256 and again divided by 1, 8, 64, 256, or 1024.
+// For a MCU running at 8MHz, this would be a period of 32µs, 256µs, 2048µs,
+// 8192µs, or 32768µs.
+//
+// For Timer1, the period is more flexible as it uses OCR1C as the top value.
+// Timer1 also supports more prescaler values (1 to 16384).
+func (pwm PWM) Configure(config PWMConfig) error {
+	switch pwm.num {
+	case 0: // Timer/Counter 0 (8-bit)
+		// Calculate the timer prescaler.
+		var prescaler uint8
+		switch config.Period {
+		case 0, (uint64(1e9) * 256 * 1) / uint64(CPUFrequency()):
+			prescaler = 1
+		case (uint64(1e9) * 256 * 8) / uint64(CPUFrequency()):
+			prescaler = 2
+		case (uint64(1e9) * 256 * 64) / uint64(CPUFrequency()):
+			prescaler = 3
+		case (uint64(1e9) * 256 * 256) / uint64(CPUFrequency()):
+			prescaler = 4
+		case (uint64(1e9) * 256 * 1024) / uint64(CPUFrequency()):
+			prescaler = 5
+		default:
+			return ErrPWMPeriodTooLong
+		}
+
+		avr.TCCR0B.Set(prescaler)
+		// Set the PWM mode to fast PWM (mode = 3).
+		avr.TCCR0A.Set(avr.TCCR0A_WGM00 | avr.TCCR0A_WGM01)
+
+	case 1: // Timer/Counter 1 (8-bit high-speed)
+		// Timer1 on ATtiny85 is different from ATmega328:
+		// - It's 8-bit with configurable top (OCR1C)
+		// - Has more prescaler options (1-16384)
+		// - PWM mode is enabled per-channel via PWM1A/PWM1B bits
+		var top uint64
+		if config.Period == 0 {
+			// Use a top appropriate for LEDs.
+			top = 0xff
+		} else {
+			// Calculate top value: top = period * (CPUFrequency / 1e9)
+			top = config.Period * (uint64(CPUFrequency()) / 1000000) / 1000
+		}
+
+		// Timer1 prescaler values: 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384
+		const maxTop = 256
+		var prescaler uint8
+		switch {
+		case top <= maxTop:
+			prescaler = 1 // prescaler 1
+		case top/2 <= maxTop:
+			prescaler = 2 // prescaler 2
+			top /= 2
+		case top/4 <= maxTop:
+			prescaler = 3 // prescaler 4
+			top /= 4
+		case top/8 <= maxTop:
+			prescaler = 4 // prescaler 8
+			top /= 8
+		case top/16 <= maxTop:
+			prescaler = 5 // prescaler 16
+			top /= 16
+		case top/32 <= maxTop:
+			prescaler = 6 // prescaler 32
+			top /= 32
+		case top/64 <= maxTop:
+			prescaler = 7 // prescaler 64
+			top /= 64
+		case top/128 <= maxTop:
+			prescaler = 8 // prescaler 128
+			top /= 128
+		case top/256 <= maxTop:
+			prescaler = 9 // prescaler 256
+			top /= 256
+		case top/512 <= maxTop:
+			prescaler = 10 // prescaler 512
+			top /= 512
+		case top/1024 <= maxTop:
+			prescaler = 11 // prescaler 1024
+			top /= 1024
+		case top/2048 <= maxTop:
+			prescaler = 12 // prescaler 2048
+			top /= 2048
+		case top/4096 <= maxTop:
+			prescaler = 13 // prescaler 4096
+			top /= 4096
+		case top/8192 <= maxTop:
+			prescaler = 14 // prescaler 8192
+			top /= 8192
+		case top/16384 <= maxTop:
+			prescaler = 15 // prescaler 16384
+			top /= 16384
+		default:
+			return ErrPWMPeriodTooLong
+		}
+
+		// Set prescaler (CS1[3:0] bits)
+		avr.TCCR1.Set(prescaler)
+		// Set top value
+		avr.OCR1C.Set(uint8(top - 1))
+	}
+	return nil
+}
+
+// SetPeriod updates the period of this PWM peripheral.
+// To set a particular frequency, use the following formula:
+//
+//	period = 1e9 / frequency
+//
+// If you use a period of 0, a period that works well for LEDs will be picked.
+//
+// SetPeriod will not change the prescaler, but also won't change the current
+// value in any of the channels. This means that you may need to update the
+// value for the particular channel.
+//
+// Note that you cannot pick any arbitrary period after the PWM peripheral has
+// been configured. If you want to switch between frequencies, pick the lowest
+// frequency (longest period) once when calling Configure and adjust the
+// frequency here as needed.
+func (pwm PWM) SetPeriod(period uint64) error {
+	if pwm.num == 0 {
+		return ErrPWMPeriodTooLong // Timer0 doesn't support dynamic period
+	}
+
+	// Timer1 can adjust period via OCR1C
+	var top uint64
+	if period == 0 {
+		top = 0xff
+	} else {
+		top = period * (uint64(CPUFrequency()) / 1000000) / 1000
+	}
+
+	// Get current prescaler
+	prescaler := avr.TCCR1.Get() & 0x0f
+	// Timer1 prescaler values follow a power-of-2 pattern:
+	// prescaler n maps to divisor 2^(n-1), so we can use a simple shift
+	if prescaler > 0 && prescaler <= 15 {
+		top >>= (prescaler - 1)
+	}
+
+	if top > 256 {
+		return ErrPWMPeriodTooLong
+	}
+
+	avr.OCR1C.Set(uint8(top - 1))
+	avr.TCNT1.Set(0)
+
+	return nil
+}
+
+// Top returns the current counter top, for use in duty cycle calculation. It
+// will only change with a call to Configure or SetPeriod, otherwise it is
+// constant.
+//
+// The value returned here is hardware dependent. In general, it's best to treat
+// it as an opaque value that can be divided by some number and passed to Set
+// (see Set documentation for more information).
+func (pwm PWM) Top() uint32 {
+	if pwm.num == 1 {
+		// Timer1 has configurable top via OCR1C
+		return uint32(avr.OCR1C.Get()) + 1
+	}
+	// Timer0 goes from 0 to 0xff (256 in total)
+	return 256
+}
+
+// Counter returns the current counter value of the timer in this PWM
+// peripheral. It may be useful for debugging.
+func (pwm PWM) Counter() uint32 {
+	switch pwm.num {
+	case 0:
+		return uint32(avr.TCNT0.Get())
+	case 1:
+		return uint32(avr.TCNT1.Get())
+	}
+	return 0
+}
+
+// Prescaler lookup tables using uint16 (more efficient than uint64 on AVR)
+// Timer0 prescaler lookup table (index 0-7 maps to prescaler bits)
+var timer0Prescalers = [8]uint16{0, 1, 8, 64, 256, 1024, 0, 0}
+
+// Timer1 prescaler lookup table (index 0-15 maps to prescaler bits)
+var timer1Prescalers = [16]uint16{0, 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}
+
+// Period returns the used PWM period in nanoseconds. It might deviate slightly
+// from the configured period due to rounding.
+func (pwm PWM) Period() uint64 {
+	var prescaler uint64
+	switch pwm.num {
+	case 0:
+		prescalerBits := avr.TCCR0B.Get() & 0x7
+		prescaler = uint64(timer0Prescalers[prescalerBits])
+		if prescaler == 0 {
+			return 0
+		}
+	case 1:
+		prescalerBits := avr.TCCR1.Get() & 0x0f
+		prescaler = uint64(timer1Prescalers[prescalerBits])
+		if prescaler == 0 {
+			return 0
+		}
+	}
+	top := uint64(pwm.Top())
+	return prescaler * top * 1000 / uint64(CPUFrequency()/1e6)
+}
+
+// Channel returns a PWM channel for the given pin.
+func (pwm PWM) Channel(pin Pin) (uint8, error) {
+	pin.Configure(PinConfig{Mode: PinOutput})
+	pin.Low()
+	switch pwm.num {
+	case 0:
+		switch pin {
+		case PB0: // OC0A
+			avr.TCCR0A.SetBits(avr.TCCR0A_COM0A1)
+			return 0, nil
+		case PB1: // OC0B
+			avr.TCCR0A.SetBits(avr.TCCR0A_COM0B1)
+			return 1, nil
+		}
+	case 1:
+		switch pin {
+		case PB1: // OC1A
+			// Enable PWM on channel A
+			avr.TCCR1.SetBits(avr.TCCR1_PWM1A | avr.TCCR1_COM1A1)
+			return 0, nil
+		case PB4: // OC1B
+			// Enable PWM on channel B (controlled via GTCCR)
+			avr.GTCCR.SetBits(gtccrPWM1B | gtccrCOM1B1)
+			return 1, nil
+		}
+	}
+	return 0, ErrInvalidOutputPin
+}
+
+// SetInverting sets whether to invert the output of this channel.
+// Without inverting, a 25% duty cycle would mean the output is high for 25% of
+// the time and low for the rest. Inverting flips the output as if a NOT gate
+// was placed at the output, meaning that the output would be 25% low and 75%
+// high with a duty cycle of 25%.
+func (pwm PWM) SetInverting(channel uint8, inverting bool) {
+	switch pwm.num {
+	case 0:
+		switch channel {
+		case 0: // channel A, PB0
+			if inverting {
+				avr.PORTB.SetBits(1 << 0)
+				avr.TCCR0A.SetBits(avr.TCCR0A_COM0A0)
+			} else {
+				avr.PORTB.ClearBits(1 << 0)
+				avr.TCCR0A.ClearBits(avr.TCCR0A_COM0A0)
+			}
+		case 1: // channel B, PB1
+			if inverting {
+				avr.PORTB.SetBits(1 << 1)
+				avr.TCCR0A.SetBits(avr.TCCR0A_COM0B0)
+			} else {
+				avr.PORTB.ClearBits(1 << 1)
+				avr.TCCR0A.ClearBits(avr.TCCR0A_COM0B0)
+			}
+		}
+	case 1:
+		switch channel {
+		case 0: // channel A, PB1
+			if inverting {
+				avr.PORTB.SetBits(1 << 1)
+				avr.TCCR1.SetBits(avr.TCCR1_COM1A0)
+			} else {
+				avr.PORTB.ClearBits(1 << 1)
+				avr.TCCR1.ClearBits(avr.TCCR1_COM1A0)
+			}
+		case 1: // channel B, PB4
+			if inverting {
+				avr.PORTB.SetBits(1 << 4)
+				avr.GTCCR.SetBits(gtccrCOM1B0)
+			} else {
+				avr.PORTB.ClearBits(1 << 4)
+				avr.GTCCR.ClearBits(gtccrCOM1B0)
+			}
+		}
+	}
+}
+
+// Set updates the channel value. This is used to control the channel duty
+// cycle, in other words the fraction of time the channel output is high (or low
+// when inverted). For example, to set it to a 25% duty cycle, use:
+//
+//	pwm.Set(channel, pwm.Top() / 4)
+//
+// pwm.Set(channel, 0) will set the output to low and pwm.Set(channel,
+// pwm.Top()) will set the output to high, assuming the output isn't inverted.
+func (pwm PWM) Set(channel uint8, value uint32) {
+	switch pwm.num {
+	case 0:
+		switch channel {
+		case 0: // channel A, PB0
+			if value == 0 {
+				avr.TCCR0A.ClearBits(avr.TCCR0A_COM0A1)
+			} else {
+				avr.OCR0A.Set(uint8(value - 1))
+				avr.TCCR0A.SetBits(avr.TCCR0A_COM0A1)
+			}
+		case 1: // channel B, PB1
+			if value == 0 {
+				avr.TCCR0A.ClearBits(avr.TCCR0A_COM0B1)
+			} else {
+				avr.OCR0B.Set(uint8(value - 1))
+				avr.TCCR0A.SetBits(avr.TCCR0A_COM0B1)
+			}
+		}
+	case 1:
+		switch channel {
+		case 0: // channel A, PB1
+			if value == 0 {
+				avr.TCCR1.ClearBits(avr.TCCR1_COM1A1)
+			} else {
+				avr.OCR1A.Set(uint8(value - 1))
+				avr.TCCR1.SetBits(avr.TCCR1_COM1A1)
+			}
+		case 1: // channel B, PB4
+			if value == 0 {
+				avr.GTCCR.ClearBits(gtccrCOM1B1)
+			} else {
+				avr.OCR1B.Set(uint8(value - 1))
+				avr.GTCCR.SetBits(gtccrCOM1B1)
+			}
+		}
+	}
+}
+
+// SPIConfig is used to store config info for SPI.
+type SPIConfig struct {
+	Frequency uint32
+	LSBFirst  bool
+	Mode      uint8
+}
+
+// SPI is the USI-based SPI implementation for ATTiny85.
+// The ATTiny85 doesn't have dedicated SPI hardware, but uses the USI
+// (Universal Serial Interface) in three-wire mode.
+//
+// Fixed pin mapping (directly controlled by USI hardware):
+//   - PB2: SCK (clock)
+//   - PB1: DO/MOSI (data out)
+//   - PB0: DI/MISO (data in)
+//
+// Note: CS pin must be managed by the user.
+type SPI struct {
+	// Delay cycles for frequency control (0 = max speed)
+	delayCycles uint16
+
+	// USICR value configured for the selected SPI mode
+	usicrValue uint8
+
+	// LSB-first mode (requires software bit reversal)
+	lsbFirst bool
+}
+
+// SPI0 is the USI-based SPI interface on the ATTiny85
+var SPI0 = SPI{}
+
+// Configure sets up the USI for SPI communication.
+// Note: The user must configure and control the CS pin separately.
+func (s *SPI) Configure(config SPIConfig) error {
+	// Configure USI pins (fixed by hardware)
+	// PB1 (DO/MOSI) -> OUTPUT
+	// PB2 (USCK/SCK) -> OUTPUT
+	// PB0 (DI/MISO) -> INPUT
+	PB1.Configure(PinConfig{Mode: PinOutput})
+	PB2.Configure(PinConfig{Mode: PinOutput})
+	PB0.Configure(PinConfig{Mode: PinInput})
+
+	// Reset USI registers
+	avr.USIDR.Set(0)
+	avr.USISR.Set(0)
+
+	// Configure USI for SPI mode:
+	// - USIWM0: Three-wire mode (SPI)
+	// - USICS1: External clock source (software controlled via USITC)
+	// - USICLK: Clock strobe - enables counter increment on USITC toggle
+	// - USICS0: Controls clock phase (CPHA)
+	//
+	// SPI Modes:
+	//   Mode 0 (CPOL=0, CPHA=0): Clock idle low, sample on rising edge
+	//   Mode 1 (CPOL=0, CPHA=1): Clock idle low, sample on falling edge
+	//   Mode 2 (CPOL=1, CPHA=0): Clock idle high, sample on falling edge
+	//   Mode 3 (CPOL=1, CPHA=1): Clock idle high, sample on rising edge
+	//
+	// For USI, USICS0 controls the sampling edge when USICS1=1:
+	//   USICS0=0: Positive edge (rising)
+	//   USICS0=1: Negative edge (falling)
+	switch config.Mode {
+	case Mode0: // CPOL=0, CPHA=0: idle low, sample rising
+		PB2.Low()
+		s.usicrValue = avr.USICR_USIWM0 | avr.USICR_USICS1 | avr.USICR_USICLK
+	case Mode1: // CPOL=0, CPHA=1: idle low, sample falling
+		PB2.Low()
+		s.usicrValue = avr.USICR_USIWM0 | avr.USICR_USICS1 | avr.USICR_USICS0 | avr.USICR_USICLK
+	case Mode2: // CPOL=1, CPHA=0: idle high, sample falling
+		PB2.High()
+		s.usicrValue = avr.USICR_USIWM0 | avr.USICR_USICS1 | avr.USICR_USICS0 | avr.USICR_USICLK
+	case Mode3: // CPOL=1, CPHA=1: idle high, sample rising
+		PB2.High()
+		s.usicrValue = avr.USICR_USIWM0 | avr.USICR_USICS1 | avr.USICR_USICLK
+	default: // Default to Mode 0
+		PB2.Low()
+		s.usicrValue = avr.USICR_USIWM0 | avr.USICR_USICS1 | avr.USICR_USICLK
+	}
+	avr.USICR.Set(s.usicrValue)
+
+	// Calculate delay cycles for frequency control
+	// Each bit transfer requires 2 clock toggles (rising + falling edge)
+	// The loop overhead is approximately 10-15 cycles per toggle on AVR
+	// We calculate additional delay cycles needed to achieve the target frequency
+	if config.Frequency > 0 && config.Frequency < CPUFrequency()/2 {
+		// Cycles per half-period = CPUFrequency / (2 * Frequency)
+		// Subtract loop overhead (~15 cycles) to get delay cycles
+		cyclesPerHalfPeriod := CPUFrequency() / (2 * config.Frequency)
+		const loopOverhead = 15
+		if cyclesPerHalfPeriod > loopOverhead {
+			s.delayCycles = uint16(cyclesPerHalfPeriod - loopOverhead)
+		} else {
+			s.delayCycles = 0
+		}
+	} else {
+		// Max speed - no delay
+		s.delayCycles = 0
+	}
+
+	// Store LSBFirst setting for use in Transfer
+	s.lsbFirst = config.LSBFirst
+
+	return nil
+}
+
+// reverseByte reverses the bit order of a byte (MSB <-> LSB)
+// Used for LSB-first SPI mode since USI hardware only supports MSB-first
+func reverseByte(b byte) byte {
+	b = (b&0xF0)>>4 | (b&0x0F)<<4
+	b = (b&0xCC)>>2 | (b&0x33)<<2
+	b = (b&0xAA)>>1 | (b&0x55)<<1
+	return b
+}
+
+// Transfer performs a single byte SPI transfer (send and receive simultaneously)
+// This implements the USI-based SPI transfer using the "clock strobing" technique
+func (s *SPI) Transfer(b byte) (byte, error) {
+	// For LSB-first mode, reverse the bits before sending
+	// USI hardware only supports MSB-first, so we do it in software
+	if s.lsbFirst {
+		b = reverseByte(b)
+	}
+
+	// Load the byte to transmit into the USI Data Register
+	avr.USIDR.Set(b)
+
+	// Clear the counter overflow flag by writing 1 to it (AVR quirk)
+	// This also resets the 4-bit counter to 0
+	avr.USISR.Set(avr.USISR_USIOIF)
+
+	// Clock the data out/in
+	// We need 16 clock toggles (8 bits × 2 edges per bit)
+	// The USI counter counts each clock edge, so it overflows at 16
+	// After 16 toggles, the clock returns to its idle state (set by CPOL in Configure)
+	//
+	// IMPORTANT: Only toggle USITC here!
+	// - USITC toggles the clock pin
+	// - The USICR mode bits (USIWM0, USICS1, USICS0, USICLK) were set in Configure()
+	// - SetBits preserves those bits and only sets USITC
+	if s.delayCycles == 0 {
+		// Fast path: no delay, run at maximum speed
+		for !avr.USISR.HasBits(avr.USISR_USIOIF) {
+			avr.USICR.SetBits(avr.USICR_USITC)
+		}
+	} else {
+		// Frequency-controlled path: add delay between clock toggles
+		for !avr.USISR.HasBits(avr.USISR_USIOIF) {
+			avr.USICR.SetBits(avr.USICR_USITC)
+			// Delay loop for frequency control
+			// Each iteration is approximately 3 cycles on AVR (dec, brne)
+			for i := s.delayCycles; i > 0; i-- {
+				avr.Asm("nop")
+			}
+		}
+	}
+
+	// Get the received byte
+	result := avr.USIDR.Get()
+
+	// For LSB-first mode, reverse the received bits
+	if s.lsbFirst {
+		result = reverseByte(result)
+	}
+
+	return result, nil
+}
diff --git a/src/machine/machine_esp32c3.go b/src/machine/machine_esp32c3.go
index eb2a18b5ee..f413a8c143 100644
--- a/src/machine/machine_esp32c3.go
+++ b/src/machine/machine_esp32c3.go
@@ -509,102 +509,6 @@ func (uart *UART) writeByte(b byte) error {
 
 func (uart *UART) flush() {}
 
-type Serialer interface {
-	WriteByte(c byte) error
-	Write(data []byte) (n int, err error)
-	Configure(config UARTConfig) error
-	Buffered() int
-	ReadByte() (byte, error)
-	DTR() bool
-	RTS() bool
-}
-
-func initUSB() {
-	// nothing to do here
-}
-
-// USB Serial/JTAG Controller
-// See esp32-c3_technical_reference_manual_en.pdf
-// pg. 736
-type USB_DEVICE struct {
-	Bus *esp.USB_DEVICE_Type
-}
-
-var (
-	_USBCDC = &USB_DEVICE{
-		Bus: esp.USB_DEVICE,
-	}
-
-	USBCDC Serialer = _USBCDC
-)
-
-var (
-	errUSBWrongSize            = errors.New("USB: invalid write size")
-	errUSBCouldNotWriteAllData = errors.New("USB: could not write all data")
-	errUSBBufferEmpty          = errors.New("USB: read buffer empty")
-)
-
-func (usbdev *USB_DEVICE) Configure(config UARTConfig) error {
-	return nil
-}
-
-func (usbdev *USB_DEVICE) WriteByte(c byte) error {
-	if usbdev.Bus.GetEP1_CONF_SERIAL_IN_EP_DATA_FREE() == 0 {
-		return errUSBCouldNotWriteAllData
-	}
-
-	usbdev.Bus.SetEP1_RDWR_BYTE(uint32(c))
-	usbdev.flush()
-
-	return nil
-}
-
-func (usbdev *USB_DEVICE) Write(data []byte) (n int, err error) {
-	if len(data) == 0 || len(data) > 64 {
-		return 0, errUSBWrongSize
-	}
-
-	for i, c := range data {
-		if usbdev.Bus.GetEP1_CONF_SERIAL_IN_EP_DATA_FREE() == 0 {
-			if i > 0 {
-				usbdev.flush()
-			}
-
-			return i, errUSBCouldNotWriteAllData
-		}
-		usbdev.Bus.SetEP1_RDWR_BYTE(uint32(c))
-	}
-
-	usbdev.flush()
-	return len(data), nil
-}
-
-func (usbdev *USB_DEVICE) Buffered() int {
-	return int(usbdev.Bus.GetEP1_CONF_SERIAL_OUT_EP_DATA_AVAIL())
-}
-
-func (usbdev *USB_DEVICE) ReadByte() (byte, error) {
-	if usbdev.Bus.GetEP1_CONF_SERIAL_OUT_EP_DATA_AVAIL() != 0 {
-		return byte(usbdev.Bus.GetEP1_RDWR_BYTE()), nil
-	}
-
-	return 0, nil
-}
-
-func (usbdev *USB_DEVICE) DTR() bool {
-	return false
-}
-
-func (usbdev *USB_DEVICE) RTS() bool {
-	return false
-}
-
-func (usbdev *USB_DEVICE) flush() {
-	usbdev.Bus.SetEP1_CONF_WR_DONE(1)
-	for usbdev.Bus.GetEP1_CONF_SERIAL_IN_EP_DATA_FREE() == 0 {
-	}
-}
-
 // GetRNG returns 32-bit random numbers using the ESP32-C3 true random number generator,
 // Random numbers are generated based on the thermal noise in the system and the
 // asynchronous clock mismatch.
diff --git a/src/machine/machine_esp32s3.go b/src/machine/machine_esp32s3.go
index 65261bd3c8..f3ac520d2b 100644
--- a/src/machine/machine_esp32s3.go
+++ b/src/machine/machine_esp32s3.go
@@ -308,5 +308,3 @@ func (uart *UART) writeByte(b byte) error {
 }
 
 func (uart *UART) flush() {}
-
-// TODO: SPI
diff --git a/src/machine/machine_esp32s3_spi.go b/src/machine/machine_esp32s3_spi.go
new file mode 100644
index 0000000000..8c1618fc39
--- /dev/null
+++ b/src/machine/machine_esp32s3_spi.go
@@ -0,0 +1,460 @@
+//go:build esp32s3
+
+package machine
+
+// ESP32-S3 SPI support based on ESP-IDF HAL
+// Simple but correct implementation following spi_ll.h
+// SPI0 = hardware SPI2 (FSPI), SPI1 = hardware SPI3 (HSPI)
+// https://docs.espressif.com/projects/esp-idf/en/latest/esp32s3/api-reference/peripherals/spi_master.html
+
+import (
+	"device/esp"
+	"errors"
+	"runtime/volatile"
+	"unsafe"
+)
+
+const (
+	SPI_MODE0 = uint8(0)
+	SPI_MODE1 = uint8(1)
+	SPI_MODE2 = uint8(2)
+	SPI_MODE3 = uint8(3)
+
+	// ESP32-S3 PLL clock frequency (same as ESP32-C3)
+	pplClockFreq = 80e6
+
+	// Default SPI frequency - maximum safe speed
+	SPI_DEFAULT_FREQUENCY = 80e6 // 80MHz
+)
+
+const (
+	// IO MUX function number for SPI direct connection
+	SPI_IOMUX_FUNC = 4
+)
+
+// ESP32-S3 GPIO Matrix signal indices for SPI - CORRECTED from ESP-IDF gpio_sig_map.h
+const (
+	// SPI2 (FSPI) signals - Hardware SPI2 - CORRECT VALUES from ESP-IDF
+	SPI2_CLK_OUT_IDX = uint32(101) // FSPICLK_OUT_IDX
+	SPI2_CLK_IN_IDX  = uint32(101) // FSPICLK_IN_IDX
+	SPI2_Q_OUT_IDX   = uint32(102) // FSPIQ_OUT_IDX (MISO)
+	SPI2_Q_IN_IDX    = uint32(102) // FSPIQ_IN_IDX
+	SPI2_D_OUT_IDX   = uint32(103) // FSPID_OUT_IDX (MOSI)
+	SPI2_D_IN_IDX    = uint32(103) // FSPID_IN_IDX
+	SPI2_CS0_OUT_IDX = uint32(110) // FSPICS0_OUT_IDX
+
+	// SPI3 (HSPI) signals - Hardware SPI3 - CORRECTED from ESP-IDF gpio_sig_map.h
+	// Source: /esp-idf/components/soc/esp32s3/include/soc/gpio_sig_map.h
+	SPI3_CLK_OUT_IDX = uint32(66) // Line 136: SPI3_CLK_OUT_IDX
+	SPI3_CLK_IN_IDX  = uint32(66) // Line 135: SPI3_CLK_IN_IDX
+	SPI3_Q_OUT_IDX   = uint32(67) // Line 138: SPI3_Q_OUT_IDX (MISO)
+	SPI3_Q_IN_IDX    = uint32(67) // Line 137: SPI3_Q_IN_IDX
+	SPI3_D_OUT_IDX   = uint32(68) // Line 140: SPI3_D_OUT_IDX (MOSI)
+	SPI3_D_IN_IDX    = uint32(68) // Line 139: SPI3_D_IN_IDX
+	SPI3_CS0_OUT_IDX = uint32(71) // Line 146: SPI3_CS0_OUT_IDX
+)
+
+type SPI struct {
+	Bus   interface{}
+	busID uint8
+}
+
+var (
+	SPI0 = &SPI{Bus: esp.SPI2, busID: 2} // Primary SPI (FSPI)
+	SPI1 = &SPI{Bus: esp.SPI3, busID: 3} // Secondary SPI (HSPI)
+)
+
+type SPIConfig struct {
+	Frequency uint32
+	SCK       Pin   // Serial Clock
+	SDO       Pin   // Serial Data Out (MOSI)
+	SDI       Pin   // Serial Data In  (MISO)
+	CS        Pin   // Chip Select (optional)
+	LSBFirst  bool  // MSB is default
+	Mode      uint8 // SPI_MODE0 is default
+}
+
+// Configure and make the SPI peripheral ready to use.
+// Implementation following ESP-IDF HAL with GPIO Matrix routing
+func (spi *SPI) Configure(config SPIConfig) error {
+	// Set default
+	if config.Frequency == 0 {
+		config.Frequency = SPI_DEFAULT_FREQUENCY
+	}
+
+	switch spi.busID {
+	case 2: // SPI2 (FSPI)
+		if config.SCK == 0 {
+			config.SCK = SPI1_SCK_PIN
+		}
+		if config.SDO == 0 {
+			config.SDO = SPI1_MOSI_PIN
+		}
+		if config.SDI == 0 {
+			config.SDI = SPI1_MISO_PIN
+		}
+	case 3: // SPI3 (HSPI)
+		if config.SCK == 0 {
+			config.SCK = SPI2_SCK_PIN
+		}
+		if config.SDO == 0 {
+			config.SDO = SPI2_MOSI_PIN
+		}
+		if config.SDI == 0 {
+			config.SDI = SPI2_MISO_PIN
+		}
+	default:
+	}
+
+	// Get GPIO Matrix signal indices for this SPI bus
+	var sckOutIdx, mosiOutIdx, misoInIdx, csOutIdx uint32
+	switch spi.busID {
+	case 2: // SPI2 (FSPI)
+		sckOutIdx = SPI2_CLK_OUT_IDX
+		mosiOutIdx = SPI2_D_OUT_IDX
+		misoInIdx = SPI2_Q_IN_IDX
+		csOutIdx = SPI2_CS0_OUT_IDX
+	case 3: // SPI3 (HSPI)
+		sckOutIdx = SPI3_CLK_OUT_IDX
+		mosiOutIdx = SPI3_D_OUT_IDX
+		misoInIdx = SPI3_Q_IN_IDX
+		csOutIdx = SPI3_CS0_OUT_IDX
+	default:
+		return ErrInvalidSPIBus
+	}
+
+	// Check if we can use IO MUX direct connection for better performance
+	if isDefaultSPIPins(spi.busID, config) {
+		// Use IO MUX direct connection - better signal quality and performance
+		// Configure pins using IO MUX direct connection (SPI function)
+		if config.SCK != NoPin {
+			config.SCK.configure(PinConfig{Mode: PinOutput}, SPI_IOMUX_FUNC)
+		}
+		if config.SDO != NoPin {
+			config.SDO.configure(PinConfig{Mode: PinOutput}, SPI_IOMUX_FUNC)
+		}
+		if config.SDI != NoPin {
+			config.SDI.configure(PinConfig{Mode: PinInput}, SPI_IOMUX_FUNC)
+		}
+		if config.CS != NoPin {
+			config.CS.configure(PinConfig{Mode: PinOutput}, SPI_IOMUX_FUNC)
+		}
+	} else {
+		// Use GPIO Matrix routing - more flexible but slightly slower
+		// Configure SDI (MISO) pin
+		if config.SDI != NoPin {
+			config.SDI.Configure(PinConfig{Mode: PinInput})
+			inFunc(misoInIdx).Set(esp.GPIO_FUNC_IN_SEL_CFG_SEL | uint32(config.SDI))
+		}
+
+		// Configure SDO (MOSI) pin
+		if config.SDO != NoPin {
+			config.SDO.Configure(PinConfig{Mode: PinOutput})
+			config.SDO.outFunc().Set(mosiOutIdx)
+		}
+
+		// Configure SCK (Clock) pin
+		if config.SCK != NoPin {
+			config.SCK.Configure(PinConfig{Mode: PinOutput})
+			config.SCK.outFunc().Set(sckOutIdx)
+		}
+
+		// Configure CS (Chip Select) pin
+		if config.CS != NoPin {
+			config.CS.Configure(PinConfig{Mode: PinOutput})
+			config.CS.outFunc().Set(csOutIdx)
+		}
+	}
+
+	// Enable peripheral clock and reset
+	// Without bootloader, we need to be more explicit about clock initialization
+	switch spi.busID {
+	case 2: // Hardware SPI2 (FSPI)
+		esp.SYSTEM.SetPERIP_CLK_EN0_SPI2_CLK_EN(1)
+		esp.SYSTEM.SetPERIP_RST_EN0_SPI2_RST(1)
+		esp.SYSTEM.SetPERIP_RST_EN0_SPI2_RST(0)
+	case 3: // Hardware SPI3 (HSPI)
+		esp.SYSTEM.SetPERIP_CLK_EN0_SPI3_CLK_EN(1)
+		esp.SYSTEM.SetPERIP_RST_EN0_SPI3_RST(1)
+		esp.SYSTEM.SetPERIP_RST_EN0_SPI3_RST(0)
+	}
+
+	// Get bus handle - both SPI2 and SPI3 use SPI2_Type
+	bus, ok := spi.Bus.(*esp.SPI2_Type)
+	if !ok {
+		return ErrInvalidSPIBus
+	}
+
+	// Reset timing: cs_setup_time = 0, cs_hold_time = 0
+	bus.USER1.Set(0)
+
+	// Use all 64 bytes of the buffer
+	bus.SetUSER_USR_MISO_HIGHPART(0)
+	bus.SetUSER_USR_MOSI_HIGHPART(0)
+
+	// Disable unneeded interrupts and clear all USER bits first
+	bus.SLAVE.Set(0)
+	bus.USER.Set(0)
+
+	// Clear other important registers like ESP32-C3
+	bus.MISC.Set(0)
+	bus.CTRL.Set(0)
+	bus.CLOCK.Set(0)
+
+	// Clear data buffers like ESP32-C3
+	bus.W0.Set(0)
+	bus.W1.Set(0)
+	bus.W2.Set(0)
+	bus.W3.Set(0)
+
+	// Configure master clock gate - CRITICAL: need CLK_EN bit!
+	bus.SetCLK_GATE_CLK_EN(1)         // Enable basic SPI clock (bit 0)
+	bus.SetCLK_GATE_MST_CLK_ACTIVE(1) // Enable master clock (bit 1)
+	bus.SetCLK_GATE_MST_CLK_SEL(1)    // Select master clock (bit 2)
+
+	// Configure DMA following ESP-IDF HAL
+	// Reset DMA configuration
+	bus.DMA_CONF.Set(0)
+	// Set DMA segment transaction clear enable bits
+	bus.SetDMA_CONF_SLV_TX_SEG_TRANS_CLR_EN(1)
+	bus.SetDMA_CONF_SLV_RX_SEG_TRANS_CLR_EN(1)
+	// dma_seg_trans_en = 0 (already 0 from DMA_CONF.Set(0))
+
+	// Configure master mode
+	bus.SetUSER_USR_MOSI(1)     // Enable MOSI
+	bus.SetUSER_USR_MISO(1)     // Enable MISO
+	bus.SetUSER_DOUTDIN(1)      // Full-duplex mode
+	bus.SetCTRL_WR_BIT_ORDER(0) // MSB first
+	bus.SetCTRL_RD_BIT_ORDER(0) // MSB first
+
+	// CRITICAL: Enable clock output (from working test)
+	bus.SetMISC_CK_DIS(0) // Enable CLK output - THIS IS KEY!
+
+	// Configure SPI mode (CPOL/CPHA) following ESP-IDF HAL
+	switch config.Mode {
+	case SPI_MODE0:
+		// CPOL=0, CPHA=0 (default)
+	case SPI_MODE1:
+		bus.SetUSER_CK_OUT_EDGE(1) // CPHA=1
+	case SPI_MODE2:
+		bus.SetMISC_CK_IDLE_EDGE(1) // CPOL=1
+		bus.SetUSER_CK_OUT_EDGE(1)  // CPHA=1
+	case SPI_MODE3:
+		bus.SetMISC_CK_IDLE_EDGE(1) // CPOL=1
+	}
+
+	// Configure SPI bus clock using ESP32-C3 algorithm for better accuracy
+	bus.CLOCK.Set(freqToClockDiv(config.Frequency))
+
+	return nil
+}
+
+// Transfer writes/reads a single byte using the SPI interface.
+// Implementation following ESP-IDF HAL spi_ll_user_start with proper USER register setup
+func (spi *SPI) Transfer(w byte) (byte, error) {
+	// Both SPI2 and SPI3 use SPI2_Type
+	bus, ok := spi.Bus.(*esp.SPI2_Type)
+	if !ok {
+		return 0, errors.New("invalid SPI bus type")
+	}
+
+	// Set transfer length (8 bits = 7 in register)
+	bus.SetMS_DLEN_MS_DATA_BITLEN(7)
+
+	// Clear any pending interrupt flags BEFORE starting transaction
+	bus.SetDMA_INT_CLR_TRANS_DONE_INT_CLR(1)
+
+	// Write data to buffer (use W0 register)
+	bus.W0.Set(uint32(w))
+
+	// CRITICAL: Apply configuration before transmission (like ESP-IDF spi_ll_apply_config)
+	bus.SetCMD_UPDATE(1)
+	for bus.GetCMD_UPDATE() != 0 {
+		// Wait for config to be applied
+	}
+
+	// Start transaction following ESP-IDF HAL spi_ll_user_start
+	bus.SetCMD_USR(1)
+
+	// Wait for completion using CMD_USR flag (like ESP32-C3 approach)
+	// Hardware clears CMD_USR when transaction is complete
+	timeout := 100000
+	for bus.GetCMD_USR() != 0 && timeout > 0 {
+		timeout--
+		// Wait for CMD_USR to be cleared by hardware
+	}
+
+	if timeout == 0 {
+		return 0, errors.New("SPI transfer timeout")
+	}
+
+	// Read received data from W0 register
+	result := byte(bus.W0.Get() & 0xFF)
+	return result, nil
+}
+
+// Tx handles read/write operation for SPI interface. Since SPI is a synchronous write/read
+// interface, there must always be the same number of bytes written as bytes read.
+// This is accomplished by sending zero bits if r is bigger than w or discarding
+// the incoming data if w is bigger than r.
+// Optimized implementation ported from ESP32-C3 for better performance.
+func (spi *SPI) Tx(w, r []byte) error {
+	toTransfer := len(w)
+	if len(r) > toTransfer {
+		toTransfer = len(r)
+	}
+
+	// Get bus handle - both SPI2 and SPI3 use SPI2_Type
+	bus, ok := spi.Bus.(*esp.SPI2_Type)
+	if !ok {
+		return ErrInvalidSPIBus
+	}
+
+	for toTransfer > 0 {
+		// Chunk 64 bytes at a time.
+		chunkSize := toTransfer
+		if chunkSize > 64 {
+			chunkSize = 64
+		}
+
+		// Fill tx buffer.
+		transferWords := (*[16]volatile.Register32)(unsafe.Add(unsafe.Pointer(&bus.W0), 0))
+		if len(w) >= 64 {
+			// We can fill the entire 64-byte transfer buffer with data.
+			// This loop is slightly faster than the loop below.
+			for i := 0; i < 16; i++ {
+				word := uint32(w[i*4]) | uint32(w[i*4+1])<<8 | uint32(w[i*4+2])<<16 | uint32(w[i*4+3])<<24
+				transferWords[i].Set(word)
+			}
+		} else {
+			// We can't fill the entire transfer buffer, so we need to be a bit
+			// more careful.
+			// Note that parts of the transfer buffer that aren't used still
+			// need to be set to zero, otherwise we might be transferring
+			// garbage from a previous transmission if w is smaller than r.
+			for i := 0; i < 16; i++ {
+				var word uint32
+				if i*4+3 < len(w) {
+					word |= uint32(w[i*4+3]) << 24
+				}
+				if i*4+2 < len(w) {
+					word |= uint32(w[i*4+2]) << 16
+				}
+				if i*4+1 < len(w) {
+					word |= uint32(w[i*4+1]) << 8
+				}
+				if i*4+0 < len(w) {
+					word |= uint32(w[i*4+0]) << 0
+				}
+				transferWords[i].Set(word)
+			}
+		}
+
+		// Do the transfer.
+		bus.SetMS_DLEN_MS_DATA_BITLEN(uint32(chunkSize)*8 - 1)
+
+		bus.SetCMD_UPDATE(1)
+		for bus.GetCMD_UPDATE() != 0 {
+		}
+
+		bus.SetCMD_USR(1)
+		for bus.GetCMD_USR() != 0 {
+		}
+
+		// Read rx buffer.
+		rxSize := chunkSize
+		if rxSize > len(r) {
+			rxSize = len(r)
+		}
+		for i := 0; i < rxSize; i++ {
+			r[i] = byte(transferWords[i/4].Get() >> ((i % 4) * 8))
+		}
+
+		// Cut off some part of the output buffer so the next iteration we will
+		// only send the remaining bytes.
+		if len(w) < chunkSize {
+			w = nil
+		} else {
+			w = w[chunkSize:]
+		}
+		if len(r) < chunkSize {
+			r = nil
+		} else {
+			r = r[chunkSize:]
+		}
+		toTransfer -= chunkSize
+	}
+
+	return nil
+}
+
+// Compute the SPI bus frequency from the APB clock frequency.
+// Note: APB clock is always 80MHz on ESP32-S3, independent of CPU frequency.
+// Ported from ESP32-C3 implementation for better accuracy.
+func freqToClockDiv(hz uint32) uint32 {
+	// Use APB clock frequency (80MHz), not CPU frequency!
+	// SPI peripheral is connected to APB bus which stays at 80MHz
+	const apbFreq = pplClockFreq // 80MHz
+
+	if hz >= apbFreq { // maximum frequency
+		return 1 << 31
+	}
+	if hz < (apbFreq / (16 * 64)) { // minimum frequency
+		return 15<<18 | 63<<12 | 31<<6 | 63 // pre=15, n=63
+	}
+
+	// iterate looking for an exact match
+	// or iterate all 16 prescaler options
+	// looking for the smallest error
+	var bestPre, bestN, bestErr uint32
+	bestN = 1
+	bestErr = 0xffffffff
+	q := uint32(float32(apbFreq)/float32(hz) + float32(0.5))
+	for p := uint32(0); p < 16; p++ {
+		n := q/(p+1) - 1
+		if n < 1 { // prescaler became too large, stop enum
+			break
+		}
+		if n > 63 { // prescaler too small, skip to next
+			continue
+		}
+
+		freq := apbFreq / ((p + 1) * (n + 1))
+		if freq == hz { // exact match
+			return p<<18 | n<<12 | (n/2)<<6 | n
+		}
+
+		var err uint32
+		if freq < hz {
+			err = hz - freq
+		} else {
+			err = freq - hz
+		}
+		if err < bestErr {
+			bestErr = err
+			bestPre = p
+			bestN = n
+		}
+	}
+
+	return bestPre<<18 | bestN<<12 | (bestN/2)<<6 | bestN
+}
+
+// isDefaultSPIPins checks if the given pins match the default SPI pin configuration
+// that supports IO MUX direct connection for better performance
+func isDefaultSPIPins(busID uint8, config SPIConfig) bool {
+	switch busID {
+	case 2: // SPI2 (FSPI)
+		return config.SCK == SPI1_SCK_PIN &&
+			config.SDO == SPI1_MOSI_PIN &&
+			config.SDI == SPI1_MISO_PIN &&
+			(config.CS == SPI1_CS_PIN || config.CS == NoPin)
+	case 3: // SPI3 (HSPI)
+		return config.SCK == SPI2_SCK_PIN &&
+			config.SDO == SPI2_MOSI_PIN &&
+			config.SDI == SPI2_MISO_PIN &&
+			(config.CS == SPI2_CS_PIN || config.CS == NoPin)
+	default:
+		return false
+	}
+}
diff --git a/src/machine/machine_esp32xx_usb.go b/src/machine/machine_esp32xx_usb.go
new file mode 100644
index 0000000000..bdd1281a81
--- /dev/null
+++ b/src/machine/machine_esp32xx_usb.go
@@ -0,0 +1,102 @@
+//go:build esp32s3 || esp32c3
+
+package machine
+
+import (
+	"device/esp"
+	"errors"
+)
+
+// USB Serial/JTAG Controller
+// See esp32-c3_technical_reference_manual_en.pdf
+// pg. 736
+type USB_DEVICE struct {
+	Bus *esp.USB_DEVICE_Type
+}
+
+var (
+	_USBCDC = &USB_DEVICE{
+		Bus: esp.USB_DEVICE,
+	}
+
+	USBCDC Serialer = _USBCDC
+)
+
+var (
+	errUSBWrongSize            = errors.New("USB: invalid write size")
+	errUSBCouldNotWriteAllData = errors.New("USB: could not write all data")
+	errUSBBufferEmpty          = errors.New("USB: read buffer empty")
+)
+
+type Serialer interface {
+	WriteByte(c byte) error
+	Write(data []byte) (n int, err error)
+	Configure(config UARTConfig) error
+	Buffered() int
+	ReadByte() (byte, error)
+	DTR() bool
+	RTS() bool
+}
+
+func initUSB() {}
+
+func (usbdev *USB_DEVICE) Configure(config UARTConfig) error {
+	return nil
+}
+
+func (usbdev *USB_DEVICE) WriteByte(c byte) error {
+	if usbdev.Bus.GetEP1_CONF_SERIAL_IN_EP_DATA_FREE() == 0 {
+		return errUSBCouldNotWriteAllData
+	}
+
+	usbdev.Bus.SetEP1_RDWR_BYTE(uint32(c))
+	usbdev.flush()
+
+	return nil
+}
+
+func (usbdev *USB_DEVICE) Write(data []byte) (n int, err error) {
+	if len(data) == 0 || len(data) > 64 {
+		return 0, errUSBWrongSize
+	}
+
+	for i, c := range data {
+		if usbdev.Bus.GetEP1_CONF_SERIAL_IN_EP_DATA_FREE() == 0 {
+			if i > 0 {
+				usbdev.flush()
+			}
+
+			return i, errUSBCouldNotWriteAllData
+		}
+		usbdev.Bus.SetEP1_RDWR_BYTE(uint32(c))
+	}
+
+	usbdev.flush()
+	return len(data), nil
+}
+
+func (usbdev *USB_DEVICE) Buffered() int {
+	return int(usbdev.Bus.GetEP1_CONF_SERIAL_OUT_EP_DATA_AVAIL())
+}
+
+func (usbdev *USB_DEVICE) ReadByte() (byte, error) {
+	if usbdev.Bus.GetEP1_CONF_SERIAL_OUT_EP_DATA_AVAIL() != 0 {
+		return byte(usbdev.Bus.GetEP1_RDWR_BYTE()), nil
+	}
+
+	return 0, nil
+}
+
+func (usbdev *USB_DEVICE) DTR() bool {
+	return false
+}
+
+func (usbdev *USB_DEVICE) RTS() bool {
+	return false
+}
+
+func (usbdev *USB_DEVICE) flush() {
+	usbdev.Bus.SetEP1_CONF_WR_DONE(1)
+	for usbdev.Bus.GetEP1_CONF_SERIAL_IN_EP_DATA_FREE() == 0 {
+	}
+}
diff --git a/src/machine/machine_rp2_adc.go b/src/machine/machine_rp2_adc.go
index e0d6a459a9..12ff152dc9 100644
--- a/src/machine/machine_rp2_adc.go
+++ b/src/machine/machine_rp2_adc.go
@@ -19,10 +19,8 @@ var adcAref uint32
 
 // InitADC resets the ADC peripheral.
 func InitADC() {
-	rp.RESETS.RESET.SetBits(rp.RESETS_RESET_ADC)
-	rp.RESETS.RESET.ClearBits(rp.RESETS_RESET_ADC)
-	for !rp.RESETS.RESET_DONE.HasBits(rp.RESETS_RESET_ADC) {
-	}
+	resetBlock(rp.RESETS_RESET_ADC)
+	unresetBlockWait(rp.RESETS_RESET_ADC)
 	// enable ADC
 	rp.ADC.CS.Set(rp.ADC_CS_EN)
 	adcAref = 3300
diff --git a/src/machine/machine_rp2_i2c.go b/src/machine/machine_rp2_i2c.go
index 54a5e5357b..e4de7a783b 100644
--- a/src/machine/machine_rp2_i2c.go
+++ b/src/machine/machine_rp2_i2c.go
@@ -259,10 +259,7 @@ func (i2c *I2C) init(config I2CConfig) error {
 //go:inline
 func (i2c *I2C) reset() {
 	resetVal := i2c.deinit()
-	rp.RESETS.RESET.ClearBits(resetVal)
-	// Wait until reset is done.
-	for !rp.RESETS.RESET_DONE.HasBits(resetVal) {
-	}
+	unresetBlockWait(resetVal)
 }
 
 // deinit sets reset bit for I2C. Must call reset to reenable I2C after deinit.
@@ -276,15 +273,13 @@ func (i2c *I2C) deinit() (resetVal uint32) {
 		resetVal = rp.RESETS_RESET_I2C1
 	}
 	// Perform I2C reset.
-	rp.RESETS.RESET.SetBits(resetVal)
+	resetBlock(resetVal)
 
 	return resetVal
 }
 
 // tx performs blocking write followed by read to I2C bus.
 func (i2c *I2C) tx(addr uint8, tx, rx []byte) (err error) {
-	const timeout_us = 4_000
-	deadline := ticks() + timeout_us
 	if addr >= 0x80 || isReservedI2CAddr(addr) {
 		return errInvalidTgtAddr
 	}
@@ -295,6 +290,14 @@ func (i2c *I2C) tx(addr uint8, tx, rx []byte) (err error) {
 		return nil
 	}
 
+	// Base 4ms for small register pokes.
+	// Add per-byte budget. 100us/byte is conservative at 400kHz and still ok at 100kHz for modest sizes.
+	timeout_us := uint64(4_000) + uint64(txlen+rxlen)*100
+	// Cap so it doesn't go insane:
+	timeout_us = min(timeout_us, 500_000)
+
+	deadline := ticks() + timeout_us
+
 	err = i2c.disable()
 	if err != nil {
 		return err
diff --git a/src/machine/machine_rp2_spi.go b/src/machine/machine_rp2_spi.go
index 75e4f86b7b..f3fb256f61 100644
--- a/src/machine/machine_rp2_spi.go
+++ b/src/machine/machine_rp2_spi.go
@@ -212,10 +212,7 @@ func (spi *SPI) setFormat(mode uint8) {
 //go:inline
 func (spi *SPI) reset() {
 	resetVal := spi.deinit()
-	rp.RESETS.RESET.ClearBits(resetVal)
-	// Wait until reset is done.
-	for !rp.RESETS.RESET_DONE.HasBits(resetVal) {
-	}
+	unresetBlockWait(resetVal)
 }
 
 //go:inline
@@ -227,7 +224,7 @@ func (spi *SPI) deinit() (resetVal uint32) {
 		resetVal = rp.RESETS_RESET_SPI1
 	}
 	// Perform SPI reset.
-	rp.RESETS.RESET.SetBits(resetVal)
+	resetBlock(resetVal)
 	return resetVal
 }
 
diff --git a/src/machine/machine_rp2_uart.go b/src/machine/machine_rp2_uart.go
index 872418a766..37e2ca9c2a 100644
--- a/src/machine/machine_rp2_uart.go
+++ b/src/machine/machine_rp2_uart.go
@@ -73,6 +73,27 @@ func (uart *UART) Configure(config UARTConfig) error {
 	return nil
 }
 
+// Close the UART and disable its interrupt/power use.
+func (uart *UART) Close() error {
+	uart.Interrupt.Disable()
+
+	// Disable UART.
+	uart.Bus.UARTCR.ClearBits(rp.UART0_UARTCR_UARTEN)
+
+	var resetVal uint32
+	switch {
+	case uart.Bus == rp.UART0:
+		resetVal = rp.RESETS_RESET_UART0
+	case uart.Bus == rp.UART1:
+		resetVal = rp.RESETS_RESET_UART1
+	}
+
+	// reset UART
+	resetBlock(resetVal)
+
+	return nil
+}
+
 // SetBaudRate sets the baudrate to be used for the UART.
 func (uart *UART) SetBaudRate(br uint32) {
 	div := 8 * CPUFrequency() / br
@@ -148,10 +169,8 @@ func initUART(uart *UART) {
 	}
 
 	// reset UART
-	rp.RESETS.RESET.SetBits(resetVal)
-	rp.RESETS.RESET.ClearBits(resetVal)
-	for !rp.RESETS.RESET_DONE.HasBits(resetVal) {
-	}
+	resetBlock(resetVal)
+	unresetBlockWait(resetVal)
 }
 
 // handleInterrupt should be called from the appropriate interrupt handler for
diff --git a/src/machine/machine_stm32g0_can.go b/src/machine/machine_stm32g0_can.go
index 01bf523df8..c02338e7ba 100644
--- a/src/machine/machine_stm32g0_can.go
+++ b/src/machine/machine_stm32g0_can.go
@@ -9,6 +9,8 @@ import (
 	"unsafe"
 )
 
+// Exported API in src/machine/can.go
+
 // FDCAN Message RAM configuration
 // STM32G0B1 SRAMCAN base address: 0x4000B400
 // Each FDCAN instance has its own message RAM area
@@ -74,78 +76,53 @@ const (
 	FDCAN_IT_ERROR_PASSIVE        = 0x00800000
 )
 
-// FDCAN represents an FDCAN peripheral
-type FDCAN struct {
+// CAN is a STM32G0's CAN/FDCAN peripheral.
+type CAN struct {
 	Bus             *stm32.FDCAN_Type
 	TxAltFuncSelect uint8
 	RxAltFuncSelect uint8
 	Interrupt       interrupt.Interrupt
 	instance        uint8
+	alwaysFD        bool
+	rxInterrupt     bool
 }
 
-// FDCANTransferRate represents CAN bus transfer rates
-type FDCANTransferRate uint32
+// CANTransferRate represents CAN bus transfer rates
+type CANTransferRate uint32
 
 const (
-	FDCANTransferRate125kbps  FDCANTransferRate = 125000
-	FDCANTransferRate250kbps  FDCANTransferRate = 250000
-	FDCANTransferRate500kbps  FDCANTransferRate = 500000
-	FDCANTransferRate1000kbps FDCANTransferRate = 1000000
-	FDCANTransferRate2000kbps FDCANTransferRate = 2000000 // FD only
-	FDCANTransferRate4000kbps FDCANTransferRate = 4000000 // FD only
+	FDCANTransferRate125kbps  CANTransferRate = 125000
+	FDCANTransferRate250kbps  CANTransferRate = 250000
+	FDCANTransferRate500kbps  CANTransferRate = 500000
+	FDCANTransferRate1000kbps CANTransferRate = 1000000
+	FDCANTransferRate2000kbps CANTransferRate = 2000000 // FD only
+	FDCANTransferRate4000kbps CANTransferRate = 4000000 // FD only
 )
 
-// FDCANMode represents the FDCAN operating mode
-type FDCANMode uint8
+// CANMode represents the FDCAN operating mode
+type CANMode uint8
 
 const (
-	FDCANModeNormal           FDCANMode = 0
-	FDCANModeBusMonitoring    FDCANMode = 1
-	FDCANModeInternalLoopback FDCANMode = 2
-	FDCANModeExternalLoopback FDCANMode = 3
+	CANModeNormal           CANMode = 0
+	CANModeBusMonitoring    CANMode = 1
+	CANModeInternalLoopback CANMode = 2
+	CANModeExternalLoopback CANMode = 3
 )
 
-// FDCANConfig holds FDCAN configuration parameters
-type FDCANConfig struct {
-	TransferRate   FDCANTransferRate // Nominal bit rate (arbitration phase)
-	TransferRateFD FDCANTransferRate // Data bit rate (data phase), must be >= TransferRate
-	Mode           FDCANMode
-	Tx             Pin
-	Rx             Pin
-	Standby        Pin // Optional standby pin for CAN transceiver (set to NoPin if not used)
-}
-
-// FDCANTxBufferElement represents a transmit buffer element
-type FDCANTxBufferElement struct {
-	ESI bool     // Error State Indicator
-	XTD bool     // Extended ID flag
-	RTR bool     // Remote Transmission Request
-	ID  uint32   // CAN identifier (11-bit or 29-bit)
-	MM  uint8    // Message Marker
-	EFC bool     // Event FIFO Control
-	FDF bool     // FD Frame indicator
-	BRS bool     // Bit Rate Switch
-	DLC uint8    // Data Length Code (0-15)
-	DB  [64]byte // Data buffer
-}
-
-// FDCANRxBufferElement represents a receive buffer element
-type FDCANRxBufferElement struct {
-	ESI  bool     // Error State Indicator
-	XTD  bool     // Extended ID flag
-	RTR  bool     // Remote Transmission Request
-	ID   uint32   // CAN identifier
-	ANMF bool     // Accepted Non-matching Frame
-	FIDX uint8    // Filter Index
-	FDF  bool     // FD Frame
-	BRS  bool     // Bit Rate Switch
-	DLC  uint8    // Data Length Code
-	RXTS uint16   // RX Timestamp
-	DB   [64]byte // Data buffer
+// CANConfig holds FDCAN configuration parameters
+type CANConfig struct {
+	TransferRate      CANTransferRate // Nominal bit rate (arbitration phase)
+	TransferRateFD    CANTransferRate // Data bit rate (data phase), must be >= TransferRate
+	Mode              CANMode
+	Tx                Pin
+	Rx                Pin
+	Standby           Pin  // Optional standby pin for CAN transceiver (set to NoPin if not used)
+	AlwaysFD          bool // Always transmit as FD frames, even when data fits in classic CAN
+	EnableRxInterrupt bool // Enable interrupt-driven receive (messages delivered via SetRxCallback)
 }
 
-// FDCANFilterConfig represents a filter configuration
-type FDCANFilterConfig struct {
+// CANFilterConfig represents a message filter configuration
+type CANFilterConfig struct {
 	Index        uint8  // Filter index (0-27 for standard, 0-7 for extended)
 	Type         uint8  // 0=Range, 1=Dual, 2=Classic (ID/Mask)
 	Config       uint8  // 0=Disable, 1=FIFO0, 2=FIFO1, 3=Reject
@@ -155,401 +132,350 @@ type FDCANFilterConfig struct {
 }
 
 var (
-	errFDCANInvalidTransferRate   = errors.New("FDCAN: invalid TransferRate")
-	errFDCANInvalidTransferRateFD = errors.New("FDCAN: invalid TransferRateFD")
-	errFDCANTimeout               = errors.New("FDCAN: timeout")
-	errFDCANTxFifoFull            = errors.New("FDCAN: Tx FIFO full")
-	errFDCANRxFifoEmpty           = errors.New("FDCAN: Rx FIFO empty")
-	errFDCANNotStarted            = errors.New("FDCAN: not started")
+	errCANInvalidTransferRate   = errors.New("CAN: invalid TransferRate")
+	errCANInvalidTransferRateFD = errors.New("CAN: invalid TransferRateFD")
+	errCANTimeout               = errors.New("CAN: timeout")
+	errCANTxFifoFull            = errors.New("CAN: Tx FIFO full")
 )
 
-// DLC to bytes lookup table
-var dlcToBytes = [16]byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 16, 20, 24, 32, 48, 64}
+// enableFDCANClock enables the FDCAN peripheral clock
+func enableFDCANClock() {
+	// FDCAN clock is on APB1
+	stm32.RCC.SetAPBENR1_FDCANEN(1)
+}
+
+// flags implemented as described in [CAN.SetRxCallback]
+var canRxCB [2]canRxCallback
+
+// canInstances tracks CAN peripherals with interrupt-driven RX enabled.
+// A non-nil entry means setRxCallback was called with a non-nil callback.
+var canInstances [2]*CAN
+
+// Configure initializes the FDCAN peripheral and starts it.
+func (can *CAN) Configure(config CANConfig) error {
+	can.alwaysFD = config.AlwaysFD
 
-// Configure initializes the FDCAN peripheral
-func (can *FDCAN) Configure(config FDCANConfig) error {
-	// Configure standby pin if specified (for CAN transceivers with standby control)
-	// Setting it low enables the transceiver
 	if config.Standby != NoPin {
 		config.Standby.Configure(PinConfig{Mode: PinOutput})
 		config.Standby.Low()
 	}
 
-	// Enable FDCAN clock
 	enableFDCANClock()
 
-	// Configure TX and RX pins
 	config.Tx.ConfigureAltFunc(PinConfig{Mode: PinOutput}, can.TxAltFuncSelect)
 	config.Rx.ConfigureAltFunc(PinConfig{Mode: PinInputFloating}, can.RxAltFuncSelect)
 
-	// Exit from sleep mode
+	// Exit sleep mode.
 	can.Bus.SetCCCR_CSR(0)
-
-	// Wait for sleep mode exit
 	timeout := 10000
 	for can.Bus.GetCCCR_CSA() != 0 {
 		timeout--
 		if timeout == 0 {
-			return errFDCANTimeout
+			return errCANTimeout
 		}
 	}
 
-	// Request initialization
+	// Request initialization.
 	can.Bus.SetCCCR_INIT(1)
-
-	// Wait for init mode
 	timeout = 10000
 	for can.Bus.GetCCCR_INIT() == 0 {
 		timeout--
 		if timeout == 0 {
-			return errFDCANTimeout
+			return errCANTimeout
 		}
 	}
 
-	// Enable configuration change
+	// Enable configuration change.
 	can.Bus.SetCCCR_CCE(1)
 
-	// Configure clock divider (only for FDCAN1)
 	if can.Bus == stm32.FDCAN1 {
-		can.Bus.SetCKDIV_PDIV(0)
-		//can.Bus.CKDIV.Set(0) // No division
+		can.Bus.SetCKDIV_PDIV(0) // No clock division.
 	}
 
-	// Enable automatic retransmission
-	can.Bus.SetCCCR_DAR(0)
-
-	// Disable transmit pause
-	can.Bus.SetCCCR_TXP(0)
-
-	// Enable protocol exception handling
-	can.Bus.SetCCCR_PXHD(0)
+	can.Bus.SetCCCR_DAR(0)  // Enable auto retransmission.
+	can.Bus.SetCCCR_TXP(0)  // Disable transmit pause.
+	can.Bus.SetCCCR_PXHD(0) // Enable protocol exception handling.
+	can.Bus.SetCCCR_FDOE(1) // FD operation.
+	can.Bus.SetCCCR_BRSE(1) // Bit rate switching.
 
-	// Enable FD mode with bit rate switching
-	can.Bus.SetCCCR_FDOE(1)
-	can.Bus.SetCCCR_BRSE(1)
-
-	// Configure operating mode
+	// Reset mode bits, then apply requested mode.
 	can.Bus.SetCCCR_TEST(0)
 	can.Bus.SetCCCR_MON(0)
 	can.Bus.SetCCCR_ASM(0)
 	can.Bus.SetTEST_LBCK(0)
-
 	switch config.Mode {
-	case FDCANModeBusMonitoring:
+	case CANModeBusMonitoring:
 		can.Bus.SetCCCR_MON(1)
-	case FDCANModeInternalLoopback:
+	case CANModeInternalLoopback:
 		can.Bus.SetCCCR_TEST(1)
 		can.Bus.SetCCCR_MON(1)
 		can.Bus.SetTEST_LBCK(1)
-	case FDCANModeExternalLoopback:
+	case CANModeExternalLoopback:
 		can.Bus.SetCCCR_TEST(1)
 		can.Bus.SetTEST_LBCK(1)
 	}
 
-	// Set nominal bit timing
-	// STM32G0 runs at 64MHz, FDCAN clock = PCLK = 64MHz
-	// Bit time = (1 + NTSEG1 + NTSEG2) * tq
-	// tq = (NBRP + 1) / fCAN_CLK
+	// Nominal bit timing (64 MHz FDCAN clock, 16 tq/bit, ~80% sample point).
 	if config.TransferRate == 0 {
 		config.TransferRate = FDCANTransferRate500kbps
 	}
-
-	nbrp, ntseg1, ntseg2, nsjw, err := can.calculateNominalBitTiming(config.TransferRate)
+	nbrp, ntseg1, ntseg2, nsjw, err := fdcanNominalBitTiming(config.TransferRate)
 	if err != nil {
 		return err
 	}
 	can.Bus.NBTP.Set(((nsjw - 1) << 25) | ((nbrp - 1) << 16) | ((ntseg1 - 1) << 8) | (ntseg2 - 1))
 
-	// Set data bit timing (for FD mode)
+	// Data bit timing (FD phase).
 	if config.TransferRateFD == 0 {
 		config.TransferRateFD = FDCANTransferRate1000kbps
 	}
 	if config.TransferRateFD < config.TransferRate {
-		return errFDCANInvalidTransferRateFD
+		return errCANInvalidTransferRateFD
 	}
-
-	dbrp, dtseg1, dtseg2, dsjw, err := can.calculateDataBitTiming(config.TransferRateFD)
+	dbrp, dtseg1, dtseg2, dsjw, err := fdcanDataBitTiming(config.TransferRateFD)
 	if err != nil {
 		return err
 	}
 	can.Bus.DBTP.Set(((dbrp - 1) << 16) | ((dtseg1 - 1) << 8) | ((dtseg2 - 1) << 4) | (dsjw - 1))
 
-	// Configure message RAM
-	can.configureMessageRAM()
+	// Enable timestamp counter (internal, prescaler=1).
+	can.Bus.TSCC.Set(1)
 
-	return nil
-}
+	// Clear message RAM.
+	base := can.sramBase()
+	for addr := base; addr < base+sramcanSize; addr += 4 {
+		*(*uint32)(unsafe.Pointer(addr)) = 0
+	}
 
-// Start enables the FDCAN peripheral for communication
-func (can *FDCAN) Start() error {
-	// Disable configuration change
-	can.Bus.SetCCCR_CCE(0)
+	// Set filter list sizes: LSS[20:16], LSE[27:24].
+	rxgfc := can.Bus.RXGFC.Get()
+	rxgfc &= ^uint32(0x0F1F0000)
+	rxgfc |= uint32(sramcanFLSNbr) << 16
+	rxgfc |= uint32(sramcanFLENbr) << 24
+	can.Bus.RXGFC.Set(rxgfc)
 
-	// Exit initialization mode
+	// Start peripheral.
+	can.Bus.SetCCCR_CCE(0)
 	can.Bus.SetCCCR_INIT(0)
-
-	// Wait for normal operation
-	timeout := 10000
-
+	timeout = 10000
 	for can.Bus.GetCCCR_INIT() != 0 {
 		timeout--
 		if timeout == 0 {
-			return errFDCANTimeout
+			return errCANTimeout
 		}
 	}
 
 	return nil
 }
 
-// Stop disables the FDCAN peripheral
-func (can *FDCAN) Stop() error {
-	// Request initialization
+// Stop puts the FDCAN peripheral back into initialization mode.
+func (can *CAN) Stop() error {
 	can.Bus.SetCCCR_INIT(1)
-
-	// Wait for init mode
 	timeout := 10000
 	for can.Bus.GetCCCR_INIT() == 0 {
 		timeout--
 		if timeout == 0 {
-			return errFDCANTimeout
+			return errCANTimeout
 		}
 	}
-
-	// Enable configuration change
 	can.Bus.SetCCCR_CCE(1)
-
 	return nil
 }
 
-// TxFifoIsFull returns true if the TX FIFO is full
-func (can *FDCAN) TxFifoIsFull() bool {
-	return (can.Bus.TXFQS.Get() & 0x00200000) != 0 // TFQF bit
-}
-
-// TxFifoFreeLevel returns the number of free TX FIFO elements
-func (can *FDCAN) TxFifoFreeLevel() int {
-	return int(can.Bus.TXFQS.Get() & 0x07) // TFFL[2:0]
+// txFIFOLevel implements [CAN.TxFIFOLevel].
+func (can *CAN) txFIFOLevel() (int, int) {
+	free := int(can.Bus.TXFQS.Get() & 0x07) // TFFL[2:0]
+	return sramcanTFQNbr - free, sramcanTFQNbr
 }
 
-// RxFifoSize returns the number of messages in RX FIFO 0
-func (can *FDCAN) RxFifoSize() int {
-	return int(can.Bus.RXF0S.Get() & 0x0F) // F0FL[3:0]
-}
-
-// RxFifoIsEmpty returns true if RX FIFO 0 is empty
-func (can *FDCAN) RxFifoIsEmpty() bool {
-	return (can.Bus.RXF0S.Get() & 0x0F) == 0
-}
+// tx implements [CAN.Tx].
+func (can *CAN) tx(id canID, flags canFlags, data []byte) error {
+	if can.Bus.TXFQS.Get()&0x00200000 != 0 { // TFQF bit
+		return errCANTxFifoFull
+	}
 
-// TxRaw transmits a CAN frame using the raw buffer element structure
-func (can *FDCAN) TxRaw(e *FDCANTxBufferElement) error {
-	// Check if TX FIFO is full
-	if can.TxFifoIsFull() {
-		return errFDCANTxFifoFull
+	length := byte(len(data))
+	if length > 64 {
+		length = 64
 	}
 
-	// Get put index
-	putIndex := (can.Bus.TXFQS.Get() >> 16) & 0x03 // TFQPI[1:0]
+	// Use FD framing if configured to always use FD, or if data exceeds classic CAN max.
+	isFD := flags&canFlagFDF != 0 || length > 8
 
-	// Calculate TX buffer address
-	sramBase := can.getSRAMBase()
-	txAddress := sramBase + sramcanTFQSA + (uintptr(putIndex) * sramcanTFQSize)
+	putIndex := (can.Bus.TXFQS.Get() >> 16) & 0x03 // TFQPI[1:0]
+	txAddr := can.sramBase() + sramcanTFQSA + uintptr(putIndex)*sramcanTFQSize
 
-	// Build first word
+	// Header word 1: identifier and flags.
 	var w1 uint32
-	id := e.ID
-	if !e.XTD {
-		// Standard ID - shift to bits [28:18]
-		id = (id & 0x7FF) << 18
-	}
-	w1 = id & 0x1FFFFFFF
-	if e.ESI {
-		w1 |= fdcanElementMaskESI
-	}
-	if e.XTD {
-		w1 |= fdcanElementMaskXTD
-	}
-	if e.RTR {
-		w1 |= fdcanElementMaskRTR
+	if flags&canFlagESI != 0 {
+		w1 = (id & 0x1FFFFFFF) | fdcanElementMaskXTD
+	} else {
+		w1 = (id & 0x7FF) << 18
 	}
 
-	// Build second word
-	var w2 uint32
-	w2 = uint32(e.DLC) << 16
-	if e.FDF {
-		w2 |= fdcanElementMaskFDF
-	}
-	if e.BRS {
-		w2 |= fdcanElementMaskBRS
+	// Header word 2: DLC, FD/BRS flags.
+	dlc := lengthToDLC(length)
+	w2 := uint32(dlc) << 16
+	if isFD {
+		w2 |= fdcanElementMaskFDF | fdcanElementMaskBRS
 	}
-	if e.EFC {
-		w2 |= fdcanElementMaskEFC
-	}
-	w2 |= uint32(e.MM) << 24
 
-	// Write to message RAM
-	*(*uint32)(unsafe.Pointer(txAddress)) = w1
-	*(*uint32)(unsafe.Pointer(txAddress + 4)) = w2
+	*(*uint32)(unsafe.Pointer(txAddr)) = w1
+	*(*uint32)(unsafe.Pointer(txAddr + 4)) = w2
 
-	// Copy data bytes - must use 32-bit word access on Cortex-M0+
-	dataLen := dlcToBytes[e.DLC&0x0F]
-	numWords := (dataLen + 3) / 4
-	for w := byte(0); w < numWords; w++ {
+	// Copy data with 32-bit word access (Cortex-M0+).
+	for w := byte(0); w < (length+3)/4; w++ {
 		var word uint32
-		baseIdx := w * 4
-		for b := byte(0); b < 4 && baseIdx+b < dataLen; b++ {
-			word |= uint32(e.DB[baseIdx+b]) << (b * 8)
+		base := w * 4
+		for b := byte(0); b < 4 && base+b < length; b++ {
+			word |= uint32(data[base+b]) << (b * 8)
 		}
-		*(*uint32)(unsafe.Pointer(txAddress + 8 + uintptr(w)*4)) = word
+		*(*uint32)(unsafe.Pointer(txAddr + 8 + uintptr(w)*4)) = word
 	}
 
-	// Request transmission
 	can.Bus.TXBAR.Set(1 << putIndex)
-
 	return nil
 }
 
-// Tx transmits a CAN frame with the specified ID and data
-func (can *FDCAN) Tx(id uint32, data []byte, isFD, isExtendedID bool) error {
-	length := byte(len(data))
-	if length > 64 {
-		length = 64
-	}
-	if !isFD && length > 8 {
-		length = 8
-	}
-
-	e := FDCANTxBufferElement{
-		ESI: false,
-		XTD: isExtendedID,
-		RTR: false,
-		ID:  id,
-		MM:  0,
-		EFC: false,
-		FDF: isFD,
-		BRS: isFD,
-		DLC: FDCANLengthToDlc(length, isFD),
-	}
-
-	for i := byte(0); i < length; i++ {
-		e.DB[i] = data[i]
+// rxFIFOLevel implements [CAN.RxFIFOLevel].
+// Returns 0,0 when interrupt-driven (messages delivered via callback).
+func (can *CAN) rxFIFOLevel() (int, int) {
+	if canInstances[can.instance] != nil {
+		return 0, 0
 	}
-
-	return can.TxRaw(&e)
+	level := int(can.Bus.RXF0S.Get() & 0x0F) // F0FL[3:0]
+	return level, sramcanRF0Nbr
 }
 
-// RxRaw receives a CAN frame into the raw buffer element structure
-func (can *FDCAN) RxRaw(e *FDCANRxBufferElement) error {
-	if can.RxFifoIsEmpty() {
-		return errFDCANRxFifoEmpty
-	}
-
-	// Get get index
-	getIndex := (can.Bus.RXF0S.Get() >> 8) & 0x03 // F0GI[1:0]
-
-	// Calculate RX buffer address
-	sramBase := can.getSRAMBase()
-	rxAddress := sramBase + sramcanRF0SA + (uintptr(getIndex) * sramcanRF0Size)
-
-	// Read first word
-	w1 := *(*uint32)(unsafe.Pointer(rxAddress))
-	e.ESI = (w1 & fdcanElementMaskESI) != 0
-	e.XTD = (w1 & fdcanElementMaskXTD) != 0
-	e.RTR = (w1 & fdcanElementMaskRTR) != 0
-
-	if e.XTD {
-		e.ID = w1 & fdcanElementMaskEXTID
+// setRxCallback implements [CAN.SetRxCallback].
+// When cb is non-nil, interrupt-driven receive is enabled on RX FIFO 0.
+// The CAN.Interrupt field must be initialized with interrupt.New in the board file.
+func (can *CAN) setRxCallback(cb canRxCallback) {
+	canRxCB[can.instance] = cb
+	if cb != nil {
+		canInstances[can.instance] = can
+		// Enable RX FIFO 0 new message interrupt, routed to interrupt line 0.
+		can.Bus.SetIE_RF0NE(1)
+		can.Bus.SetILS_RxFIFO0(0)
+		can.Bus.SetILE_EINT0(1)
+		can.Interrupt.Enable()
 	} else {
-		e.ID = (w1 & fdcanElementMaskSTDID) >> 18
+		can.Bus.SetIE_RF0NE(0)
+		canInstances[can.instance] = nil
 	}
+}
 
-	// Read second word
-	w2 := *(*uint32)(unsafe.Pointer(rxAddress + 4))
-	e.RXTS = uint16(w2 & fdcanElementMaskTS)
-	e.DLC = uint8((w2 & fdcanElementMaskDLC) >> 16)
-	e.BRS = (w2 & fdcanElementMaskBRS) != 0
-	e.FDF = (w2 & fdcanElementMaskFDF) != 0
-	e.FIDX = uint8((w2 & fdcanElementMaskFIDX) >> 24)
-	e.ANMF = (w2 & fdcanElementMaskANMF) != 0
-
-	// Copy data bytes - must use 32-bit word access on Cortex-M0+
-	dataLen := dlcToBytes[e.DLC&0x0F]
-	numWords := (dataLen + 3) / 4
-	for w := byte(0); w < numWords; w++ {
-		word := *(*uint32)(unsafe.Pointer(rxAddress + 8 + uintptr(w)*4))
-		baseIdx := w * 4
-		for b := byte(0); b < 4 && baseIdx+b < dataLen; b++ {
-			e.DB[baseIdx+b] = byte(word >> (b * 8))
-		}
+// rxPoll implements [CAN.RxPoll].
+// No-op when interrupt-driven receive is active.
+func (can *CAN) rxPoll() error {
+	if canInstances[can.instance] != nil {
+		return nil
 	}
-
-	// Acknowledge the read
-	can.Bus.RXF0A.Set(uint32(getIndex))
-
+	cb := canRxCB[can.instance]
+	if cb == nil {
+		return nil
+	}
+	processRxFIFO0(can, cb)
 	return nil
 }
 
-// Rx receives a CAN frame and returns its components
-func (can *FDCAN) Rx() (id uint32, dlc byte, data []byte, isFD, isExtendedID bool, err error) {
-	e := FDCANRxBufferElement{}
-	err = can.RxRaw(&e)
-	if err != nil {
-		return 0, 0, nil, false, false, err
-	}
+// processRxFIFO0 drains RX FIFO 0 and delivers each message to cb.
+// Used by both rxPoll (poll mode) and canHandleInterrupt (interrupt mode).
+func processRxFIFO0(can *CAN, cb canRxCallback) {
+	for can.Bus.RXF0S.Get()&0x0F != 0 {
+		getIndex := (can.Bus.RXF0S.Get() >> 8) & 0x03 // F0GI[1:0]
+		rxAddr := can.sramBase() + sramcanRF0SA + uintptr(getIndex)*sramcanRF0Size
+
+		w1 := *(*uint32)(unsafe.Pointer(rxAddr))
+		w2 := *(*uint32)(unsafe.Pointer(rxAddr + 4))
+
+		extendedID := w1&fdcanElementMaskXTD != 0
+		var id uint32
+		var flags uint32
+		if extendedID {
+			flags |= canFlagIDE
+			id = w1 & fdcanElementMaskEXTID
+		} else {
+			id = (w1 & fdcanElementMaskSTDID) >> 18
+		}
 
-	length := FDCANDlcToLength(e.DLC, e.FDF)
-	return e.ID, length, e.DB[:length], e.FDF, e.XTD, nil
-}
+		timestamp := w2 & fdcanElementMaskTS
+		dlc := byte((w2 & fdcanElementMaskDLC) >> 16)
+		isFD := w2&fdcanElementMaskFDF != 0
 
-// SetInterrupt configures interrupt handling for the FDCAN peripheral
-func (can *FDCAN) SetInterrupt(ie uint32, callback func(*FDCAN)) error {
-	if callback == nil {
-		can.Bus.IE.ClearBits(ie)
-		return nil
-	}
+		if isFD {
+			flags |= canFlagFDF
+		}
+		if w1&fdcanElementMaskRTR != 0 {
+			flags |= canFlagRTR
+		}
+		if w2&fdcanElementMaskBRS != 0 {
+			flags |= canFlagBRS
+		}
+		if w1&fdcanElementMaskESI != 0 {
+			flags |= canFlagESI
+		}
 
-	can.Bus.IE.SetBits(ie)
+		dataLen := dlcToLength(dlc)
+		if !isFD && dataLen > 8 {
+			dataLen = 8
+		}
+		var buf [64]byte
+		for w := byte(0); w < (dataLen+3)/4; w++ {
+			word := *(*uint32)(unsafe.Pointer(rxAddr + 8 + uintptr(w)*4))
+			base := w * 4
+			for b := byte(0); b < 4 && base+b < dataLen; b++ {
+				buf[base+b] = byte(word >> (b * 8))
+			}
+		}
 
-	idx := can.instance
-	fdcanInstances[idx] = can
+		// Acknowledge before callback so the FIFO slot is freed.
+		can.Bus.RXF0A.Set(uint32(getIndex))
+		cb(buf[:dataLen], id, timestamp, flags)
+	}
+}
 
-	for i := uint(0); i < 32; i++ {
-		if ie&(1<<i) != 0 {
-			fdcanCallbacks[idx][i] = callback
+// canHandleInterrupt is the shared interrupt handler for FDCAN interrupt line 0 (IRQ_TIM16).
+// Both FDCAN1 and FDCAN2 share this IRQ vector.
+func canHandleInterrupt(interrupt.Interrupt) {
+	for i := range canInstances {
+		can := canInstances[i]
+		if can == nil {
+			continue
+		}
+		ir := can.Bus.IR.Get()
+		if ir&FDCAN_IT_RX_FIFO0_NEW_MESSAGE != 0 {
+			can.Bus.IR.Set(FDCAN_IT_RX_FIFO0_NEW_MESSAGE) // Write 1 to clear
+			if cb := canRxCB[i]; cb != nil {
+				processRxFIFO0(can, cb)
+			}
 		}
 	}
-
-	can.Interrupt.Enable()
-	return nil
 }
 
-// ConfigureFilter configures a message filter
-func (can *FDCAN) ConfigureFilter(config FDCANFilterConfig) error {
-	sramBase := can.getSRAMBase()
+// ConfigureFilter configures a message acceptance filter.
+func (can *CAN) ConfigureFilter(config CANFilterConfig) error {
+	base := can.sramBase()
 
 	if config.IsExtendedID {
-		// Extended filter
 		if config.Index >= sramcanFLENbr {
-			return errors.New("FDCAN: filter index out of range")
+			return errors.New("CAN: filter index out of range")
 		}
 
-		filterAddr := sramBase + sramcanFLESA + (uintptr(config.Index) * sramcanFLESize)
+		filterAddr := base + sramcanFLESA + (uintptr(config.Index) * sramcanFLESize)
 
-		// Build filter elements
 		w1 := (uint32(config.Config) << 29) | (config.ID1 & 0x1FFFFFFF)
 		w2 := (uint32(config.Type) << 30) | (config.ID2 & 0x1FFFFFFF)
 
 		*(*uint32)(unsafe.Pointer(filterAddr)) = w1
 		*(*uint32)(unsafe.Pointer(filterAddr + 4)) = w2
 	} else {
-		// Standard filter
 		if config.Index >= sramcanFLSNbr {
-			return errors.New("FDCAN: filter index out of range")
+			return errors.New("CAN: filter index out of range")
 		}
 
-		filterAddr := sramBase + sramcanFLSSA + (uintptr(config.Index) * sramcanFLSSize)
+		filterAddr := base + sramcanFLSSA + (uintptr(config.Index) * sramcanFLSSize)
 
-		// Build filter element
 		w := (uint32(config.Type) << 30) |
 			(uint32(config.Config) << 27) |
 			((config.ID1 & 0x7FF) << 16) |
@@ -561,56 +487,32 @@ func (can *FDCAN) ConfigureFilter(config FDCANFilterConfig) error {
 	return nil
 }
 
-func (can *FDCAN) getSRAMBase() uintptr {
-	base := uintptr(sramcanBase)
+func (can *CAN) sramBase() uintptr {
 	if can.Bus == stm32.FDCAN2 {
-		base += sramcanSize
+		return uintptr(sramcanBase) + sramcanSize
 	}
-	return base
+	return uintptr(sramcanBase)
 }
 
-func (can *FDCAN) configureMessageRAM() {
-	sramBase := can.getSRAMBase()
-
-	// Clear message RAM
-	for addr := sramBase; addr < sramBase+sramcanSize; addr += 4 {
-		*(*uint32)(unsafe.Pointer(addr)) = 0
-	}
-
-	// Configure filter counts (using RXGFC register)
-	// LSS = number of standard filters, LSE = number of extended filters
-	rxgfc := can.Bus.RXGFC.Get()
-	rxgfc &= ^uint32(0xFF000000)            // Clear LSS and LSE
-	rxgfc |= (sramcanFLSNbr << 24)          // Standard filters
-	rxgfc |= (sramcanFLENbr << 24) & 0xFF00 // Extended filters (shifted)
-	can.Bus.RXGFC.Set(rxgfc)
-}
-
-func (can *FDCAN) calculateNominalBitTiming(rate FDCANTransferRate) (brp, tseg1, tseg2, sjw uint32, err error) {
-	// STM32G0 FDCAN clock = 64MHz
-	// Target: 80% sample point
-	// Bit time = (1 + TSEG1 + TSEG2) time quanta
+// fdcanNominalBitTiming returns prescaler and segment values for the nominal (arbitration) phase.
+// STM32G0 FDCAN clock = 64 MHz, 16 time quanta per bit, ~80% sample point.
+func fdcanNominalBitTiming(rate CANTransferRate) (brp, tseg1, tseg2, sjw uint32, err error) {
 	switch rate {
 	case FDCANTransferRate125kbps:
-		// 64MHz / 32 = 2MHz, 16 tq per bit = 125kbps
 		return 32, 13, 2, 4, nil
 	case FDCANTransferRate250kbps:
-		// 64MHz / 16 = 4MHz, 16 tq per bit = 250kbps
 		return 16, 13, 2, 4, nil
 	case FDCANTransferRate500kbps:
-		// 64MHz / 8 = 8MHz, 16 tq per bit = 500kbps
 		return 8, 13, 2, 4, nil
 	case FDCANTransferRate1000kbps:
-		// 64MHz / 4 = 16MHz, 16 tq per bit = 1Mbps
 		return 4, 13, 2, 4, nil
 	default:
-		return 0, 0, 0, 0, errFDCANInvalidTransferRate
+		return 0, 0, 0, 0, errCANInvalidTransferRate
 	}
 }
 
-func (can *FDCAN) calculateDataBitTiming(rate FDCANTransferRate) (brp, tseg1, tseg2, sjw uint32, err error) {
-	// STM32G0 FDCAN clock = 64MHz
-	// For data phase, we need higher bit rates
+// fdcanDataBitTiming returns prescaler and segment values for the data phase (FD).
+func fdcanDataBitTiming(rate CANTransferRate) (brp, tseg1, tseg2, sjw uint32, err error) {
 	switch rate {
 	case FDCANTransferRate125kbps:
 		return 32, 13, 2, 4, nil
@@ -621,91 +523,10 @@ func (can *FDCAN) calculateDataBitTiming(rate FDCANTransferRate) (brp, tseg1, ts
 	case FDCANTransferRate1000kbps:
 		return 4, 13, 2, 4, nil
 	case FDCANTransferRate2000kbps:
-		// 64MHz / 2 = 32MHz, 16 tq per bit = 2Mbps
 		return 2, 13, 2, 4, nil
 	case FDCANTransferRate4000kbps:
-		// 64MHz / 1 = 64MHz, 16 tq per bit = 4Mbps
 		return 1, 13, 2, 4, nil
 	default:
-		return 0, 0, 0, 0, errFDCANInvalidTransferRateFD
-	}
-}
-
-// FDCANDlcToLength converts a DLC value to actual byte length
-func FDCANDlcToLength(dlc byte, isFD bool) byte {
-	if dlc > 15 {
-		dlc = 15
-	}
-	length := dlcToBytes[dlc]
-	if !isFD && length > 8 {
-		return 8
-	}
-	return length
-}
-
-// FDCANLengthToDlc converts a byte length to DLC value
-func FDCANLengthToDlc(length byte, isFD bool) byte {
-	if !isFD {
-		if length > 8 {
-			return 8
-		}
-		return length
-	}
-
-	switch {
-	case length <= 8:
-		return length
-	case length <= 12:
-		return 9
-	case length <= 16:
-		return 10
-	case length <= 20:
-		return 11
-	case length <= 24:
-		return 12
-	case length <= 32:
-		return 13
-	case length <= 48:
-		return 14
-	default:
-		return 15
-	}
-}
-
-// Interrupt handling
-var (
-	fdcanInstances [2]*FDCAN
-	fdcanCallbacks [2][32]func(*FDCAN)
-)
-
-func fdcanHandleInterrupt(idx int) {
-	if fdcanInstances[idx] == nil {
-		return
-	}
-
-	can := fdcanInstances[idx]
-	ir := can.Bus.IR.Get()
-	can.Bus.IR.Set(ir) // Clear interrupt flags
-
-	for i := uint(0); i < 32; i++ {
-		if ir&(1<<i) != 0 && fdcanCallbacks[idx][i] != nil {
-			fdcanCallbacks[idx][i](can)
-		}
+		return 0, 0, 0, 0, errCANInvalidTransferRateFD
 	}
 }
-
-// Data returns the received data as a slice
-func (e *FDCANRxBufferElement) Data() []byte {
-	return e.DB[:FDCANDlcToLength(e.DLC, e.FDF)]
-}
-
-// Length returns the actual data length
-func (e *FDCANRxBufferElement) Length() byte {
-	return FDCANDlcToLength(e.DLC, e.FDF)
-}
-
-// enableFDCANClock enables the FDCAN peripheral clock
-func enableFDCANClock() {
-	// FDCAN clock is on APB1
-	stm32.RCC.SetAPBENR1_FDCANEN(1)
-}
diff --git a/src/machine/spi.go b/src/machine/spi.go
index 9a1033ca7d..fa507b961d 100644
--- a/src/machine/spi.go
+++ b/src/machine/spi.go
@@ -1,4 +1,4 @@
-//go:build !baremetal || atmega || esp32 || fe310 || k210 || nrf || (nxp && !mk66f18) || rp2040 || rp2350 || sam || (stm32 && !stm32f7x2 && !stm32l5x2)
+//go:build !baremetal || atmega || attiny85 || esp32 || fe310 || k210 || nrf || (nxp && !mk66f18) || rp2040 || rp2350 || sam || (stm32 && !stm32f7x2 && !stm32l5x2)
 
 package machine
 
diff --git a/src/machine/spi_tx.go b/src/machine/spi_tx.go
index 97385bb596..aec3f52fe1 100644
--- a/src/machine/spi_tx.go
+++ b/src/machine/spi_tx.go
@@ -1,4 +1,4 @@
-//go:build atmega || fe310 || k210 || (nxp && !mk66f18) || (stm32 && !stm32f7x2 && !stm32l5x2)
+//go:build atmega || attiny85 || fe310 || k210 || (nxp && !mk66f18) || (stm32 && !stm32f7x2 && !stm32l5x2)
 
 // This file implements the SPI Tx function for targets that don't have a custom
 // (faster) implementation for it.
diff --git a/src/runtime/float.go b/src/runtime/float.go
index c80c8b7abf..b5fee4c5c4 100644
--- a/src/runtime/float.go
+++ b/src/runtime/float.go
@@ -52,3 +52,130 @@ func float64bits(f float64) uint64 {
 func float64frombits(b uint64) float64 {
 	return *(*float64)(unsafe.Pointer(&b))
 }
+
+// The fmimimum/fmaximum are missing from most libm implementations.
+// Just define them ourselves.
+
+//export fminimum
+func fminimum(x, y float64) float64 {
+	return minimumFloat64(x, y)
+}
+
+//export fminimumf
+func fminimumf(x, y float32) float32 {
+	return minimumFloat32(x, y)
+}
+
+//export fmaximum
+func fmaximum(x, y float64) float64 {
+	return maximumFloat64(x, y)
+}
+
+//export fmaximumf
+func fmaximumf(x, y float32) float32 {
+	return maximumFloat32(x, y)
+}
+
+// Create seperate copies of the function that are not exported.
+// This is necessary so that LLVM does not recognize them as builtins.
+// If tests called the builtins, LLVM would just override them on most platforms.
+
+func minimumFloat32(x, y float32) float32 {
+	return minimumFloat[float32, int32](x, y, minPosNaN32, magMask32)
+}
+
+func minimumFloat64(x, y float64) float64 {
+	return minimumFloat[float64, int64](x, y, minPosNaN64, magMask64)
+}
+
+func maximumFloat32(x, y float32) float32 {
+	return maximumFloat[float32, int32](x, y, minPosNaN32, magMask32)
+}
+
+func maximumFloat64(x, y float64) float64 {
+	return maximumFloat[float64, int64](x, y, minPosNaN64, magMask64)
+}
+
+// minimumFloat is a generic implementation of the floating-point minimum operation.
+// This implementation uses integer operations because this is mainly used for platforms without an FPU.
+func minimumFloat[T float, I floatInt](x, y T, minPosNaN, magMask I) T {
+	xBits := *(*I)(unsafe.Pointer(&x))
+	yBits := *(*I)(unsafe.Pointer(&y))
+
+	// Handle the special case of a positive NaN value.
+	switch {
+	case xBits >= minPosNaN:
+		return x
+	case yBits >= minPosNaN:
+		return y
+	}
+
+	// The exponent-mantissa portion of the float is comparable via unsigned comparison (excluding the NaN case).
+	// We can turn a float into a signed-comparable value by reversing the comparison order of negative values.
+	// We can reverse the order by inverting the bits.
+	// This also ensures that positive zero compares greater than negative zero (as required by the spec).
+	// Negative NaN values will compare less than any other value, so they require no special handling to propogate.
+	if xBits < 0 {
+		xBits ^= magMask
+	}
+	if yBits < 0 {
+		yBits ^= magMask
+	}
+	if xBits <= yBits {
+		return x
+	} else {
+		return y
+	}
+}
+
+// maximumFloat is a generic implementation of the floating-point maximum operation.
+// This implementation uses integer operations because this is mainly used for platforms without an FPU.
+func maximumFloat[T float, I floatInt](x, y T, minPosNaN, magMask I) T {
+	xBits := *(*I)(unsafe.Pointer(&x))
+	yBits := *(*I)(unsafe.Pointer(&y))
+
+	// The exponent-mantissa portion of the float is comparable via unsigned comparison (excluding the NaN case).
+	// We can turn a float into a signed-comparable value by reversing the comparison order of negative values.
+	// We can reverse the order by inverting the bits.
+	// This also ensures that positive zero compares greater than negative zero (as required by the spec).
+	// Positive NaN values will compare greater than any other value, so they require no special handling to propogate.
+	if xBits < 0 {
+		xBits ^= magMask
+	}
+	if yBits < 0 {
+		yBits ^= magMask
+	}
+	// Handle the special case of a negative NaN value.
+	maxNegNaN := ^minPosNaN
+	switch {
+	case xBits <= maxNegNaN:
+		return x
+	case yBits <= maxNegNaN:
+		return y
+	}
+	if xBits >= yBits {
+		return x
+	} else {
+		return y
+	}
+}
+
+const (
+	signPos64     = 63
+	exponentPos64 = 52
+	minPosNaN64   = ((1 << signPos64) - (1 << exponentPos64)) + 1
+	magMask64     = 1<<signPos64 - 1
+
+	signPos32     = 31
+	exponentPos32 = 23
+	minPosNaN32   = ((1 << signPos32) - (1 << exponentPos32)) + 1
+	magMask32     = 1<<signPos32 - 1
+)
+
+type float interface {
+	float32 | float64
+}
+
+type floatInt interface {
+	int32 | int64
+}
diff --git a/src/runtime/float_test.go b/src/runtime/float_test.go
new file mode 100644
index 0000000000..49c3880db5
--- /dev/null
+++ b/src/runtime/float_test.go
@@ -0,0 +1,227 @@
+package runtime_test
+
+import (
+	"math"
+	"testing"
+	_ "unsafe"
+)
+
+func TestFloatMinMax32(t *testing.T) {
+	t.Parallel()
+
+	for _, c := range []struct {
+		x   float32
+		y   float32
+		min float32
+		max float32
+	}{
+		{
+			x:   0,
+			y:   0,
+			min: 0,
+			max: 0,
+		},
+		{
+			x:   -12,
+			y:   2,
+			min: -12,
+			max: 2,
+		},
+		{
+			x:   2,
+			y:   -12,
+			min: -12,
+			max: 2,
+		},
+		{
+			x:   float32(math.Copysign(0, -1)),
+			y:   0,
+			min: float32(math.Copysign(0, -1)),
+			max: 0,
+		},
+		{
+			x:   0,
+			y:   float32(math.Copysign(0, -1)),
+			min: float32(math.Copysign(0, -1)),
+			max: 0,
+		},
+		{
+			x:   float32(math.Inf(-1)),
+			y:   float32(math.Inf(1)),
+			min: float32(math.Inf(-1)),
+			max: float32(math.Inf(1)),
+		},
+		{
+			x:   math.MaxFloat32,
+			y:   math.SmallestNonzeroFloat32,
+			min: math.SmallestNonzeroFloat32,
+			max: math.MaxFloat32,
+		},
+		{
+			x:   math.Float32frombits(float32PositiveNaN),
+			y:   0,
+			min: math.Float32frombits(float32PositiveNaN),
+			max: math.Float32frombits(float32PositiveNaN),
+		},
+		{
+			x:   0,
+			y:   math.Float32frombits(float32PositiveNaN),
+			min: math.Float32frombits(float32PositiveNaN),
+			max: math.Float32frombits(float32PositiveNaN),
+		},
+		{
+			x:   math.Float32frombits(float32PositiveNaN),
+			y:   math.Float32frombits(float32PositiveNaN),
+			min: math.Float32frombits(float32PositiveNaN),
+			max: math.Float32frombits(float32PositiveNaN),
+		},
+		{
+			x:   math.Float32frombits(float32NegativeNaN),
+			y:   0,
+			min: math.Float32frombits(float32NegativeNaN),
+			max: math.Float32frombits(float32NegativeNaN),
+		},
+		{
+			x:   0,
+			y:   math.Float32frombits(float32NegativeNaN),
+			min: math.Float32frombits(float32NegativeNaN),
+			max: math.Float32frombits(float32NegativeNaN),
+		},
+		{
+			x:   math.Float32frombits(float32NegativeNaN),
+			y:   math.Float32frombits(float32NegativeNaN),
+			min: math.Float32frombits(float32NegativeNaN),
+			max: math.Float32frombits(float32NegativeNaN),
+		},
+	} {
+		if min := minimumFloat32(c.x, c.y); math.Float32bits(min) != math.Float32bits(c.min) {
+			t.Errorf("minimumFloat32(%f, %f) = %f (expected %f)", c.x, c.y, min, c.min)
+		}
+		if max := maximumFloat32(c.x, c.y); math.Float32bits(max) != math.Float32bits(c.max) {
+			t.Errorf("maximumFloat32(%f, %f) = %f (expected %f)", c.x, c.y, max, c.max)
+		}
+	}
+}
+
+const (
+	// float32PositiveNaN is the smallest positive NaN value for a float32.
+	float32PositiveNaN = 0x7FC00001
+	// float32NegativeNaN is the smallest negative NaN value for a float32.
+	float32NegativeNaN = 0xFFC00001
+)
+
+//go:linkname minimumFloat32 runtime.minimumFloat32
+func minimumFloat32(x, y float32) float32
+
+//go:linkname maximumFloat32 runtime.maximumFloat32
+func maximumFloat32(x, y float32) float32
+
+func TestFloatMinMax64(t *testing.T) {
+	t.Parallel()
+
+	for _, c := range []struct {
+		x   float64
+		y   float64
+		min float64
+		max float64
+	}{
+		{
+			x:   0,
+			y:   0,
+			min: 0,
+			max: 0,
+		},
+		{
+			x:   -12,
+			y:   2,
+			min: -12,
+			max: 2,
+		},
+		{
+			x:   2,
+			y:   -12,
+			min: -12,
+			max: 2,
+		},
+		{
+			x:   math.Copysign(0, -1),
+			y:   0,
+			min: math.Copysign(0, -1),
+			max: 0,
+		},
+		{
+			x:   0,
+			y:   math.Copysign(0, -1),
+			min: math.Copysign(0, -1),
+			max: 0,
+		},
+		{
+			x:   math.Inf(-1),
+			y:   math.Inf(1),
+			min: math.Inf(-1),
+			max: math.Inf(1),
+		},
+		{
+			x:   math.MaxFloat64,
+			y:   math.SmallestNonzeroFloat64,
+			min: math.SmallestNonzeroFloat64,
+			max: math.MaxFloat64,
+		},
+		{
+			x:   math.Float64frombits(float64PositiveNaN),
+			y:   0,
+			min: math.Float64frombits(float64PositiveNaN),
+			max: math.Float64frombits(float64PositiveNaN),
+		},
+		{
+			x:   0,
+			y:   math.Float64frombits(float64PositiveNaN),
+			min: math.Float64frombits(float64PositiveNaN),
+			max: math.Float64frombits(float64PositiveNaN),
+		},
+		{
+			x:   math.Float64frombits(float64PositiveNaN),
+			y:   math.Float64frombits(float64PositiveNaN),
+			min: math.Float64frombits(float64PositiveNaN),
+			max: math.Float64frombits(float64PositiveNaN),
+		},
+		{
+			x:   math.Float64frombits(float64NegativeNaN),
+			y:   0,
+			min: math.Float64frombits(float64NegativeNaN),
+			max: math.Float64frombits(float64NegativeNaN),
+		},
+		{
+			x:   0,
+			y:   math.Float64frombits(float64NegativeNaN),
+			min: math.Float64frombits(float64NegativeNaN),
+			max: math.Float64frombits(float64NegativeNaN),
+		},
+		{
+			x:   math.Float64frombits(float64NegativeNaN),
+			y:   0,
+			min: math.Float64frombits(float64NegativeNaN),
+			max: math.Float64frombits(float64NegativeNaN),
+		},
+	} {
+		if min := minimumFloat64(c.x, c.y); math.Float64bits(min) != math.Float64bits(c.min) {
+			t.Errorf("minimumFloat64(%f, %f) = %f (expected %f)", c.x, c.y, min, c.min)
+		}
+		if max := maximumFloat64(c.x, c.y); math.Float64bits(max) != math.Float64bits(c.max) {
+			t.Errorf("maximumFloat64(%f, %f) = %f (expected %f)", c.x, c.y, max, c.max)
+		}
+	}
+}
+
+const (
+	// float64PositiveNaN is the smallest positive NaN value for a float64.
+	float64PositiveNaN = 0x7FF8000000000001
+	// float64NegativeNaN is the smallest negative NaN value for a float64.
+	float64NegativeNaN = 0xFFF8000000000001
+)
+
+//go:linkname minimumFloat64 runtime.minimumFloat64
+func minimumFloat64(x, y float64) float64
+
+//go:linkname maximumFloat64 runtime.maximumFloat64
+func maximumFloat64(x, y float64) float64
diff --git a/src/runtime/runtime_esp32c3.go b/src/runtime/runtime_esp32c3.go
index 013c939246..f85f7dec78 100644
--- a/src/runtime/runtime_esp32c3.go
+++ b/src/runtime/runtime_esp32c3.go
@@ -54,10 +54,6 @@ func main() {
 	// Configure interrupt handler
 	interruptInit()
 
-	// Initialize UART.
-	machine.USBCDC.Configure(machine.UARTConfig{})
-	machine.InitSerial()
-
 	// Initialize main system timer used for time.Now.
 	initTimer()
 
@@ -68,6 +64,11 @@ func main() {
 	exit(0)
 }
 
+func init() {
+	// Initialize UART.
+	machine.InitSerial()
+}
+
 func abort() {
 	// lock up forever
 	for {
diff --git a/src/runtime/runtime_esp32s3.go b/src/runtime/runtime_esp32s3.go
index 35cd26da85..b469ddf9e9 100644
--- a/src/runtime/runtime_esp32s3.go
+++ b/src/runtime/runtime_esp32s3.go
@@ -4,6 +4,7 @@ package runtime
 
 import (
 	"device/esp"
+	"machine"
 )
 
 // This is the function called on startup after the flash (IROM/DROM) is
@@ -49,8 +50,22 @@ func main() {
 	// Change CPU frequency from 80MHz to 240MHz by setting SYSTEM_PLL_FREQ_SEL to
 	// 1 and SYSTEM_CPUPERIOD_SEL to 2 (see table "CPU Clock Frequency" in the
 	// reference manual).
+	// We do this gradually to allow PLL and system to stabilize.
 	esp.SYSTEM.SetCPU_PER_CONF_PLL_FREQ_SEL(1)
+
+	// First switch to 160MHz (intermediate step)
+	esp.SYSTEM.SetCPU_PER_CONF_CPUPERIOD_SEL(1)
+	// Small delay to let PLL stabilize at 160MHz
+	for i := 0; i < 1000; i++ {
+		_ = esp.SYSTEM.CPU_PER_CONF.Get()
+	}
+
+	// Now switch to 240MHz
 	esp.SYSTEM.SetCPU_PER_CONF_CPUPERIOD_SEL(2)
+	// Small delay to let PLL stabilize at 240MHz
+	for i := 0; i < 1000; i++ {
+		_ = esp.SYSTEM.CPU_PER_CONF.Get()
+	}
 
 	// Clear bss. Repeat many times while we wait for cpu/clock to stabilize
 	for x := 0; x < 30; x++ {
@@ -67,6 +82,11 @@ func main() {
 	exit(0)
 }
 
+func init() {
+	// Initialize UART.
+	machine.InitSerial()
+}
+
 func abort() {
 	// lock up forever
 	print("abort called\n")
diff --git a/src/runtime/runtime_wasmentry.go b/src/runtime/runtime_wasmentry.go
index 005b58641e..59cacb3b04 100644
--- a/src/runtime/runtime_wasmentry.go
+++ b/src/runtime/runtime_wasmentry.go
@@ -34,8 +34,8 @@ func wasmEntryReactor() {
 	// Initialize the heap.
 	heapStart = uintptr(unsafe.Pointer(&heapStartSymbol))
 	heapEnd = uintptr(wasm_memory_size(0) * wasmPageSize)
-	initRand()
 	initHeap()
+	initRand()
 
 	if hasScheduler {
 		// A package initializer might do funky stuff like start a goroutine and
diff --git a/targets/esp32s3-wroom1.json b/targets/esp32s3-wroom1.json
new file mode 100644
index 0000000000..56f8d57d21
--- /dev/null
+++ b/targets/esp32s3-wroom1.json
@@ -0,0 +1,4 @@
+{
+	"inherits": ["esp32s3"],
+	"build-tags": ["esp32s3_wroom1"]
+}
\ No newline at end of file
diff --git a/targets/esp32s3.json b/targets/esp32s3.json
index f245b82ab8..051cc85420 100644
--- a/targets/esp32s3.json
+++ b/targets/esp32s3.json
@@ -4,7 +4,7 @@
 	"features": "+atomctl,+bool,+clamps,+coprocessor,+debug,+density,+div32,+esp32s3,+exception,+fp,+highpriinterrupts,+interrupt,+loop,+mac16,+memctl,+minmax,+miscsr,+mul32,+mul32high,+nsa,+prid,+regprotect,+rvector,+s32c1i,+sext,+threadptr,+timerint,+windowed",
 	"build-tags": ["esp32s3", "esp"],
 	"scheduler": "tasks",
-	"serial": "uart",
+	"serial": "usb",
 	"linker": "ld.lld",
 	"default-stack-size": 2048,
 	"rtlib": "compiler-rt",
diff --git a/targets/vicharak_shrike-lite.json b/targets/vicharak_shrike-lite.json
new file mode 100644
index 0000000000..c801b2c27b
--- /dev/null
+++ b/targets/vicharak_shrike-lite.json
@@ -0,0 +1,14 @@
+{
+    "inherits": [
+        "rp2040"
+    ],
+    "serial-port": ["2e8a:0003"],
+    "default-stack-size": 8192,
+    "build-tags": ["vicharak_shrike_lite"],
+    "ldflags": [
+        "--defsym=__flash_size=4M"
+    ],
+    "extra-files": [
+        "targets/pico-boot-stage2.S"
+    ]
+}
diff --git a/testdata/corpus.yaml b/testdata/corpus.yaml
index 0ed29adbe4..36ac337cf6 100644
--- a/testdata/corpus.yaml
+++ b/testdata/corpus.yaml
@@ -29,8 +29,7 @@
 - repo: github.com/dgryski/go-camellia
 - repo: github.com/dgryski/go-change
 - repo: github.com/dgryski/go-chaskey
-  tags: appengine noasm
-  skipwasi: true # siphash has build tag issues
+  tags: appengine noasm # for dchest/siphash
 - repo: github.com/dgryski/go-clefia
 - repo: github.com/dgryski/go-clockpro
 - repo: github.com/dgryski/go-cobs
@@ -56,7 +55,6 @@
 - repo: github.com/dgryski/go-linlog
 - repo: github.com/dgryski/go-maglev
   tags: appengine # for dchest/siphash
-  skipwasi: true
 - repo: github.com/dgryski/go-marvin32
 - repo: github.com/dgryski/go-md5crypt
 - repo: github.com/dgryski/go-metro
@@ -66,7 +64,6 @@
   tags: noasm
 - repo: github.com/dgryski/go-mpchash
   tags: appengine # for dchest/siphash
-  skipwasi: true
 - repo: github.com/dgryski/go-neeva
 - repo: github.com/dgryski/go-nibz
 - repo: github.com/dgryski/go-nibblesort
@@ -289,3 +286,8 @@
 - repo: github.com/philhofer/fwd
 - repo: github.com/blevesearch/sear
 - repo: github.com/steveyen/gtreap
+- repo: github.com/orsinium-labs/tinymath
+- repo: github.com/orsinium-labs/jsony
+- repo: github.com/tidwall/gjson
+- repo: github.com/dchest/siphash
+  tags: appengine