Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions GNUmakefile
Original file line number Diff line number Diff line change
Expand Up @@ -896,6 +896,10 @@ endif
@$(MD5SUM) test.hex
$(TINYGO) build -size short -o test.hex -target=digispark examples/blinky1
@$(MD5SUM) test.hex
$(TINYGO) build -size short -o test.hex -target=digispark examples/pwm
@$(MD5SUM) test.hex
$(TINYGO) build -size short -o test.hex -target=digispark examples/mcp3008
@$(MD5SUM) test.hex
$(TINYGO) build -size short -o test.hex -target=digispark -gc=leaking examples/blinky1
@$(MD5SUM) test.hex
ifneq ($(XTENSA), 0)
Expand Down
3 changes: 3 additions & 0 deletions compiler/gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,9 @@ func typeHasPointers(t llvm.Type) bool {
}
return false
case llvm.ArrayTypeKind:
if t.ArrayLength() == 0 {
return false
}
if typeHasPointers(t.ElementType()) {
return true
}
Expand Down
48 changes: 46 additions & 2 deletions compiler/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"go/types"
"strconv"
"strings"
"sync"

"golang.org/x/tools/go/ssa"
"tinygo.org/x/go-llvm"
Expand Down Expand Up @@ -511,14 +512,57 @@ var basicTypeNames = [...]string{
types.UnsafePointer: "unsafe.Pointer",
}

var scopeIDCache = struct {
sync.Mutex
scopeid map[*types.Scope]string
}{
sync.Mutex{},
make(map[*types.Scope]string),
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move this into the compiler context to get rid of the lock and the memory leak.

}

// return an integer representing this scope in a package.
func scopeID(pkg *types.Scope, scope *types.Scope) string {
scopeIDCache.Lock()
defer scopeIDCache.Unlock()

if id := scopeIDCache.scopeid[scope]; id != "" {
return id
}

entry := scope

var ids []int
for scope != pkg {
parent := scope.Parent()
for i := range parent.NumChildren() {
if parent.Child(i) == scope {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can result in O(n^2) overhead. Maybe build a map with everything in the package once and then look up from it?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This code only runs for types declared in a local scope. I'll add a scope -> name cache for this function but walking the entire list of scopes looking for named types before we need them seems like overkill.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You still have not fixed this issue. You are still iterating over every child of every parent for every child. This index is what you should be caching.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you want the id cached for only the matching child (so I don't need to iterate next time), or for every child? (I'm concerned the "every child" case will just bloat the cache with entries that will never be queried. This is already only walking up the scopes from the scope the type is defined in up to the parent scope, so we're only looking at the children of parent nodes on the path to the root.)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Commit e56e3e5 caches the index of each scope that we encounter on the path to the root. We might have to double scan a particular parent if there are multiple scopes with types defined in them. (I'm going to run this over the test corpus and see if I can quantify the loops -- the numbers we're dealing with are pretty small..)

Copy link
Copy Markdown
Member

@niaow niaow Jan 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or for every child?

Yes. The cache does not actually solve anything unless you do this.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added some logging to getScopeID and this is the output for running the compiler over the entire test corpus (180 packages, ~1.5 million lines of Go code):

~/go/src/github.com/dgryski/tinygo-test-corpus $ rg === corpus.out |sed 's/.*===//'
scopeIdx: id=46:10: loops=58
scopeIdx: id=46:10: loops=0
scopeIdx: id=46:10: loops=0
scopeIdx: id=25:4: loops=31
scopeIdx: id=25:4: loops=0
scopeIdx: id=25:4: loops=0
scopeIdx: id=25:4: loops=0
scopeIdx: id=25:4: loops=0
scopeIdx: id=25:4: loops=0
scopeIdx: id=4:1: loops=7
scopeIdx: id=4:1: loops=0
scopeIdx: id=4:1: loops=0
scopeIdx: id=4:1: loops=0
scopeIdx: id=0:3: loops=5
scopeIdx: id=0:3: loops=0
scopeIdx: id=0:3: loops=0
scopeIdx: id=0:3: loops=0
scopeIdx: id=0:3: loops=0
scopeIdx: id=0:3: loops=0
scopeIdx: id=0:3: loops=0
scopeIdx: id=0:3: loops=5
scopeIdx: id=0:3: loops=0
scopeIdx: id=0:3: loops=0
scopeIdx: id=0:3: loops=0
scopeIdx: id=0:3: loops=0
scopeIdx: id=0:3: loops=0
scopeIdx: id=0:3: loops=0
scopeIdx: id=8:0: loops=10
scopeIdx: id=8:0: loops=0
scopeIdx: id=8:0: loops=0
scopeIdx: id=10:0: loops=11
scopeIdx: id=10:0: loops=0
scopeIdx: id=10:0: loops=0
scopeIdx: id=0:9:0:5:1:0: loops=21
scopeIdx: id=0:9:0:5:1:0: loops=0
scopeIdx: id=0:9:0:5:1:0: loops=0

Even only adding in the matching child, the number of iterations is very small, and this code just isn't called that frequently. Adding in all the children will just increase the memory usage for no benefit, since none of them are likely to have named types in them that we're going to query.

ids = append(ids, i)
scope = scope.Parent()
break
}
}
}

var buf []byte
for _, v := range ids {
buf = strconv.AppendInt(buf, int64(v), 10)
buf = append(buf, ':')
}

id := string(buf)
scopeIDCache.scopeid[entry] = id
return id
}

// getTypeCodeName returns a name for this type that can be used in the
// interface lowering pass to assign type codes as expected by the reflect
// package. See getTypeCodeNum.
func getTypeCodeName(t types.Type) (string, bool) {
switch t := types.Unalias(t).(type) {
case *types.Named:
if t.Obj().Parent() != t.Obj().Pkg().Scope() {
return "named:" + t.String() + "$local", true
parent, pkg := t.Obj().Parent(), t.Obj().Pkg().Scope()
if parent != pkg {
return fmt.Sprintf("named:%s$local:%s", t.String(), scopeID(pkg, parent)), true
}
return "named:" + t.String(), false
case *types.Array:
Expand Down
182 changes: 91 additions & 91 deletions compiler/llvm.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
package compiler

import (
"encoding/binary"
"fmt"
"go/token"
"go/types"
"math/big"
"strings"

"github.com/tinygo-org/tinygo/compileopts"
Expand Down Expand Up @@ -231,6 +231,12 @@ func (c *compilerContext) makeGlobalArray(buf []byte, name string, elementType l
//
// For details on what's in this value, see src/runtime/gc_precise.go.
func (c *compilerContext) createObjectLayout(t llvm.Type, pos token.Pos) llvm.Value {
if !typeHasPointers(t) {
// There are no pointers in this type, so we can simplify the layout.
layout := (uint64(1) << 1) | 1
return llvm.ConstIntToPtr(llvm.ConstInt(c.uintptrType, layout, false), c.dataPtrType)
}

// Use the element type for arrays. This works even for nested arrays.
for {
kind := t.TypeKind()
Expand All @@ -248,87 +254,62 @@ func (c *compilerContext) createObjectLayout(t llvm.Type, pos token.Pos) llvm.Va
break
}

// Do a few checks to see whether we need to generate any object layout
// information at all.
// Create the pointer bitmap.
objectSizeBytes := c.targetData.TypeAllocSize(t)
pointerSize := c.targetData.TypeAllocSize(c.dataPtrType)
pointerAlignment := c.targetData.PrefTypeAlignment(c.dataPtrType)
if objectSizeBytes < pointerSize {
// Too small to contain a pointer.
layout := (uint64(1) << 1) | 1
return llvm.ConstIntToPtr(llvm.ConstInt(c.uintptrType, layout, false), c.dataPtrType)
}
bitmap := c.getPointerBitmap(t, pos)
if bitmap.BitLen() == 0 {
// There are no pointers in this type, so we can simplify the layout.
// TODO: this can be done in many other cases, e.g. when allocating an
// array (like [4][]byte, which repeats a slice 4 times).
layout := (uint64(1) << 1) | 1
return llvm.ConstIntToPtr(llvm.ConstInt(c.uintptrType, layout, false), c.dataPtrType)
}
if objectSizeBytes%uint64(pointerAlignment) != 0 {
// This shouldn't happen except for packed structs, which aren't
// currently used.
c.addError(pos, "internal error: unexpected object size for object with pointer field")
return llvm.ConstNull(c.dataPtrType)
}
objectSizeWords := objectSizeBytes / uint64(pointerAlignment)
pointerAlignment := uint64(c.targetData.PrefTypeAlignment(c.dataPtrType))
bitmapLen := objectSizeBytes / pointerAlignment
bitmapBytes := (bitmapLen + 7) / 8
bitmap := make([]byte, bitmapBytes, max(bitmapBytes, 8))
c.buildPointerBitmap(bitmap, pointerAlignment, pos, t, 0)

// Try to encode the layout inline.
pointerSize := c.targetData.TypeAllocSize(c.dataPtrType)
pointerBits := pointerSize * 8
var sizeFieldBits uint64
switch pointerBits {
case 16:
sizeFieldBits = 4
case 32:
sizeFieldBits = 5
case 64:
sizeFieldBits = 6
default:
panic("unknown pointer size")
}
layoutFieldBits := pointerBits - 1 - sizeFieldBits

// Try to emit the value as an inline integer. This is possible in most
// cases.
if objectSizeWords < layoutFieldBits {
// If it can be stored directly in the pointer value, do so.
// The runtime knows that if the least significant bit of the pointer is
// set, the pointer contains the value itself.
layout := bitmap.Uint64()<<(sizeFieldBits+1) | (objectSizeWords << 1) | 1
return llvm.ConstIntToPtr(llvm.ConstInt(c.uintptrType, layout, false), c.dataPtrType)
if bitmapLen < pointerBits {
rawMask := binary.LittleEndian.Uint64(bitmap[0:8])
layout := rawMask*pointerBits + bitmapLen
layout <<= 1
layout |= 1

// Check if the layout fits.
layout &= 1<<pointerBits - 1
if (layout>>1)/pointerBits == rawMask {
// No set bits were shifted off.
return llvm.ConstIntToPtr(llvm.ConstInt(c.uintptrType, layout, false), c.dataPtrType)
}
}

// Unfortunately, the object layout is too big to fit in a pointer-sized
// integer. Store it in a global instead.

// Try first whether the global already exists. All objects with a
// particular name have the same type, so this is possible.
globalName := "runtime/gc.layout:" + fmt.Sprintf("%d-%0*x", objectSizeWords, (objectSizeWords+15)/16, bitmap)
globalName := "runtime/gc.layout:" + fmt.Sprintf("%d-%0*x", bitmapLen, (bitmapLen+15)/16, bitmap)
global := c.mod.NamedGlobal(globalName)
if !global.IsNil() {
return global
}

// Create the global initializer.
bitmapBytes := make([]byte, int(objectSizeWords+7)/8)
bitmap.FillBytes(bitmapBytes)
reverseBytes(bitmapBytes) // big-endian to little-endian
var bitmapByteValues []llvm.Value
for _, b := range bitmapBytes {
bitmapByteValues = append(bitmapByteValues, llvm.ConstInt(c.ctx.Int8Type(), uint64(b), false))
bitmapByteValues := make([]llvm.Value, bitmapBytes)
i8 := c.ctx.Int8Type()
for i, b := range bitmap {
bitmapByteValues[i] = llvm.ConstInt(i8, uint64(b), false)
}
initializer := c.ctx.ConstStruct([]llvm.Value{
llvm.ConstInt(c.uintptrType, objectSizeWords, false),
llvm.ConstArray(c.ctx.Int8Type(), bitmapByteValues),
llvm.ConstInt(c.uintptrType, bitmapLen, false),
llvm.ConstArray(i8, bitmapByteValues),
}, false)

// Create the actual global.
global = llvm.AddGlobal(c.mod, initializer.Type(), globalName)
global.SetInitializer(initializer)
global.SetUnnamedAddr(true)
global.SetGlobalConstant(true)
global.SetLinkage(llvm.LinkOnceODRLinkage)
if c.targetData.PrefTypeAlignment(c.uintptrType) < 2 {
// AVR doesn't have alignment by default.
// The lowest bit must be unset to distinguish this from an inline layout.
global.SetAlignment(2)
}
if c.Debug && pos != token.NoPos {
Expand Down Expand Up @@ -360,52 +341,71 @@ func (c *compilerContext) createObjectLayout(t llvm.Type, pos token.Pos) llvm.Va
return global
}

// getPointerBitmap scans the given LLVM type for pointers and sets bits in a
// bigint at the word offset that contains a pointer. This scan is recursive.
func (c *compilerContext) getPointerBitmap(typ llvm.Type, pos token.Pos) *big.Int {
alignment := c.targetData.PrefTypeAlignment(c.dataPtrType)
switch typ.TypeKind() {
// buildPointerBitmap scans the given LLVM type for pointers and sets bits in a
// bitmap at the word offset that contains a pointer. This scan is recursive.
func (c *compilerContext) buildPointerBitmap(
dst []byte,
ptrAlign uint64,
pos token.Pos,
t llvm.Type,
offset uint64,
) {
switch t.TypeKind() {
case llvm.IntegerTypeKind, llvm.FloatTypeKind, llvm.DoubleTypeKind:
return big.NewInt(0)
// These types do not contain pointers.

case llvm.PointerTypeKind:
return big.NewInt(1)
// Set the corresponding position in the bitmap.
dst[offset/8] |= 1 << (offset % 8)

case llvm.StructTypeKind:
ptrs := big.NewInt(0)
for i, subtyp := range typ.StructElementTypes() {
subptrs := c.getPointerBitmap(subtyp, pos)
if subptrs.BitLen() == 0 {
continue
}
offset := c.targetData.ElementOffset(typ, i)
if offset%uint64(alignment) != 0 {
// This error will let the compilation fail, but by continuing
// the error can still easily be shown.
c.addError(pos, "internal error: allocated struct contains unaligned pointer")
// Recurse over struct elements.
for i, et := range t.StructElementTypes() {
eo := c.targetData.ElementOffset(t, i)
if eo%uint64(ptrAlign) != 0 {
if typeHasPointers(et) {
// This error will let the compilation fail, but by continuing
// the error can still easily be shown.
c.addError(pos, "internal error: allocated struct contains unaligned pointer")
}
continue
}
subptrs.Lsh(subptrs, uint(offset)/uint(alignment))
ptrs.Or(ptrs, subptrs)
c.buildPointerBitmap(
dst,
ptrAlign,
pos,
et,
offset+(eo/ptrAlign),
)
}
return ptrs

case llvm.ArrayTypeKind:
subtyp := typ.ElementType()
subptrs := c.getPointerBitmap(subtyp, pos)
ptrs := big.NewInt(0)
if subptrs.BitLen() == 0 {
return ptrs
// Recurse over array elements.
len := t.ArrayLength()
if len <= 0 {
return
}
elementSize := c.targetData.TypeAllocSize(subtyp)
if elementSize%uint64(alignment) != 0 {
// This error will let the compilation fail (but continues so that
// other errors can be shown).
c.addError(pos, "internal error: allocated array contains unaligned pointer")
return ptrs
et := t.ElementType()
elementSize := c.targetData.TypeAllocSize(et)
if elementSize%ptrAlign != 0 {
if typeHasPointers(et) {
// This error will let the compilation fail (but continues so that
// other errors can be shown).
c.addError(pos, "internal error: allocated array contains unaligned pointer")
}
return
}
for i := 0; i < typ.ArrayLength(); i++ {
ptrs.Lsh(ptrs, uint(elementSize)/uint(alignment))
ptrs.Or(ptrs, subptrs)
elementSize /= ptrAlign
for i := 0; i < len; i++ {
c.buildPointerBitmap(
dst,
ptrAlign,
pos,
et,
offset+uint64(i)*elementSize,
)
}
return ptrs

default:
// Should not happen.
panic("unknown LLVM type")
Expand Down
8 changes: 8 additions & 0 deletions compiler/testdata/gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ var (
x *byte
y [61]uintptr
}
struct5 *struct {
x *byte
y [30]uintptr
}

slice1 []byte
slice2 []*int
Expand Down Expand Up @@ -58,6 +62,10 @@ func newStruct() {
x *byte
y [61]uintptr
})
struct5 = new(struct {
x *byte
y [30]uintptr
})
}

func newFuncValue() *func() {
Expand Down
12 changes: 8 additions & 4 deletions compiler/testdata/gc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ target triple = "wasm32-unknown-wasi"
@main.struct2 = hidden global ptr null, align 4
@main.struct3 = hidden global ptr null, align 4
@main.struct4 = hidden global ptr null, align 4
@main.struct5 = hidden global ptr null, align 4
@main.slice1 = hidden global { ptr, i32, i32 } zeroinitializer, align 4
@main.slice2 = hidden global { ptr, i32, i32 } zeroinitializer, align 4
@main.slice3 = hidden global { ptr, i32, i32 } zeroinitializer, align 4
@"runtime/gc.layout:62-2000000000000001" = linkonce_odr unnamed_addr constant { i32, [8 x i8] } { i32 62, [8 x i8] c"\01\00\00\00\00\00\00 " }
@"runtime/gc.layout:62-0001" = linkonce_odr unnamed_addr constant { i32, [8 x i8] } { i32 62, [8 x i8] c"\01\00\00\00\00\00\00\00" }
@"runtime/gc.layout:62-0100000000000020" = linkonce_odr unnamed_addr constant { i32, [8 x i8] } { i32 62, [8 x i8] c"\01\00\00\00\00\00\00 " }
@"runtime/gc.layout:62-0100000000000000" = linkonce_odr unnamed_addr constant { i32, [8 x i8] } { i32 62, [8 x i8] c"\01\00\00\00\00\00\00\00" }
@"reflect/types.type:basic:complex128" = linkonce_odr constant { i8, ptr } { i8 80, ptr @"reflect/types.type:pointer:basic:complex128" }, align 4
@"reflect/types.type:pointer:basic:complex128" = linkonce_odr constant { i8, i16, ptr } { i8 -43, i16 0, ptr @"reflect/types.type:basic:complex128" }, align 4

Expand Down Expand Up @@ -80,12 +81,15 @@ entry:
%new1 = call align 4 dereferenceable(8) ptr @runtime.alloc(i32 8, ptr nonnull inttoptr (i32 3 to ptr), ptr undef) #3
call void @runtime.trackPointer(ptr nonnull %new1, ptr nonnull %stackalloc, ptr undef) #3
store ptr %new1, ptr @main.struct2, align 4
%new2 = call align 4 dereferenceable(248) ptr @runtime.alloc(i32 248, ptr nonnull @"runtime/gc.layout:62-2000000000000001", ptr undef) #3
%new2 = call align 4 dereferenceable(248) ptr @runtime.alloc(i32 248, ptr nonnull @"runtime/gc.layout:62-0100000000000020", ptr undef) #3
call void @runtime.trackPointer(ptr nonnull %new2, ptr nonnull %stackalloc, ptr undef) #3
store ptr %new2, ptr @main.struct3, align 4
%new3 = call align 4 dereferenceable(248) ptr @runtime.alloc(i32 248, ptr nonnull @"runtime/gc.layout:62-0001", ptr undef) #3
%new3 = call align 4 dereferenceable(248) ptr @runtime.alloc(i32 248, ptr nonnull @"runtime/gc.layout:62-0100000000000000", ptr undef) #3
call void @runtime.trackPointer(ptr nonnull %new3, ptr nonnull %stackalloc, ptr undef) #3
store ptr %new3, ptr @main.struct4, align 4
%new4 = call align 4 dereferenceable(124) ptr @runtime.alloc(i32 124, ptr nonnull inttoptr (i32 127 to ptr), ptr undef) #3
call void @runtime.trackPointer(ptr nonnull %new4, ptr nonnull %stackalloc, ptr undef) #3
store ptr %new4, ptr @main.struct5, align 4
ret void
}

Expand Down
Loading
Loading