Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Android.bp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ common_cflags = [
"-DN_ARENA=1",
"-DCONFIG_STATS=true",
"-DCONFIG_SELF_INIT=false",
"-DCONFIG_PAGE_SIZE=4096",
]

cc_defaults {
Expand Down
7 changes: 6 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@ endif

OBJECTS := $(addprefix $(OUT)/,$(OBJECTS))

ifeq (,$(filter $(CONFIG_PAGE_SIZE),4096 16384))
$(error CONFIG_PAGE_SIZE must be 4096 or 16384)
endif

ifeq (,$(filter $(CONFIG_SEAL_METADATA),true false))
$(error CONFIG_SEAL_METADATA must be true or false)
endif
Expand Down Expand Up @@ -108,7 +112,8 @@ CPPFLAGS += \
-DCONFIG_CLASS_REGION_SIZE=$(CONFIG_CLASS_REGION_SIZE) \
-DN_ARENA=$(CONFIG_N_ARENA) \
-DCONFIG_STATS=$(CONFIG_STATS) \
-DCONFIG_SELF_INIT=$(CONFIG_SELF_INIT)
-DCONFIG_SELF_INIT=$(CONFIG_SELF_INIT) \
-DCONFIG_PAGE_SIZE=$(CONFIG_PAGE_SIZE)

$(OUT)/libhardened_malloc$(SUFFIX).so: $(OBJECTS) | $(OUT)
$(CC) $(CFLAGS) $(LDFLAGS) -shared $^ $(LDLIBS) -o $@
Expand Down
31 changes: 19 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,9 @@ large number of guard pages created by hardened\_malloc. As an example, in
This is unnecessary if you set `CONFIG_GUARD_SLABS_INTERVAL` to a very large
value in the build configuration.

On arm64, make sure your kernel is configured to use 4k pages since we haven't
yet added support for 16k and 64k pages. The kernel also has to be configured
to use 4 level page tables for the full 48 bit address space instead of only
having a 39 bit address space for the default hardened\_malloc configuration.
On arm64, the kernel also has to be configured to use 4 level page tables for
the full 48 bit address space instead of only having a 39 bit address space
for the default hardened\_malloc configuration.
It's possible to reduce the class region size substantially to make a 39 bit
address space workable but the defaults won't work.

Expand Down Expand Up @@ -334,6 +333,14 @@ The following integer configuration options are available:
granularity. See the [section on size classes](#size-classes) below for
details.

* `CONFIG_PAGE_SIZE`: `4096` (default) to set the page size used by the
allocator. Supported values are `4096` and `16384`. This must match the page
size of the kernel the library will run on. On arm64, kernels may be
configured for 4k or 16k pages. The allocator verifies at runtime that the
compile-time page size matches the kernel page size and will abort if they
differ. The slab slot counts are tuned per page size to minimize internal
fragmentation for slabs.

There will be more control over enabled features in the future along with
control over fairly arbitrarily chosen values like the size of empty slab
caches (making them smaller improves security and reduces memory usage while
Expand Down Expand Up @@ -537,11 +544,11 @@ classes for each doubling in size.

The slot counts tied to the size classes are specific to this allocator rather
than being taken from jemalloc. Slabs are always a span of pages so the slot
count needs to be tuned to minimize waste due to rounding to the page size. For
now, this allocator is set up only for 4096 byte pages as a small page size is
desirable for finer-grained memory protection and randomization. It could be
ported to larger page sizes in the future. The current slot counts are only a
preliminary set of values.
count needs to be tuned to minimize waste due to rounding to the page size.
Tuned slot counts are provided for 4096 and 16384 byte page sizes, selectable
via `CONFIG_PAGE_SIZE`. A smaller page size is desirable for
finer-grained memory protection and randomization. The tables below show the
default slot counts for 4096 byte pages.

| size class | worst case internal fragmentation | slab slots | slab size | internal fragmentation for slabs |
| - | - | - | - | - |
Expand Down Expand Up @@ -584,7 +591,7 @@ preliminary set of values.

The slab allocation size classes end at 16384 since that's the final size for
2048 byte spacing and the next spacing class matches the page size of 4096
bytes on the target platforms. This is the minimum set of small size classes
bytes when using 4k pages. This is the minimum set of small size classes
required to avoid substantial waste from rounding.

The `CONFIG_EXTENDED_SIZE_CLASSES` option extends the size classes up to
Expand Down Expand Up @@ -620,8 +627,8 @@ the same size class scheme providing 4 size classes for every doubling of size.
It increases virtual memory consumption but drastically improves performance
where realloc is used without proper growth factors, which is fairly common and
destroys performance in some commonly used programs. If large size classes are
disabled, the granularity is instead the page size, which is currently always
4096 bytes on supported platforms.
disabled, the granularity is instead the page size (4096 or 16384 bytes
depending on `CONFIG_PAGE_SIZE`).

## Scalability

Expand Down
73 changes: 58 additions & 15 deletions calculate-waste
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
#!/usr/bin/env python3

from sys import argv
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--pagesize", default=4096, type=int)
args = parser.parse_args()
page_size = args.pagesize

size_classes = [
16, 32, 48, 64, 80, 96, 112, 128,
Expand All @@ -16,7 +21,7 @@ size_classes = [
81920, 98304, 114688, 131072,
]

size_class_slots = [
size_class_slots_4k = [
256, 128, 85, 64, 51, 42, 36, 64,
51, 64, 54, 64,
64, 64, 64, 64,
Expand All @@ -30,46 +35,84 @@ size_class_slots = [
1, 1, 1, 1,
]

size_class_slots_16k = [
256, 256, 256, 256,
204, 170, 146, 256,
204, 256, 219, 256,
256, 256, 256, 256,
256, 256, 256, 256,
192, 160, 128, 128,
96, 80, 64, 64,
48, 40, 32, 32,
24, 20, 16, 16,
12, 10, 8, 8,
6, 5, 4, 4,
3, 2, 2, 2,
]

if page_size == 16384:
size_class_slots = size_class_slots_16k
else:
size_class_slots = size_class_slots_4k

fragmentation = [100 - 1 / 16 * 100]

for i in range(len(size_classes) - 1):
size_class = size_classes[i + 1]
worst_case = size_classes[i] + 1
used = worst_case / size_class
fragmentation.append(100 - used * 100);
fragmentation.append(100 - used * 100)


def page_align(size):
return (size + 4095) & ~4095
mask = page_size - 1
return (size + mask) & ~mask


print(f"Page size: {page_size}")
print()
print("| ", end="")
print(
"size class",
"worst case internal fragmentation",
"slab slots",
"slab size",
"internal fragmentation for slabs",
sep=" | ",
end=" |\n",
)
print("| ", end="")
print("size class", "worst case internal fragmentation", "slab slots", "slab size", "internal fragmentation for slabs", sep=" | ", end=" |\n")
print("| ", end='')
print("-", "-", "-", "-", "-", sep=" | ", end=" |\n")
for size, slots, fragmentation in zip(size_classes, size_class_slots, fragmentation):
used = size * slots
real = page_align(used)
print("| ", end='')
print(size, f"{fragmentation:.4}%", slots, real, str(100 - used / real * 100) + "%", sep=" | ", end=" |\n")

if len(argv) < 2:
exit()
print("| ", end="")
print(
size,
f"{fragmentation:.4}%",
slots,
real,
str(100 - used / real * 100) + "%",
sep=" | ",
end=" |\n",
)

max_bits = 256
max_bits = 512
max_page_span = 16

print()

print("maximum bitmap size is {}-bit".format(max_bits))
print("maximum page span size is {} ({})".format(max_page_span, max_page_span * 4096))
print("maximum page span size is {} ({})".format(max_page_span, max_page_span * page_size))

for size_class in size_classes:
choices = []
for bits in range(1, max_bits + 1):
used = size_class * bits
real = page_align(used)
if real > 65536:
if real > max_page_span * page_size:
continue
pages = real / 4096
pages = real / page_size
efficiency = used / real * 100
choices.append((bits, used, real, pages, efficiency))

Expand Down
1 change: 1 addition & 0 deletions config/default.mk
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ CONFIG_CLASS_REGION_SIZE := 34359738368 # 32GiB
CONFIG_N_ARENA := 4
CONFIG_STATS := false
CONFIG_SELF_INIT := true
CONFIG_PAGE_SIZE := 4096
1 change: 1 addition & 0 deletions config/light.mk
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ CONFIG_CLASS_REGION_SIZE := 34359738368 # 32GiB
CONFIG_N_ARENA := 4
CONFIG_STATS := false
CONFIG_SELF_INIT := true
CONFIG_PAGE_SIZE := 4096
57 changes: 41 additions & 16 deletions h_malloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,11 @@ static bool memory_map_fixed_tagged(void *ptr, size_t size) {
#define SLAB_METADATA_COUNT

struct slab_metadata {
#if CONFIG_PAGE_SIZE == 16384
u64 bitmap[8];
#else
u64 bitmap[4];
#endif
struct slab_metadata *next;
struct slab_metadata *prev;
#if SLAB_CANARY
Expand All @@ -125,8 +129,12 @@ struct slab_metadata {
u16 count;
#endif
#if SLAB_QUARANTINE
#if CONFIG_PAGE_SIZE == 16384
u64 quarantine_bitmap[8];
#else
u64 quarantine_bitmap[4];
#endif
#endif /* CONFIG_PAGE_SIZE */
#endif
#ifdef HAS_ARM_MTE
// arm_mte_tags is used as a u4 array (MTE tags are 4-bit wide)
//
Expand Down Expand Up @@ -178,6 +186,22 @@ static const u32 size_classes[] = {
};

static const u16 size_class_slots[] = {
#if CONFIG_PAGE_SIZE == 16384
/* 0 */ 256,
/* 16 */ 256, 256, 256, 256, 204, 170, 146, 256,
/* 32 */ 204, 256, 219, 256,
/* 64 */ 256, 256, 256, 256,
/* 128 */ 256, 256, 256, 256,
/* 256 */ 192, 160, 128, 128,
/* 512 */ 96, 80, 64, 64,
/* 1024 */ 48, 40, 32, 32,
/* 2048 */ 24, 20, 16, 16,
#if CONFIG_EXTENDED_SIZE_CLASSES
/* 4096 */ 12, 10, 8, 8,
/* 8192 */ 6, 5, 4, 4,
/* 16384 */ 3, 2, 2, 2,
#endif
#else /* 4k pages */
/* 0 */ 256,
/* 16 */ 256, 128, 85, 64, 51, 42, 36, 64,
/* 32 */ 51, 64, 54, 64,
Expand All @@ -192,6 +216,7 @@ static const u16 size_class_slots[] = {
/* 8192 */ 1, 1, 1, 1,
/* 16384 */ 1, 1, 1, 1,
#endif
#endif
};

static size_t get_slots(unsigned class) {
Expand Down Expand Up @@ -321,7 +346,8 @@ struct __attribute__((aligned(CACHELINE_SIZE))) size_class {
#define REAL_CLASS_REGION_SIZE (CLASS_REGION_SIZE * 2)
#define ARENA_SIZE (REAL_CLASS_REGION_SIZE * N_SIZE_CLASSES)
static const size_t slab_region_size = ARENA_SIZE * N_ARENA;
static_assert(PAGE_SIZE == 4096, "bitmap handling will need adjustment for other page sizes");
static_assert(PAGE_SIZE == 4096 || PAGE_SIZE == 16384,
"page size must be 4096 or 16384");

static void *get_slab(const struct size_class *c, size_t slab_size, const struct slab_metadata *metadata) {
size_t index = metadata - c->slab_info;
Expand Down Expand Up @@ -449,20 +475,14 @@ static bool has_free_slots(size_t slots, const struct slab_metadata *metadata) {
#ifdef SLAB_METADATA_COUNT
return metadata->count < slots;
#else
if (slots <= U64_WIDTH) {
u64 masked = metadata->bitmap[0] | get_mask(slots);
return masked != ~0UL;
}
if (slots <= U64_WIDTH * 2) {
u64 masked = metadata->bitmap[1] | get_mask(slots - U64_WIDTH);
return metadata->bitmap[0] != ~0UL || masked != ~0UL;
}
if (slots <= U64_WIDTH * 3) {
u64 masked = metadata->bitmap[2] | get_mask(slots - U64_WIDTH * 2);
return metadata->bitmap[0] != ~0UL || metadata->bitmap[1] != ~0UL || masked != ~0UL;
size_t last = (slots - 1) / U64_WIDTH;
for (size_t i = 0; i < last; i++) {
if (metadata->bitmap[i] != ~0UL) {
return true;
}
}
u64 masked = metadata->bitmap[3] | get_mask(slots - U64_WIDTH * 3);
return metadata->bitmap[0] != ~0UL || metadata->bitmap[1] != ~0UL || metadata->bitmap[2] != ~0UL || masked != ~0UL;
u64 masked = metadata->bitmap[last] | get_mask(slots - last * U64_WIDTH);
return masked != ~0UL;
#endif
}

Expand All @@ -471,7 +491,12 @@ static bool is_free_slab(const struct slab_metadata *metadata) {
return !metadata->count;
#else
return !metadata->bitmap[0] && !metadata->bitmap[1] && !metadata->bitmap[2] &&
!metadata->bitmap[3];
!metadata->bitmap[3]
#if CONFIG_PAGE_SIZE == 16384
&& !metadata->bitmap[4] && !metadata->bitmap[5] && !metadata->bitmap[6]
&& !metadata->bitmap[7]
#endif
;
#endif
}

Expand Down
7 changes: 7 additions & 0 deletions pages.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,14 @@

#include "util.h"

#ifndef PAGE_SHIFT
#if CONFIG_PAGE_SIZE == 16384
#define PAGE_SHIFT 14
#else
#define PAGE_SHIFT 12
#endif
#endif

#ifndef PAGE_SIZE
#define PAGE_SIZE ((size_t)1 << PAGE_SHIFT)
#endif
Expand Down